##// END OF EJS Templates
store: issue a single entry for each revlog...
marmoute -
r51389:e50d1fe7 default
parent child Browse files
Show More
@@ -1,576 +1,574
1 # repair.py - functions for repository repair for mercurial
1 # repair.py - functions for repository repair for mercurial
2 #
2 #
3 # Copyright 2005, 2006 Chris Mason <mason@suse.com>
3 # Copyright 2005, 2006 Chris Mason <mason@suse.com>
4 # Copyright 2007 Olivia Mackall
4 # Copyright 2007 Olivia Mackall
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9
9
10 from .i18n import _
10 from .i18n import _
11 from .node import (
11 from .node import (
12 hex,
12 hex,
13 short,
13 short,
14 )
14 )
15 from . import (
15 from . import (
16 bundle2,
16 bundle2,
17 changegroup,
17 changegroup,
18 discovery,
18 discovery,
19 error,
19 error,
20 exchange,
20 exchange,
21 obsolete,
21 obsolete,
22 obsutil,
22 obsutil,
23 pathutil,
23 pathutil,
24 phases,
24 phases,
25 requirements,
25 requirements,
26 scmutil,
26 scmutil,
27 store,
27 store,
28 transaction,
28 transaction,
29 util,
29 util,
30 )
30 )
31 from .utils import (
31 from .utils import (
32 hashutil,
32 hashutil,
33 urlutil,
33 urlutil,
34 )
34 )
35
35
36
36
37 def backupbundle(
37 def backupbundle(
38 repo,
38 repo,
39 bases,
39 bases,
40 heads,
40 heads,
41 node,
41 node,
42 suffix,
42 suffix,
43 compress=True,
43 compress=True,
44 obsolescence=True,
44 obsolescence=True,
45 tmp_backup=False,
45 tmp_backup=False,
46 ):
46 ):
47 """create a bundle with the specified revisions as a backup"""
47 """create a bundle with the specified revisions as a backup"""
48
48
49 backupdir = b"strip-backup"
49 backupdir = b"strip-backup"
50 vfs = repo.vfs
50 vfs = repo.vfs
51 if not vfs.isdir(backupdir):
51 if not vfs.isdir(backupdir):
52 vfs.mkdir(backupdir)
52 vfs.mkdir(backupdir)
53
53
54 # Include a hash of all the nodes in the filename for uniqueness
54 # Include a hash of all the nodes in the filename for uniqueness
55 allcommits = repo.set(b'%ln::%ln', bases, heads)
55 allcommits = repo.set(b'%ln::%ln', bases, heads)
56 allhashes = sorted(c.hex() for c in allcommits)
56 allhashes = sorted(c.hex() for c in allcommits)
57 totalhash = hashutil.sha1(b''.join(allhashes)).digest()
57 totalhash = hashutil.sha1(b''.join(allhashes)).digest()
58 name = b"%s/%s-%s-%s.hg" % (
58 name = b"%s/%s-%s-%s.hg" % (
59 backupdir,
59 backupdir,
60 short(node),
60 short(node),
61 hex(totalhash[:4]),
61 hex(totalhash[:4]),
62 suffix,
62 suffix,
63 )
63 )
64
64
65 cgversion = changegroup.localversion(repo)
65 cgversion = changegroup.localversion(repo)
66 comp = None
66 comp = None
67 if cgversion != b'01':
67 if cgversion != b'01':
68 bundletype = b"HG20"
68 bundletype = b"HG20"
69 if compress:
69 if compress:
70 comp = b'BZ'
70 comp = b'BZ'
71 elif compress:
71 elif compress:
72 bundletype = b"HG10BZ"
72 bundletype = b"HG10BZ"
73 else:
73 else:
74 bundletype = b"HG10UN"
74 bundletype = b"HG10UN"
75
75
76 outgoing = discovery.outgoing(repo, missingroots=bases, ancestorsof=heads)
76 outgoing = discovery.outgoing(repo, missingroots=bases, ancestorsof=heads)
77 contentopts = {
77 contentopts = {
78 b'cg.version': cgversion,
78 b'cg.version': cgversion,
79 b'obsolescence': obsolescence,
79 b'obsolescence': obsolescence,
80 b'phases': True,
80 b'phases': True,
81 }
81 }
82 return bundle2.writenewbundle(
82 return bundle2.writenewbundle(
83 repo.ui,
83 repo.ui,
84 repo,
84 repo,
85 b'strip',
85 b'strip',
86 name,
86 name,
87 bundletype,
87 bundletype,
88 outgoing,
88 outgoing,
89 contentopts,
89 contentopts,
90 vfs,
90 vfs,
91 compression=comp,
91 compression=comp,
92 allow_internal=tmp_backup,
92 allow_internal=tmp_backup,
93 )
93 )
94
94
95
95
96 def _collectfiles(repo, striprev):
96 def _collectfiles(repo, striprev):
97 """find out the filelogs affected by the strip"""
97 """find out the filelogs affected by the strip"""
98 files = set()
98 files = set()
99
99
100 for x in range(striprev, len(repo)):
100 for x in range(striprev, len(repo)):
101 files.update(repo[x].files())
101 files.update(repo[x].files())
102
102
103 return sorted(files)
103 return sorted(files)
104
104
105
105
106 def _collectrevlog(revlog, striprev):
106 def _collectrevlog(revlog, striprev):
107 _, brokenset = revlog.getstrippoint(striprev)
107 _, brokenset = revlog.getstrippoint(striprev)
108 return [revlog.linkrev(r) for r in brokenset]
108 return [revlog.linkrev(r) for r in brokenset]
109
109
110
110
111 def _collectbrokencsets(repo, files, striprev):
111 def _collectbrokencsets(repo, files, striprev):
112 """return the changesets which will be broken by the truncation"""
112 """return the changesets which will be broken by the truncation"""
113 s = set()
113 s = set()
114
114
115 for revlog in manifestrevlogs(repo):
115 for revlog in manifestrevlogs(repo):
116 s.update(_collectrevlog(revlog, striprev))
116 s.update(_collectrevlog(revlog, striprev))
117 for fname in files:
117 for fname in files:
118 s.update(_collectrevlog(repo.file(fname), striprev))
118 s.update(_collectrevlog(repo.file(fname), striprev))
119
119
120 return s
120 return s
121
121
122
122
123 def strip(ui, repo, nodelist, backup=True, topic=b'backup'):
123 def strip(ui, repo, nodelist, backup=True, topic=b'backup'):
124 # This function requires the caller to lock the repo, but it operates
124 # This function requires the caller to lock the repo, but it operates
125 # within a transaction of its own, and thus requires there to be no current
125 # within a transaction of its own, and thus requires there to be no current
126 # transaction when it is called.
126 # transaction when it is called.
127 if repo.currenttransaction() is not None:
127 if repo.currenttransaction() is not None:
128 raise error.ProgrammingError(b'cannot strip from inside a transaction')
128 raise error.ProgrammingError(b'cannot strip from inside a transaction')
129
129
130 # Simple way to maintain backwards compatibility for this
130 # Simple way to maintain backwards compatibility for this
131 # argument.
131 # argument.
132 if backup in [b'none', b'strip']:
132 if backup in [b'none', b'strip']:
133 backup = False
133 backup = False
134
134
135 repo = repo.unfiltered()
135 repo = repo.unfiltered()
136 repo.destroying()
136 repo.destroying()
137 vfs = repo.vfs
137 vfs = repo.vfs
138 # load bookmark before changelog to avoid side effect from outdated
138 # load bookmark before changelog to avoid side effect from outdated
139 # changelog (see repo._refreshchangelog)
139 # changelog (see repo._refreshchangelog)
140 repo._bookmarks
140 repo._bookmarks
141 cl = repo.changelog
141 cl = repo.changelog
142
142
143 # TODO handle undo of merge sets
143 # TODO handle undo of merge sets
144 if isinstance(nodelist, bytes):
144 if isinstance(nodelist, bytes):
145 nodelist = [nodelist]
145 nodelist = [nodelist]
146 striplist = [cl.rev(node) for node in nodelist]
146 striplist = [cl.rev(node) for node in nodelist]
147 striprev = min(striplist)
147 striprev = min(striplist)
148
148
149 files = _collectfiles(repo, striprev)
149 files = _collectfiles(repo, striprev)
150 saverevs = _collectbrokencsets(repo, files, striprev)
150 saverevs = _collectbrokencsets(repo, files, striprev)
151
151
152 # Some revisions with rev > striprev may not be descendants of striprev.
152 # Some revisions with rev > striprev may not be descendants of striprev.
153 # We have to find these revisions and put them in a bundle, so that
153 # We have to find these revisions and put them in a bundle, so that
154 # we can restore them after the truncations.
154 # we can restore them after the truncations.
155 # To create the bundle we use repo.changegroupsubset which requires
155 # To create the bundle we use repo.changegroupsubset which requires
156 # the list of heads and bases of the set of interesting revisions.
156 # the list of heads and bases of the set of interesting revisions.
157 # (head = revision in the set that has no descendant in the set;
157 # (head = revision in the set that has no descendant in the set;
158 # base = revision in the set that has no ancestor in the set)
158 # base = revision in the set that has no ancestor in the set)
159 tostrip = set(striplist)
159 tostrip = set(striplist)
160 saveheads = set(saverevs)
160 saveheads = set(saverevs)
161 for r in cl.revs(start=striprev + 1):
161 for r in cl.revs(start=striprev + 1):
162 if any(p in tostrip for p in cl.parentrevs(r)):
162 if any(p in tostrip for p in cl.parentrevs(r)):
163 tostrip.add(r)
163 tostrip.add(r)
164
164
165 if r not in tostrip:
165 if r not in tostrip:
166 saverevs.add(r)
166 saverevs.add(r)
167 saveheads.difference_update(cl.parentrevs(r))
167 saveheads.difference_update(cl.parentrevs(r))
168 saveheads.add(r)
168 saveheads.add(r)
169 saveheads = [cl.node(r) for r in saveheads]
169 saveheads = [cl.node(r) for r in saveheads]
170
170
171 # compute base nodes
171 # compute base nodes
172 if saverevs:
172 if saverevs:
173 descendants = set(cl.descendants(saverevs))
173 descendants = set(cl.descendants(saverevs))
174 saverevs.difference_update(descendants)
174 saverevs.difference_update(descendants)
175 savebases = [cl.node(r) for r in saverevs]
175 savebases = [cl.node(r) for r in saverevs]
176 stripbases = [cl.node(r) for r in tostrip]
176 stripbases = [cl.node(r) for r in tostrip]
177
177
178 stripobsidx = obsmarkers = ()
178 stripobsidx = obsmarkers = ()
179 if repo.ui.configbool(b'devel', b'strip-obsmarkers'):
179 if repo.ui.configbool(b'devel', b'strip-obsmarkers'):
180 obsmarkers = obsutil.exclusivemarkers(repo, stripbases)
180 obsmarkers = obsutil.exclusivemarkers(repo, stripbases)
181 if obsmarkers:
181 if obsmarkers:
182 stripobsidx = [
182 stripobsidx = [
183 i for i, m in enumerate(repo.obsstore) if m in obsmarkers
183 i for i, m in enumerate(repo.obsstore) if m in obsmarkers
184 ]
184 ]
185
185
186 newbmtarget, updatebm = _bookmarkmovements(repo, tostrip)
186 newbmtarget, updatebm = _bookmarkmovements(repo, tostrip)
187
187
188 backupfile = None
188 backupfile = None
189 node = nodelist[-1]
189 node = nodelist[-1]
190 if backup:
190 if backup:
191 backupfile = _createstripbackup(repo, stripbases, node, topic)
191 backupfile = _createstripbackup(repo, stripbases, node, topic)
192 # create a changegroup for all the branches we need to keep
192 # create a changegroup for all the branches we need to keep
193 tmpbundlefile = None
193 tmpbundlefile = None
194 if saveheads:
194 if saveheads:
195 # do not compress temporary bundle if we remove it from disk later
195 # do not compress temporary bundle if we remove it from disk later
196 #
196 #
197 # We do not include obsolescence, it might re-introduce prune markers
197 # We do not include obsolescence, it might re-introduce prune markers
198 # we are trying to strip. This is harmless since the stripped markers
198 # we are trying to strip. This is harmless since the stripped markers
199 # are already backed up and we did not touched the markers for the
199 # are already backed up and we did not touched the markers for the
200 # saved changesets.
200 # saved changesets.
201 tmpbundlefile = backupbundle(
201 tmpbundlefile = backupbundle(
202 repo,
202 repo,
203 savebases,
203 savebases,
204 saveheads,
204 saveheads,
205 node,
205 node,
206 b'temp',
206 b'temp',
207 compress=False,
207 compress=False,
208 obsolescence=False,
208 obsolescence=False,
209 tmp_backup=True,
209 tmp_backup=True,
210 )
210 )
211
211
212 with ui.uninterruptible():
212 with ui.uninterruptible():
213 try:
213 try:
214 with repo.transaction(b"strip") as tr:
214 with repo.transaction(b"strip") as tr:
215 # TODO this code violates the interface abstraction of the
215 # TODO this code violates the interface abstraction of the
216 # transaction and makes assumptions that file storage is
216 # transaction and makes assumptions that file storage is
217 # using append-only files. We'll need some kind of storage
217 # using append-only files. We'll need some kind of storage
218 # API to handle stripping for us.
218 # API to handle stripping for us.
219 oldfiles = set(tr._offsetmap.keys())
219 oldfiles = set(tr._offsetmap.keys())
220 oldfiles.update(tr._newfiles)
220 oldfiles.update(tr._newfiles)
221
221
222 tr.startgroup()
222 tr.startgroup()
223 cl.strip(striprev, tr)
223 cl.strip(striprev, tr)
224 stripmanifest(repo, striprev, tr, files)
224 stripmanifest(repo, striprev, tr, files)
225
225
226 for fn in files:
226 for fn in files:
227 repo.file(fn).strip(striprev, tr)
227 repo.file(fn).strip(striprev, tr)
228 tr.endgroup()
228 tr.endgroup()
229
229
230 entries = tr.readjournal()
230 entries = tr.readjournal()
231
231
232 for file, troffset in entries:
232 for file, troffset in entries:
233 if file in oldfiles:
233 if file in oldfiles:
234 continue
234 continue
235 with repo.svfs(file, b'a', checkambig=True) as fp:
235 with repo.svfs(file, b'a', checkambig=True) as fp:
236 fp.truncate(troffset)
236 fp.truncate(troffset)
237 if troffset == 0:
237 if troffset == 0:
238 repo.store.markremoved(file)
238 repo.store.markremoved(file)
239
239
240 deleteobsmarkers(repo.obsstore, stripobsidx)
240 deleteobsmarkers(repo.obsstore, stripobsidx)
241 del repo.obsstore
241 del repo.obsstore
242 repo.invalidatevolatilesets()
242 repo.invalidatevolatilesets()
243 repo._phasecache.filterunknown(repo)
243 repo._phasecache.filterunknown(repo)
244
244
245 if tmpbundlefile:
245 if tmpbundlefile:
246 ui.note(_(b"adding branch\n"))
246 ui.note(_(b"adding branch\n"))
247 f = vfs.open(tmpbundlefile, b"rb")
247 f = vfs.open(tmpbundlefile, b"rb")
248 gen = exchange.readbundle(ui, f, tmpbundlefile, vfs)
248 gen = exchange.readbundle(ui, f, tmpbundlefile, vfs)
249 # silence internal shuffling chatter
249 # silence internal shuffling chatter
250 maybe_silent = (
250 maybe_silent = (
251 repo.ui.silent()
251 repo.ui.silent()
252 if not repo.ui.verbose
252 if not repo.ui.verbose
253 else util.nullcontextmanager()
253 else util.nullcontextmanager()
254 )
254 )
255 with maybe_silent:
255 with maybe_silent:
256 tmpbundleurl = b'bundle:' + vfs.join(tmpbundlefile)
256 tmpbundleurl = b'bundle:' + vfs.join(tmpbundlefile)
257 txnname = b'strip'
257 txnname = b'strip'
258 if not isinstance(gen, bundle2.unbundle20):
258 if not isinstance(gen, bundle2.unbundle20):
259 txnname = b"strip\n%s" % urlutil.hidepassword(
259 txnname = b"strip\n%s" % urlutil.hidepassword(
260 tmpbundleurl
260 tmpbundleurl
261 )
261 )
262 with repo.transaction(txnname) as tr:
262 with repo.transaction(txnname) as tr:
263 bundle2.applybundle(
263 bundle2.applybundle(
264 repo, gen, tr, source=b'strip', url=tmpbundleurl
264 repo, gen, tr, source=b'strip', url=tmpbundleurl
265 )
265 )
266 f.close()
266 f.close()
267
267
268 with repo.transaction(b'repair') as tr:
268 with repo.transaction(b'repair') as tr:
269 bmchanges = [(m, repo[newbmtarget].node()) for m in updatebm]
269 bmchanges = [(m, repo[newbmtarget].node()) for m in updatebm]
270 repo._bookmarks.applychanges(repo, tr, bmchanges)
270 repo._bookmarks.applychanges(repo, tr, bmchanges)
271
271
272 transaction.cleanup_undo_files(repo.ui.warn, repo.vfs_map)
272 transaction.cleanup_undo_files(repo.ui.warn, repo.vfs_map)
273
273
274 except: # re-raises
274 except: # re-raises
275 if backupfile:
275 if backupfile:
276 ui.warn(
276 ui.warn(
277 _(b"strip failed, backup bundle stored in '%s'\n")
277 _(b"strip failed, backup bundle stored in '%s'\n")
278 % vfs.join(backupfile)
278 % vfs.join(backupfile)
279 )
279 )
280 if tmpbundlefile:
280 if tmpbundlefile:
281 ui.warn(
281 ui.warn(
282 _(b"strip failed, unrecovered changes stored in '%s'\n")
282 _(b"strip failed, unrecovered changes stored in '%s'\n")
283 % vfs.join(tmpbundlefile)
283 % vfs.join(tmpbundlefile)
284 )
284 )
285 ui.warn(
285 ui.warn(
286 _(
286 _(
287 b"(fix the problem, then recover the changesets with "
287 b"(fix the problem, then recover the changesets with "
288 b"\"hg unbundle '%s'\")\n"
288 b"\"hg unbundle '%s'\")\n"
289 )
289 )
290 % vfs.join(tmpbundlefile)
290 % vfs.join(tmpbundlefile)
291 )
291 )
292 raise
292 raise
293 else:
293 else:
294 if tmpbundlefile:
294 if tmpbundlefile:
295 # Remove temporary bundle only if there were no exceptions
295 # Remove temporary bundle only if there were no exceptions
296 vfs.unlink(tmpbundlefile)
296 vfs.unlink(tmpbundlefile)
297
297
298 repo.destroyed()
298 repo.destroyed()
299 # return the backup file path (or None if 'backup' was False) so
299 # return the backup file path (or None if 'backup' was False) so
300 # extensions can use it
300 # extensions can use it
301 return backupfile
301 return backupfile
302
302
303
303
304 def softstrip(ui, repo, nodelist, backup=True, topic=b'backup'):
304 def softstrip(ui, repo, nodelist, backup=True, topic=b'backup'):
305 """perform a "soft" strip using the archived phase"""
305 """perform a "soft" strip using the archived phase"""
306 tostrip = [c.node() for c in repo.set(b'sort(%ln::)', nodelist)]
306 tostrip = [c.node() for c in repo.set(b'sort(%ln::)', nodelist)]
307 if not tostrip:
307 if not tostrip:
308 return None
308 return None
309
309
310 backupfile = None
310 backupfile = None
311 if backup:
311 if backup:
312 node = tostrip[0]
312 node = tostrip[0]
313 backupfile = _createstripbackup(repo, tostrip, node, topic)
313 backupfile = _createstripbackup(repo, tostrip, node, topic)
314
314
315 newbmtarget, updatebm = _bookmarkmovements(repo, tostrip)
315 newbmtarget, updatebm = _bookmarkmovements(repo, tostrip)
316 with repo.transaction(b'strip') as tr:
316 with repo.transaction(b'strip') as tr:
317 phases.retractboundary(repo, tr, phases.archived, tostrip)
317 phases.retractboundary(repo, tr, phases.archived, tostrip)
318 bmchanges = [(m, repo[newbmtarget].node()) for m in updatebm]
318 bmchanges = [(m, repo[newbmtarget].node()) for m in updatebm]
319 repo._bookmarks.applychanges(repo, tr, bmchanges)
319 repo._bookmarks.applychanges(repo, tr, bmchanges)
320 return backupfile
320 return backupfile
321
321
322
322
323 def _bookmarkmovements(repo, tostrip):
323 def _bookmarkmovements(repo, tostrip):
324 # compute necessary bookmark movement
324 # compute necessary bookmark movement
325 bm = repo._bookmarks
325 bm = repo._bookmarks
326 updatebm = []
326 updatebm = []
327 for m in bm:
327 for m in bm:
328 rev = repo[bm[m]].rev()
328 rev = repo[bm[m]].rev()
329 if rev in tostrip:
329 if rev in tostrip:
330 updatebm.append(m)
330 updatebm.append(m)
331 newbmtarget = None
331 newbmtarget = None
332 # If we need to move bookmarks, compute bookmark
332 # If we need to move bookmarks, compute bookmark
333 # targets. Otherwise we can skip doing this logic.
333 # targets. Otherwise we can skip doing this logic.
334 if updatebm:
334 if updatebm:
335 # For a set s, max(parents(s) - s) is the same as max(heads(::s - s)),
335 # For a set s, max(parents(s) - s) is the same as max(heads(::s - s)),
336 # but is much faster
336 # but is much faster
337 newbmtarget = repo.revs(b'max(parents(%ld) - (%ld))', tostrip, tostrip)
337 newbmtarget = repo.revs(b'max(parents(%ld) - (%ld))', tostrip, tostrip)
338 if newbmtarget:
338 if newbmtarget:
339 newbmtarget = repo[newbmtarget.first()].node()
339 newbmtarget = repo[newbmtarget.first()].node()
340 else:
340 else:
341 newbmtarget = b'.'
341 newbmtarget = b'.'
342 return newbmtarget, updatebm
342 return newbmtarget, updatebm
343
343
344
344
345 def _createstripbackup(repo, stripbases, node, topic):
345 def _createstripbackup(repo, stripbases, node, topic):
346 # backup the changeset we are about to strip
346 # backup the changeset we are about to strip
347 vfs = repo.vfs
347 vfs = repo.vfs
348 unfi = repo.unfiltered()
348 unfi = repo.unfiltered()
349 to_node = unfi.changelog.node
349 to_node = unfi.changelog.node
350 # internal changeset are internal implementation details that should not
350 # internal changeset are internal implementation details that should not
351 # leave the repository and not be exposed to the users. In addition feature
351 # leave the repository and not be exposed to the users. In addition feature
352 # using them requires to be resistant to strip. See test case for more
352 # using them requires to be resistant to strip. See test case for more
353 # details.
353 # details.
354 all_backup = unfi.revs(
354 all_backup = unfi.revs(
355 b"(%ln)::(%ld) and not _internal()",
355 b"(%ln)::(%ld) and not _internal()",
356 stripbases,
356 stripbases,
357 unfi.changelog.headrevs(),
357 unfi.changelog.headrevs(),
358 )
358 )
359 if not all_backup:
359 if not all_backup:
360 return None
360 return None
361
361
362 def to_nodes(revs):
362 def to_nodes(revs):
363 return [to_node(r) for r in revs]
363 return [to_node(r) for r in revs]
364
364
365 bases = to_nodes(unfi.revs("roots(%ld)", all_backup))
365 bases = to_nodes(unfi.revs("roots(%ld)", all_backup))
366 heads = to_nodes(unfi.revs("heads(%ld)", all_backup))
366 heads = to_nodes(unfi.revs("heads(%ld)", all_backup))
367 backupfile = backupbundle(repo, bases, heads, node, topic)
367 backupfile = backupbundle(repo, bases, heads, node, topic)
368 repo.ui.status(_(b"saved backup bundle to %s\n") % vfs.join(backupfile))
368 repo.ui.status(_(b"saved backup bundle to %s\n") % vfs.join(backupfile))
369 repo.ui.log(
369 repo.ui.log(
370 b"backupbundle", b"saved backup bundle to %s\n", vfs.join(backupfile)
370 b"backupbundle", b"saved backup bundle to %s\n", vfs.join(backupfile)
371 )
371 )
372 return backupfile
372 return backupfile
373
373
374
374
375 def safestriproots(ui, repo, nodes):
375 def safestriproots(ui, repo, nodes):
376 """return list of roots of nodes where descendants are covered by nodes"""
376 """return list of roots of nodes where descendants are covered by nodes"""
377 torev = repo.unfiltered().changelog.rev
377 torev = repo.unfiltered().changelog.rev
378 revs = {torev(n) for n in nodes}
378 revs = {torev(n) for n in nodes}
379 # tostrip = wanted - unsafe = wanted - ancestors(orphaned)
379 # tostrip = wanted - unsafe = wanted - ancestors(orphaned)
380 # orphaned = affected - wanted
380 # orphaned = affected - wanted
381 # affected = descendants(roots(wanted))
381 # affected = descendants(roots(wanted))
382 # wanted = revs
382 # wanted = revs
383 revset = b'%ld - ( ::( (roots(%ld):: and not _phase(%s)) -%ld) )'
383 revset = b'%ld - ( ::( (roots(%ld):: and not _phase(%s)) -%ld) )'
384 tostrip = set(repo.revs(revset, revs, revs, phases.internal, revs))
384 tostrip = set(repo.revs(revset, revs, revs, phases.internal, revs))
385 notstrip = revs - tostrip
385 notstrip = revs - tostrip
386 if notstrip:
386 if notstrip:
387 nodestr = b', '.join(sorted(short(repo[n].node()) for n in notstrip))
387 nodestr = b', '.join(sorted(short(repo[n].node()) for n in notstrip))
388 ui.warn(
388 ui.warn(
389 _(b'warning: orphaned descendants detected, not stripping %s\n')
389 _(b'warning: orphaned descendants detected, not stripping %s\n')
390 % nodestr
390 % nodestr
391 )
391 )
392 return [c.node() for c in repo.set(b'roots(%ld)', tostrip)]
392 return [c.node() for c in repo.set(b'roots(%ld)', tostrip)]
393
393
394
394
395 class stripcallback:
395 class stripcallback:
396 """used as a transaction postclose callback"""
396 """used as a transaction postclose callback"""
397
397
398 def __init__(self, ui, repo, backup, topic):
398 def __init__(self, ui, repo, backup, topic):
399 self.ui = ui
399 self.ui = ui
400 self.repo = repo
400 self.repo = repo
401 self.backup = backup
401 self.backup = backup
402 self.topic = topic or b'backup'
402 self.topic = topic or b'backup'
403 self.nodelist = []
403 self.nodelist = []
404
404
405 def addnodes(self, nodes):
405 def addnodes(self, nodes):
406 self.nodelist.extend(nodes)
406 self.nodelist.extend(nodes)
407
407
408 def __call__(self, tr):
408 def __call__(self, tr):
409 roots = safestriproots(self.ui, self.repo, self.nodelist)
409 roots = safestriproots(self.ui, self.repo, self.nodelist)
410 if roots:
410 if roots:
411 strip(self.ui, self.repo, roots, self.backup, self.topic)
411 strip(self.ui, self.repo, roots, self.backup, self.topic)
412
412
413
413
414 def delayedstrip(ui, repo, nodelist, topic=None, backup=True):
414 def delayedstrip(ui, repo, nodelist, topic=None, backup=True):
415 """like strip, but works inside transaction and won't strip irreverent revs
415 """like strip, but works inside transaction and won't strip irreverent revs
416
416
417 nodelist must explicitly contain all descendants. Otherwise a warning will
417 nodelist must explicitly contain all descendants. Otherwise a warning will
418 be printed that some nodes are not stripped.
418 be printed that some nodes are not stripped.
419
419
420 Will do a backup if `backup` is True. The last non-None "topic" will be
420 Will do a backup if `backup` is True. The last non-None "topic" will be
421 used as the backup topic name. The default backup topic name is "backup".
421 used as the backup topic name. The default backup topic name is "backup".
422 """
422 """
423 tr = repo.currenttransaction()
423 tr = repo.currenttransaction()
424 if not tr:
424 if not tr:
425 nodes = safestriproots(ui, repo, nodelist)
425 nodes = safestriproots(ui, repo, nodelist)
426 return strip(ui, repo, nodes, backup=backup, topic=topic)
426 return strip(ui, repo, nodes, backup=backup, topic=topic)
427 # transaction postclose callbacks are called in alphabet order.
427 # transaction postclose callbacks are called in alphabet order.
428 # use '\xff' as prefix so we are likely to be called last.
428 # use '\xff' as prefix so we are likely to be called last.
429 callback = tr.getpostclose(b'\xffstrip')
429 callback = tr.getpostclose(b'\xffstrip')
430 if callback is None:
430 if callback is None:
431 callback = stripcallback(ui, repo, backup=backup, topic=topic)
431 callback = stripcallback(ui, repo, backup=backup, topic=topic)
432 tr.addpostclose(b'\xffstrip', callback)
432 tr.addpostclose(b'\xffstrip', callback)
433 if topic:
433 if topic:
434 callback.topic = topic
434 callback.topic = topic
435 callback.addnodes(nodelist)
435 callback.addnodes(nodelist)
436
436
437
437
438 def stripmanifest(repo, striprev, tr, files):
438 def stripmanifest(repo, striprev, tr, files):
439 for revlog in manifestrevlogs(repo):
439 for revlog in manifestrevlogs(repo):
440 revlog.strip(striprev, tr)
440 revlog.strip(striprev, tr)
441
441
442
442
443 def manifestrevlogs(repo):
443 def manifestrevlogs(repo):
444 yield repo.manifestlog.getstorage(b'')
444 yield repo.manifestlog.getstorage(b'')
445 if scmutil.istreemanifest(repo):
445 if scmutil.istreemanifest(repo):
446 # This logic is safe if treemanifest isn't enabled, but also
446 # This logic is safe if treemanifest isn't enabled, but also
447 # pointless, so we skip it if treemanifest isn't enabled.
447 # pointless, so we skip it if treemanifest isn't enabled.
448 for entry in repo.store.datafiles():
448 for entry in repo.store.datafiles():
449 if not entry.is_revlog:
449 if not entry.is_revlog:
450 continue
450 continue
451 if not entry.revlog_type == store.FILEFLAGS_MANIFESTLOG:
451 if entry.revlog_type == store.FILEFLAGS_MANIFESTLOG:
452 continue
453 if entry.is_revlog_main:
454 yield repo.manifestlog.getstorage(entry.target_id)
452 yield repo.manifestlog.getstorage(entry.target_id)
455
453
456
454
457 def rebuildfncache(ui, repo, only_data=False):
455 def rebuildfncache(ui, repo, only_data=False):
458 """Rebuilds the fncache file from repo history.
456 """Rebuilds the fncache file from repo history.
459
457
460 Missing entries will be added. Extra entries will be removed.
458 Missing entries will be added. Extra entries will be removed.
461 """
459 """
462 repo = repo.unfiltered()
460 repo = repo.unfiltered()
463
461
464 if requirements.FNCACHE_REQUIREMENT not in repo.requirements:
462 if requirements.FNCACHE_REQUIREMENT not in repo.requirements:
465 ui.warn(
463 ui.warn(
466 _(
464 _(
467 b'(not rebuilding fncache because repository does not '
465 b'(not rebuilding fncache because repository does not '
468 b'support fncache)\n'
466 b'support fncache)\n'
469 )
467 )
470 )
468 )
471 return
469 return
472
470
473 with repo.lock():
471 with repo.lock():
474 fnc = repo.store.fncache
472 fnc = repo.store.fncache
475 fnc.ensureloaded(warn=ui.warn)
473 fnc.ensureloaded(warn=ui.warn)
476
474
477 oldentries = set(fnc.entries)
475 oldentries = set(fnc.entries)
478 newentries = set()
476 newentries = set()
479 seenfiles = set()
477 seenfiles = set()
480
478
481 if only_data:
479 if only_data:
482 # Trust the listing of .i from the fncache, but not the .d. This is
480 # Trust the listing of .i from the fncache, but not the .d. This is
483 # much faster, because we only need to stat every possible .d files,
481 # much faster, because we only need to stat every possible .d files,
484 # instead of reading the full changelog
482 # instead of reading the full changelog
485 for f in fnc:
483 for f in fnc:
486 if f[:5] == b'data/' and f[-2:] == b'.i':
484 if f[:5] == b'data/' and f[-2:] == b'.i':
487 seenfiles.add(f[5:-2])
485 seenfiles.add(f[5:-2])
488 newentries.add(f)
486 newentries.add(f)
489 dataf = f[:-2] + b'.d'
487 dataf = f[:-2] + b'.d'
490 if repo.store._exists(dataf):
488 if repo.store._exists(dataf):
491 newentries.add(dataf)
489 newentries.add(dataf)
492 else:
490 else:
493 progress = ui.makeprogress(
491 progress = ui.makeprogress(
494 _(b'rebuilding'), unit=_(b'changesets'), total=len(repo)
492 _(b'rebuilding'), unit=_(b'changesets'), total=len(repo)
495 )
493 )
496 for rev in repo:
494 for rev in repo:
497 progress.update(rev)
495 progress.update(rev)
498
496
499 ctx = repo[rev]
497 ctx = repo[rev]
500 for f in ctx.files():
498 for f in ctx.files():
501 # This is to minimize I/O.
499 # This is to minimize I/O.
502 if f in seenfiles:
500 if f in seenfiles:
503 continue
501 continue
504 seenfiles.add(f)
502 seenfiles.add(f)
505
503
506 i = b'data/%s.i' % f
504 i = b'data/%s.i' % f
507 d = b'data/%s.d' % f
505 d = b'data/%s.d' % f
508
506
509 if repo.store._exists(i):
507 if repo.store._exists(i):
510 newentries.add(i)
508 newentries.add(i)
511 if repo.store._exists(d):
509 if repo.store._exists(d):
512 newentries.add(d)
510 newentries.add(d)
513
511
514 progress.complete()
512 progress.complete()
515
513
516 if requirements.TREEMANIFEST_REQUIREMENT in repo.requirements:
514 if requirements.TREEMANIFEST_REQUIREMENT in repo.requirements:
517 # This logic is safe if treemanifest isn't enabled, but also
515 # This logic is safe if treemanifest isn't enabled, but also
518 # pointless, so we skip it if treemanifest isn't enabled.
516 # pointless, so we skip it if treemanifest isn't enabled.
519 for dir in pathutil.dirs(seenfiles):
517 for dir in pathutil.dirs(seenfiles):
520 i = b'meta/%s/00manifest.i' % dir
518 i = b'meta/%s/00manifest.i' % dir
521 d = b'meta/%s/00manifest.d' % dir
519 d = b'meta/%s/00manifest.d' % dir
522
520
523 if repo.store._exists(i):
521 if repo.store._exists(i):
524 newentries.add(i)
522 newentries.add(i)
525 if repo.store._exists(d):
523 if repo.store._exists(d):
526 newentries.add(d)
524 newentries.add(d)
527
525
528 addcount = len(newentries - oldentries)
526 addcount = len(newentries - oldentries)
529 removecount = len(oldentries - newentries)
527 removecount = len(oldentries - newentries)
530 for p in sorted(oldentries - newentries):
528 for p in sorted(oldentries - newentries):
531 ui.write(_(b'removing %s\n') % p)
529 ui.write(_(b'removing %s\n') % p)
532 for p in sorted(newentries - oldentries):
530 for p in sorted(newentries - oldentries):
533 ui.write(_(b'adding %s\n') % p)
531 ui.write(_(b'adding %s\n') % p)
534
532
535 if addcount or removecount:
533 if addcount or removecount:
536 ui.write(
534 ui.write(
537 _(b'%d items added, %d removed from fncache\n')
535 _(b'%d items added, %d removed from fncache\n')
538 % (addcount, removecount)
536 % (addcount, removecount)
539 )
537 )
540 fnc.entries = newentries
538 fnc.entries = newentries
541 fnc._dirty = True
539 fnc._dirty = True
542
540
543 with repo.transaction(b'fncache') as tr:
541 with repo.transaction(b'fncache') as tr:
544 fnc.write(tr)
542 fnc.write(tr)
545 else:
543 else:
546 ui.write(_(b'fncache already up to date\n'))
544 ui.write(_(b'fncache already up to date\n'))
547
545
548
546
549 def deleteobsmarkers(obsstore, indices):
547 def deleteobsmarkers(obsstore, indices):
550 """Delete some obsmarkers from obsstore and return how many were deleted
548 """Delete some obsmarkers from obsstore and return how many were deleted
551
549
552 'indices' is a list of ints which are the indices
550 'indices' is a list of ints which are the indices
553 of the markers to be deleted.
551 of the markers to be deleted.
554
552
555 Every invocation of this function completely rewrites the obsstore file,
553 Every invocation of this function completely rewrites the obsstore file,
556 skipping the markers we want to be removed. The new temporary file is
554 skipping the markers we want to be removed. The new temporary file is
557 created, remaining markers are written there and on .close() this file
555 created, remaining markers are written there and on .close() this file
558 gets atomically renamed to obsstore, thus guaranteeing consistency."""
556 gets atomically renamed to obsstore, thus guaranteeing consistency."""
559 if not indices:
557 if not indices:
560 # we don't want to rewrite the obsstore with the same content
558 # we don't want to rewrite the obsstore with the same content
561 return
559 return
562
560
563 left = []
561 left = []
564 current = obsstore._all
562 current = obsstore._all
565 n = 0
563 n = 0
566 for i, m in enumerate(current):
564 for i, m in enumerate(current):
567 if i in indices:
565 if i in indices:
568 n += 1
566 n += 1
569 continue
567 continue
570 left.append(m)
568 left.append(m)
571
569
572 newobsstorefile = obsstore.svfs(b'obsstore', b'w', atomictemp=True)
570 newobsstorefile = obsstore.svfs(b'obsstore', b'w', atomictemp=True)
573 for bytes in obsolete.encodemarkers(left, True, obsstore._version):
571 for bytes in obsolete.encodemarkers(left, True, obsstore._version):
574 newobsstorefile.write(bytes)
572 newobsstorefile.write(bytes)
575 newobsstorefile.close()
573 newobsstorefile.close()
576 return n
574 return n
@@ -1,887 +1,885
1 # censor code related to censoring revision
1 # censor code related to censoring revision
2 # coding: utf8
2 # coding: utf8
3 #
3 #
4 # Copyright 2021 Pierre-Yves David <pierre-yves.david@octobus.net>
4 # Copyright 2021 Pierre-Yves David <pierre-yves.david@octobus.net>
5 # Copyright 2015 Google, Inc <martinvonz@google.com>
5 # Copyright 2015 Google, Inc <martinvonz@google.com>
6 #
6 #
7 # This software may be used and distributed according to the terms of the
7 # This software may be used and distributed according to the terms of the
8 # GNU General Public License version 2 or any later version.
8 # GNU General Public License version 2 or any later version.
9
9
10 import binascii
10 import binascii
11 import contextlib
11 import contextlib
12 import os
12 import os
13 import struct
13 import struct
14
14
15 from ..node import (
15 from ..node import (
16 nullrev,
16 nullrev,
17 )
17 )
18 from .constants import (
18 from .constants import (
19 COMP_MODE_PLAIN,
19 COMP_MODE_PLAIN,
20 ENTRY_DATA_COMPRESSED_LENGTH,
20 ENTRY_DATA_COMPRESSED_LENGTH,
21 ENTRY_DATA_COMPRESSION_MODE,
21 ENTRY_DATA_COMPRESSION_MODE,
22 ENTRY_DATA_OFFSET,
22 ENTRY_DATA_OFFSET,
23 ENTRY_DATA_UNCOMPRESSED_LENGTH,
23 ENTRY_DATA_UNCOMPRESSED_LENGTH,
24 ENTRY_DELTA_BASE,
24 ENTRY_DELTA_BASE,
25 ENTRY_LINK_REV,
25 ENTRY_LINK_REV,
26 ENTRY_NODE_ID,
26 ENTRY_NODE_ID,
27 ENTRY_PARENT_1,
27 ENTRY_PARENT_1,
28 ENTRY_PARENT_2,
28 ENTRY_PARENT_2,
29 ENTRY_SIDEDATA_COMPRESSED_LENGTH,
29 ENTRY_SIDEDATA_COMPRESSED_LENGTH,
30 ENTRY_SIDEDATA_COMPRESSION_MODE,
30 ENTRY_SIDEDATA_COMPRESSION_MODE,
31 ENTRY_SIDEDATA_OFFSET,
31 ENTRY_SIDEDATA_OFFSET,
32 REVIDX_ISCENSORED,
32 REVIDX_ISCENSORED,
33 REVLOGV0,
33 REVLOGV0,
34 REVLOGV1,
34 REVLOGV1,
35 )
35 )
36 from ..i18n import _
36 from ..i18n import _
37
37
38 from .. import (
38 from .. import (
39 error,
39 error,
40 mdiff,
40 mdiff,
41 pycompat,
41 pycompat,
42 revlogutils,
42 revlogutils,
43 util,
43 util,
44 )
44 )
45 from ..utils import (
45 from ..utils import (
46 storageutil,
46 storageutil,
47 )
47 )
48 from . import (
48 from . import (
49 constants,
49 constants,
50 deltas,
50 deltas,
51 )
51 )
52
52
53
53
54 def v1_censor(rl, tr, censornode, tombstone=b''):
54 def v1_censor(rl, tr, censornode, tombstone=b''):
55 """censors a revision in a "version 1" revlog"""
55 """censors a revision in a "version 1" revlog"""
56 assert rl._format_version == constants.REVLOGV1, rl._format_version
56 assert rl._format_version == constants.REVLOGV1, rl._format_version
57
57
58 # avoid cycle
58 # avoid cycle
59 from .. import revlog
59 from .. import revlog
60
60
61 censorrev = rl.rev(censornode)
61 censorrev = rl.rev(censornode)
62 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
62 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
63
63
64 # Rewriting the revlog in place is hard. Our strategy for censoring is
64 # Rewriting the revlog in place is hard. Our strategy for censoring is
65 # to create a new revlog, copy all revisions to it, then replace the
65 # to create a new revlog, copy all revisions to it, then replace the
66 # revlogs on transaction close.
66 # revlogs on transaction close.
67 #
67 #
68 # This is a bit dangerous. We could easily have a mismatch of state.
68 # This is a bit dangerous. We could easily have a mismatch of state.
69 newrl = revlog.revlog(
69 newrl = revlog.revlog(
70 rl.opener,
70 rl.opener,
71 target=rl.target,
71 target=rl.target,
72 radix=rl.radix,
72 radix=rl.radix,
73 postfix=b'tmpcensored',
73 postfix=b'tmpcensored',
74 censorable=True,
74 censorable=True,
75 )
75 )
76 newrl._format_version = rl._format_version
76 newrl._format_version = rl._format_version
77 newrl._format_flags = rl._format_flags
77 newrl._format_flags = rl._format_flags
78 newrl._generaldelta = rl._generaldelta
78 newrl._generaldelta = rl._generaldelta
79 newrl._parse_index = rl._parse_index
79 newrl._parse_index = rl._parse_index
80
80
81 for rev in rl.revs():
81 for rev in rl.revs():
82 node = rl.node(rev)
82 node = rl.node(rev)
83 p1, p2 = rl.parents(node)
83 p1, p2 = rl.parents(node)
84
84
85 if rev == censorrev:
85 if rev == censorrev:
86 newrl.addrawrevision(
86 newrl.addrawrevision(
87 tombstone,
87 tombstone,
88 tr,
88 tr,
89 rl.linkrev(censorrev),
89 rl.linkrev(censorrev),
90 p1,
90 p1,
91 p2,
91 p2,
92 censornode,
92 censornode,
93 constants.REVIDX_ISCENSORED,
93 constants.REVIDX_ISCENSORED,
94 )
94 )
95
95
96 if newrl.deltaparent(rev) != nullrev:
96 if newrl.deltaparent(rev) != nullrev:
97 m = _(b'censored revision stored as delta; cannot censor')
97 m = _(b'censored revision stored as delta; cannot censor')
98 h = _(
98 h = _(
99 b'censoring of revlogs is not fully implemented;'
99 b'censoring of revlogs is not fully implemented;'
100 b' please report this bug'
100 b' please report this bug'
101 )
101 )
102 raise error.Abort(m, hint=h)
102 raise error.Abort(m, hint=h)
103 continue
103 continue
104
104
105 if rl.iscensored(rev):
105 if rl.iscensored(rev):
106 if rl.deltaparent(rev) != nullrev:
106 if rl.deltaparent(rev) != nullrev:
107 m = _(
107 m = _(
108 b'cannot censor due to censored '
108 b'cannot censor due to censored '
109 b'revision having delta stored'
109 b'revision having delta stored'
110 )
110 )
111 raise error.Abort(m)
111 raise error.Abort(m)
112 rawtext = rl._chunk(rev)
112 rawtext = rl._chunk(rev)
113 else:
113 else:
114 rawtext = rl.rawdata(rev)
114 rawtext = rl.rawdata(rev)
115
115
116 newrl.addrawrevision(
116 newrl.addrawrevision(
117 rawtext, tr, rl.linkrev(rev), p1, p2, node, rl.flags(rev)
117 rawtext, tr, rl.linkrev(rev), p1, p2, node, rl.flags(rev)
118 )
118 )
119
119
120 tr.addbackup(rl._indexfile, location=b'store')
120 tr.addbackup(rl._indexfile, location=b'store')
121 if not rl._inline:
121 if not rl._inline:
122 tr.addbackup(rl._datafile, location=b'store')
122 tr.addbackup(rl._datafile, location=b'store')
123
123
124 rl.opener.rename(newrl._indexfile, rl._indexfile)
124 rl.opener.rename(newrl._indexfile, rl._indexfile)
125 if not rl._inline:
125 if not rl._inline:
126 rl.opener.rename(newrl._datafile, rl._datafile)
126 rl.opener.rename(newrl._datafile, rl._datafile)
127
127
128 rl.clearcaches()
128 rl.clearcaches()
129 rl._loadindex()
129 rl._loadindex()
130
130
131
131
132 def v2_censor(revlog, tr, censornode, tombstone=b''):
132 def v2_censor(revlog, tr, censornode, tombstone=b''):
133 """censors a revision in a "version 2" revlog"""
133 """censors a revision in a "version 2" revlog"""
134 assert revlog._format_version != REVLOGV0, revlog._format_version
134 assert revlog._format_version != REVLOGV0, revlog._format_version
135 assert revlog._format_version != REVLOGV1, revlog._format_version
135 assert revlog._format_version != REVLOGV1, revlog._format_version
136
136
137 censor_revs = {revlog.rev(censornode)}
137 censor_revs = {revlog.rev(censornode)}
138 _rewrite_v2(revlog, tr, censor_revs, tombstone)
138 _rewrite_v2(revlog, tr, censor_revs, tombstone)
139
139
140
140
141 def _rewrite_v2(revlog, tr, censor_revs, tombstone=b''):
141 def _rewrite_v2(revlog, tr, censor_revs, tombstone=b''):
142 """rewrite a revlog to censor some of its content
142 """rewrite a revlog to censor some of its content
143
143
144 General principle
144 General principle
145
145
146 We create new revlog files (index/data/sidedata) to copy the content of
146 We create new revlog files (index/data/sidedata) to copy the content of
147 the existing data without the censored data.
147 the existing data without the censored data.
148
148
149 We need to recompute new delta for any revision that used the censored
149 We need to recompute new delta for any revision that used the censored
150 revision as delta base. As the cumulative size of the new delta may be
150 revision as delta base. As the cumulative size of the new delta may be
151 large, we store them in a temporary file until they are stored in their
151 large, we store them in a temporary file until they are stored in their
152 final destination.
152 final destination.
153
153
154 All data before the censored data can be blindly copied. The rest needs
154 All data before the censored data can be blindly copied. The rest needs
155 to be copied as we go and the associated index entry needs adjustement.
155 to be copied as we go and the associated index entry needs adjustement.
156 """
156 """
157 assert revlog._format_version != REVLOGV0, revlog._format_version
157 assert revlog._format_version != REVLOGV0, revlog._format_version
158 assert revlog._format_version != REVLOGV1, revlog._format_version
158 assert revlog._format_version != REVLOGV1, revlog._format_version
159
159
160 old_index = revlog.index
160 old_index = revlog.index
161 docket = revlog._docket
161 docket = revlog._docket
162
162
163 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
163 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
164
164
165 first_excl_rev = min(censor_revs)
165 first_excl_rev = min(censor_revs)
166
166
167 first_excl_entry = revlog.index[first_excl_rev]
167 first_excl_entry = revlog.index[first_excl_rev]
168 index_cutoff = revlog.index.entry_size * first_excl_rev
168 index_cutoff = revlog.index.entry_size * first_excl_rev
169 data_cutoff = first_excl_entry[ENTRY_DATA_OFFSET] >> 16
169 data_cutoff = first_excl_entry[ENTRY_DATA_OFFSET] >> 16
170 sidedata_cutoff = revlog.sidedata_cut_off(first_excl_rev)
170 sidedata_cutoff = revlog.sidedata_cut_off(first_excl_rev)
171
171
172 with pycompat.unnamedtempfile(mode=b"w+b") as tmp_storage:
172 with pycompat.unnamedtempfile(mode=b"w+b") as tmp_storage:
173 # rev β†’ (new_base, data_start, data_end, compression_mode)
173 # rev β†’ (new_base, data_start, data_end, compression_mode)
174 rewritten_entries = _precompute_rewritten_delta(
174 rewritten_entries = _precompute_rewritten_delta(
175 revlog,
175 revlog,
176 old_index,
176 old_index,
177 censor_revs,
177 censor_revs,
178 tmp_storage,
178 tmp_storage,
179 )
179 )
180
180
181 all_files = _setup_new_files(
181 all_files = _setup_new_files(
182 revlog,
182 revlog,
183 index_cutoff,
183 index_cutoff,
184 data_cutoff,
184 data_cutoff,
185 sidedata_cutoff,
185 sidedata_cutoff,
186 )
186 )
187
187
188 # we dont need to open the old index file since its content already
188 # we dont need to open the old index file since its content already
189 # exist in a usable form in `old_index`.
189 # exist in a usable form in `old_index`.
190 with all_files() as open_files:
190 with all_files() as open_files:
191 (
191 (
192 old_data_file,
192 old_data_file,
193 old_sidedata_file,
193 old_sidedata_file,
194 new_index_file,
194 new_index_file,
195 new_data_file,
195 new_data_file,
196 new_sidedata_file,
196 new_sidedata_file,
197 ) = open_files
197 ) = open_files
198
198
199 # writing the censored revision
199 # writing the censored revision
200
200
201 # Writing all subsequent revisions
201 # Writing all subsequent revisions
202 for rev in range(first_excl_rev, len(old_index)):
202 for rev in range(first_excl_rev, len(old_index)):
203 if rev in censor_revs:
203 if rev in censor_revs:
204 _rewrite_censor(
204 _rewrite_censor(
205 revlog,
205 revlog,
206 old_index,
206 old_index,
207 open_files,
207 open_files,
208 rev,
208 rev,
209 tombstone,
209 tombstone,
210 )
210 )
211 else:
211 else:
212 _rewrite_simple(
212 _rewrite_simple(
213 revlog,
213 revlog,
214 old_index,
214 old_index,
215 open_files,
215 open_files,
216 rev,
216 rev,
217 rewritten_entries,
217 rewritten_entries,
218 tmp_storage,
218 tmp_storage,
219 )
219 )
220 docket.write(transaction=None, stripping=True)
220 docket.write(transaction=None, stripping=True)
221
221
222
222
223 def _precompute_rewritten_delta(
223 def _precompute_rewritten_delta(
224 revlog,
224 revlog,
225 old_index,
225 old_index,
226 excluded_revs,
226 excluded_revs,
227 tmp_storage,
227 tmp_storage,
228 ):
228 ):
229 """Compute new delta for revisions whose delta is based on revision that
229 """Compute new delta for revisions whose delta is based on revision that
230 will not survive as is.
230 will not survive as is.
231
231
232 Return a mapping: {rev β†’ (new_base, data_start, data_end, compression_mode)}
232 Return a mapping: {rev β†’ (new_base, data_start, data_end, compression_mode)}
233 """
233 """
234 dc = deltas.deltacomputer(revlog)
234 dc = deltas.deltacomputer(revlog)
235 rewritten_entries = {}
235 rewritten_entries = {}
236 first_excl_rev = min(excluded_revs)
236 first_excl_rev = min(excluded_revs)
237 with revlog._segmentfile._open_read() as dfh:
237 with revlog._segmentfile._open_read() as dfh:
238 for rev in range(first_excl_rev, len(old_index)):
238 for rev in range(first_excl_rev, len(old_index)):
239 if rev in excluded_revs:
239 if rev in excluded_revs:
240 # this revision will be preserved as is, so we don't need to
240 # this revision will be preserved as is, so we don't need to
241 # consider recomputing a delta.
241 # consider recomputing a delta.
242 continue
242 continue
243 entry = old_index[rev]
243 entry = old_index[rev]
244 if entry[ENTRY_DELTA_BASE] not in excluded_revs:
244 if entry[ENTRY_DELTA_BASE] not in excluded_revs:
245 continue
245 continue
246 # This is a revision that use the censored revision as the base
246 # This is a revision that use the censored revision as the base
247 # for its delta. We need a need new deltas
247 # for its delta. We need a need new deltas
248 if entry[ENTRY_DATA_UNCOMPRESSED_LENGTH] == 0:
248 if entry[ENTRY_DATA_UNCOMPRESSED_LENGTH] == 0:
249 # this revision is empty, we can delta against nullrev
249 # this revision is empty, we can delta against nullrev
250 rewritten_entries[rev] = (nullrev, 0, 0, COMP_MODE_PLAIN)
250 rewritten_entries[rev] = (nullrev, 0, 0, COMP_MODE_PLAIN)
251 else:
251 else:
252
252
253 text = revlog.rawdata(rev, _df=dfh)
253 text = revlog.rawdata(rev, _df=dfh)
254 info = revlogutils.revisioninfo(
254 info = revlogutils.revisioninfo(
255 node=entry[ENTRY_NODE_ID],
255 node=entry[ENTRY_NODE_ID],
256 p1=revlog.node(entry[ENTRY_PARENT_1]),
256 p1=revlog.node(entry[ENTRY_PARENT_1]),
257 p2=revlog.node(entry[ENTRY_PARENT_2]),
257 p2=revlog.node(entry[ENTRY_PARENT_2]),
258 btext=[text],
258 btext=[text],
259 textlen=len(text),
259 textlen=len(text),
260 cachedelta=None,
260 cachedelta=None,
261 flags=entry[ENTRY_DATA_OFFSET] & 0xFFFF,
261 flags=entry[ENTRY_DATA_OFFSET] & 0xFFFF,
262 )
262 )
263 d = dc.finddeltainfo(
263 d = dc.finddeltainfo(
264 info, dfh, excluded_bases=excluded_revs, target_rev=rev
264 info, dfh, excluded_bases=excluded_revs, target_rev=rev
265 )
265 )
266 default_comp = revlog._docket.default_compression_header
266 default_comp = revlog._docket.default_compression_header
267 comp_mode, d = deltas.delta_compression(default_comp, d)
267 comp_mode, d = deltas.delta_compression(default_comp, d)
268 # using `tell` is a bit lazy, but we are not here for speed
268 # using `tell` is a bit lazy, but we are not here for speed
269 start = tmp_storage.tell()
269 start = tmp_storage.tell()
270 tmp_storage.write(d.data[1])
270 tmp_storage.write(d.data[1])
271 end = tmp_storage.tell()
271 end = tmp_storage.tell()
272 rewritten_entries[rev] = (d.base, start, end, comp_mode)
272 rewritten_entries[rev] = (d.base, start, end, comp_mode)
273 return rewritten_entries
273 return rewritten_entries
274
274
275
275
276 def _setup_new_files(
276 def _setup_new_files(
277 revlog,
277 revlog,
278 index_cutoff,
278 index_cutoff,
279 data_cutoff,
279 data_cutoff,
280 sidedata_cutoff,
280 sidedata_cutoff,
281 ):
281 ):
282 """
282 """
283
283
284 return a context manager to open all the relevant files:
284 return a context manager to open all the relevant files:
285 - old_data_file,
285 - old_data_file,
286 - old_sidedata_file,
286 - old_sidedata_file,
287 - new_index_file,
287 - new_index_file,
288 - new_data_file,
288 - new_data_file,
289 - new_sidedata_file,
289 - new_sidedata_file,
290
290
291 The old_index_file is not here because it is accessed through the
291 The old_index_file is not here because it is accessed through the
292 `old_index` object if the caller function.
292 `old_index` object if the caller function.
293 """
293 """
294 docket = revlog._docket
294 docket = revlog._docket
295 old_index_filepath = revlog.opener.join(docket.index_filepath())
295 old_index_filepath = revlog.opener.join(docket.index_filepath())
296 old_data_filepath = revlog.opener.join(docket.data_filepath())
296 old_data_filepath = revlog.opener.join(docket.data_filepath())
297 old_sidedata_filepath = revlog.opener.join(docket.sidedata_filepath())
297 old_sidedata_filepath = revlog.opener.join(docket.sidedata_filepath())
298
298
299 new_index_filepath = revlog.opener.join(docket.new_index_file())
299 new_index_filepath = revlog.opener.join(docket.new_index_file())
300 new_data_filepath = revlog.opener.join(docket.new_data_file())
300 new_data_filepath = revlog.opener.join(docket.new_data_file())
301 new_sidedata_filepath = revlog.opener.join(docket.new_sidedata_file())
301 new_sidedata_filepath = revlog.opener.join(docket.new_sidedata_file())
302
302
303 util.copyfile(old_index_filepath, new_index_filepath, nb_bytes=index_cutoff)
303 util.copyfile(old_index_filepath, new_index_filepath, nb_bytes=index_cutoff)
304 util.copyfile(old_data_filepath, new_data_filepath, nb_bytes=data_cutoff)
304 util.copyfile(old_data_filepath, new_data_filepath, nb_bytes=data_cutoff)
305 util.copyfile(
305 util.copyfile(
306 old_sidedata_filepath,
306 old_sidedata_filepath,
307 new_sidedata_filepath,
307 new_sidedata_filepath,
308 nb_bytes=sidedata_cutoff,
308 nb_bytes=sidedata_cutoff,
309 )
309 )
310 revlog.opener.register_file(docket.index_filepath())
310 revlog.opener.register_file(docket.index_filepath())
311 revlog.opener.register_file(docket.data_filepath())
311 revlog.opener.register_file(docket.data_filepath())
312 revlog.opener.register_file(docket.sidedata_filepath())
312 revlog.opener.register_file(docket.sidedata_filepath())
313
313
314 docket.index_end = index_cutoff
314 docket.index_end = index_cutoff
315 docket.data_end = data_cutoff
315 docket.data_end = data_cutoff
316 docket.sidedata_end = sidedata_cutoff
316 docket.sidedata_end = sidedata_cutoff
317
317
318 # reload the revlog internal information
318 # reload the revlog internal information
319 revlog.clearcaches()
319 revlog.clearcaches()
320 revlog._loadindex(docket=docket)
320 revlog._loadindex(docket=docket)
321
321
322 @contextlib.contextmanager
322 @contextlib.contextmanager
323 def all_files_opener():
323 def all_files_opener():
324 # hide opening in an helper function to please check-code, black
324 # hide opening in an helper function to please check-code, black
325 # and various python version at the same time
325 # and various python version at the same time
326 with open(old_data_filepath, 'rb') as old_data_file:
326 with open(old_data_filepath, 'rb') as old_data_file:
327 with open(old_sidedata_filepath, 'rb') as old_sidedata_file:
327 with open(old_sidedata_filepath, 'rb') as old_sidedata_file:
328 with open(new_index_filepath, 'r+b') as new_index_file:
328 with open(new_index_filepath, 'r+b') as new_index_file:
329 with open(new_data_filepath, 'r+b') as new_data_file:
329 with open(new_data_filepath, 'r+b') as new_data_file:
330 with open(
330 with open(
331 new_sidedata_filepath, 'r+b'
331 new_sidedata_filepath, 'r+b'
332 ) as new_sidedata_file:
332 ) as new_sidedata_file:
333 new_index_file.seek(0, os.SEEK_END)
333 new_index_file.seek(0, os.SEEK_END)
334 assert new_index_file.tell() == index_cutoff
334 assert new_index_file.tell() == index_cutoff
335 new_data_file.seek(0, os.SEEK_END)
335 new_data_file.seek(0, os.SEEK_END)
336 assert new_data_file.tell() == data_cutoff
336 assert new_data_file.tell() == data_cutoff
337 new_sidedata_file.seek(0, os.SEEK_END)
337 new_sidedata_file.seek(0, os.SEEK_END)
338 assert new_sidedata_file.tell() == sidedata_cutoff
338 assert new_sidedata_file.tell() == sidedata_cutoff
339 yield (
339 yield (
340 old_data_file,
340 old_data_file,
341 old_sidedata_file,
341 old_sidedata_file,
342 new_index_file,
342 new_index_file,
343 new_data_file,
343 new_data_file,
344 new_sidedata_file,
344 new_sidedata_file,
345 )
345 )
346
346
347 return all_files_opener
347 return all_files_opener
348
348
349
349
350 def _rewrite_simple(
350 def _rewrite_simple(
351 revlog,
351 revlog,
352 old_index,
352 old_index,
353 all_files,
353 all_files,
354 rev,
354 rev,
355 rewritten_entries,
355 rewritten_entries,
356 tmp_storage,
356 tmp_storage,
357 ):
357 ):
358 """append a normal revision to the index after the rewritten one(s)"""
358 """append a normal revision to the index after the rewritten one(s)"""
359 (
359 (
360 old_data_file,
360 old_data_file,
361 old_sidedata_file,
361 old_sidedata_file,
362 new_index_file,
362 new_index_file,
363 new_data_file,
363 new_data_file,
364 new_sidedata_file,
364 new_sidedata_file,
365 ) = all_files
365 ) = all_files
366 entry = old_index[rev]
366 entry = old_index[rev]
367 flags = entry[ENTRY_DATA_OFFSET] & 0xFFFF
367 flags = entry[ENTRY_DATA_OFFSET] & 0xFFFF
368 old_data_offset = entry[ENTRY_DATA_OFFSET] >> 16
368 old_data_offset = entry[ENTRY_DATA_OFFSET] >> 16
369
369
370 if rev not in rewritten_entries:
370 if rev not in rewritten_entries:
371 old_data_file.seek(old_data_offset)
371 old_data_file.seek(old_data_offset)
372 new_data_size = entry[ENTRY_DATA_COMPRESSED_LENGTH]
372 new_data_size = entry[ENTRY_DATA_COMPRESSED_LENGTH]
373 new_data = old_data_file.read(new_data_size)
373 new_data = old_data_file.read(new_data_size)
374 data_delta_base = entry[ENTRY_DELTA_BASE]
374 data_delta_base = entry[ENTRY_DELTA_BASE]
375 d_comp_mode = entry[ENTRY_DATA_COMPRESSION_MODE]
375 d_comp_mode = entry[ENTRY_DATA_COMPRESSION_MODE]
376 else:
376 else:
377 (
377 (
378 data_delta_base,
378 data_delta_base,
379 start,
379 start,
380 end,
380 end,
381 d_comp_mode,
381 d_comp_mode,
382 ) = rewritten_entries[rev]
382 ) = rewritten_entries[rev]
383 new_data_size = end - start
383 new_data_size = end - start
384 tmp_storage.seek(start)
384 tmp_storage.seek(start)
385 new_data = tmp_storage.read(new_data_size)
385 new_data = tmp_storage.read(new_data_size)
386
386
387 # It might be faster to group continuous read/write operation,
387 # It might be faster to group continuous read/write operation,
388 # however, this is censor, an operation that is not focussed
388 # however, this is censor, an operation that is not focussed
389 # around stellar performance. So I have not written this
389 # around stellar performance. So I have not written this
390 # optimisation yet.
390 # optimisation yet.
391 new_data_offset = new_data_file.tell()
391 new_data_offset = new_data_file.tell()
392 new_data_file.write(new_data)
392 new_data_file.write(new_data)
393
393
394 sidedata_size = entry[ENTRY_SIDEDATA_COMPRESSED_LENGTH]
394 sidedata_size = entry[ENTRY_SIDEDATA_COMPRESSED_LENGTH]
395 new_sidedata_offset = new_sidedata_file.tell()
395 new_sidedata_offset = new_sidedata_file.tell()
396 if 0 < sidedata_size:
396 if 0 < sidedata_size:
397 old_sidedata_offset = entry[ENTRY_SIDEDATA_OFFSET]
397 old_sidedata_offset = entry[ENTRY_SIDEDATA_OFFSET]
398 old_sidedata_file.seek(old_sidedata_offset)
398 old_sidedata_file.seek(old_sidedata_offset)
399 new_sidedata = old_sidedata_file.read(sidedata_size)
399 new_sidedata = old_sidedata_file.read(sidedata_size)
400 new_sidedata_file.write(new_sidedata)
400 new_sidedata_file.write(new_sidedata)
401
401
402 data_uncompressed_length = entry[ENTRY_DATA_UNCOMPRESSED_LENGTH]
402 data_uncompressed_length = entry[ENTRY_DATA_UNCOMPRESSED_LENGTH]
403 sd_com_mode = entry[ENTRY_SIDEDATA_COMPRESSION_MODE]
403 sd_com_mode = entry[ENTRY_SIDEDATA_COMPRESSION_MODE]
404 assert data_delta_base <= rev, (data_delta_base, rev)
404 assert data_delta_base <= rev, (data_delta_base, rev)
405
405
406 new_entry = revlogutils.entry(
406 new_entry = revlogutils.entry(
407 flags=flags,
407 flags=flags,
408 data_offset=new_data_offset,
408 data_offset=new_data_offset,
409 data_compressed_length=new_data_size,
409 data_compressed_length=new_data_size,
410 data_uncompressed_length=data_uncompressed_length,
410 data_uncompressed_length=data_uncompressed_length,
411 data_delta_base=data_delta_base,
411 data_delta_base=data_delta_base,
412 link_rev=entry[ENTRY_LINK_REV],
412 link_rev=entry[ENTRY_LINK_REV],
413 parent_rev_1=entry[ENTRY_PARENT_1],
413 parent_rev_1=entry[ENTRY_PARENT_1],
414 parent_rev_2=entry[ENTRY_PARENT_2],
414 parent_rev_2=entry[ENTRY_PARENT_2],
415 node_id=entry[ENTRY_NODE_ID],
415 node_id=entry[ENTRY_NODE_ID],
416 sidedata_offset=new_sidedata_offset,
416 sidedata_offset=new_sidedata_offset,
417 sidedata_compressed_length=sidedata_size,
417 sidedata_compressed_length=sidedata_size,
418 data_compression_mode=d_comp_mode,
418 data_compression_mode=d_comp_mode,
419 sidedata_compression_mode=sd_com_mode,
419 sidedata_compression_mode=sd_com_mode,
420 )
420 )
421 revlog.index.append(new_entry)
421 revlog.index.append(new_entry)
422 entry_bin = revlog.index.entry_binary(rev)
422 entry_bin = revlog.index.entry_binary(rev)
423 new_index_file.write(entry_bin)
423 new_index_file.write(entry_bin)
424
424
425 revlog._docket.index_end = new_index_file.tell()
425 revlog._docket.index_end = new_index_file.tell()
426 revlog._docket.data_end = new_data_file.tell()
426 revlog._docket.data_end = new_data_file.tell()
427 revlog._docket.sidedata_end = new_sidedata_file.tell()
427 revlog._docket.sidedata_end = new_sidedata_file.tell()
428
428
429
429
430 def _rewrite_censor(
430 def _rewrite_censor(
431 revlog,
431 revlog,
432 old_index,
432 old_index,
433 all_files,
433 all_files,
434 rev,
434 rev,
435 tombstone,
435 tombstone,
436 ):
436 ):
437 """rewrite and append a censored revision"""
437 """rewrite and append a censored revision"""
438 (
438 (
439 old_data_file,
439 old_data_file,
440 old_sidedata_file,
440 old_sidedata_file,
441 new_index_file,
441 new_index_file,
442 new_data_file,
442 new_data_file,
443 new_sidedata_file,
443 new_sidedata_file,
444 ) = all_files
444 ) = all_files
445 entry = old_index[rev]
445 entry = old_index[rev]
446
446
447 # XXX consider trying the default compression too
447 # XXX consider trying the default compression too
448 new_data_size = len(tombstone)
448 new_data_size = len(tombstone)
449 new_data_offset = new_data_file.tell()
449 new_data_offset = new_data_file.tell()
450 new_data_file.write(tombstone)
450 new_data_file.write(tombstone)
451
451
452 # we are not adding any sidedata as they might leak info about the censored version
452 # we are not adding any sidedata as they might leak info about the censored version
453
453
454 link_rev = entry[ENTRY_LINK_REV]
454 link_rev = entry[ENTRY_LINK_REV]
455
455
456 p1 = entry[ENTRY_PARENT_1]
456 p1 = entry[ENTRY_PARENT_1]
457 p2 = entry[ENTRY_PARENT_2]
457 p2 = entry[ENTRY_PARENT_2]
458
458
459 new_entry = revlogutils.entry(
459 new_entry = revlogutils.entry(
460 flags=constants.REVIDX_ISCENSORED,
460 flags=constants.REVIDX_ISCENSORED,
461 data_offset=new_data_offset,
461 data_offset=new_data_offset,
462 data_compressed_length=new_data_size,
462 data_compressed_length=new_data_size,
463 data_uncompressed_length=new_data_size,
463 data_uncompressed_length=new_data_size,
464 data_delta_base=rev,
464 data_delta_base=rev,
465 link_rev=link_rev,
465 link_rev=link_rev,
466 parent_rev_1=p1,
466 parent_rev_1=p1,
467 parent_rev_2=p2,
467 parent_rev_2=p2,
468 node_id=entry[ENTRY_NODE_ID],
468 node_id=entry[ENTRY_NODE_ID],
469 sidedata_offset=0,
469 sidedata_offset=0,
470 sidedata_compressed_length=0,
470 sidedata_compressed_length=0,
471 data_compression_mode=COMP_MODE_PLAIN,
471 data_compression_mode=COMP_MODE_PLAIN,
472 sidedata_compression_mode=COMP_MODE_PLAIN,
472 sidedata_compression_mode=COMP_MODE_PLAIN,
473 )
473 )
474 revlog.index.append(new_entry)
474 revlog.index.append(new_entry)
475 entry_bin = revlog.index.entry_binary(rev)
475 entry_bin = revlog.index.entry_binary(rev)
476 new_index_file.write(entry_bin)
476 new_index_file.write(entry_bin)
477 revlog._docket.index_end = new_index_file.tell()
477 revlog._docket.index_end = new_index_file.tell()
478 revlog._docket.data_end = new_data_file.tell()
478 revlog._docket.data_end = new_data_file.tell()
479
479
480
480
481 def _get_filename_from_filelog_index(path):
481 def _get_filename_from_filelog_index(path):
482 # Drop the extension and the `data/` prefix
482 # Drop the extension and the `data/` prefix
483 path_part = path.rsplit(b'.', 1)[0].split(b'/', 1)
483 path_part = path.rsplit(b'.', 1)[0].split(b'/', 1)
484 if len(path_part) < 2:
484 if len(path_part) < 2:
485 msg = _(b"cannot recognize filelog from filename: '%s'")
485 msg = _(b"cannot recognize filelog from filename: '%s'")
486 msg %= path
486 msg %= path
487 raise error.Abort(msg)
487 raise error.Abort(msg)
488
488
489 return path_part[1]
489 return path_part[1]
490
490
491
491
492 def _filelog_from_filename(repo, path):
492 def _filelog_from_filename(repo, path):
493 """Returns the filelog for the given `path`. Stolen from `engine.py`"""
493 """Returns the filelog for the given `path`. Stolen from `engine.py`"""
494
494
495 from .. import filelog # avoid cycle
495 from .. import filelog # avoid cycle
496
496
497 fl = filelog.filelog(repo.svfs, path)
497 fl = filelog.filelog(repo.svfs, path)
498 return fl
498 return fl
499
499
500
500
501 def _write_swapped_parents(repo, rl, rev, offset, fp):
501 def _write_swapped_parents(repo, rl, rev, offset, fp):
502 """Swaps p1 and p2 and overwrites the revlog entry for `rev` in `fp`"""
502 """Swaps p1 and p2 and overwrites the revlog entry for `rev` in `fp`"""
503 from ..pure import parsers # avoid cycle
503 from ..pure import parsers # avoid cycle
504
504
505 if repo._currentlock(repo._lockref) is None:
505 if repo._currentlock(repo._lockref) is None:
506 # Let's be paranoid about it
506 # Let's be paranoid about it
507 msg = "repo needs to be locked to rewrite parents"
507 msg = "repo needs to be locked to rewrite parents"
508 raise error.ProgrammingError(msg)
508 raise error.ProgrammingError(msg)
509
509
510 index_format = parsers.IndexObject.index_format
510 index_format = parsers.IndexObject.index_format
511 entry = rl.index[rev]
511 entry = rl.index[rev]
512 new_entry = list(entry)
512 new_entry = list(entry)
513 new_entry[5], new_entry[6] = entry[6], entry[5]
513 new_entry[5], new_entry[6] = entry[6], entry[5]
514 packed = index_format.pack(*new_entry[:8])
514 packed = index_format.pack(*new_entry[:8])
515 fp.seek(offset)
515 fp.seek(offset)
516 fp.write(packed)
516 fp.write(packed)
517
517
518
518
519 def _reorder_filelog_parents(repo, fl, to_fix):
519 def _reorder_filelog_parents(repo, fl, to_fix):
520 """
520 """
521 Swaps p1 and p2 for all `to_fix` revisions of filelog `fl` and writes the
521 Swaps p1 and p2 for all `to_fix` revisions of filelog `fl` and writes the
522 new version to disk, overwriting the old one with a rename.
522 new version to disk, overwriting the old one with a rename.
523 """
523 """
524 from ..pure import parsers # avoid cycle
524 from ..pure import parsers # avoid cycle
525
525
526 ui = repo.ui
526 ui = repo.ui
527 assert len(to_fix) > 0
527 assert len(to_fix) > 0
528 rl = fl._revlog
528 rl = fl._revlog
529 if rl._format_version != constants.REVLOGV1:
529 if rl._format_version != constants.REVLOGV1:
530 msg = "expected version 1 revlog, got version '%d'" % rl._format_version
530 msg = "expected version 1 revlog, got version '%d'" % rl._format_version
531 raise error.ProgrammingError(msg)
531 raise error.ProgrammingError(msg)
532
532
533 index_file = rl._indexfile
533 index_file = rl._indexfile
534 new_file_path = index_file + b'.tmp-parents-fix'
534 new_file_path = index_file + b'.tmp-parents-fix'
535 repaired_msg = _(b"repaired revision %d of 'filelog %s'\n")
535 repaired_msg = _(b"repaired revision %d of 'filelog %s'\n")
536
536
537 with ui.uninterruptible():
537 with ui.uninterruptible():
538 try:
538 try:
539 util.copyfile(
539 util.copyfile(
540 rl.opener.join(index_file),
540 rl.opener.join(index_file),
541 rl.opener.join(new_file_path),
541 rl.opener.join(new_file_path),
542 checkambig=rl._checkambig,
542 checkambig=rl._checkambig,
543 )
543 )
544
544
545 with rl.opener(new_file_path, mode=b"r+") as fp:
545 with rl.opener(new_file_path, mode=b"r+") as fp:
546 if rl._inline:
546 if rl._inline:
547 index = parsers.InlinedIndexObject(fp.read())
547 index = parsers.InlinedIndexObject(fp.read())
548 for rev in fl.revs():
548 for rev in fl.revs():
549 if rev in to_fix:
549 if rev in to_fix:
550 offset = index._calculate_index(rev)
550 offset = index._calculate_index(rev)
551 _write_swapped_parents(repo, rl, rev, offset, fp)
551 _write_swapped_parents(repo, rl, rev, offset, fp)
552 ui.write(repaired_msg % (rev, index_file))
552 ui.write(repaired_msg % (rev, index_file))
553 else:
553 else:
554 index_format = parsers.IndexObject.index_format
554 index_format = parsers.IndexObject.index_format
555 for rev in to_fix:
555 for rev in to_fix:
556 offset = rev * index_format.size
556 offset = rev * index_format.size
557 _write_swapped_parents(repo, rl, rev, offset, fp)
557 _write_swapped_parents(repo, rl, rev, offset, fp)
558 ui.write(repaired_msg % (rev, index_file))
558 ui.write(repaired_msg % (rev, index_file))
559
559
560 rl.opener.rename(new_file_path, index_file)
560 rl.opener.rename(new_file_path, index_file)
561 rl.clearcaches()
561 rl.clearcaches()
562 rl._loadindex()
562 rl._loadindex()
563 finally:
563 finally:
564 util.tryunlink(new_file_path)
564 util.tryunlink(new_file_path)
565
565
566
566
567 def _is_revision_affected(fl, filerev, metadata_cache=None):
567 def _is_revision_affected(fl, filerev, metadata_cache=None):
568 full_text = lambda: fl._revlog.rawdata(filerev)
568 full_text = lambda: fl._revlog.rawdata(filerev)
569 parent_revs = lambda: fl._revlog.parentrevs(filerev)
569 parent_revs = lambda: fl._revlog.parentrevs(filerev)
570 return _is_revision_affected_inner(
570 return _is_revision_affected_inner(
571 full_text, parent_revs, filerev, metadata_cache
571 full_text, parent_revs, filerev, metadata_cache
572 )
572 )
573
573
574
574
575 def _is_revision_affected_inner(
575 def _is_revision_affected_inner(
576 full_text,
576 full_text,
577 parents_revs,
577 parents_revs,
578 filerev,
578 filerev,
579 metadata_cache=None,
579 metadata_cache=None,
580 ):
580 ):
581 """Mercurial currently (5.9rc0) uses `p1 == nullrev and p2 != nullrev` as a
581 """Mercurial currently (5.9rc0) uses `p1 == nullrev and p2 != nullrev` as a
582 special meaning compared to the reverse in the context of filelog-based
582 special meaning compared to the reverse in the context of filelog-based
583 copytracing. issue6528 exists because new code assumed that parent ordering
583 copytracing. issue6528 exists because new code assumed that parent ordering
584 didn't matter, so this detects if the revision contains metadata (since
584 didn't matter, so this detects if the revision contains metadata (since
585 it's only used for filelog-based copytracing) and its parents are in the
585 it's only used for filelog-based copytracing) and its parents are in the
586 "wrong" order."""
586 "wrong" order."""
587 try:
587 try:
588 raw_text = full_text()
588 raw_text = full_text()
589 except error.CensoredNodeError:
589 except error.CensoredNodeError:
590 # We don't care about censored nodes as they never carry metadata
590 # We don't care about censored nodes as they never carry metadata
591 return False
591 return False
592
592
593 # raw text can be a `memoryview`, which doesn't implement `startswith`
593 # raw text can be a `memoryview`, which doesn't implement `startswith`
594 has_meta = bytes(raw_text[:2]) == b'\x01\n'
594 has_meta = bytes(raw_text[:2]) == b'\x01\n'
595 if metadata_cache is not None:
595 if metadata_cache is not None:
596 metadata_cache[filerev] = has_meta
596 metadata_cache[filerev] = has_meta
597 if has_meta:
597 if has_meta:
598 (p1, p2) = parents_revs()
598 (p1, p2) = parents_revs()
599 if p1 != nullrev and p2 == nullrev:
599 if p1 != nullrev and p2 == nullrev:
600 return True
600 return True
601 return False
601 return False
602
602
603
603
604 def _is_revision_affected_fast(repo, fl, filerev, metadata_cache):
604 def _is_revision_affected_fast(repo, fl, filerev, metadata_cache):
605 rl = fl._revlog
605 rl = fl._revlog
606 is_censored = lambda: rl.iscensored(filerev)
606 is_censored = lambda: rl.iscensored(filerev)
607 delta_base = lambda: rl.deltaparent(filerev)
607 delta_base = lambda: rl.deltaparent(filerev)
608 delta = lambda: rl._chunk(filerev)
608 delta = lambda: rl._chunk(filerev)
609 full_text = lambda: rl.rawdata(filerev)
609 full_text = lambda: rl.rawdata(filerev)
610 parent_revs = lambda: rl.parentrevs(filerev)
610 parent_revs = lambda: rl.parentrevs(filerev)
611 return _is_revision_affected_fast_inner(
611 return _is_revision_affected_fast_inner(
612 is_censored,
612 is_censored,
613 delta_base,
613 delta_base,
614 delta,
614 delta,
615 full_text,
615 full_text,
616 parent_revs,
616 parent_revs,
617 filerev,
617 filerev,
618 metadata_cache,
618 metadata_cache,
619 )
619 )
620
620
621
621
622 def _is_revision_affected_fast_inner(
622 def _is_revision_affected_fast_inner(
623 is_censored,
623 is_censored,
624 delta_base,
624 delta_base,
625 delta,
625 delta,
626 full_text,
626 full_text,
627 parent_revs,
627 parent_revs,
628 filerev,
628 filerev,
629 metadata_cache,
629 metadata_cache,
630 ):
630 ):
631 """Optimization fast-path for `_is_revision_affected`.
631 """Optimization fast-path for `_is_revision_affected`.
632
632
633 `metadata_cache` is a dict of `{rev: has_metadata}` which allows any
633 `metadata_cache` is a dict of `{rev: has_metadata}` which allows any
634 revision to check if its base has metadata, saving computation of the full
634 revision to check if its base has metadata, saving computation of the full
635 text, instead looking at the current delta.
635 text, instead looking at the current delta.
636
636
637 This optimization only works if the revisions are looked at in order."""
637 This optimization only works if the revisions are looked at in order."""
638
638
639 if is_censored():
639 if is_censored():
640 # Censored revisions don't contain metadata, so they cannot be affected
640 # Censored revisions don't contain metadata, so they cannot be affected
641 metadata_cache[filerev] = False
641 metadata_cache[filerev] = False
642 return False
642 return False
643
643
644 p1, p2 = parent_revs()
644 p1, p2 = parent_revs()
645 if p1 == nullrev or p2 != nullrev:
645 if p1 == nullrev or p2 != nullrev:
646 return False
646 return False
647
647
648 delta_parent = delta_base()
648 delta_parent = delta_base()
649 parent_has_metadata = metadata_cache.get(delta_parent)
649 parent_has_metadata = metadata_cache.get(delta_parent)
650 if parent_has_metadata is None:
650 if parent_has_metadata is None:
651 return _is_revision_affected_inner(
651 return _is_revision_affected_inner(
652 full_text,
652 full_text,
653 parent_revs,
653 parent_revs,
654 filerev,
654 filerev,
655 metadata_cache,
655 metadata_cache,
656 )
656 )
657
657
658 chunk = delta()
658 chunk = delta()
659 if not len(chunk):
659 if not len(chunk):
660 # No diff for this revision
660 # No diff for this revision
661 return parent_has_metadata
661 return parent_has_metadata
662
662
663 header_length = 12
663 header_length = 12
664 if len(chunk) < header_length:
664 if len(chunk) < header_length:
665 raise error.Abort(_(b"patch cannot be decoded"))
665 raise error.Abort(_(b"patch cannot be decoded"))
666
666
667 start, _end, _length = struct.unpack(b">lll", chunk[:header_length])
667 start, _end, _length = struct.unpack(b">lll", chunk[:header_length])
668
668
669 if start < 2: # len(b'\x01\n') == 2
669 if start < 2: # len(b'\x01\n') == 2
670 # This delta does *something* to the metadata marker (if any).
670 # This delta does *something* to the metadata marker (if any).
671 # Check it the slow way
671 # Check it the slow way
672 is_affected = _is_revision_affected_inner(
672 is_affected = _is_revision_affected_inner(
673 full_text,
673 full_text,
674 parent_revs,
674 parent_revs,
675 filerev,
675 filerev,
676 metadata_cache,
676 metadata_cache,
677 )
677 )
678 return is_affected
678 return is_affected
679
679
680 # The diff did not remove or add the metadata header, it's then in the same
680 # The diff did not remove or add the metadata header, it's then in the same
681 # situation as its parent
681 # situation as its parent
682 metadata_cache[filerev] = parent_has_metadata
682 metadata_cache[filerev] = parent_has_metadata
683 return parent_has_metadata
683 return parent_has_metadata
684
684
685
685
686 def _from_report(ui, repo, context, from_report, dry_run):
686 def _from_report(ui, repo, context, from_report, dry_run):
687 """
687 """
688 Fix the revisions given in the `from_report` file, but still checks if the
688 Fix the revisions given in the `from_report` file, but still checks if the
689 revisions are indeed affected to prevent an unfortunate cyclic situation
689 revisions are indeed affected to prevent an unfortunate cyclic situation
690 where we'd swap well-ordered parents again.
690 where we'd swap well-ordered parents again.
691
691
692 See the doc for `debug_fix_issue6528` for the format documentation.
692 See the doc for `debug_fix_issue6528` for the format documentation.
693 """
693 """
694 ui.write(_(b"loading report file '%s'\n") % from_report)
694 ui.write(_(b"loading report file '%s'\n") % from_report)
695
695
696 with context(), open(from_report, mode='rb') as f:
696 with context(), open(from_report, mode='rb') as f:
697 for line in f.read().split(b'\n'):
697 for line in f.read().split(b'\n'):
698 if not line:
698 if not line:
699 continue
699 continue
700 filenodes, filename = line.split(b' ', 1)
700 filenodes, filename = line.split(b' ', 1)
701 fl = _filelog_from_filename(repo, filename)
701 fl = _filelog_from_filename(repo, filename)
702 to_fix = set(
702 to_fix = set(
703 fl.rev(binascii.unhexlify(n)) for n in filenodes.split(b',')
703 fl.rev(binascii.unhexlify(n)) for n in filenodes.split(b',')
704 )
704 )
705 excluded = set()
705 excluded = set()
706
706
707 for filerev in to_fix:
707 for filerev in to_fix:
708 if _is_revision_affected(fl, filerev):
708 if _is_revision_affected(fl, filerev):
709 msg = b"found affected revision %d for filelog '%s'\n"
709 msg = b"found affected revision %d for filelog '%s'\n"
710 ui.warn(msg % (filerev, filename))
710 ui.warn(msg % (filerev, filename))
711 else:
711 else:
712 msg = _(b"revision %s of file '%s' is not affected\n")
712 msg = _(b"revision %s of file '%s' is not affected\n")
713 msg %= (binascii.hexlify(fl.node(filerev)), filename)
713 msg %= (binascii.hexlify(fl.node(filerev)), filename)
714 ui.warn(msg)
714 ui.warn(msg)
715 excluded.add(filerev)
715 excluded.add(filerev)
716
716
717 to_fix = to_fix - excluded
717 to_fix = to_fix - excluded
718 if not to_fix:
718 if not to_fix:
719 msg = _(b"no affected revisions were found for '%s'\n")
719 msg = _(b"no affected revisions were found for '%s'\n")
720 ui.write(msg % filename)
720 ui.write(msg % filename)
721 continue
721 continue
722 if not dry_run:
722 if not dry_run:
723 _reorder_filelog_parents(repo, fl, sorted(to_fix))
723 _reorder_filelog_parents(repo, fl, sorted(to_fix))
724
724
725
725
726 def filter_delta_issue6528(revlog, deltas_iter):
726 def filter_delta_issue6528(revlog, deltas_iter):
727 """filter incomind deltas to repaire issue 6528 on the fly"""
727 """filter incomind deltas to repaire issue 6528 on the fly"""
728 metadata_cache = {}
728 metadata_cache = {}
729
729
730 deltacomputer = deltas.deltacomputer(revlog)
730 deltacomputer = deltas.deltacomputer(revlog)
731
731
732 for rev, d in enumerate(deltas_iter, len(revlog)):
732 for rev, d in enumerate(deltas_iter, len(revlog)):
733 (
733 (
734 node,
734 node,
735 p1_node,
735 p1_node,
736 p2_node,
736 p2_node,
737 linknode,
737 linknode,
738 deltabase,
738 deltabase,
739 delta,
739 delta,
740 flags,
740 flags,
741 sidedata,
741 sidedata,
742 ) = d
742 ) = d
743
743
744 if not revlog.index.has_node(deltabase):
744 if not revlog.index.has_node(deltabase):
745 raise error.LookupError(
745 raise error.LookupError(
746 deltabase, revlog.radix, _(b'unknown parent')
746 deltabase, revlog.radix, _(b'unknown parent')
747 )
747 )
748 base_rev = revlog.rev(deltabase)
748 base_rev = revlog.rev(deltabase)
749 if not revlog.index.has_node(p1_node):
749 if not revlog.index.has_node(p1_node):
750 raise error.LookupError(p1_node, revlog.radix, _(b'unknown parent'))
750 raise error.LookupError(p1_node, revlog.radix, _(b'unknown parent'))
751 p1_rev = revlog.rev(p1_node)
751 p1_rev = revlog.rev(p1_node)
752 if not revlog.index.has_node(p2_node):
752 if not revlog.index.has_node(p2_node):
753 raise error.LookupError(p2_node, revlog.radix, _(b'unknown parent'))
753 raise error.LookupError(p2_node, revlog.radix, _(b'unknown parent'))
754 p2_rev = revlog.rev(p2_node)
754 p2_rev = revlog.rev(p2_node)
755
755
756 is_censored = lambda: bool(flags & REVIDX_ISCENSORED)
756 is_censored = lambda: bool(flags & REVIDX_ISCENSORED)
757 delta_base = lambda: revlog.rev(delta_base)
757 delta_base = lambda: revlog.rev(delta_base)
758 delta_base = lambda: base_rev
758 delta_base = lambda: base_rev
759 parent_revs = lambda: (p1_rev, p2_rev)
759 parent_revs = lambda: (p1_rev, p2_rev)
760
760
761 def full_text():
761 def full_text():
762 # note: being able to reuse the full text computation in the
762 # note: being able to reuse the full text computation in the
763 # underlying addrevision would be useful however this is a bit too
763 # underlying addrevision would be useful however this is a bit too
764 # intrusive the for the "quick" issue6528 we are writing before the
764 # intrusive the for the "quick" issue6528 we are writing before the
765 # 5.8 release
765 # 5.8 release
766 textlen = mdiff.patchedsize(revlog.size(base_rev), delta)
766 textlen = mdiff.patchedsize(revlog.size(base_rev), delta)
767
767
768 revinfo = revlogutils.revisioninfo(
768 revinfo = revlogutils.revisioninfo(
769 node,
769 node,
770 p1_node,
770 p1_node,
771 p2_node,
771 p2_node,
772 [None],
772 [None],
773 textlen,
773 textlen,
774 (base_rev, delta),
774 (base_rev, delta),
775 flags,
775 flags,
776 )
776 )
777 # cached by the global "writing" context
777 # cached by the global "writing" context
778 assert revlog._writinghandles is not None
778 assert revlog._writinghandles is not None
779 if revlog._inline:
779 if revlog._inline:
780 fh = revlog._writinghandles[0]
780 fh = revlog._writinghandles[0]
781 else:
781 else:
782 fh = revlog._writinghandles[1]
782 fh = revlog._writinghandles[1]
783 return deltacomputer.buildtext(revinfo, fh)
783 return deltacomputer.buildtext(revinfo, fh)
784
784
785 is_affected = _is_revision_affected_fast_inner(
785 is_affected = _is_revision_affected_fast_inner(
786 is_censored,
786 is_censored,
787 delta_base,
787 delta_base,
788 lambda: delta,
788 lambda: delta,
789 full_text,
789 full_text,
790 parent_revs,
790 parent_revs,
791 rev,
791 rev,
792 metadata_cache,
792 metadata_cache,
793 )
793 )
794 if is_affected:
794 if is_affected:
795 d = (
795 d = (
796 node,
796 node,
797 p2_node,
797 p2_node,
798 p1_node,
798 p1_node,
799 linknode,
799 linknode,
800 deltabase,
800 deltabase,
801 delta,
801 delta,
802 flags,
802 flags,
803 sidedata,
803 sidedata,
804 )
804 )
805 yield d
805 yield d
806
806
807
807
808 def repair_issue6528(
808 def repair_issue6528(
809 ui, repo, dry_run=False, to_report=None, from_report=None, paranoid=False
809 ui, repo, dry_run=False, to_report=None, from_report=None, paranoid=False
810 ):
810 ):
811 from .. import store # avoid cycle
811 from .. import store # avoid cycle
812
812
813 @contextlib.contextmanager
813 @contextlib.contextmanager
814 def context():
814 def context():
815 if dry_run or to_report: # No need for locking
815 if dry_run or to_report: # No need for locking
816 yield
816 yield
817 else:
817 else:
818 with repo.wlock(), repo.lock():
818 with repo.wlock(), repo.lock():
819 yield
819 yield
820
820
821 if from_report:
821 if from_report:
822 return _from_report(ui, repo, context, from_report, dry_run)
822 return _from_report(ui, repo, context, from_report, dry_run)
823
823
824 report_entries = []
824 report_entries = []
825
825
826 with context():
826 with context():
827 files = list(
827 files = list(
828 entry
828 entry
829 for entry in repo.store.datafiles()
829 for entry in repo.store.datafiles()
830 if (
830 if (
831 entry.is_revlog
831 entry.is_revlog and entry.revlog_type == store.FILEFLAGS_FILELOG
832 and entry.is_revlog_main
833 and entry.revlog_type == store.FILEFLAGS_FILELOG
834 )
832 )
835 )
833 )
836
834
837 progress = ui.makeprogress(
835 progress = ui.makeprogress(
838 _(b"looking for affected revisions"),
836 _(b"looking for affected revisions"),
839 unit=_(b"filelogs"),
837 unit=_(b"filelogs"),
840 total=len(files),
838 total=len(files),
841 )
839 )
842 found_nothing = True
840 found_nothing = True
843
841
844 for entry in files:
842 for entry in files:
845 progress.increment()
843 progress.increment()
846 filename = entry.target_id
844 filename = entry.target_id
847 fl = _filelog_from_filename(repo, entry.target_id)
845 fl = _filelog_from_filename(repo, entry.target_id)
848
846
849 # Set of filerevs (or hex filenodes if `to_report`) that need fixing
847 # Set of filerevs (or hex filenodes if `to_report`) that need fixing
850 to_fix = set()
848 to_fix = set()
851 metadata_cache = {}
849 metadata_cache = {}
852 for filerev in fl.revs():
850 for filerev in fl.revs():
853 affected = _is_revision_affected_fast(
851 affected = _is_revision_affected_fast(
854 repo, fl, filerev, metadata_cache
852 repo, fl, filerev, metadata_cache
855 )
853 )
856 if paranoid:
854 if paranoid:
857 slow = _is_revision_affected(fl, filerev)
855 slow = _is_revision_affected(fl, filerev)
858 if slow != affected:
856 if slow != affected:
859 msg = _(b"paranoid check failed for '%s' at node %s")
857 msg = _(b"paranoid check failed for '%s' at node %s")
860 node = binascii.hexlify(fl.node(filerev))
858 node = binascii.hexlify(fl.node(filerev))
861 raise error.Abort(msg % (filename, node))
859 raise error.Abort(msg % (filename, node))
862 if affected:
860 if affected:
863 msg = b"found affected revision %d for file '%s'\n"
861 msg = b"found affected revision %d for file '%s'\n"
864 ui.warn(msg % (filerev, filename))
862 ui.warn(msg % (filerev, filename))
865 found_nothing = False
863 found_nothing = False
866 if not dry_run:
864 if not dry_run:
867 if to_report:
865 if to_report:
868 to_fix.add(binascii.hexlify(fl.node(filerev)))
866 to_fix.add(binascii.hexlify(fl.node(filerev)))
869 else:
867 else:
870 to_fix.add(filerev)
868 to_fix.add(filerev)
871
869
872 if to_fix:
870 if to_fix:
873 to_fix = sorted(to_fix)
871 to_fix = sorted(to_fix)
874 if to_report:
872 if to_report:
875 report_entries.append((filename, to_fix))
873 report_entries.append((filename, to_fix))
876 else:
874 else:
877 _reorder_filelog_parents(repo, fl, to_fix)
875 _reorder_filelog_parents(repo, fl, to_fix)
878
876
879 if found_nothing:
877 if found_nothing:
880 ui.write(_(b"no affected revisions were found\n"))
878 ui.write(_(b"no affected revisions were found\n"))
881
879
882 if to_report and report_entries:
880 if to_report and report_entries:
883 with open(to_report, mode="wb") as f:
881 with open(to_report, mode="wb") as f:
884 for path, to_fix in report_entries:
882 for path, to_fix in report_entries:
885 f.write(b"%s %s\n" % (b",".join(to_fix), path))
883 f.write(b"%s %s\n" % (b",".join(to_fix), path))
886
884
887 progress.complete()
885 progress.complete()
@@ -1,1056 +1,1067
1 # store.py - repository store handling for Mercurial
1 # store.py - repository store handling for Mercurial
2 #
2 #
3 # Copyright 2008 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2008 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 import collections
8 import collections
9 import functools
9 import functools
10 import os
10 import os
11 import re
11 import re
12 import stat
12 import stat
13 from typing import Generator
13 from typing import Generator
14
14
15 from .i18n import _
15 from .i18n import _
16 from .pycompat import getattr
16 from .pycompat import getattr
17 from .thirdparty import attr
17 from .thirdparty import attr
18 from .node import hex
18 from .node import hex
19 from . import (
19 from . import (
20 changelog,
20 changelog,
21 error,
21 error,
22 manifest,
22 manifest,
23 policy,
23 policy,
24 pycompat,
24 pycompat,
25 util,
25 util,
26 vfs as vfsmod,
26 vfs as vfsmod,
27 )
27 )
28 from .utils import hashutil
28 from .utils import hashutil
29
29
30 parsers = policy.importmod('parsers')
30 parsers = policy.importmod('parsers')
31 # how much bytes should be read from fncache in one read
31 # how much bytes should be read from fncache in one read
32 # It is done to prevent loading large fncache files into memory
32 # It is done to prevent loading large fncache files into memory
33 fncache_chunksize = 10 ** 6
33 fncache_chunksize = 10 ** 6
34
34
35
35
36 def _match_tracked_entry(entry, matcher):
36 def _match_tracked_entry(entry, matcher):
37 """parses a fncache entry and returns whether the entry is tracking a path
37 """parses a fncache entry and returns whether the entry is tracking a path
38 matched by matcher or not.
38 matched by matcher or not.
39
39
40 If matcher is None, returns True"""
40 If matcher is None, returns True"""
41
41
42 if matcher is None:
42 if matcher is None:
43 return True
43 return True
44 if entry.revlog_type == FILEFLAGS_FILELOG:
44 if entry.revlog_type == FILEFLAGS_FILELOG:
45 return matcher(entry.target_id)
45 return matcher(entry.target_id)
46 elif entry.revlog_type == FILEFLAGS_MANIFESTLOG:
46 elif entry.revlog_type == FILEFLAGS_MANIFESTLOG:
47 return matcher.visitdir(entry.target_id.rstrip(b'/'))
47 return matcher.visitdir(entry.target_id.rstrip(b'/'))
48 raise error.ProgrammingError(b"cannot process entry %r" % entry)
48 raise error.ProgrammingError(b"cannot process entry %r" % entry)
49
49
50
50
51 # This avoids a collision between a file named foo and a dir named
51 # This avoids a collision between a file named foo and a dir named
52 # foo.i or foo.d
52 # foo.i or foo.d
53 def _encodedir(path):
53 def _encodedir(path):
54 """
54 """
55 >>> _encodedir(b'data/foo.i')
55 >>> _encodedir(b'data/foo.i')
56 'data/foo.i'
56 'data/foo.i'
57 >>> _encodedir(b'data/foo.i/bla.i')
57 >>> _encodedir(b'data/foo.i/bla.i')
58 'data/foo.i.hg/bla.i'
58 'data/foo.i.hg/bla.i'
59 >>> _encodedir(b'data/foo.i.hg/bla.i')
59 >>> _encodedir(b'data/foo.i.hg/bla.i')
60 'data/foo.i.hg.hg/bla.i'
60 'data/foo.i.hg.hg/bla.i'
61 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
61 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
62 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
62 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
63 """
63 """
64 return (
64 return (
65 path.replace(b".hg/", b".hg.hg/")
65 path.replace(b".hg/", b".hg.hg/")
66 .replace(b".i/", b".i.hg/")
66 .replace(b".i/", b".i.hg/")
67 .replace(b".d/", b".d.hg/")
67 .replace(b".d/", b".d.hg/")
68 )
68 )
69
69
70
70
71 encodedir = getattr(parsers, 'encodedir', _encodedir)
71 encodedir = getattr(parsers, 'encodedir', _encodedir)
72
72
73
73
74 def decodedir(path):
74 def decodedir(path):
75 """
75 """
76 >>> decodedir(b'data/foo.i')
76 >>> decodedir(b'data/foo.i')
77 'data/foo.i'
77 'data/foo.i'
78 >>> decodedir(b'data/foo.i.hg/bla.i')
78 >>> decodedir(b'data/foo.i.hg/bla.i')
79 'data/foo.i/bla.i'
79 'data/foo.i/bla.i'
80 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
80 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
81 'data/foo.i.hg/bla.i'
81 'data/foo.i.hg/bla.i'
82 """
82 """
83 if b".hg/" not in path:
83 if b".hg/" not in path:
84 return path
84 return path
85 return (
85 return (
86 path.replace(b".d.hg/", b".d/")
86 path.replace(b".d.hg/", b".d/")
87 .replace(b".i.hg/", b".i/")
87 .replace(b".i.hg/", b".i/")
88 .replace(b".hg.hg/", b".hg/")
88 .replace(b".hg.hg/", b".hg/")
89 )
89 )
90
90
91
91
92 def _reserved():
92 def _reserved():
93 """characters that are problematic for filesystems
93 """characters that are problematic for filesystems
94
94
95 * ascii escapes (0..31)
95 * ascii escapes (0..31)
96 * ascii hi (126..255)
96 * ascii hi (126..255)
97 * windows specials
97 * windows specials
98
98
99 these characters will be escaped by encodefunctions
99 these characters will be escaped by encodefunctions
100 """
100 """
101 winreserved = [ord(x) for x in u'\\:*?"<>|']
101 winreserved = [ord(x) for x in u'\\:*?"<>|']
102 for x in range(32):
102 for x in range(32):
103 yield x
103 yield x
104 for x in range(126, 256):
104 for x in range(126, 256):
105 yield x
105 yield x
106 for x in winreserved:
106 for x in winreserved:
107 yield x
107 yield x
108
108
109
109
110 def _buildencodefun():
110 def _buildencodefun():
111 """
111 """
112 >>> enc, dec = _buildencodefun()
112 >>> enc, dec = _buildencodefun()
113
113
114 >>> enc(b'nothing/special.txt')
114 >>> enc(b'nothing/special.txt')
115 'nothing/special.txt'
115 'nothing/special.txt'
116 >>> dec(b'nothing/special.txt')
116 >>> dec(b'nothing/special.txt')
117 'nothing/special.txt'
117 'nothing/special.txt'
118
118
119 >>> enc(b'HELLO')
119 >>> enc(b'HELLO')
120 '_h_e_l_l_o'
120 '_h_e_l_l_o'
121 >>> dec(b'_h_e_l_l_o')
121 >>> dec(b'_h_e_l_l_o')
122 'HELLO'
122 'HELLO'
123
123
124 >>> enc(b'hello:world?')
124 >>> enc(b'hello:world?')
125 'hello~3aworld~3f'
125 'hello~3aworld~3f'
126 >>> dec(b'hello~3aworld~3f')
126 >>> dec(b'hello~3aworld~3f')
127 'hello:world?'
127 'hello:world?'
128
128
129 >>> enc(b'the\\x07quick\\xADshot')
129 >>> enc(b'the\\x07quick\\xADshot')
130 'the~07quick~adshot'
130 'the~07quick~adshot'
131 >>> dec(b'the~07quick~adshot')
131 >>> dec(b'the~07quick~adshot')
132 'the\\x07quick\\xadshot'
132 'the\\x07quick\\xadshot'
133 """
133 """
134 e = b'_'
134 e = b'_'
135 xchr = pycompat.bytechr
135 xchr = pycompat.bytechr
136 asciistr = list(map(xchr, range(127)))
136 asciistr = list(map(xchr, range(127)))
137 capitals = list(range(ord(b"A"), ord(b"Z") + 1))
137 capitals = list(range(ord(b"A"), ord(b"Z") + 1))
138
138
139 cmap = {x: x for x in asciistr}
139 cmap = {x: x for x in asciistr}
140 for x in _reserved():
140 for x in _reserved():
141 cmap[xchr(x)] = b"~%02x" % x
141 cmap[xchr(x)] = b"~%02x" % x
142 for x in capitals + [ord(e)]:
142 for x in capitals + [ord(e)]:
143 cmap[xchr(x)] = e + xchr(x).lower()
143 cmap[xchr(x)] = e + xchr(x).lower()
144
144
145 dmap = {}
145 dmap = {}
146 for k, v in cmap.items():
146 for k, v in cmap.items():
147 dmap[v] = k
147 dmap[v] = k
148
148
149 def decode(s):
149 def decode(s):
150 i = 0
150 i = 0
151 while i < len(s):
151 while i < len(s):
152 for l in range(1, 4):
152 for l in range(1, 4):
153 try:
153 try:
154 yield dmap[s[i : i + l]]
154 yield dmap[s[i : i + l]]
155 i += l
155 i += l
156 break
156 break
157 except KeyError:
157 except KeyError:
158 pass
158 pass
159 else:
159 else:
160 raise KeyError
160 raise KeyError
161
161
162 return (
162 return (
163 lambda s: b''.join([cmap[s[c : c + 1]] for c in range(len(s))]),
163 lambda s: b''.join([cmap[s[c : c + 1]] for c in range(len(s))]),
164 lambda s: b''.join(list(decode(s))),
164 lambda s: b''.join(list(decode(s))),
165 )
165 )
166
166
167
167
168 _encodefname, _decodefname = _buildencodefun()
168 _encodefname, _decodefname = _buildencodefun()
169
169
170
170
171 def encodefilename(s):
171 def encodefilename(s):
172 """
172 """
173 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
173 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
174 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
174 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
175 """
175 """
176 return _encodefname(encodedir(s))
176 return _encodefname(encodedir(s))
177
177
178
178
179 def decodefilename(s):
179 def decodefilename(s):
180 """
180 """
181 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
181 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
182 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
182 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
183 """
183 """
184 return decodedir(_decodefname(s))
184 return decodedir(_decodefname(s))
185
185
186
186
187 def _buildlowerencodefun():
187 def _buildlowerencodefun():
188 """
188 """
189 >>> f = _buildlowerencodefun()
189 >>> f = _buildlowerencodefun()
190 >>> f(b'nothing/special.txt')
190 >>> f(b'nothing/special.txt')
191 'nothing/special.txt'
191 'nothing/special.txt'
192 >>> f(b'HELLO')
192 >>> f(b'HELLO')
193 'hello'
193 'hello'
194 >>> f(b'hello:world?')
194 >>> f(b'hello:world?')
195 'hello~3aworld~3f'
195 'hello~3aworld~3f'
196 >>> f(b'the\\x07quick\\xADshot')
196 >>> f(b'the\\x07quick\\xADshot')
197 'the~07quick~adshot'
197 'the~07quick~adshot'
198 """
198 """
199 xchr = pycompat.bytechr
199 xchr = pycompat.bytechr
200 cmap = {xchr(x): xchr(x) for x in range(127)}
200 cmap = {xchr(x): xchr(x) for x in range(127)}
201 for x in _reserved():
201 for x in _reserved():
202 cmap[xchr(x)] = b"~%02x" % x
202 cmap[xchr(x)] = b"~%02x" % x
203 for x in range(ord(b"A"), ord(b"Z") + 1):
203 for x in range(ord(b"A"), ord(b"Z") + 1):
204 cmap[xchr(x)] = xchr(x).lower()
204 cmap[xchr(x)] = xchr(x).lower()
205
205
206 def lowerencode(s):
206 def lowerencode(s):
207 return b"".join([cmap[c] for c in pycompat.iterbytestr(s)])
207 return b"".join([cmap[c] for c in pycompat.iterbytestr(s)])
208
208
209 return lowerencode
209 return lowerencode
210
210
211
211
212 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
212 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
213
213
214 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
214 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
215 _winres3 = (b'aux', b'con', b'prn', b'nul') # length 3
215 _winres3 = (b'aux', b'con', b'prn', b'nul') # length 3
216 _winres4 = (b'com', b'lpt') # length 4 (with trailing 1..9)
216 _winres4 = (b'com', b'lpt') # length 4 (with trailing 1..9)
217
217
218
218
219 def _auxencode(path, dotencode):
219 def _auxencode(path, dotencode):
220 """
220 """
221 Encodes filenames containing names reserved by Windows or which end in
221 Encodes filenames containing names reserved by Windows or which end in
222 period or space. Does not touch other single reserved characters c.
222 period or space. Does not touch other single reserved characters c.
223 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
223 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
224 Additionally encodes space or period at the beginning, if dotencode is
224 Additionally encodes space or period at the beginning, if dotencode is
225 True. Parameter path is assumed to be all lowercase.
225 True. Parameter path is assumed to be all lowercase.
226 A segment only needs encoding if a reserved name appears as a
226 A segment only needs encoding if a reserved name appears as a
227 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
227 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
228 doesn't need encoding.
228 doesn't need encoding.
229
229
230 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
230 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
231 >>> _auxencode(s.split(b'/'), True)
231 >>> _auxencode(s.split(b'/'), True)
232 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
232 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
233 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
233 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
234 >>> _auxencode(s.split(b'/'), False)
234 >>> _auxencode(s.split(b'/'), False)
235 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
235 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
236 >>> _auxencode([b'foo. '], True)
236 >>> _auxencode([b'foo. '], True)
237 ['foo.~20']
237 ['foo.~20']
238 >>> _auxencode([b' .foo'], True)
238 >>> _auxencode([b' .foo'], True)
239 ['~20.foo']
239 ['~20.foo']
240 """
240 """
241 for i, n in enumerate(path):
241 for i, n in enumerate(path):
242 if not n:
242 if not n:
243 continue
243 continue
244 if dotencode and n[0] in b'. ':
244 if dotencode and n[0] in b'. ':
245 n = b"~%02x" % ord(n[0:1]) + n[1:]
245 n = b"~%02x" % ord(n[0:1]) + n[1:]
246 path[i] = n
246 path[i] = n
247 else:
247 else:
248 l = n.find(b'.')
248 l = n.find(b'.')
249 if l == -1:
249 if l == -1:
250 l = len(n)
250 l = len(n)
251 if (l == 3 and n[:3] in _winres3) or (
251 if (l == 3 and n[:3] in _winres3) or (
252 l == 4
252 l == 4
253 and n[3:4] <= b'9'
253 and n[3:4] <= b'9'
254 and n[3:4] >= b'1'
254 and n[3:4] >= b'1'
255 and n[:3] in _winres4
255 and n[:3] in _winres4
256 ):
256 ):
257 # encode third letter ('aux' -> 'au~78')
257 # encode third letter ('aux' -> 'au~78')
258 ec = b"~%02x" % ord(n[2:3])
258 ec = b"~%02x" % ord(n[2:3])
259 n = n[0:2] + ec + n[3:]
259 n = n[0:2] + ec + n[3:]
260 path[i] = n
260 path[i] = n
261 if n[-1] in b'. ':
261 if n[-1] in b'. ':
262 # encode last period or space ('foo...' -> 'foo..~2e')
262 # encode last period or space ('foo...' -> 'foo..~2e')
263 path[i] = n[:-1] + b"~%02x" % ord(n[-1:])
263 path[i] = n[:-1] + b"~%02x" % ord(n[-1:])
264 return path
264 return path
265
265
266
266
267 _maxstorepathlen = 120
267 _maxstorepathlen = 120
268 _dirprefixlen = 8
268 _dirprefixlen = 8
269 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
269 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
270
270
271
271
272 def _hashencode(path, dotencode):
272 def _hashencode(path, dotencode):
273 digest = hex(hashutil.sha1(path).digest())
273 digest = hex(hashutil.sha1(path).digest())
274 le = lowerencode(path[5:]).split(b'/') # skips prefix 'data/' or 'meta/'
274 le = lowerencode(path[5:]).split(b'/') # skips prefix 'data/' or 'meta/'
275 parts = _auxencode(le, dotencode)
275 parts = _auxencode(le, dotencode)
276 basename = parts[-1]
276 basename = parts[-1]
277 _root, ext = os.path.splitext(basename)
277 _root, ext = os.path.splitext(basename)
278 sdirs = []
278 sdirs = []
279 sdirslen = 0
279 sdirslen = 0
280 for p in parts[:-1]:
280 for p in parts[:-1]:
281 d = p[:_dirprefixlen]
281 d = p[:_dirprefixlen]
282 if d[-1] in b'. ':
282 if d[-1] in b'. ':
283 # Windows can't access dirs ending in period or space
283 # Windows can't access dirs ending in period or space
284 d = d[:-1] + b'_'
284 d = d[:-1] + b'_'
285 if sdirslen == 0:
285 if sdirslen == 0:
286 t = len(d)
286 t = len(d)
287 else:
287 else:
288 t = sdirslen + 1 + len(d)
288 t = sdirslen + 1 + len(d)
289 if t > _maxshortdirslen:
289 if t > _maxshortdirslen:
290 break
290 break
291 sdirs.append(d)
291 sdirs.append(d)
292 sdirslen = t
292 sdirslen = t
293 dirs = b'/'.join(sdirs)
293 dirs = b'/'.join(sdirs)
294 if len(dirs) > 0:
294 if len(dirs) > 0:
295 dirs += b'/'
295 dirs += b'/'
296 res = b'dh/' + dirs + digest + ext
296 res = b'dh/' + dirs + digest + ext
297 spaceleft = _maxstorepathlen - len(res)
297 spaceleft = _maxstorepathlen - len(res)
298 if spaceleft > 0:
298 if spaceleft > 0:
299 filler = basename[:spaceleft]
299 filler = basename[:spaceleft]
300 res = b'dh/' + dirs + filler + digest + ext
300 res = b'dh/' + dirs + filler + digest + ext
301 return res
301 return res
302
302
303
303
304 def _hybridencode(path, dotencode):
304 def _hybridencode(path, dotencode):
305 """encodes path with a length limit
305 """encodes path with a length limit
306
306
307 Encodes all paths that begin with 'data/', according to the following.
307 Encodes all paths that begin with 'data/', according to the following.
308
308
309 Default encoding (reversible):
309 Default encoding (reversible):
310
310
311 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
311 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
312 characters are encoded as '~xx', where xx is the two digit hex code
312 characters are encoded as '~xx', where xx is the two digit hex code
313 of the character (see encodefilename).
313 of the character (see encodefilename).
314 Relevant path components consisting of Windows reserved filenames are
314 Relevant path components consisting of Windows reserved filenames are
315 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
315 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
316
316
317 Hashed encoding (not reversible):
317 Hashed encoding (not reversible):
318
318
319 If the default-encoded path is longer than _maxstorepathlen, a
319 If the default-encoded path is longer than _maxstorepathlen, a
320 non-reversible hybrid hashing of the path is done instead.
320 non-reversible hybrid hashing of the path is done instead.
321 This encoding uses up to _dirprefixlen characters of all directory
321 This encoding uses up to _dirprefixlen characters of all directory
322 levels of the lowerencoded path, but not more levels than can fit into
322 levels of the lowerencoded path, but not more levels than can fit into
323 _maxshortdirslen.
323 _maxshortdirslen.
324 Then follows the filler followed by the sha digest of the full path.
324 Then follows the filler followed by the sha digest of the full path.
325 The filler is the beginning of the basename of the lowerencoded path
325 The filler is the beginning of the basename of the lowerencoded path
326 (the basename is everything after the last path separator). The filler
326 (the basename is everything after the last path separator). The filler
327 is as long as possible, filling in characters from the basename until
327 is as long as possible, filling in characters from the basename until
328 the encoded path has _maxstorepathlen characters (or all chars of the
328 the encoded path has _maxstorepathlen characters (or all chars of the
329 basename have been taken).
329 basename have been taken).
330 The extension (e.g. '.i' or '.d') is preserved.
330 The extension (e.g. '.i' or '.d') is preserved.
331
331
332 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
332 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
333 encoding was used.
333 encoding was used.
334 """
334 """
335 path = encodedir(path)
335 path = encodedir(path)
336 ef = _encodefname(path).split(b'/')
336 ef = _encodefname(path).split(b'/')
337 res = b'/'.join(_auxencode(ef, dotencode))
337 res = b'/'.join(_auxencode(ef, dotencode))
338 if len(res) > _maxstorepathlen:
338 if len(res) > _maxstorepathlen:
339 res = _hashencode(path, dotencode)
339 res = _hashencode(path, dotencode)
340 return res
340 return res
341
341
342
342
343 def _pathencode(path):
343 def _pathencode(path):
344 de = encodedir(path)
344 de = encodedir(path)
345 if len(path) > _maxstorepathlen:
345 if len(path) > _maxstorepathlen:
346 return _hashencode(de, True)
346 return _hashencode(de, True)
347 ef = _encodefname(de).split(b'/')
347 ef = _encodefname(de).split(b'/')
348 res = b'/'.join(_auxencode(ef, True))
348 res = b'/'.join(_auxencode(ef, True))
349 if len(res) > _maxstorepathlen:
349 if len(res) > _maxstorepathlen:
350 return _hashencode(de, True)
350 return _hashencode(de, True)
351 return res
351 return res
352
352
353
353
354 _pathencode = getattr(parsers, 'pathencode', _pathencode)
354 _pathencode = getattr(parsers, 'pathencode', _pathencode)
355
355
356
356
357 def _plainhybridencode(f):
357 def _plainhybridencode(f):
358 return _hybridencode(f, False)
358 return _hybridencode(f, False)
359
359
360
360
361 def _calcmode(vfs):
361 def _calcmode(vfs):
362 try:
362 try:
363 # files in .hg/ will be created using this mode
363 # files in .hg/ will be created using this mode
364 mode = vfs.stat().st_mode
364 mode = vfs.stat().st_mode
365 # avoid some useless chmods
365 # avoid some useless chmods
366 if (0o777 & ~util.umask) == (0o777 & mode):
366 if (0o777 & ~util.umask) == (0o777 & mode):
367 mode = None
367 mode = None
368 except OSError:
368 except OSError:
369 mode = None
369 mode = None
370 return mode
370 return mode
371
371
372
372
373 _data = [
373 _data = [
374 b'bookmarks',
374 b'bookmarks',
375 b'narrowspec',
375 b'narrowspec',
376 b'data',
376 b'data',
377 b'meta',
377 b'meta',
378 b'00manifest.d',
378 b'00manifest.d',
379 b'00manifest.i',
379 b'00manifest.i',
380 b'00changelog.d',
380 b'00changelog.d',
381 b'00changelog.i',
381 b'00changelog.i',
382 b'phaseroots',
382 b'phaseroots',
383 b'obsstore',
383 b'obsstore',
384 b'requires',
384 b'requires',
385 ]
385 ]
386
386
387 REVLOG_FILES_MAIN_EXT = (b'.i',)
387 REVLOG_FILES_MAIN_EXT = (b'.i',)
388 REVLOG_FILES_OTHER_EXT = (
388 REVLOG_FILES_OTHER_EXT = (
389 b'.idx',
389 b'.idx',
390 b'.d',
390 b'.d',
391 b'.dat',
391 b'.dat',
392 b'.n',
392 b'.n',
393 b'.nd',
393 b'.nd',
394 b'.sda',
394 b'.sda',
395 )
395 )
396 # file extension that also use a `-SOMELONGIDHASH.ext` form
396 # file extension that also use a `-SOMELONGIDHASH.ext` form
397 REVLOG_FILES_LONG_EXT = (
397 REVLOG_FILES_LONG_EXT = (
398 b'.nd',
398 b'.nd',
399 b'.idx',
399 b'.idx',
400 b'.dat',
400 b'.dat',
401 b'.sda',
401 b'.sda',
402 )
402 )
403 # files that are "volatile" and might change between listing and streaming
403 # files that are "volatile" and might change between listing and streaming
404 #
404 #
405 # note: the ".nd" file are nodemap data and won't "change" but they might be
405 # note: the ".nd" file are nodemap data and won't "change" but they might be
406 # deleted.
406 # deleted.
407 REVLOG_FILES_VOLATILE_EXT = (b'.n', b'.nd')
407 REVLOG_FILES_VOLATILE_EXT = (b'.n', b'.nd')
408
408
409 # some exception to the above matching
409 # some exception to the above matching
410 #
410 #
411 # XXX This is currently not in use because of issue6542
411 # XXX This is currently not in use because of issue6542
412 EXCLUDED = re.compile(br'.*undo\.[^/]+\.(nd?|i)$')
412 EXCLUDED = re.compile(br'.*undo\.[^/]+\.(nd?|i)$')
413
413
414
414
415 def is_revlog(f, kind, st):
415 def is_revlog(f, kind, st):
416 if kind != stat.S_IFREG:
416 if kind != stat.S_IFREG:
417 return None
417 return None
418 return revlog_type(f)
418 return revlog_type(f)
419
419
420
420
421 def revlog_type(f):
421 def revlog_type(f):
422 # XXX we need to filter `undo.` created by the transaction here, however
422 # XXX we need to filter `undo.` created by the transaction here, however
423 # being naive about it also filter revlog for `undo.*` files, leading to
423 # being naive about it also filter revlog for `undo.*` files, leading to
424 # issue6542. So we no longer use EXCLUDED.
424 # issue6542. So we no longer use EXCLUDED.
425 if f.endswith(REVLOG_FILES_MAIN_EXT):
425 if f.endswith(REVLOG_FILES_MAIN_EXT):
426 return FILEFLAGS_REVLOG_MAIN
426 return FILEFLAGS_REVLOG_MAIN
427 elif f.endswith(REVLOG_FILES_OTHER_EXT):
427 elif f.endswith(REVLOG_FILES_OTHER_EXT):
428 t = FILETYPE_FILELOG_OTHER
428 t = FILETYPE_FILELOG_OTHER
429 if f.endswith(REVLOG_FILES_VOLATILE_EXT):
429 if f.endswith(REVLOG_FILES_VOLATILE_EXT):
430 t |= FILEFLAGS_VOLATILE
430 t |= FILEFLAGS_VOLATILE
431 return t
431 return t
432 return None
432 return None
433
433
434
434
435 # the file is part of changelog data
435 # the file is part of changelog data
436 FILEFLAGS_CHANGELOG = 1 << 13
436 FILEFLAGS_CHANGELOG = 1 << 13
437 # the file is part of manifest data
437 # the file is part of manifest data
438 FILEFLAGS_MANIFESTLOG = 1 << 12
438 FILEFLAGS_MANIFESTLOG = 1 << 12
439 # the file is part of filelog data
439 # the file is part of filelog data
440 FILEFLAGS_FILELOG = 1 << 11
440 FILEFLAGS_FILELOG = 1 << 11
441 # file that are not directly part of a revlog
441 # file that are not directly part of a revlog
442 FILEFLAGS_OTHER = 1 << 10
442 FILEFLAGS_OTHER = 1 << 10
443
443
444 # the main entry point for a revlog
444 # the main entry point for a revlog
445 FILEFLAGS_REVLOG_MAIN = 1 << 1
445 FILEFLAGS_REVLOG_MAIN = 1 << 1
446 # a secondary file for a revlog
446 # a secondary file for a revlog
447 FILEFLAGS_REVLOG_OTHER = 1 << 0
447 FILEFLAGS_REVLOG_OTHER = 1 << 0
448
448
449 # files that are "volatile" and might change between listing and streaming
449 # files that are "volatile" and might change between listing and streaming
450 FILEFLAGS_VOLATILE = 1 << 20
450 FILEFLAGS_VOLATILE = 1 << 20
451
451
452 FILETYPE_CHANGELOG_MAIN = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_MAIN
452 FILETYPE_CHANGELOG_MAIN = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_MAIN
453 FILETYPE_CHANGELOG_OTHER = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_OTHER
453 FILETYPE_CHANGELOG_OTHER = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_OTHER
454 FILETYPE_MANIFESTLOG_MAIN = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_MAIN
454 FILETYPE_MANIFESTLOG_MAIN = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_MAIN
455 FILETYPE_MANIFESTLOG_OTHER = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_OTHER
455 FILETYPE_MANIFESTLOG_OTHER = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_OTHER
456 FILETYPE_FILELOG_MAIN = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_MAIN
456 FILETYPE_FILELOG_MAIN = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_MAIN
457 FILETYPE_FILELOG_OTHER = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_OTHER
457 FILETYPE_FILELOG_OTHER = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_OTHER
458 FILETYPE_OTHER = FILEFLAGS_OTHER
458 FILETYPE_OTHER = FILEFLAGS_OTHER
459
459
460
460
461 @attr.s(slots=True, init=False)
461 @attr.s(slots=True, init=False)
462 class BaseStoreEntry:
462 class BaseStoreEntry:
463 """An entry in the store
463 """An entry in the store
464
464
465 This is returned by `store.walk` and represent some data in the store."""
465 This is returned by `store.walk` and represent some data in the store."""
466
466
467
468 @attr.s(slots=True, init=False)
469 class SimpleStoreEntry(BaseStoreEntry):
470 """A generic entry in the store"""
471
472 is_revlog = False
473
467 _entry_path = attr.ib()
474 _entry_path = attr.ib()
468 _is_volatile = attr.ib(default=False)
475 _is_volatile = attr.ib(default=False)
469 _file_size = attr.ib(default=None)
476 _file_size = attr.ib(default=None)
470
477
471 def __init__(
478 def __init__(
472 self,
479 self,
473 entry_path,
480 entry_path,
474 is_volatile=False,
481 is_volatile=False,
475 file_size=None,
482 file_size=None,
476 ):
483 ):
484 super().__init__()
477 self._entry_path = entry_path
485 self._entry_path = entry_path
478 self._is_volatile = is_volatile
486 self._is_volatile = is_volatile
479 self._file_size = file_size
487 self._file_size = file_size
480
488
481 def files(self):
489 def files(self):
482 return [
490 return [
483 StoreFile(
491 StoreFile(
484 unencoded_path=self._entry_path,
492 unencoded_path=self._entry_path,
485 file_size=self._file_size,
493 file_size=self._file_size,
486 is_volatile=self._is_volatile,
494 is_volatile=self._is_volatile,
487 )
495 )
488 ]
496 ]
489
497
490
498
491 @attr.s(slots=True, init=False)
499 @attr.s(slots=True, init=False)
492 class SimpleStoreEntry(BaseStoreEntry):
493 """A generic entry in the store"""
494
495 is_revlog = False
496
497
498 @attr.s(slots=True, init=False)
499 class RevlogStoreEntry(BaseStoreEntry):
500 class RevlogStoreEntry(BaseStoreEntry):
500 """A revlog entry in the store"""
501 """A revlog entry in the store"""
501
502
502 is_revlog = True
503 is_revlog = True
504
503 revlog_type = attr.ib(default=None)
505 revlog_type = attr.ib(default=None)
504 target_id = attr.ib(default=None)
506 target_id = attr.ib(default=None)
505 is_revlog_main = attr.ib(default=None)
507 _path_prefix = attr.ib(default=None)
508 _details = attr.ib(default=None)
506
509
507 def __init__(
510 def __init__(
508 self,
511 self,
509 entry_path,
510 revlog_type,
512 revlog_type,
513 path_prefix,
511 target_id,
514 target_id,
512 is_revlog_main=False,
515 details,
513 is_volatile=False,
514 file_size=None,
515 ):
516 ):
516 super().__init__(
517 super().__init__()
517 entry_path=entry_path,
518 is_volatile=is_volatile,
519 file_size=file_size,
520 )
521 self.revlog_type = revlog_type
518 self.revlog_type = revlog_type
522 self.target_id = target_id
519 self.target_id = target_id
523 self.is_revlog_main = is_revlog_main
520 self._path_prefix = path_prefix
521 assert b'.i' in details, (path_prefix, details)
522 self._details = details
524
523
525 def main_file_path(self):
524 def main_file_path(self):
526 """unencoded path of the main revlog file"""
525 """unencoded path of the main revlog file"""
527 return self._entry_path
526 return self._path_prefix + b'.i'
527
528 def files(self):
529 files = []
530 for ext in sorted(self._details, key=_ext_key):
531 path = self._path_prefix + ext
532 data = self._details[ext]
533 files.append(StoreFile(unencoded_path=path, **data))
534 return files
528
535
529
536
530 @attr.s(slots=True)
537 @attr.s(slots=True)
531 class StoreFile:
538 class StoreFile:
532 """a file matching an entry"""
539 """a file matching an entry"""
533
540
534 unencoded_path = attr.ib()
541 unencoded_path = attr.ib()
535 _file_size = attr.ib(default=False)
542 _file_size = attr.ib(default=None)
536 is_volatile = attr.ib(default=False)
543 is_volatile = attr.ib(default=False)
537
544
538 def file_size(self, vfs):
545 def file_size(self, vfs):
539 if self._file_size is not None:
546 if self._file_size is not None:
540 return self._file_size
547 return self._file_size
541 try:
548 try:
542 return vfs.stat(self.unencoded_path).st_size
549 return vfs.stat(self.unencoded_path).st_size
543 except FileNotFoundError:
550 except FileNotFoundError:
544 return 0
551 return 0
545
552
546
553
547 def _gather_revlog(files_data):
554 def _gather_revlog(files_data):
548 """group files per revlog prefix
555 """group files per revlog prefix
549
556
550 The returns a two level nested dict. The top level key is the revlog prefix
557 The returns a two level nested dict. The top level key is the revlog prefix
551 without extension, the second level is all the file "suffix" that were
558 without extension, the second level is all the file "suffix" that were
552 seen for this revlog and arbitrary file data as value.
559 seen for this revlog and arbitrary file data as value.
553 """
560 """
554 revlogs = collections.defaultdict(dict)
561 revlogs = collections.defaultdict(dict)
555 for u, value in files_data:
562 for u, value in files_data:
556 name, ext = _split_revlog_ext(u)
563 name, ext = _split_revlog_ext(u)
557 revlogs[name][ext] = value
564 revlogs[name][ext] = value
558 return sorted(revlogs.items())
565 return sorted(revlogs.items())
559
566
560
567
561 def _split_revlog_ext(filename):
568 def _split_revlog_ext(filename):
562 """split the revlog file prefix from the variable extension"""
569 """split the revlog file prefix from the variable extension"""
563 if filename.endswith(REVLOG_FILES_LONG_EXT):
570 if filename.endswith(REVLOG_FILES_LONG_EXT):
564 char = b'-'
571 char = b'-'
565 else:
572 else:
566 char = b'.'
573 char = b'.'
567 idx = filename.rfind(char)
574 idx = filename.rfind(char)
568 return filename[:idx], filename[idx:]
575 return filename[:idx], filename[idx:]
569
576
570
577
571 def _ext_key(ext):
578 def _ext_key(ext):
572 """a key to order revlog suffix
579 """a key to order revlog suffix
573
580
574 important to issue .i after other entry."""
581 important to issue .i after other entry."""
575 # the only important part of this order is to keep the `.i` last.
582 # the only important part of this order is to keep the `.i` last.
576 if ext.endswith(b'.n'):
583 if ext.endswith(b'.n'):
577 return (0, ext)
584 return (0, ext)
578 elif ext.endswith(b'.nd'):
585 elif ext.endswith(b'.nd'):
579 return (10, ext)
586 return (10, ext)
580 elif ext.endswith(b'.d'):
587 elif ext.endswith(b'.d'):
581 return (20, ext)
588 return (20, ext)
582 elif ext.endswith(b'.i'):
589 elif ext.endswith(b'.i'):
583 return (50, ext)
590 return (50, ext)
584 else:
591 else:
585 return (40, ext)
592 return (40, ext)
586
593
587
594
588 class basicstore:
595 class basicstore:
589 '''base class for local repository stores'''
596 '''base class for local repository stores'''
590
597
591 def __init__(self, path, vfstype):
598 def __init__(self, path, vfstype):
592 vfs = vfstype(path)
599 vfs = vfstype(path)
593 self.path = vfs.base
600 self.path = vfs.base
594 self.createmode = _calcmode(vfs)
601 self.createmode = _calcmode(vfs)
595 vfs.createmode = self.createmode
602 vfs.createmode = self.createmode
596 self.rawvfs = vfs
603 self.rawvfs = vfs
597 self.vfs = vfsmod.filtervfs(vfs, encodedir)
604 self.vfs = vfsmod.filtervfs(vfs, encodedir)
598 self.opener = self.vfs
605 self.opener = self.vfs
599
606
600 def join(self, f):
607 def join(self, f):
601 return self.path + b'/' + encodedir(f)
608 return self.path + b'/' + encodedir(f)
602
609
603 def _walk(self, relpath, recurse, undecodable=None):
610 def _walk(self, relpath, recurse, undecodable=None):
604 '''yields (revlog_type, unencoded, size)'''
611 '''yields (revlog_type, unencoded, size)'''
605 path = self.path
612 path = self.path
606 if relpath:
613 if relpath:
607 path += b'/' + relpath
614 path += b'/' + relpath
608 striplen = len(self.path) + 1
615 striplen = len(self.path) + 1
609 l = []
616 l = []
610 if self.rawvfs.isdir(path):
617 if self.rawvfs.isdir(path):
611 visit = [path]
618 visit = [path]
612 readdir = self.rawvfs.readdir
619 readdir = self.rawvfs.readdir
613 while visit:
620 while visit:
614 p = visit.pop()
621 p = visit.pop()
615 for f, kind, st in readdir(p, stat=True):
622 for f, kind, st in readdir(p, stat=True):
616 fp = p + b'/' + f
623 fp = p + b'/' + f
617 rl_type = is_revlog(f, kind, st)
624 rl_type = is_revlog(f, kind, st)
618 if rl_type is not None:
625 if rl_type is not None:
619 n = util.pconvert(fp[striplen:])
626 n = util.pconvert(fp[striplen:])
620 l.append((decodedir(n), (rl_type, st.st_size)))
627 l.append((decodedir(n), (rl_type, st.st_size)))
621 elif kind == stat.S_IFDIR and recurse:
628 elif kind == stat.S_IFDIR and recurse:
622 visit.append(fp)
629 visit.append(fp)
623
630
624 l.sort()
631 l.sort()
625 return l
632 return l
626
633
627 def changelog(self, trypending, concurrencychecker=None):
634 def changelog(self, trypending, concurrencychecker=None):
628 return changelog.changelog(
635 return changelog.changelog(
629 self.vfs,
636 self.vfs,
630 trypending=trypending,
637 trypending=trypending,
631 concurrencychecker=concurrencychecker,
638 concurrencychecker=concurrencychecker,
632 )
639 )
633
640
634 def manifestlog(self, repo, storenarrowmatch):
641 def manifestlog(self, repo, storenarrowmatch):
635 rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs)
642 rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs)
636 return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
643 return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
637
644
638 def datafiles(
645 def datafiles(
639 self, matcher=None, undecodable=None
646 self, matcher=None, undecodable=None
640 ) -> Generator[BaseStoreEntry, None, None]:
647 ) -> Generator[BaseStoreEntry, None, None]:
641 """Like walk, but excluding the changelog and root manifest.
648 """Like walk, but excluding the changelog and root manifest.
642
649
643 When [undecodable] is None, revlogs names that can't be
650 When [undecodable] is None, revlogs names that can't be
644 decoded cause an exception. When it is provided, it should
651 decoded cause an exception. When it is provided, it should
645 be a list and the filenames that can't be decoded are added
652 be a list and the filenames that can't be decoded are added
646 to it instead. This is very rarely needed."""
653 to it instead. This is very rarely needed."""
647 dirs = [
654 dirs = [
648 (b'data', FILEFLAGS_FILELOG),
655 (b'data', FILEFLAGS_FILELOG),
649 (b'meta', FILEFLAGS_MANIFESTLOG),
656 (b'meta', FILEFLAGS_MANIFESTLOG),
650 ]
657 ]
651 for base_dir, rl_type in dirs:
658 for base_dir, rl_type in dirs:
652 files = self._walk(base_dir, True, undecodable=undecodable)
659 files = self._walk(base_dir, True, undecodable=undecodable)
653 files = (f for f in files if f[1][0] is not None)
660 files = (f for f in files if f[1][0] is not None)
654 for revlog, details in _gather_revlog(files):
661 for revlog, details in _gather_revlog(files):
662 file_details = {}
663 revlog_target_id = revlog.split(b'/', 1)[1]
655 for ext, (t, s) in sorted(details.items()):
664 for ext, (t, s) in sorted(details.items()):
656 u = revlog + ext
665 file_details[ext] = {
657 revlog_target_id = revlog.split(b'/', 1)[1]
666 'is_volatile': bool(t & FILEFLAGS_VOLATILE),
658 yield RevlogStoreEntry(
667 'file_size': s,
659 entry_path=u,
668 }
660 revlog_type=rl_type,
669 yield RevlogStoreEntry(
661 target_id=revlog_target_id,
670 path_prefix=revlog,
662 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
671 revlog_type=rl_type,
663 is_volatile=bool(t & FILEFLAGS_VOLATILE),
672 target_id=revlog_target_id,
664 file_size=s,
673 details=file_details,
665 )
674 )
666
675
667 def topfiles(self) -> Generator[BaseStoreEntry, None, None]:
676 def topfiles(self) -> Generator[BaseStoreEntry, None, None]:
668 files = reversed(self._walk(b'', False))
677 files = reversed(self._walk(b'', False))
669
678
670 changelogs = collections.defaultdict(dict)
679 changelogs = collections.defaultdict(dict)
671 manifestlogs = collections.defaultdict(dict)
680 manifestlogs = collections.defaultdict(dict)
672
681
673 for u, (t, s) in files:
682 for u, (t, s) in files:
674 if u.startswith(b'00changelog'):
683 if u.startswith(b'00changelog'):
675 name, ext = _split_revlog_ext(u)
684 name, ext = _split_revlog_ext(u)
676 changelogs[name][ext] = (t, s)
685 changelogs[name][ext] = (t, s)
677 elif u.startswith(b'00manifest'):
686 elif u.startswith(b'00manifest'):
678 name, ext = _split_revlog_ext(u)
687 name, ext = _split_revlog_ext(u)
679 manifestlogs[name][ext] = (t, s)
688 manifestlogs[name][ext] = (t, s)
680 else:
689 else:
681 yield SimpleStoreEntry(
690 yield SimpleStoreEntry(
682 entry_path=u,
691 entry_path=u,
683 is_volatile=bool(t & FILEFLAGS_VOLATILE),
692 is_volatile=bool(t & FILEFLAGS_VOLATILE),
684 file_size=s,
693 file_size=s,
685 )
694 )
686 # yield manifest before changelog
695 # yield manifest before changelog
687 top_rl = [
696 top_rl = [
688 (manifestlogs, FILEFLAGS_MANIFESTLOG),
697 (manifestlogs, FILEFLAGS_MANIFESTLOG),
689 (changelogs, FILEFLAGS_CHANGELOG),
698 (changelogs, FILEFLAGS_CHANGELOG),
690 ]
699 ]
691 assert len(manifestlogs) <= 1
700 assert len(manifestlogs) <= 1
692 assert len(changelogs) <= 1
701 assert len(changelogs) <= 1
693 for data, revlog_type in top_rl:
702 for data, revlog_type in top_rl:
694 for revlog, details in sorted(data.items()):
703 for revlog, details in sorted(data.items()):
695 # (keeping ordering so we get 00changelog.i last)
704 file_details = {}
696 key = lambda x: _ext_key(x[0])
705 for ext, (t, s) in details.items():
697 for ext, (t, s) in sorted(details.items(), key=key):
706 file_details[ext] = {
698 u = revlog + ext
707 'is_volatile': bool(t & FILEFLAGS_VOLATILE),
699 yield RevlogStoreEntry(
708 'file_size': s,
700 entry_path=u,
709 }
701 revlog_type=revlog_type,
710 yield RevlogStoreEntry(
702 target_id=b'',
711 path_prefix=revlog,
703 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
712 revlog_type=revlog_type,
704 is_volatile=bool(t & FILEFLAGS_VOLATILE),
713 target_id=b'',
705 file_size=s,
714 details=file_details,
706 )
715 )
707
716
708 def walk(self, matcher=None) -> Generator[BaseStoreEntry, None, None]:
717 def walk(self, matcher=None) -> Generator[BaseStoreEntry, None, None]:
709 """return files related to data storage (ie: revlogs)
718 """return files related to data storage (ie: revlogs)
710
719
711 yields (file_type, unencoded, size)
720 yields (file_type, unencoded, size)
712
721
713 if a matcher is passed, storage files of only those tracked paths
722 if a matcher is passed, storage files of only those tracked paths
714 are passed with matches the matcher
723 are passed with matches the matcher
715 """
724 """
716 # yield data files first
725 # yield data files first
717 for x in self.datafiles(matcher):
726 for x in self.datafiles(matcher):
718 yield x
727 yield x
719 for x in self.topfiles():
728 for x in self.topfiles():
720 yield x
729 yield x
721
730
722 def copylist(self):
731 def copylist(self):
723 return _data
732 return _data
724
733
725 def write(self, tr):
734 def write(self, tr):
726 pass
735 pass
727
736
728 def invalidatecaches(self):
737 def invalidatecaches(self):
729 pass
738 pass
730
739
731 def markremoved(self, fn):
740 def markremoved(self, fn):
732 pass
741 pass
733
742
734 def __contains__(self, path):
743 def __contains__(self, path):
735 '''Checks if the store contains path'''
744 '''Checks if the store contains path'''
736 path = b"/".join((b"data", path))
745 path = b"/".join((b"data", path))
737 # file?
746 # file?
738 if self.vfs.exists(path + b".i"):
747 if self.vfs.exists(path + b".i"):
739 return True
748 return True
740 # dir?
749 # dir?
741 if not path.endswith(b"/"):
750 if not path.endswith(b"/"):
742 path = path + b"/"
751 path = path + b"/"
743 return self.vfs.exists(path)
752 return self.vfs.exists(path)
744
753
745
754
746 class encodedstore(basicstore):
755 class encodedstore(basicstore):
747 def __init__(self, path, vfstype):
756 def __init__(self, path, vfstype):
748 vfs = vfstype(path + b'/store')
757 vfs = vfstype(path + b'/store')
749 self.path = vfs.base
758 self.path = vfs.base
750 self.createmode = _calcmode(vfs)
759 self.createmode = _calcmode(vfs)
751 vfs.createmode = self.createmode
760 vfs.createmode = self.createmode
752 self.rawvfs = vfs
761 self.rawvfs = vfs
753 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
762 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
754 self.opener = self.vfs
763 self.opener = self.vfs
755
764
756 def _walk(self, relpath, recurse, undecodable=None):
765 def _walk(self, relpath, recurse, undecodable=None):
757 old = super()._walk(relpath, recurse)
766 old = super()._walk(relpath, recurse)
758 new = []
767 new = []
759 for f1, value in old:
768 for f1, value in old:
760 try:
769 try:
761 f2 = decodefilename(f1)
770 f2 = decodefilename(f1)
762 except KeyError:
771 except KeyError:
763 if undecodable is None:
772 if undecodable is None:
764 msg = _(b'undecodable revlog name %s') % f1
773 msg = _(b'undecodable revlog name %s') % f1
765 raise error.StorageError(msg)
774 raise error.StorageError(msg)
766 else:
775 else:
767 undecodable.append(f1)
776 undecodable.append(f1)
768 continue
777 continue
769 new.append((f2, value))
778 new.append((f2, value))
770 return new
779 return new
771
780
772 def datafiles(
781 def datafiles(
773 self, matcher=None, undecodable=None
782 self, matcher=None, undecodable=None
774 ) -> Generator[BaseStoreEntry, None, None]:
783 ) -> Generator[BaseStoreEntry, None, None]:
775 entries = super(encodedstore, self).datafiles(undecodable=undecodable)
784 entries = super(encodedstore, self).datafiles(undecodable=undecodable)
776 for entry in entries:
785 for entry in entries:
777 if _match_tracked_entry(entry, matcher):
786 if _match_tracked_entry(entry, matcher):
778 yield entry
787 yield entry
779
788
780 def join(self, f):
789 def join(self, f):
781 return self.path + b'/' + encodefilename(f)
790 return self.path + b'/' + encodefilename(f)
782
791
783 def copylist(self):
792 def copylist(self):
784 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in _data]
793 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in _data]
785
794
786
795
787 class fncache:
796 class fncache:
788 # the filename used to be partially encoded
797 # the filename used to be partially encoded
789 # hence the encodedir/decodedir dance
798 # hence the encodedir/decodedir dance
790 def __init__(self, vfs):
799 def __init__(self, vfs):
791 self.vfs = vfs
800 self.vfs = vfs
792 self._ignores = set()
801 self._ignores = set()
793 self.entries = None
802 self.entries = None
794 self._dirty = False
803 self._dirty = False
795 # set of new additions to fncache
804 # set of new additions to fncache
796 self.addls = set()
805 self.addls = set()
797
806
798 def ensureloaded(self, warn=None):
807 def ensureloaded(self, warn=None):
799 """read the fncache file if not already read.
808 """read the fncache file if not already read.
800
809
801 If the file on disk is corrupted, raise. If warn is provided,
810 If the file on disk is corrupted, raise. If warn is provided,
802 warn and keep going instead."""
811 warn and keep going instead."""
803 if self.entries is None:
812 if self.entries is None:
804 self._load(warn)
813 self._load(warn)
805
814
806 def _load(self, warn=None):
815 def _load(self, warn=None):
807 '''fill the entries from the fncache file'''
816 '''fill the entries from the fncache file'''
808 self._dirty = False
817 self._dirty = False
809 try:
818 try:
810 fp = self.vfs(b'fncache', mode=b'rb')
819 fp = self.vfs(b'fncache', mode=b'rb')
811 except IOError:
820 except IOError:
812 # skip nonexistent file
821 # skip nonexistent file
813 self.entries = set()
822 self.entries = set()
814 return
823 return
815
824
816 self.entries = set()
825 self.entries = set()
817 chunk = b''
826 chunk = b''
818 for c in iter(functools.partial(fp.read, fncache_chunksize), b''):
827 for c in iter(functools.partial(fp.read, fncache_chunksize), b''):
819 chunk += c
828 chunk += c
820 try:
829 try:
821 p = chunk.rindex(b'\n')
830 p = chunk.rindex(b'\n')
822 self.entries.update(decodedir(chunk[: p + 1]).splitlines())
831 self.entries.update(decodedir(chunk[: p + 1]).splitlines())
823 chunk = chunk[p + 1 :]
832 chunk = chunk[p + 1 :]
824 except ValueError:
833 except ValueError:
825 # substring '\n' not found, maybe the entry is bigger than the
834 # substring '\n' not found, maybe the entry is bigger than the
826 # chunksize, so let's keep iterating
835 # chunksize, so let's keep iterating
827 pass
836 pass
828
837
829 if chunk:
838 if chunk:
830 msg = _(b"fncache does not ends with a newline")
839 msg = _(b"fncache does not ends with a newline")
831 if warn:
840 if warn:
832 warn(msg + b'\n')
841 warn(msg + b'\n')
833 else:
842 else:
834 raise error.Abort(
843 raise error.Abort(
835 msg,
844 msg,
836 hint=_(
845 hint=_(
837 b"use 'hg debugrebuildfncache' to "
846 b"use 'hg debugrebuildfncache' to "
838 b"rebuild the fncache"
847 b"rebuild the fncache"
839 ),
848 ),
840 )
849 )
841 self._checkentries(fp, warn)
850 self._checkentries(fp, warn)
842 fp.close()
851 fp.close()
843
852
844 def _checkentries(self, fp, warn):
853 def _checkentries(self, fp, warn):
845 """make sure there is no empty string in entries"""
854 """make sure there is no empty string in entries"""
846 if b'' in self.entries:
855 if b'' in self.entries:
847 fp.seek(0)
856 fp.seek(0)
848 for n, line in enumerate(fp):
857 for n, line in enumerate(fp):
849 if not line.rstrip(b'\n'):
858 if not line.rstrip(b'\n'):
850 t = _(b'invalid entry in fncache, line %d') % (n + 1)
859 t = _(b'invalid entry in fncache, line %d') % (n + 1)
851 if warn:
860 if warn:
852 warn(t + b'\n')
861 warn(t + b'\n')
853 else:
862 else:
854 raise error.Abort(t)
863 raise error.Abort(t)
855
864
856 def write(self, tr):
865 def write(self, tr):
857 if self._dirty:
866 if self._dirty:
858 assert self.entries is not None
867 assert self.entries is not None
859 self.entries = self.entries | self.addls
868 self.entries = self.entries | self.addls
860 self.addls = set()
869 self.addls = set()
861 tr.addbackup(b'fncache')
870 tr.addbackup(b'fncache')
862 fp = self.vfs(b'fncache', mode=b'wb', atomictemp=True)
871 fp = self.vfs(b'fncache', mode=b'wb', atomictemp=True)
863 if self.entries:
872 if self.entries:
864 fp.write(encodedir(b'\n'.join(self.entries) + b'\n'))
873 fp.write(encodedir(b'\n'.join(self.entries) + b'\n'))
865 fp.close()
874 fp.close()
866 self._dirty = False
875 self._dirty = False
867 if self.addls:
876 if self.addls:
868 # if we have just new entries, let's append them to the fncache
877 # if we have just new entries, let's append them to the fncache
869 tr.addbackup(b'fncache')
878 tr.addbackup(b'fncache')
870 fp = self.vfs(b'fncache', mode=b'ab', atomictemp=True)
879 fp = self.vfs(b'fncache', mode=b'ab', atomictemp=True)
871 if self.addls:
880 if self.addls:
872 fp.write(encodedir(b'\n'.join(self.addls) + b'\n'))
881 fp.write(encodedir(b'\n'.join(self.addls) + b'\n'))
873 fp.close()
882 fp.close()
874 self.entries = None
883 self.entries = None
875 self.addls = set()
884 self.addls = set()
876
885
877 def addignore(self, fn):
886 def addignore(self, fn):
878 self._ignores.add(fn)
887 self._ignores.add(fn)
879
888
880 def add(self, fn):
889 def add(self, fn):
881 if fn in self._ignores:
890 if fn in self._ignores:
882 return
891 return
883 if self.entries is None:
892 if self.entries is None:
884 self._load()
893 self._load()
885 if fn not in self.entries:
894 if fn not in self.entries:
886 self.addls.add(fn)
895 self.addls.add(fn)
887
896
888 def remove(self, fn):
897 def remove(self, fn):
889 if self.entries is None:
898 if self.entries is None:
890 self._load()
899 self._load()
891 if fn in self.addls:
900 if fn in self.addls:
892 self.addls.remove(fn)
901 self.addls.remove(fn)
893 return
902 return
894 try:
903 try:
895 self.entries.remove(fn)
904 self.entries.remove(fn)
896 self._dirty = True
905 self._dirty = True
897 except KeyError:
906 except KeyError:
898 pass
907 pass
899
908
900 def __contains__(self, fn):
909 def __contains__(self, fn):
901 if fn in self.addls:
910 if fn in self.addls:
902 return True
911 return True
903 if self.entries is None:
912 if self.entries is None:
904 self._load()
913 self._load()
905 return fn in self.entries
914 return fn in self.entries
906
915
907 def __iter__(self):
916 def __iter__(self):
908 if self.entries is None:
917 if self.entries is None:
909 self._load()
918 self._load()
910 return iter(self.entries | self.addls)
919 return iter(self.entries | self.addls)
911
920
912
921
913 class _fncachevfs(vfsmod.proxyvfs):
922 class _fncachevfs(vfsmod.proxyvfs):
914 def __init__(self, vfs, fnc, encode):
923 def __init__(self, vfs, fnc, encode):
915 vfsmod.proxyvfs.__init__(self, vfs)
924 vfsmod.proxyvfs.__init__(self, vfs)
916 self.fncache = fnc
925 self.fncache = fnc
917 self.encode = encode
926 self.encode = encode
918
927
919 def __call__(self, path, mode=b'r', *args, **kw):
928 def __call__(self, path, mode=b'r', *args, **kw):
920 encoded = self.encode(path)
929 encoded = self.encode(path)
921 if (
930 if (
922 mode not in (b'r', b'rb')
931 mode not in (b'r', b'rb')
923 and (path.startswith(b'data/') or path.startswith(b'meta/'))
932 and (path.startswith(b'data/') or path.startswith(b'meta/'))
924 and revlog_type(path) is not None
933 and revlog_type(path) is not None
925 ):
934 ):
926 # do not trigger a fncache load when adding a file that already is
935 # do not trigger a fncache load when adding a file that already is
927 # known to exist.
936 # known to exist.
928 notload = self.fncache.entries is None and self.vfs.exists(encoded)
937 notload = self.fncache.entries is None and self.vfs.exists(encoded)
929 if notload and b'r+' in mode and not self.vfs.stat(encoded).st_size:
938 if notload and b'r+' in mode and not self.vfs.stat(encoded).st_size:
930 # when appending to an existing file, if the file has size zero,
939 # when appending to an existing file, if the file has size zero,
931 # it should be considered as missing. Such zero-size files are
940 # it should be considered as missing. Such zero-size files are
932 # the result of truncation when a transaction is aborted.
941 # the result of truncation when a transaction is aborted.
933 notload = False
942 notload = False
934 if not notload:
943 if not notload:
935 self.fncache.add(path)
944 self.fncache.add(path)
936 return self.vfs(encoded, mode, *args, **kw)
945 return self.vfs(encoded, mode, *args, **kw)
937
946
938 def join(self, path):
947 def join(self, path):
939 if path:
948 if path:
940 return self.vfs.join(self.encode(path))
949 return self.vfs.join(self.encode(path))
941 else:
950 else:
942 return self.vfs.join(path)
951 return self.vfs.join(path)
943
952
944 def register_file(self, path):
953 def register_file(self, path):
945 """generic hook point to lets fncache steer its stew"""
954 """generic hook point to lets fncache steer its stew"""
946 if path.startswith(b'data/') or path.startswith(b'meta/'):
955 if path.startswith(b'data/') or path.startswith(b'meta/'):
947 self.fncache.add(path)
956 self.fncache.add(path)
948
957
949
958
950 class fncachestore(basicstore):
959 class fncachestore(basicstore):
951 def __init__(self, path, vfstype, dotencode):
960 def __init__(self, path, vfstype, dotencode):
952 if dotencode:
961 if dotencode:
953 encode = _pathencode
962 encode = _pathencode
954 else:
963 else:
955 encode = _plainhybridencode
964 encode = _plainhybridencode
956 self.encode = encode
965 self.encode = encode
957 vfs = vfstype(path + b'/store')
966 vfs = vfstype(path + b'/store')
958 self.path = vfs.base
967 self.path = vfs.base
959 self.pathsep = self.path + b'/'
968 self.pathsep = self.path + b'/'
960 self.createmode = _calcmode(vfs)
969 self.createmode = _calcmode(vfs)
961 vfs.createmode = self.createmode
970 vfs.createmode = self.createmode
962 self.rawvfs = vfs
971 self.rawvfs = vfs
963 fnc = fncache(vfs)
972 fnc = fncache(vfs)
964 self.fncache = fnc
973 self.fncache = fnc
965 self.vfs = _fncachevfs(vfs, fnc, encode)
974 self.vfs = _fncachevfs(vfs, fnc, encode)
966 self.opener = self.vfs
975 self.opener = self.vfs
967
976
968 def join(self, f):
977 def join(self, f):
969 return self.pathsep + self.encode(f)
978 return self.pathsep + self.encode(f)
970
979
971 def getsize(self, path):
980 def getsize(self, path):
972 return self.rawvfs.stat(path).st_size
981 return self.rawvfs.stat(path).st_size
973
982
974 def datafiles(
983 def datafiles(
975 self, matcher=None, undecodable=None
984 self, matcher=None, undecodable=None
976 ) -> Generator[BaseStoreEntry, None, None]:
985 ) -> Generator[BaseStoreEntry, None, None]:
977 files = ((f, revlog_type(f)) for f in self.fncache)
986 files = ((f, revlog_type(f)) for f in self.fncache)
978 # Note: all files in fncache should be revlog related, However the
987 # Note: all files in fncache should be revlog related, However the
979 # fncache might contains such file added by previous version of
988 # fncache might contains such file added by previous version of
980 # Mercurial.
989 # Mercurial.
981 files = (f for f in files if f[1] is not None)
990 files = (f for f in files if f[1] is not None)
982 by_revlog = _gather_revlog(files)
991 by_revlog = _gather_revlog(files)
983 for revlog, details in by_revlog:
992 for revlog, details in by_revlog:
993 file_details = {}
984 if revlog.startswith(b'data/'):
994 if revlog.startswith(b'data/'):
985 rl_type = FILEFLAGS_FILELOG
995 rl_type = FILEFLAGS_FILELOG
986 revlog_target_id = revlog.split(b'/', 1)[1]
996 revlog_target_id = revlog.split(b'/', 1)[1]
987 elif revlog.startswith(b'meta/'):
997 elif revlog.startswith(b'meta/'):
988 rl_type = FILEFLAGS_MANIFESTLOG
998 rl_type = FILEFLAGS_MANIFESTLOG
989 # drop the initial directory and the `00manifest` file part
999 # drop the initial directory and the `00manifest` file part
990 tmp = revlog.split(b'/', 1)[1]
1000 tmp = revlog.split(b'/', 1)[1]
991 revlog_target_id = tmp.rsplit(b'/', 1)[0] + b'/'
1001 revlog_target_id = tmp.rsplit(b'/', 1)[0] + b'/'
992 else:
1002 else:
993 # unreachable
1003 # unreachable
994 assert False, revlog
1004 assert False, revlog
995 for ext, t in sorted(details.items()):
1005 for ext, t in details.items():
996 f = revlog + ext
1006 file_details[ext] = {
997 entry = RevlogStoreEntry(
1007 'is_volatile': bool(t & FILEFLAGS_VOLATILE),
998 entry_path=f,
1008 }
999 revlog_type=rl_type,
1009 entry = RevlogStoreEntry(
1000 target_id=revlog_target_id,
1010 path_prefix=revlog,
1001 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
1011 revlog_type=rl_type,
1002 is_volatile=bool(t & FILEFLAGS_VOLATILE),
1012 target_id=revlog_target_id,
1003 )
1013 details=file_details,
1004 if _match_tracked_entry(entry, matcher):
1014 )
1005 yield entry
1015 if _match_tracked_entry(entry, matcher):
1016 yield entry
1006
1017
1007 def copylist(self):
1018 def copylist(self):
1008 d = (
1019 d = (
1009 b'bookmarks',
1020 b'bookmarks',
1010 b'narrowspec',
1021 b'narrowspec',
1011 b'data',
1022 b'data',
1012 b'meta',
1023 b'meta',
1013 b'dh',
1024 b'dh',
1014 b'fncache',
1025 b'fncache',
1015 b'phaseroots',
1026 b'phaseroots',
1016 b'obsstore',
1027 b'obsstore',
1017 b'00manifest.d',
1028 b'00manifest.d',
1018 b'00manifest.i',
1029 b'00manifest.i',
1019 b'00changelog.d',
1030 b'00changelog.d',
1020 b'00changelog.i',
1031 b'00changelog.i',
1021 b'requires',
1032 b'requires',
1022 )
1033 )
1023 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in d]
1034 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in d]
1024
1035
1025 def write(self, tr):
1036 def write(self, tr):
1026 self.fncache.write(tr)
1037 self.fncache.write(tr)
1027
1038
1028 def invalidatecaches(self):
1039 def invalidatecaches(self):
1029 self.fncache.entries = None
1040 self.fncache.entries = None
1030 self.fncache.addls = set()
1041 self.fncache.addls = set()
1031
1042
1032 def markremoved(self, fn):
1043 def markremoved(self, fn):
1033 self.fncache.remove(fn)
1044 self.fncache.remove(fn)
1034
1045
1035 def _exists(self, f):
1046 def _exists(self, f):
1036 ef = self.encode(f)
1047 ef = self.encode(f)
1037 try:
1048 try:
1038 self.getsize(ef)
1049 self.getsize(ef)
1039 return True
1050 return True
1040 except FileNotFoundError:
1051 except FileNotFoundError:
1041 return False
1052 return False
1042
1053
1043 def __contains__(self, path):
1054 def __contains__(self, path):
1044 '''Checks if the store contains path'''
1055 '''Checks if the store contains path'''
1045 path = b"/".join((b"data", path))
1056 path = b"/".join((b"data", path))
1046 # check for files (exact match)
1057 # check for files (exact match)
1047 e = path + b'.i'
1058 e = path + b'.i'
1048 if e in self.fncache and self._exists(e):
1059 if e in self.fncache and self._exists(e):
1049 return True
1060 return True
1050 # now check for directories (prefix match)
1061 # now check for directories (prefix match)
1051 if not path.endswith(b'/'):
1062 if not path.endswith(b'/'):
1052 path += b'/'
1063 path += b'/'
1053 for e in self.fncache:
1064 for e in self.fncache:
1054 if e.startswith(path) and self._exists(e):
1065 if e.startswith(path) and self._exists(e):
1055 return True
1066 return True
1056 return False
1067 return False
@@ -1,668 +1,668
1 # upgrade.py - functions for in place upgrade of Mercurial repository
1 # upgrade.py - functions for in place upgrade of Mercurial repository
2 #
2 #
3 # Copyright (c) 2016-present, Gregory Szorc
3 # Copyright (c) 2016-present, Gregory Szorc
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8
8
9 import stat
9 import stat
10
10
11 from ..i18n import _
11 from ..i18n import _
12 from ..pycompat import getattr
12 from ..pycompat import getattr
13 from .. import (
13 from .. import (
14 changelog,
14 changelog,
15 error,
15 error,
16 filelog,
16 filelog,
17 manifest,
17 manifest,
18 metadata,
18 metadata,
19 pycompat,
19 pycompat,
20 requirements,
20 requirements,
21 scmutil,
21 scmutil,
22 store,
22 store,
23 util,
23 util,
24 vfs as vfsmod,
24 vfs as vfsmod,
25 )
25 )
26 from ..revlogutils import (
26 from ..revlogutils import (
27 constants as revlogconst,
27 constants as revlogconst,
28 flagutil,
28 flagutil,
29 nodemap,
29 nodemap,
30 sidedata as sidedatamod,
30 sidedata as sidedatamod,
31 )
31 )
32 from . import actions as upgrade_actions
32 from . import actions as upgrade_actions
33
33
34
34
35 def get_sidedata_helpers(srcrepo, dstrepo):
35 def get_sidedata_helpers(srcrepo, dstrepo):
36 use_w = srcrepo.ui.configbool(b'experimental', b'worker.repository-upgrade')
36 use_w = srcrepo.ui.configbool(b'experimental', b'worker.repository-upgrade')
37 sequential = pycompat.iswindows or not use_w
37 sequential = pycompat.iswindows or not use_w
38 if not sequential:
38 if not sequential:
39 srcrepo.register_sidedata_computer(
39 srcrepo.register_sidedata_computer(
40 revlogconst.KIND_CHANGELOG,
40 revlogconst.KIND_CHANGELOG,
41 sidedatamod.SD_FILES,
41 sidedatamod.SD_FILES,
42 (sidedatamod.SD_FILES,),
42 (sidedatamod.SD_FILES,),
43 metadata._get_worker_sidedata_adder(srcrepo, dstrepo),
43 metadata._get_worker_sidedata_adder(srcrepo, dstrepo),
44 flagutil.REVIDX_HASCOPIESINFO,
44 flagutil.REVIDX_HASCOPIESINFO,
45 replace=True,
45 replace=True,
46 )
46 )
47 return sidedatamod.get_sidedata_helpers(srcrepo, dstrepo._wanted_sidedata)
47 return sidedatamod.get_sidedata_helpers(srcrepo, dstrepo._wanted_sidedata)
48
48
49
49
50 def _revlog_from_store_entry(repo, entry):
50 def _revlog_from_store_entry(repo, entry):
51 """Obtain a revlog from a repo store entry.
51 """Obtain a revlog from a repo store entry.
52
52
53 An instance of the appropriate class is returned.
53 An instance of the appropriate class is returned.
54 """
54 """
55 if entry.revlog_type == store.FILEFLAGS_CHANGELOG:
55 if entry.revlog_type == store.FILEFLAGS_CHANGELOG:
56 return changelog.changelog(repo.svfs)
56 return changelog.changelog(repo.svfs)
57 elif entry.revlog_type == store.FILEFLAGS_MANIFESTLOG:
57 elif entry.revlog_type == store.FILEFLAGS_MANIFESTLOG:
58 mandir = entry.target_id.rstrip(b'/')
58 mandir = entry.target_id.rstrip(b'/')
59 return manifest.manifestrevlog(
59 return manifest.manifestrevlog(
60 repo.nodeconstants, repo.svfs, tree=mandir
60 repo.nodeconstants, repo.svfs, tree=mandir
61 )
61 )
62 else:
62 else:
63 return filelog.filelog(repo.svfs, entry.target_id)
63 return filelog.filelog(repo.svfs, entry.target_id)
64
64
65
65
66 def _copyrevlog(tr, destrepo, oldrl, entry):
66 def _copyrevlog(tr, destrepo, oldrl, entry):
67 """copy all relevant files for `oldrl` into `destrepo` store
67 """copy all relevant files for `oldrl` into `destrepo` store
68
68
69 Files are copied "as is" without any transformation. The copy is performed
69 Files are copied "as is" without any transformation. The copy is performed
70 without extra checks. Callers are responsible for making sure the copied
70 without extra checks. Callers are responsible for making sure the copied
71 content is compatible with format of the destination repository.
71 content is compatible with format of the destination repository.
72 """
72 """
73 oldrl = getattr(oldrl, '_revlog', oldrl)
73 oldrl = getattr(oldrl, '_revlog', oldrl)
74 newrl = _revlog_from_store_entry(destrepo, entry)
74 newrl = _revlog_from_store_entry(destrepo, entry)
75 newrl = getattr(newrl, '_revlog', newrl)
75 newrl = getattr(newrl, '_revlog', newrl)
76
76
77 oldvfs = oldrl.opener
77 oldvfs = oldrl.opener
78 newvfs = newrl.opener
78 newvfs = newrl.opener
79 oldindex = oldvfs.join(oldrl._indexfile)
79 oldindex = oldvfs.join(oldrl._indexfile)
80 newindex = newvfs.join(newrl._indexfile)
80 newindex = newvfs.join(newrl._indexfile)
81 olddata = oldvfs.join(oldrl._datafile)
81 olddata = oldvfs.join(oldrl._datafile)
82 newdata = newvfs.join(newrl._datafile)
82 newdata = newvfs.join(newrl._datafile)
83
83
84 with newvfs(newrl._indexfile, b'w'):
84 with newvfs(newrl._indexfile, b'w'):
85 pass # create all the directories
85 pass # create all the directories
86
86
87 util.copyfile(oldindex, newindex)
87 util.copyfile(oldindex, newindex)
88 copydata = oldrl.opener.exists(oldrl._datafile)
88 copydata = oldrl.opener.exists(oldrl._datafile)
89 if copydata:
89 if copydata:
90 util.copyfile(olddata, newdata)
90 util.copyfile(olddata, newdata)
91
91
92 if entry.revlog_type & store.FILEFLAGS_FILELOG:
92 if entry.revlog_type & store.FILEFLAGS_FILELOG:
93 unencodedname = entry.main_file_path()
93 unencodedname = entry.main_file_path()
94 destrepo.svfs.fncache.add(unencodedname)
94 destrepo.svfs.fncache.add(unencodedname)
95 if copydata:
95 if copydata:
96 destrepo.svfs.fncache.add(unencodedname[:-2] + b'.d')
96 destrepo.svfs.fncache.add(unencodedname[:-2] + b'.d')
97
97
98
98
99 UPGRADE_CHANGELOG = b"changelog"
99 UPGRADE_CHANGELOG = b"changelog"
100 UPGRADE_MANIFEST = b"manifest"
100 UPGRADE_MANIFEST = b"manifest"
101 UPGRADE_FILELOGS = b"all-filelogs"
101 UPGRADE_FILELOGS = b"all-filelogs"
102
102
103 UPGRADE_ALL_REVLOGS = frozenset(
103 UPGRADE_ALL_REVLOGS = frozenset(
104 [UPGRADE_CHANGELOG, UPGRADE_MANIFEST, UPGRADE_FILELOGS]
104 [UPGRADE_CHANGELOG, UPGRADE_MANIFEST, UPGRADE_FILELOGS]
105 )
105 )
106
106
107
107
108 def matchrevlog(revlogfilter, rl_type):
108 def matchrevlog(revlogfilter, rl_type):
109 """check if a revlog is selected for cloning.
109 """check if a revlog is selected for cloning.
110
110
111 In other words, are there any updates which need to be done on revlog
111 In other words, are there any updates which need to be done on revlog
112 or it can be blindly copied.
112 or it can be blindly copied.
113
113
114 The store entry is checked against the passed filter"""
114 The store entry is checked against the passed filter"""
115 if rl_type & store.FILEFLAGS_CHANGELOG:
115 if rl_type & store.FILEFLAGS_CHANGELOG:
116 return UPGRADE_CHANGELOG in revlogfilter
116 return UPGRADE_CHANGELOG in revlogfilter
117 elif rl_type & store.FILEFLAGS_MANIFESTLOG:
117 elif rl_type & store.FILEFLAGS_MANIFESTLOG:
118 return UPGRADE_MANIFEST in revlogfilter
118 return UPGRADE_MANIFEST in revlogfilter
119 assert rl_type & store.FILEFLAGS_FILELOG
119 assert rl_type & store.FILEFLAGS_FILELOG
120 return UPGRADE_FILELOGS in revlogfilter
120 return UPGRADE_FILELOGS in revlogfilter
121
121
122
122
123 def _perform_clone(
123 def _perform_clone(
124 ui,
124 ui,
125 dstrepo,
125 dstrepo,
126 tr,
126 tr,
127 old_revlog,
127 old_revlog,
128 entry,
128 entry,
129 upgrade_op,
129 upgrade_op,
130 sidedata_helpers,
130 sidedata_helpers,
131 oncopiedrevision,
131 oncopiedrevision,
132 ):
132 ):
133 """returns the new revlog object created"""
133 """returns the new revlog object created"""
134 newrl = None
134 newrl = None
135 revlog_path = entry.main_file_path()
135 revlog_path = entry.main_file_path()
136 if matchrevlog(upgrade_op.revlogs_to_process, entry.revlog_type):
136 if matchrevlog(upgrade_op.revlogs_to_process, entry.revlog_type):
137 ui.note(
137 ui.note(
138 _(b'cloning %d revisions from %s\n')
138 _(b'cloning %d revisions from %s\n')
139 % (len(old_revlog), revlog_path)
139 % (len(old_revlog), revlog_path)
140 )
140 )
141 newrl = _revlog_from_store_entry(dstrepo, entry)
141 newrl = _revlog_from_store_entry(dstrepo, entry)
142 old_revlog.clone(
142 old_revlog.clone(
143 tr,
143 tr,
144 newrl,
144 newrl,
145 addrevisioncb=oncopiedrevision,
145 addrevisioncb=oncopiedrevision,
146 deltareuse=upgrade_op.delta_reuse_mode,
146 deltareuse=upgrade_op.delta_reuse_mode,
147 forcedeltabothparents=upgrade_op.force_re_delta_both_parents,
147 forcedeltabothparents=upgrade_op.force_re_delta_both_parents,
148 sidedata_helpers=sidedata_helpers,
148 sidedata_helpers=sidedata_helpers,
149 )
149 )
150 else:
150 else:
151 msg = _(b'blindly copying %s containing %i revisions\n')
151 msg = _(b'blindly copying %s containing %i revisions\n')
152 ui.note(msg % (revlog_path, len(old_revlog)))
152 ui.note(msg % (revlog_path, len(old_revlog)))
153 _copyrevlog(tr, dstrepo, old_revlog, entry)
153 _copyrevlog(tr, dstrepo, old_revlog, entry)
154
154
155 newrl = _revlog_from_store_entry(dstrepo, entry)
155 newrl = _revlog_from_store_entry(dstrepo, entry)
156 return newrl
156 return newrl
157
157
158
158
159 def _clonerevlogs(
159 def _clonerevlogs(
160 ui,
160 ui,
161 srcrepo,
161 srcrepo,
162 dstrepo,
162 dstrepo,
163 tr,
163 tr,
164 upgrade_op,
164 upgrade_op,
165 ):
165 ):
166 """Copy revlogs between 2 repos."""
166 """Copy revlogs between 2 repos."""
167 revcount = 0
167 revcount = 0
168 srcsize = 0
168 srcsize = 0
169 srcrawsize = 0
169 srcrawsize = 0
170 dstsize = 0
170 dstsize = 0
171 fcount = 0
171 fcount = 0
172 frevcount = 0
172 frevcount = 0
173 fsrcsize = 0
173 fsrcsize = 0
174 frawsize = 0
174 frawsize = 0
175 fdstsize = 0
175 fdstsize = 0
176 mcount = 0
176 mcount = 0
177 mrevcount = 0
177 mrevcount = 0
178 msrcsize = 0
178 msrcsize = 0
179 mrawsize = 0
179 mrawsize = 0
180 mdstsize = 0
180 mdstsize = 0
181 crevcount = 0
181 crevcount = 0
182 csrcsize = 0
182 csrcsize = 0
183 crawsize = 0
183 crawsize = 0
184 cdstsize = 0
184 cdstsize = 0
185
185
186 alldatafiles = list(srcrepo.store.walk())
186 alldatafiles = list(srcrepo.store.walk())
187 # mapping of data files which needs to be cloned
187 # mapping of data files which needs to be cloned
188 # key is unencoded filename
188 # key is unencoded filename
189 # value is revlog_object_from_srcrepo
189 # value is revlog_object_from_srcrepo
190 manifests = {}
190 manifests = {}
191 changelogs = {}
191 changelogs = {}
192 filelogs = {}
192 filelogs = {}
193
193
194 # Perform a pass to collect metadata. This validates we can open all
194 # Perform a pass to collect metadata. This validates we can open all
195 # source files and allows a unified progress bar to be displayed.
195 # source files and allows a unified progress bar to be displayed.
196 for entry in alldatafiles:
196 for entry in alldatafiles:
197 if not (entry.is_revlog and entry.is_revlog_main):
197 if not entry.is_revlog:
198 continue
198 continue
199
199
200 rl = _revlog_from_store_entry(srcrepo, entry)
200 rl = _revlog_from_store_entry(srcrepo, entry)
201
201
202 info = rl.storageinfo(
202 info = rl.storageinfo(
203 exclusivefiles=True,
203 exclusivefiles=True,
204 revisionscount=True,
204 revisionscount=True,
205 trackedsize=True,
205 trackedsize=True,
206 storedsize=True,
206 storedsize=True,
207 )
207 )
208
208
209 revcount += info[b'revisionscount'] or 0
209 revcount += info[b'revisionscount'] or 0
210 datasize = info[b'storedsize'] or 0
210 datasize = info[b'storedsize'] or 0
211 rawsize = info[b'trackedsize'] or 0
211 rawsize = info[b'trackedsize'] or 0
212
212
213 srcsize += datasize
213 srcsize += datasize
214 srcrawsize += rawsize
214 srcrawsize += rawsize
215
215
216 # This is for the separate progress bars.
216 # This is for the separate progress bars.
217 if entry.revlog_type & store.FILEFLAGS_CHANGELOG:
217 if entry.revlog_type & store.FILEFLAGS_CHANGELOG:
218 changelogs[entry.target_id] = entry
218 changelogs[entry.target_id] = entry
219 crevcount += len(rl)
219 crevcount += len(rl)
220 csrcsize += datasize
220 csrcsize += datasize
221 crawsize += rawsize
221 crawsize += rawsize
222 elif entry.revlog_type & store.FILEFLAGS_MANIFESTLOG:
222 elif entry.revlog_type & store.FILEFLAGS_MANIFESTLOG:
223 manifests[entry.target_id] = entry
223 manifests[entry.target_id] = entry
224 mcount += 1
224 mcount += 1
225 mrevcount += len(rl)
225 mrevcount += len(rl)
226 msrcsize += datasize
226 msrcsize += datasize
227 mrawsize += rawsize
227 mrawsize += rawsize
228 elif entry.revlog_type & store.FILEFLAGS_FILELOG:
228 elif entry.revlog_type & store.FILEFLAGS_FILELOG:
229 filelogs[entry.target_id] = entry
229 filelogs[entry.target_id] = entry
230 fcount += 1
230 fcount += 1
231 frevcount += len(rl)
231 frevcount += len(rl)
232 fsrcsize += datasize
232 fsrcsize += datasize
233 frawsize += rawsize
233 frawsize += rawsize
234 else:
234 else:
235 error.ProgrammingError(b'unknown revlog type')
235 error.ProgrammingError(b'unknown revlog type')
236
236
237 if not revcount:
237 if not revcount:
238 return
238 return
239
239
240 ui.status(
240 ui.status(
241 _(
241 _(
242 b'migrating %d total revisions (%d in filelogs, %d in manifests, '
242 b'migrating %d total revisions (%d in filelogs, %d in manifests, '
243 b'%d in changelog)\n'
243 b'%d in changelog)\n'
244 )
244 )
245 % (revcount, frevcount, mrevcount, crevcount)
245 % (revcount, frevcount, mrevcount, crevcount)
246 )
246 )
247 ui.status(
247 ui.status(
248 _(b'migrating %s in store; %s tracked data\n')
248 _(b'migrating %s in store; %s tracked data\n')
249 % ((util.bytecount(srcsize), util.bytecount(srcrawsize)))
249 % ((util.bytecount(srcsize), util.bytecount(srcrawsize)))
250 )
250 )
251
251
252 # Used to keep track of progress.
252 # Used to keep track of progress.
253 progress = None
253 progress = None
254
254
255 def oncopiedrevision(rl, rev, node):
255 def oncopiedrevision(rl, rev, node):
256 progress.increment()
256 progress.increment()
257
257
258 sidedata_helpers = get_sidedata_helpers(srcrepo, dstrepo)
258 sidedata_helpers = get_sidedata_helpers(srcrepo, dstrepo)
259
259
260 # Migrating filelogs
260 # Migrating filelogs
261 ui.status(
261 ui.status(
262 _(
262 _(
263 b'migrating %d filelogs containing %d revisions '
263 b'migrating %d filelogs containing %d revisions '
264 b'(%s in store; %s tracked data)\n'
264 b'(%s in store; %s tracked data)\n'
265 )
265 )
266 % (
266 % (
267 fcount,
267 fcount,
268 frevcount,
268 frevcount,
269 util.bytecount(fsrcsize),
269 util.bytecount(fsrcsize),
270 util.bytecount(frawsize),
270 util.bytecount(frawsize),
271 )
271 )
272 )
272 )
273 progress = srcrepo.ui.makeprogress(_(b'file revisions'), total=frevcount)
273 progress = srcrepo.ui.makeprogress(_(b'file revisions'), total=frevcount)
274 for target_id, entry in sorted(filelogs.items()):
274 for target_id, entry in sorted(filelogs.items()):
275 oldrl = _revlog_from_store_entry(srcrepo, entry)
275 oldrl = _revlog_from_store_entry(srcrepo, entry)
276
276
277 newrl = _perform_clone(
277 newrl = _perform_clone(
278 ui,
278 ui,
279 dstrepo,
279 dstrepo,
280 tr,
280 tr,
281 oldrl,
281 oldrl,
282 entry,
282 entry,
283 upgrade_op,
283 upgrade_op,
284 sidedata_helpers,
284 sidedata_helpers,
285 oncopiedrevision,
285 oncopiedrevision,
286 )
286 )
287 info = newrl.storageinfo(storedsize=True)
287 info = newrl.storageinfo(storedsize=True)
288 fdstsize += info[b'storedsize'] or 0
288 fdstsize += info[b'storedsize'] or 0
289 ui.status(
289 ui.status(
290 _(
290 _(
291 b'finished migrating %d filelog revisions across %d '
291 b'finished migrating %d filelog revisions across %d '
292 b'filelogs; change in size: %s\n'
292 b'filelogs; change in size: %s\n'
293 )
293 )
294 % (frevcount, fcount, util.bytecount(fdstsize - fsrcsize))
294 % (frevcount, fcount, util.bytecount(fdstsize - fsrcsize))
295 )
295 )
296
296
297 # Migrating manifests
297 # Migrating manifests
298 ui.status(
298 ui.status(
299 _(
299 _(
300 b'migrating %d manifests containing %d revisions '
300 b'migrating %d manifests containing %d revisions '
301 b'(%s in store; %s tracked data)\n'
301 b'(%s in store; %s tracked data)\n'
302 )
302 )
303 % (
303 % (
304 mcount,
304 mcount,
305 mrevcount,
305 mrevcount,
306 util.bytecount(msrcsize),
306 util.bytecount(msrcsize),
307 util.bytecount(mrawsize),
307 util.bytecount(mrawsize),
308 )
308 )
309 )
309 )
310 if progress:
310 if progress:
311 progress.complete()
311 progress.complete()
312 progress = srcrepo.ui.makeprogress(
312 progress = srcrepo.ui.makeprogress(
313 _(b'manifest revisions'), total=mrevcount
313 _(b'manifest revisions'), total=mrevcount
314 )
314 )
315 for target_id, entry in sorted(manifests.items()):
315 for target_id, entry in sorted(manifests.items()):
316 oldrl = _revlog_from_store_entry(srcrepo, entry)
316 oldrl = _revlog_from_store_entry(srcrepo, entry)
317 newrl = _perform_clone(
317 newrl = _perform_clone(
318 ui,
318 ui,
319 dstrepo,
319 dstrepo,
320 tr,
320 tr,
321 oldrl,
321 oldrl,
322 entry,
322 entry,
323 upgrade_op,
323 upgrade_op,
324 sidedata_helpers,
324 sidedata_helpers,
325 oncopiedrevision,
325 oncopiedrevision,
326 )
326 )
327 info = newrl.storageinfo(storedsize=True)
327 info = newrl.storageinfo(storedsize=True)
328 mdstsize += info[b'storedsize'] or 0
328 mdstsize += info[b'storedsize'] or 0
329 ui.status(
329 ui.status(
330 _(
330 _(
331 b'finished migrating %d manifest revisions across %d '
331 b'finished migrating %d manifest revisions across %d '
332 b'manifests; change in size: %s\n'
332 b'manifests; change in size: %s\n'
333 )
333 )
334 % (mrevcount, mcount, util.bytecount(mdstsize - msrcsize))
334 % (mrevcount, mcount, util.bytecount(mdstsize - msrcsize))
335 )
335 )
336
336
337 # Migrating changelog
337 # Migrating changelog
338 ui.status(
338 ui.status(
339 _(
339 _(
340 b'migrating changelog containing %d revisions '
340 b'migrating changelog containing %d revisions '
341 b'(%s in store; %s tracked data)\n'
341 b'(%s in store; %s tracked data)\n'
342 )
342 )
343 % (
343 % (
344 crevcount,
344 crevcount,
345 util.bytecount(csrcsize),
345 util.bytecount(csrcsize),
346 util.bytecount(crawsize),
346 util.bytecount(crawsize),
347 )
347 )
348 )
348 )
349 if progress:
349 if progress:
350 progress.complete()
350 progress.complete()
351 progress = srcrepo.ui.makeprogress(
351 progress = srcrepo.ui.makeprogress(
352 _(b'changelog revisions'), total=crevcount
352 _(b'changelog revisions'), total=crevcount
353 )
353 )
354 for target_id, entry in sorted(changelogs.items()):
354 for target_id, entry in sorted(changelogs.items()):
355 oldrl = _revlog_from_store_entry(srcrepo, entry)
355 oldrl = _revlog_from_store_entry(srcrepo, entry)
356 newrl = _perform_clone(
356 newrl = _perform_clone(
357 ui,
357 ui,
358 dstrepo,
358 dstrepo,
359 tr,
359 tr,
360 oldrl,
360 oldrl,
361 entry,
361 entry,
362 upgrade_op,
362 upgrade_op,
363 sidedata_helpers,
363 sidedata_helpers,
364 oncopiedrevision,
364 oncopiedrevision,
365 )
365 )
366 info = newrl.storageinfo(storedsize=True)
366 info = newrl.storageinfo(storedsize=True)
367 cdstsize += info[b'storedsize'] or 0
367 cdstsize += info[b'storedsize'] or 0
368 progress.complete()
368 progress.complete()
369 ui.status(
369 ui.status(
370 _(
370 _(
371 b'finished migrating %d changelog revisions; change in size: '
371 b'finished migrating %d changelog revisions; change in size: '
372 b'%s\n'
372 b'%s\n'
373 )
373 )
374 % (crevcount, util.bytecount(cdstsize - csrcsize))
374 % (crevcount, util.bytecount(cdstsize - csrcsize))
375 )
375 )
376
376
377 dstsize = fdstsize + mdstsize + cdstsize
377 dstsize = fdstsize + mdstsize + cdstsize
378 ui.status(
378 ui.status(
379 _(
379 _(
380 b'finished migrating %d total revisions; total change in store '
380 b'finished migrating %d total revisions; total change in store '
381 b'size: %s\n'
381 b'size: %s\n'
382 )
382 )
383 % (revcount, util.bytecount(dstsize - srcsize))
383 % (revcount, util.bytecount(dstsize - srcsize))
384 )
384 )
385
385
386
386
387 def _files_to_copy_post_revlog_clone(srcrepo):
387 def _files_to_copy_post_revlog_clone(srcrepo):
388 """yields files which should be copied to destination after revlogs
388 """yields files which should be copied to destination after revlogs
389 are cloned"""
389 are cloned"""
390 for path, kind, st in sorted(srcrepo.store.vfs.readdir(b'', stat=True)):
390 for path, kind, st in sorted(srcrepo.store.vfs.readdir(b'', stat=True)):
391 # don't copy revlogs as they are already cloned
391 # don't copy revlogs as they are already cloned
392 if store.revlog_type(path) is not None:
392 if store.revlog_type(path) is not None:
393 continue
393 continue
394 # Skip transaction related files.
394 # Skip transaction related files.
395 if path.startswith(b'undo'):
395 if path.startswith(b'undo'):
396 continue
396 continue
397 # Only copy regular files.
397 # Only copy regular files.
398 if kind != stat.S_IFREG:
398 if kind != stat.S_IFREG:
399 continue
399 continue
400 # Skip other skipped files.
400 # Skip other skipped files.
401 if path in (b'lock', b'fncache'):
401 if path in (b'lock', b'fncache'):
402 continue
402 continue
403 # TODO: should we skip cache too?
403 # TODO: should we skip cache too?
404
404
405 yield path
405 yield path
406
406
407
407
408 def _replacestores(currentrepo, upgradedrepo, backupvfs, upgrade_op):
408 def _replacestores(currentrepo, upgradedrepo, backupvfs, upgrade_op):
409 """Replace the stores after current repository is upgraded
409 """Replace the stores after current repository is upgraded
410
410
411 Creates a backup of current repository store at backup path
411 Creates a backup of current repository store at backup path
412 Replaces upgraded store files in current repo from upgraded one
412 Replaces upgraded store files in current repo from upgraded one
413
413
414 Arguments:
414 Arguments:
415 currentrepo: repo object of current repository
415 currentrepo: repo object of current repository
416 upgradedrepo: repo object of the upgraded data
416 upgradedrepo: repo object of the upgraded data
417 backupvfs: vfs object for the backup path
417 backupvfs: vfs object for the backup path
418 upgrade_op: upgrade operation object
418 upgrade_op: upgrade operation object
419 to be used to decide what all is upgraded
419 to be used to decide what all is upgraded
420 """
420 """
421 # TODO: don't blindly rename everything in store
421 # TODO: don't blindly rename everything in store
422 # There can be upgrades where store is not touched at all
422 # There can be upgrades where store is not touched at all
423 if upgrade_op.backup_store:
423 if upgrade_op.backup_store:
424 util.rename(currentrepo.spath, backupvfs.join(b'store'))
424 util.rename(currentrepo.spath, backupvfs.join(b'store'))
425 else:
425 else:
426 currentrepo.vfs.rmtree(b'store', forcibly=True)
426 currentrepo.vfs.rmtree(b'store', forcibly=True)
427 util.rename(upgradedrepo.spath, currentrepo.spath)
427 util.rename(upgradedrepo.spath, currentrepo.spath)
428
428
429
429
430 def finishdatamigration(ui, srcrepo, dstrepo, requirements):
430 def finishdatamigration(ui, srcrepo, dstrepo, requirements):
431 """Hook point for extensions to perform additional actions during upgrade.
431 """Hook point for extensions to perform additional actions during upgrade.
432
432
433 This function is called after revlogs and store files have been copied but
433 This function is called after revlogs and store files have been copied but
434 before the new store is swapped into the original location.
434 before the new store is swapped into the original location.
435 """
435 """
436
436
437
437
438 def upgrade(ui, srcrepo, dstrepo, upgrade_op):
438 def upgrade(ui, srcrepo, dstrepo, upgrade_op):
439 """Do the low-level work of upgrading a repository.
439 """Do the low-level work of upgrading a repository.
440
440
441 The upgrade is effectively performed as a copy between a source
441 The upgrade is effectively performed as a copy between a source
442 repository and a temporary destination repository.
442 repository and a temporary destination repository.
443
443
444 The source repository is unmodified for as long as possible so the
444 The source repository is unmodified for as long as possible so the
445 upgrade can abort at any time without causing loss of service for
445 upgrade can abort at any time without causing loss of service for
446 readers and without corrupting the source repository.
446 readers and without corrupting the source repository.
447 """
447 """
448 assert srcrepo.currentwlock()
448 assert srcrepo.currentwlock()
449 assert dstrepo.currentwlock()
449 assert dstrepo.currentwlock()
450 backuppath = None
450 backuppath = None
451 backupvfs = None
451 backupvfs = None
452
452
453 ui.status(
453 ui.status(
454 _(
454 _(
455 b'(it is safe to interrupt this process any time before '
455 b'(it is safe to interrupt this process any time before '
456 b'data migration completes)\n'
456 b'data migration completes)\n'
457 )
457 )
458 )
458 )
459
459
460 if upgrade_actions.dirstatev2 in upgrade_op.upgrade_actions:
460 if upgrade_actions.dirstatev2 in upgrade_op.upgrade_actions:
461 ui.status(_(b'upgrading to dirstate-v2 from v1\n'))
461 ui.status(_(b'upgrading to dirstate-v2 from v1\n'))
462 upgrade_dirstate(ui, srcrepo, upgrade_op, b'v1', b'v2')
462 upgrade_dirstate(ui, srcrepo, upgrade_op, b'v1', b'v2')
463 upgrade_op.upgrade_actions.remove(upgrade_actions.dirstatev2)
463 upgrade_op.upgrade_actions.remove(upgrade_actions.dirstatev2)
464
464
465 if upgrade_actions.dirstatev2 in upgrade_op.removed_actions:
465 if upgrade_actions.dirstatev2 in upgrade_op.removed_actions:
466 ui.status(_(b'downgrading from dirstate-v2 to v1\n'))
466 ui.status(_(b'downgrading from dirstate-v2 to v1\n'))
467 upgrade_dirstate(ui, srcrepo, upgrade_op, b'v2', b'v1')
467 upgrade_dirstate(ui, srcrepo, upgrade_op, b'v2', b'v1')
468 upgrade_op.removed_actions.remove(upgrade_actions.dirstatev2)
468 upgrade_op.removed_actions.remove(upgrade_actions.dirstatev2)
469
469
470 if upgrade_actions.dirstatetrackedkey in upgrade_op.upgrade_actions:
470 if upgrade_actions.dirstatetrackedkey in upgrade_op.upgrade_actions:
471 ui.status(_(b'create dirstate-tracked-hint file\n'))
471 ui.status(_(b'create dirstate-tracked-hint file\n'))
472 upgrade_tracked_hint(ui, srcrepo, upgrade_op, add=True)
472 upgrade_tracked_hint(ui, srcrepo, upgrade_op, add=True)
473 upgrade_op.upgrade_actions.remove(upgrade_actions.dirstatetrackedkey)
473 upgrade_op.upgrade_actions.remove(upgrade_actions.dirstatetrackedkey)
474 elif upgrade_actions.dirstatetrackedkey in upgrade_op.removed_actions:
474 elif upgrade_actions.dirstatetrackedkey in upgrade_op.removed_actions:
475 ui.status(_(b'remove dirstate-tracked-hint file\n'))
475 ui.status(_(b'remove dirstate-tracked-hint file\n'))
476 upgrade_tracked_hint(ui, srcrepo, upgrade_op, add=False)
476 upgrade_tracked_hint(ui, srcrepo, upgrade_op, add=False)
477 upgrade_op.removed_actions.remove(upgrade_actions.dirstatetrackedkey)
477 upgrade_op.removed_actions.remove(upgrade_actions.dirstatetrackedkey)
478
478
479 if not (upgrade_op.upgrade_actions or upgrade_op.removed_actions):
479 if not (upgrade_op.upgrade_actions or upgrade_op.removed_actions):
480 return
480 return
481
481
482 if upgrade_op.requirements_only:
482 if upgrade_op.requirements_only:
483 ui.status(_(b'upgrading repository requirements\n'))
483 ui.status(_(b'upgrading repository requirements\n'))
484 scmutil.writereporequirements(srcrepo, upgrade_op.new_requirements)
484 scmutil.writereporequirements(srcrepo, upgrade_op.new_requirements)
485 # if there is only one action and that is persistent nodemap upgrade
485 # if there is only one action and that is persistent nodemap upgrade
486 # directly write the nodemap file and update requirements instead of going
486 # directly write the nodemap file and update requirements instead of going
487 # through the whole cloning process
487 # through the whole cloning process
488 elif (
488 elif (
489 len(upgrade_op.upgrade_actions) == 1
489 len(upgrade_op.upgrade_actions) == 1
490 and b'persistent-nodemap' in upgrade_op.upgrade_actions_names
490 and b'persistent-nodemap' in upgrade_op.upgrade_actions_names
491 and not upgrade_op.removed_actions
491 and not upgrade_op.removed_actions
492 ):
492 ):
493 ui.status(
493 ui.status(
494 _(b'upgrading repository to use persistent nodemap feature\n')
494 _(b'upgrading repository to use persistent nodemap feature\n')
495 )
495 )
496 with srcrepo.transaction(b'upgrade') as tr:
496 with srcrepo.transaction(b'upgrade') as tr:
497 unfi = srcrepo.unfiltered()
497 unfi = srcrepo.unfiltered()
498 cl = unfi.changelog
498 cl = unfi.changelog
499 nodemap.persist_nodemap(tr, cl, force=True)
499 nodemap.persist_nodemap(tr, cl, force=True)
500 # we want to directly operate on the underlying revlog to force
500 # we want to directly operate on the underlying revlog to force
501 # create a nodemap file. This is fine since this is upgrade code
501 # create a nodemap file. This is fine since this is upgrade code
502 # and it heavily relies on repository being revlog based
502 # and it heavily relies on repository being revlog based
503 # hence accessing private attributes can be justified
503 # hence accessing private attributes can be justified
504 nodemap.persist_nodemap(
504 nodemap.persist_nodemap(
505 tr, unfi.manifestlog._rootstore._revlog, force=True
505 tr, unfi.manifestlog._rootstore._revlog, force=True
506 )
506 )
507 scmutil.writereporequirements(srcrepo, upgrade_op.new_requirements)
507 scmutil.writereporequirements(srcrepo, upgrade_op.new_requirements)
508 elif (
508 elif (
509 len(upgrade_op.removed_actions) == 1
509 len(upgrade_op.removed_actions) == 1
510 and [
510 and [
511 x
511 x
512 for x in upgrade_op.removed_actions
512 for x in upgrade_op.removed_actions
513 if x.name == b'persistent-nodemap'
513 if x.name == b'persistent-nodemap'
514 ]
514 ]
515 and not upgrade_op.upgrade_actions
515 and not upgrade_op.upgrade_actions
516 ):
516 ):
517 ui.status(
517 ui.status(
518 _(b'downgrading repository to not use persistent nodemap feature\n')
518 _(b'downgrading repository to not use persistent nodemap feature\n')
519 )
519 )
520 with srcrepo.transaction(b'upgrade') as tr:
520 with srcrepo.transaction(b'upgrade') as tr:
521 unfi = srcrepo.unfiltered()
521 unfi = srcrepo.unfiltered()
522 cl = unfi.changelog
522 cl = unfi.changelog
523 nodemap.delete_nodemap(tr, srcrepo, cl)
523 nodemap.delete_nodemap(tr, srcrepo, cl)
524 # check comment 20 lines above for accessing private attributes
524 # check comment 20 lines above for accessing private attributes
525 nodemap.delete_nodemap(
525 nodemap.delete_nodemap(
526 tr, srcrepo, unfi.manifestlog._rootstore._revlog
526 tr, srcrepo, unfi.manifestlog._rootstore._revlog
527 )
527 )
528 scmutil.writereporequirements(srcrepo, upgrade_op.new_requirements)
528 scmutil.writereporequirements(srcrepo, upgrade_op.new_requirements)
529 else:
529 else:
530 with dstrepo.transaction(b'upgrade') as tr:
530 with dstrepo.transaction(b'upgrade') as tr:
531 _clonerevlogs(
531 _clonerevlogs(
532 ui,
532 ui,
533 srcrepo,
533 srcrepo,
534 dstrepo,
534 dstrepo,
535 tr,
535 tr,
536 upgrade_op,
536 upgrade_op,
537 )
537 )
538
538
539 # Now copy other files in the store directory.
539 # Now copy other files in the store directory.
540 for p in _files_to_copy_post_revlog_clone(srcrepo):
540 for p in _files_to_copy_post_revlog_clone(srcrepo):
541 srcrepo.ui.status(_(b'copying %s\n') % p)
541 srcrepo.ui.status(_(b'copying %s\n') % p)
542 src = srcrepo.store.rawvfs.join(p)
542 src = srcrepo.store.rawvfs.join(p)
543 dst = dstrepo.store.rawvfs.join(p)
543 dst = dstrepo.store.rawvfs.join(p)
544 util.copyfile(src, dst, copystat=True)
544 util.copyfile(src, dst, copystat=True)
545
545
546 finishdatamigration(ui, srcrepo, dstrepo, requirements)
546 finishdatamigration(ui, srcrepo, dstrepo, requirements)
547
547
548 ui.status(_(b'data fully upgraded in a temporary repository\n'))
548 ui.status(_(b'data fully upgraded in a temporary repository\n'))
549
549
550 if upgrade_op.backup_store:
550 if upgrade_op.backup_store:
551 backuppath = pycompat.mkdtemp(
551 backuppath = pycompat.mkdtemp(
552 prefix=b'upgradebackup.', dir=srcrepo.path
552 prefix=b'upgradebackup.', dir=srcrepo.path
553 )
553 )
554 backupvfs = vfsmod.vfs(backuppath)
554 backupvfs = vfsmod.vfs(backuppath)
555
555
556 # Make a backup of requires file first, as it is the first to be modified.
556 # Make a backup of requires file first, as it is the first to be modified.
557 util.copyfile(
557 util.copyfile(
558 srcrepo.vfs.join(b'requires'), backupvfs.join(b'requires')
558 srcrepo.vfs.join(b'requires'), backupvfs.join(b'requires')
559 )
559 )
560
560
561 # We install an arbitrary requirement that clients must not support
561 # We install an arbitrary requirement that clients must not support
562 # as a mechanism to lock out new clients during the data swap. This is
562 # as a mechanism to lock out new clients during the data swap. This is
563 # better than allowing a client to continue while the repository is in
563 # better than allowing a client to continue while the repository is in
564 # an inconsistent state.
564 # an inconsistent state.
565 ui.status(
565 ui.status(
566 _(
566 _(
567 b'marking source repository as being upgraded; clients will be '
567 b'marking source repository as being upgraded; clients will be '
568 b'unable to read from repository\n'
568 b'unable to read from repository\n'
569 )
569 )
570 )
570 )
571 scmutil.writereporequirements(
571 scmutil.writereporequirements(
572 srcrepo, srcrepo.requirements | {b'upgradeinprogress'}
572 srcrepo, srcrepo.requirements | {b'upgradeinprogress'}
573 )
573 )
574
574
575 ui.status(_(b'starting in-place swap of repository data\n'))
575 ui.status(_(b'starting in-place swap of repository data\n'))
576 if upgrade_op.backup_store:
576 if upgrade_op.backup_store:
577 ui.status(
577 ui.status(
578 _(b'replaced files will be backed up at %s\n') % backuppath
578 _(b'replaced files will be backed up at %s\n') % backuppath
579 )
579 )
580
580
581 # Now swap in the new store directory. Doing it as a rename should make
581 # Now swap in the new store directory. Doing it as a rename should make
582 # the operation nearly instantaneous and atomic (at least in well-behaved
582 # the operation nearly instantaneous and atomic (at least in well-behaved
583 # environments).
583 # environments).
584 ui.status(_(b'replacing store...\n'))
584 ui.status(_(b'replacing store...\n'))
585 tstart = util.timer()
585 tstart = util.timer()
586 _replacestores(srcrepo, dstrepo, backupvfs, upgrade_op)
586 _replacestores(srcrepo, dstrepo, backupvfs, upgrade_op)
587 elapsed = util.timer() - tstart
587 elapsed = util.timer() - tstart
588 ui.status(
588 ui.status(
589 _(
589 _(
590 b'store replacement complete; repository was inconsistent for '
590 b'store replacement complete; repository was inconsistent for '
591 b'%0.1fs\n'
591 b'%0.1fs\n'
592 )
592 )
593 % elapsed
593 % elapsed
594 )
594 )
595
595
596 # We first write the requirements file. Any new requirements will lock
596 # We first write the requirements file. Any new requirements will lock
597 # out legacy clients.
597 # out legacy clients.
598 ui.status(
598 ui.status(
599 _(
599 _(
600 b'finalizing requirements file and making repository readable '
600 b'finalizing requirements file and making repository readable '
601 b'again\n'
601 b'again\n'
602 )
602 )
603 )
603 )
604 scmutil.writereporequirements(srcrepo, upgrade_op.new_requirements)
604 scmutil.writereporequirements(srcrepo, upgrade_op.new_requirements)
605
605
606 if upgrade_op.backup_store:
606 if upgrade_op.backup_store:
607 # The lock file from the old store won't be removed because nothing has a
607 # The lock file from the old store won't be removed because nothing has a
608 # reference to its new location. So clean it up manually. Alternatively, we
608 # reference to its new location. So clean it up manually. Alternatively, we
609 # could update srcrepo.svfs and other variables to point to the new
609 # could update srcrepo.svfs and other variables to point to the new
610 # location. This is simpler.
610 # location. This is simpler.
611 assert backupvfs is not None # help pytype
611 assert backupvfs is not None # help pytype
612 backupvfs.unlink(b'store/lock')
612 backupvfs.unlink(b'store/lock')
613
613
614 return backuppath
614 return backuppath
615
615
616
616
617 def upgrade_dirstate(ui, srcrepo, upgrade_op, old, new):
617 def upgrade_dirstate(ui, srcrepo, upgrade_op, old, new):
618 if upgrade_op.backup_store:
618 if upgrade_op.backup_store:
619 backuppath = pycompat.mkdtemp(
619 backuppath = pycompat.mkdtemp(
620 prefix=b'upgradebackup.', dir=srcrepo.path
620 prefix=b'upgradebackup.', dir=srcrepo.path
621 )
621 )
622 ui.status(_(b'replaced files will be backed up at %s\n') % backuppath)
622 ui.status(_(b'replaced files will be backed up at %s\n') % backuppath)
623 backupvfs = vfsmod.vfs(backuppath)
623 backupvfs = vfsmod.vfs(backuppath)
624 util.copyfile(
624 util.copyfile(
625 srcrepo.vfs.join(b'requires'), backupvfs.join(b'requires')
625 srcrepo.vfs.join(b'requires'), backupvfs.join(b'requires')
626 )
626 )
627 try:
627 try:
628 util.copyfile(
628 util.copyfile(
629 srcrepo.vfs.join(b'dirstate'), backupvfs.join(b'dirstate')
629 srcrepo.vfs.join(b'dirstate'), backupvfs.join(b'dirstate')
630 )
630 )
631 except FileNotFoundError:
631 except FileNotFoundError:
632 # The dirstate does not exist on an empty repo or a repo with no
632 # The dirstate does not exist on an empty repo or a repo with no
633 # revision checked out
633 # revision checked out
634 pass
634 pass
635
635
636 assert srcrepo.dirstate._use_dirstate_v2 == (old == b'v2')
636 assert srcrepo.dirstate._use_dirstate_v2 == (old == b'v2')
637 use_v2 = new == b'v2'
637 use_v2 = new == b'v2'
638 if use_v2:
638 if use_v2:
639 # Write the requirements *before* upgrading
639 # Write the requirements *before* upgrading
640 scmutil.writereporequirements(srcrepo, upgrade_op.new_requirements)
640 scmutil.writereporequirements(srcrepo, upgrade_op.new_requirements)
641
641
642 srcrepo.dirstate._map.preload()
642 srcrepo.dirstate._map.preload()
643 srcrepo.dirstate._use_dirstate_v2 = use_v2
643 srcrepo.dirstate._use_dirstate_v2 = use_v2
644 srcrepo.dirstate._map._use_dirstate_v2 = use_v2
644 srcrepo.dirstate._map._use_dirstate_v2 = use_v2
645 srcrepo.dirstate._dirty = True
645 srcrepo.dirstate._dirty = True
646 try:
646 try:
647 srcrepo.vfs.unlink(b'dirstate')
647 srcrepo.vfs.unlink(b'dirstate')
648 except FileNotFoundError:
648 except FileNotFoundError:
649 # The dirstate does not exist on an empty repo or a repo with no
649 # The dirstate does not exist on an empty repo or a repo with no
650 # revision checked out
650 # revision checked out
651 pass
651 pass
652
652
653 srcrepo.dirstate.write(None)
653 srcrepo.dirstate.write(None)
654 if not use_v2:
654 if not use_v2:
655 # Remove the v2 requirement *after* downgrading
655 # Remove the v2 requirement *after* downgrading
656 scmutil.writereporequirements(srcrepo, upgrade_op.new_requirements)
656 scmutil.writereporequirements(srcrepo, upgrade_op.new_requirements)
657
657
658
658
659 def upgrade_tracked_hint(ui, srcrepo, upgrade_op, add):
659 def upgrade_tracked_hint(ui, srcrepo, upgrade_op, add):
660 if add:
660 if add:
661 srcrepo.dirstate._use_tracked_hint = True
661 srcrepo.dirstate._use_tracked_hint = True
662 srcrepo.dirstate._dirty = True
662 srcrepo.dirstate._dirty = True
663 srcrepo.dirstate._dirty_tracked_set = True
663 srcrepo.dirstate._dirty_tracked_set = True
664 srcrepo.dirstate.write(None)
664 srcrepo.dirstate.write(None)
665 if not add:
665 if not add:
666 srcrepo.dirstate.delete_tracked_hint()
666 srcrepo.dirstate.delete_tracked_hint()
667
667
668 scmutil.writereporequirements(srcrepo, upgrade_op.new_requirements)
668 scmutil.writereporequirements(srcrepo, upgrade_op.new_requirements)
General Comments 0
You need to be logged in to leave comments. Login now