##// END OF EJS Templates
store: use a StoreEntry object instead of tuple for store files...
marmoute -
r51364:521fec11 default
parent child Browse files
Show More
@@ -1,823 +1,824 b''
1 # Copyright 2009-2010 Gregory P. Ward
1 # Copyright 2009-2010 Gregory P. Ward
2 # Copyright 2009-2010 Intelerad Medical Systems Incorporated
2 # Copyright 2009-2010 Intelerad Medical Systems Incorporated
3 # Copyright 2010-2011 Fog Creek Software
3 # Copyright 2010-2011 Fog Creek Software
4 # Copyright 2010-2011 Unity Technologies
4 # Copyright 2010-2011 Unity Technologies
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 '''largefiles utility code: must not import other modules in this package.'''
9 '''largefiles utility code: must not import other modules in this package.'''
10
10
11 import contextlib
11 import contextlib
12 import copy
12 import copy
13 import os
13 import os
14 import stat
14 import stat
15
15
16 from mercurial.i18n import _
16 from mercurial.i18n import _
17 from mercurial.node import hex
17 from mercurial.node import hex
18 from mercurial.pycompat import open
18 from mercurial.pycompat import open
19
19
20 from mercurial import (
20 from mercurial import (
21 dirstate,
21 dirstate,
22 encoding,
22 encoding,
23 error,
23 error,
24 httpconnection,
24 httpconnection,
25 match as matchmod,
25 match as matchmod,
26 pycompat,
26 pycompat,
27 requirements,
27 requirements,
28 scmutil,
28 scmutil,
29 sparse,
29 sparse,
30 util,
30 util,
31 vfs as vfsmod,
31 vfs as vfsmod,
32 )
32 )
33 from mercurial.utils import hashutil
33 from mercurial.utils import hashutil
34 from mercurial.dirstateutils import timestamp
34 from mercurial.dirstateutils import timestamp
35
35
36 shortname = b'.hglf'
36 shortname = b'.hglf'
37 shortnameslash = shortname + b'/'
37 shortnameslash = shortname + b'/'
38 longname = b'largefiles'
38 longname = b'largefiles'
39
39
40 # -- Private worker functions ------------------------------------------
40 # -- Private worker functions ------------------------------------------
41
41
42
42
43 @contextlib.contextmanager
43 @contextlib.contextmanager
44 def lfstatus(repo, value=True):
44 def lfstatus(repo, value=True):
45 oldvalue = getattr(repo, 'lfstatus', False)
45 oldvalue = getattr(repo, 'lfstatus', False)
46 repo.lfstatus = value
46 repo.lfstatus = value
47 try:
47 try:
48 yield
48 yield
49 finally:
49 finally:
50 repo.lfstatus = oldvalue
50 repo.lfstatus = oldvalue
51
51
52
52
53 def getminsize(ui, assumelfiles, opt, default=10):
53 def getminsize(ui, assumelfiles, opt, default=10):
54 lfsize = opt
54 lfsize = opt
55 if not lfsize and assumelfiles:
55 if not lfsize and assumelfiles:
56 lfsize = ui.config(longname, b'minsize', default=default)
56 lfsize = ui.config(longname, b'minsize', default=default)
57 if lfsize:
57 if lfsize:
58 try:
58 try:
59 lfsize = float(lfsize)
59 lfsize = float(lfsize)
60 except ValueError:
60 except ValueError:
61 raise error.Abort(
61 raise error.Abort(
62 _(b'largefiles: size must be number (not %s)\n') % lfsize
62 _(b'largefiles: size must be number (not %s)\n') % lfsize
63 )
63 )
64 if lfsize is None:
64 if lfsize is None:
65 raise error.Abort(_(b'minimum size for largefiles must be specified'))
65 raise error.Abort(_(b'minimum size for largefiles must be specified'))
66 return lfsize
66 return lfsize
67
67
68
68
69 def link(src, dest):
69 def link(src, dest):
70 """Try to create hardlink - if that fails, efficiently make a copy."""
70 """Try to create hardlink - if that fails, efficiently make a copy."""
71 util.makedirs(os.path.dirname(dest))
71 util.makedirs(os.path.dirname(dest))
72 try:
72 try:
73 util.oslink(src, dest)
73 util.oslink(src, dest)
74 except OSError:
74 except OSError:
75 # if hardlinks fail, fallback on atomic copy
75 # if hardlinks fail, fallback on atomic copy
76 with open(src, b'rb') as srcf, util.atomictempfile(dest) as dstf:
76 with open(src, b'rb') as srcf, util.atomictempfile(dest) as dstf:
77 for chunk in util.filechunkiter(srcf):
77 for chunk in util.filechunkiter(srcf):
78 dstf.write(chunk)
78 dstf.write(chunk)
79 os.chmod(dest, os.stat(src).st_mode)
79 os.chmod(dest, os.stat(src).st_mode)
80
80
81
81
82 def usercachepath(ui, hash):
82 def usercachepath(ui, hash):
83 """Return the correct location in the "global" largefiles cache for a file
83 """Return the correct location in the "global" largefiles cache for a file
84 with the given hash.
84 with the given hash.
85 This cache is used for sharing of largefiles across repositories - both
85 This cache is used for sharing of largefiles across repositories - both
86 to preserve download bandwidth and storage space."""
86 to preserve download bandwidth and storage space."""
87 return os.path.join(_usercachedir(ui), hash)
87 return os.path.join(_usercachedir(ui), hash)
88
88
89
89
90 def _usercachedir(ui, name=longname):
90 def _usercachedir(ui, name=longname):
91 '''Return the location of the "global" largefiles cache.'''
91 '''Return the location of the "global" largefiles cache.'''
92 path = ui.configpath(name, b'usercache')
92 path = ui.configpath(name, b'usercache')
93 if path:
93 if path:
94 return path
94 return path
95
95
96 hint = None
96 hint = None
97
97
98 if pycompat.iswindows:
98 if pycompat.iswindows:
99 appdata = encoding.environ.get(
99 appdata = encoding.environ.get(
100 b'LOCALAPPDATA', encoding.environ.get(b'APPDATA')
100 b'LOCALAPPDATA', encoding.environ.get(b'APPDATA')
101 )
101 )
102 if appdata:
102 if appdata:
103 return os.path.join(appdata, name)
103 return os.path.join(appdata, name)
104
104
105 hint = _(b"define %s or %s in the environment, or set %s.usercache") % (
105 hint = _(b"define %s or %s in the environment, or set %s.usercache") % (
106 b"LOCALAPPDATA",
106 b"LOCALAPPDATA",
107 b"APPDATA",
107 b"APPDATA",
108 name,
108 name,
109 )
109 )
110 elif pycompat.isdarwin:
110 elif pycompat.isdarwin:
111 home = encoding.environ.get(b'HOME')
111 home = encoding.environ.get(b'HOME')
112 if home:
112 if home:
113 return os.path.join(home, b'Library', b'Caches', name)
113 return os.path.join(home, b'Library', b'Caches', name)
114
114
115 hint = _(b"define %s in the environment, or set %s.usercache") % (
115 hint = _(b"define %s in the environment, or set %s.usercache") % (
116 b"HOME",
116 b"HOME",
117 name,
117 name,
118 )
118 )
119 elif pycompat.isposix:
119 elif pycompat.isposix:
120 path = encoding.environ.get(b'XDG_CACHE_HOME')
120 path = encoding.environ.get(b'XDG_CACHE_HOME')
121 if path:
121 if path:
122 return os.path.join(path, name)
122 return os.path.join(path, name)
123 home = encoding.environ.get(b'HOME')
123 home = encoding.environ.get(b'HOME')
124 if home:
124 if home:
125 return os.path.join(home, b'.cache', name)
125 return os.path.join(home, b'.cache', name)
126
126
127 hint = _(b"define %s or %s in the environment, or set %s.usercache") % (
127 hint = _(b"define %s or %s in the environment, or set %s.usercache") % (
128 b"XDG_CACHE_HOME",
128 b"XDG_CACHE_HOME",
129 b"HOME",
129 b"HOME",
130 name,
130 name,
131 )
131 )
132 else:
132 else:
133 raise error.Abort(
133 raise error.Abort(
134 _(b'unknown operating system: %s\n') % pycompat.osname
134 _(b'unknown operating system: %s\n') % pycompat.osname
135 )
135 )
136
136
137 raise error.Abort(_(b'unknown %s usercache location') % name, hint=hint)
137 raise error.Abort(_(b'unknown %s usercache location') % name, hint=hint)
138
138
139
139
140 def inusercache(ui, hash):
140 def inusercache(ui, hash):
141 path = usercachepath(ui, hash)
141 path = usercachepath(ui, hash)
142 return os.path.exists(path)
142 return os.path.exists(path)
143
143
144
144
145 def findfile(repo, hash):
145 def findfile(repo, hash):
146 """Return store path of the largefile with the specified hash.
146 """Return store path of the largefile with the specified hash.
147 As a side effect, the file might be linked from user cache.
147 As a side effect, the file might be linked from user cache.
148 Return None if the file can't be found locally."""
148 Return None if the file can't be found locally."""
149 path, exists = findstorepath(repo, hash)
149 path, exists = findstorepath(repo, hash)
150 if exists:
150 if exists:
151 repo.ui.note(_(b'found %s in store\n') % hash)
151 repo.ui.note(_(b'found %s in store\n') % hash)
152 return path
152 return path
153 elif inusercache(repo.ui, hash):
153 elif inusercache(repo.ui, hash):
154 repo.ui.note(_(b'found %s in system cache\n') % hash)
154 repo.ui.note(_(b'found %s in system cache\n') % hash)
155 path = storepath(repo, hash)
155 path = storepath(repo, hash)
156 link(usercachepath(repo.ui, hash), path)
156 link(usercachepath(repo.ui, hash), path)
157 return path
157 return path
158 return None
158 return None
159
159
160
160
161 class largefilesdirstate(dirstate.dirstate):
161 class largefilesdirstate(dirstate.dirstate):
162 _large_file_dirstate = True
162 _large_file_dirstate = True
163 _tr_key_suffix = b'-large-files'
163 _tr_key_suffix = b'-large-files'
164
164
165 def __getitem__(self, key):
165 def __getitem__(self, key):
166 return super(largefilesdirstate, self).__getitem__(unixpath(key))
166 return super(largefilesdirstate, self).__getitem__(unixpath(key))
167
167
168 def set_tracked(self, f):
168 def set_tracked(self, f):
169 return super(largefilesdirstate, self).set_tracked(unixpath(f))
169 return super(largefilesdirstate, self).set_tracked(unixpath(f))
170
170
171 def set_untracked(self, f):
171 def set_untracked(self, f):
172 return super(largefilesdirstate, self).set_untracked(unixpath(f))
172 return super(largefilesdirstate, self).set_untracked(unixpath(f))
173
173
174 def normal(self, f, parentfiledata=None):
174 def normal(self, f, parentfiledata=None):
175 # not sure if we should pass the `parentfiledata` down or throw it
175 # not sure if we should pass the `parentfiledata` down or throw it
176 # away. So throwing it away to stay on the safe side.
176 # away. So throwing it away to stay on the safe side.
177 return super(largefilesdirstate, self).normal(unixpath(f))
177 return super(largefilesdirstate, self).normal(unixpath(f))
178
178
179 def remove(self, f):
179 def remove(self, f):
180 return super(largefilesdirstate, self).remove(unixpath(f))
180 return super(largefilesdirstate, self).remove(unixpath(f))
181
181
182 def add(self, f):
182 def add(self, f):
183 return super(largefilesdirstate, self).add(unixpath(f))
183 return super(largefilesdirstate, self).add(unixpath(f))
184
184
185 def drop(self, f):
185 def drop(self, f):
186 return super(largefilesdirstate, self).drop(unixpath(f))
186 return super(largefilesdirstate, self).drop(unixpath(f))
187
187
188 def forget(self, f):
188 def forget(self, f):
189 return super(largefilesdirstate, self).forget(unixpath(f))
189 return super(largefilesdirstate, self).forget(unixpath(f))
190
190
191 def normallookup(self, f):
191 def normallookup(self, f):
192 return super(largefilesdirstate, self).normallookup(unixpath(f))
192 return super(largefilesdirstate, self).normallookup(unixpath(f))
193
193
194 def _ignore(self, f):
194 def _ignore(self, f):
195 return False
195 return False
196
196
197 def write(self, tr):
197 def write(self, tr):
198 # (1) disable PENDING mode always
198 # (1) disable PENDING mode always
199 # (lfdirstate isn't yet managed as a part of the transaction)
199 # (lfdirstate isn't yet managed as a part of the transaction)
200 # (2) avoid develwarn 'use dirstate.write with ....'
200 # (2) avoid develwarn 'use dirstate.write with ....'
201 if tr:
201 if tr:
202 tr.addbackup(b'largefiles/dirstate', location=b'plain')
202 tr.addbackup(b'largefiles/dirstate', location=b'plain')
203 super(largefilesdirstate, self).write(None)
203 super(largefilesdirstate, self).write(None)
204
204
205
205
206 def openlfdirstate(ui, repo, create=True):
206 def openlfdirstate(ui, repo, create=True):
207 """
207 """
208 Return a dirstate object that tracks largefiles: i.e. its root is
208 Return a dirstate object that tracks largefiles: i.e. its root is
209 the repo root, but it is saved in .hg/largefiles/dirstate.
209 the repo root, but it is saved in .hg/largefiles/dirstate.
210
210
211 If a dirstate object already exists and is being used for a 'changing_*'
211 If a dirstate object already exists and is being used for a 'changing_*'
212 context, it will be returned.
212 context, it will be returned.
213 """
213 """
214 sub_dirstate = getattr(repo.dirstate, '_sub_dirstate', None)
214 sub_dirstate = getattr(repo.dirstate, '_sub_dirstate', None)
215 if sub_dirstate is not None:
215 if sub_dirstate is not None:
216 return sub_dirstate
216 return sub_dirstate
217 vfs = repo.vfs
217 vfs = repo.vfs
218 lfstoredir = longname
218 lfstoredir = longname
219 opener = vfsmod.vfs(vfs.join(lfstoredir))
219 opener = vfsmod.vfs(vfs.join(lfstoredir))
220 use_dirstate_v2 = requirements.DIRSTATE_V2_REQUIREMENT in repo.requirements
220 use_dirstate_v2 = requirements.DIRSTATE_V2_REQUIREMENT in repo.requirements
221 lfdirstate = largefilesdirstate(
221 lfdirstate = largefilesdirstate(
222 opener,
222 opener,
223 ui,
223 ui,
224 repo.root,
224 repo.root,
225 repo.dirstate._validate,
225 repo.dirstate._validate,
226 lambda: sparse.matcher(repo),
226 lambda: sparse.matcher(repo),
227 repo.nodeconstants,
227 repo.nodeconstants,
228 use_dirstate_v2,
228 use_dirstate_v2,
229 )
229 )
230
230
231 # If the largefiles dirstate does not exist, populate and create
231 # If the largefiles dirstate does not exist, populate and create
232 # it. This ensures that we create it on the first meaningful
232 # it. This ensures that we create it on the first meaningful
233 # largefiles operation in a new clone.
233 # largefiles operation in a new clone.
234 if create and not vfs.exists(vfs.join(lfstoredir, b'dirstate')):
234 if create and not vfs.exists(vfs.join(lfstoredir, b'dirstate')):
235 try:
235 try:
236 with repo.wlock(wait=False), lfdirstate.changing_files(repo):
236 with repo.wlock(wait=False), lfdirstate.changing_files(repo):
237 matcher = getstandinmatcher(repo)
237 matcher = getstandinmatcher(repo)
238 standins = repo.dirstate.walk(
238 standins = repo.dirstate.walk(
239 matcher, subrepos=[], unknown=False, ignored=False
239 matcher, subrepos=[], unknown=False, ignored=False
240 )
240 )
241
241
242 if len(standins) > 0:
242 if len(standins) > 0:
243 vfs.makedirs(lfstoredir)
243 vfs.makedirs(lfstoredir)
244
244
245 for standin in standins:
245 for standin in standins:
246 lfile = splitstandin(standin)
246 lfile = splitstandin(standin)
247 lfdirstate.hacky_extension_update_file(
247 lfdirstate.hacky_extension_update_file(
248 lfile,
248 lfile,
249 p1_tracked=True,
249 p1_tracked=True,
250 wc_tracked=True,
250 wc_tracked=True,
251 possibly_dirty=True,
251 possibly_dirty=True,
252 )
252 )
253 except error.LockError:
253 except error.LockError:
254 # Assume that whatever was holding the lock was important.
254 # Assume that whatever was holding the lock was important.
255 # If we were doing something important, we would already have
255 # If we were doing something important, we would already have
256 # either the lock or a largefile dirstate.
256 # either the lock or a largefile dirstate.
257 pass
257 pass
258 return lfdirstate
258 return lfdirstate
259
259
260
260
261 def lfdirstatestatus(lfdirstate, repo):
261 def lfdirstatestatus(lfdirstate, repo):
262 pctx = repo[b'.']
262 pctx = repo[b'.']
263 match = matchmod.always()
263 match = matchmod.always()
264 unsure, s, mtime_boundary = lfdirstate.status(
264 unsure, s, mtime_boundary = lfdirstate.status(
265 match, subrepos=[], ignored=False, clean=False, unknown=False
265 match, subrepos=[], ignored=False, clean=False, unknown=False
266 )
266 )
267 modified, clean = s.modified, s.clean
267 modified, clean = s.modified, s.clean
268 wctx = repo[None]
268 wctx = repo[None]
269 for lfile in unsure:
269 for lfile in unsure:
270 try:
270 try:
271 fctx = pctx[standin(lfile)]
271 fctx = pctx[standin(lfile)]
272 except LookupError:
272 except LookupError:
273 fctx = None
273 fctx = None
274 if not fctx or readasstandin(fctx) != hashfile(repo.wjoin(lfile)):
274 if not fctx or readasstandin(fctx) != hashfile(repo.wjoin(lfile)):
275 modified.append(lfile)
275 modified.append(lfile)
276 else:
276 else:
277 clean.append(lfile)
277 clean.append(lfile)
278 st = wctx[lfile].lstat()
278 st = wctx[lfile].lstat()
279 mode = st.st_mode
279 mode = st.st_mode
280 size = st.st_size
280 size = st.st_size
281 mtime = timestamp.reliable_mtime_of(st, mtime_boundary)
281 mtime = timestamp.reliable_mtime_of(st, mtime_boundary)
282 if mtime is not None:
282 if mtime is not None:
283 cache_data = (mode, size, mtime)
283 cache_data = (mode, size, mtime)
284 lfdirstate.set_clean(lfile, cache_data)
284 lfdirstate.set_clean(lfile, cache_data)
285 return s
285 return s
286
286
287
287
288 def listlfiles(repo, rev=None, matcher=None):
288 def listlfiles(repo, rev=None, matcher=None):
289 """return a list of largefiles in the working copy or the
289 """return a list of largefiles in the working copy or the
290 specified changeset"""
290 specified changeset"""
291
291
292 if matcher is None:
292 if matcher is None:
293 matcher = getstandinmatcher(repo)
293 matcher = getstandinmatcher(repo)
294
294
295 # ignore unknown files in working directory
295 # ignore unknown files in working directory
296 return [
296 return [
297 splitstandin(f)
297 splitstandin(f)
298 for f in repo[rev].walk(matcher)
298 for f in repo[rev].walk(matcher)
299 if rev is not None or repo.dirstate.get_entry(f).any_tracked
299 if rev is not None or repo.dirstate.get_entry(f).any_tracked
300 ]
300 ]
301
301
302
302
303 def instore(repo, hash, forcelocal=False):
303 def instore(repo, hash, forcelocal=False):
304 '''Return true if a largefile with the given hash exists in the store'''
304 '''Return true if a largefile with the given hash exists in the store'''
305 return os.path.exists(storepath(repo, hash, forcelocal))
305 return os.path.exists(storepath(repo, hash, forcelocal))
306
306
307
307
308 def storepath(repo, hash, forcelocal=False):
308 def storepath(repo, hash, forcelocal=False):
309 """Return the correct location in the repository largefiles store for a
309 """Return the correct location in the repository largefiles store for a
310 file with the given hash."""
310 file with the given hash."""
311 if not forcelocal and repo.shared():
311 if not forcelocal and repo.shared():
312 return repo.vfs.reljoin(repo.sharedpath, longname, hash)
312 return repo.vfs.reljoin(repo.sharedpath, longname, hash)
313 return repo.vfs.join(longname, hash)
313 return repo.vfs.join(longname, hash)
314
314
315
315
316 def findstorepath(repo, hash):
316 def findstorepath(repo, hash):
317 """Search through the local store path(s) to find the file for the given
317 """Search through the local store path(s) to find the file for the given
318 hash. If the file is not found, its path in the primary store is returned.
318 hash. If the file is not found, its path in the primary store is returned.
319 The return value is a tuple of (path, exists(path)).
319 The return value is a tuple of (path, exists(path)).
320 """
320 """
321 # For shared repos, the primary store is in the share source. But for
321 # For shared repos, the primary store is in the share source. But for
322 # backward compatibility, force a lookup in the local store if it wasn't
322 # backward compatibility, force a lookup in the local store if it wasn't
323 # found in the share source.
323 # found in the share source.
324 path = storepath(repo, hash, False)
324 path = storepath(repo, hash, False)
325
325
326 if instore(repo, hash):
326 if instore(repo, hash):
327 return (path, True)
327 return (path, True)
328 elif repo.shared() and instore(repo, hash, True):
328 elif repo.shared() and instore(repo, hash, True):
329 return storepath(repo, hash, True), True
329 return storepath(repo, hash, True), True
330
330
331 return (path, False)
331 return (path, False)
332
332
333
333
334 def copyfromcache(repo, hash, filename):
334 def copyfromcache(repo, hash, filename):
335 """Copy the specified largefile from the repo or system cache to
335 """Copy the specified largefile from the repo or system cache to
336 filename in the repository. Return true on success or false if the
336 filename in the repository. Return true on success or false if the
337 file was not found in either cache (which should not happened:
337 file was not found in either cache (which should not happened:
338 this is meant to be called only after ensuring that the needed
338 this is meant to be called only after ensuring that the needed
339 largefile exists in the cache)."""
339 largefile exists in the cache)."""
340 wvfs = repo.wvfs
340 wvfs = repo.wvfs
341 path = findfile(repo, hash)
341 path = findfile(repo, hash)
342 if path is None:
342 if path is None:
343 return False
343 return False
344 wvfs.makedirs(wvfs.dirname(wvfs.join(filename)))
344 wvfs.makedirs(wvfs.dirname(wvfs.join(filename)))
345 # The write may fail before the file is fully written, but we
345 # The write may fail before the file is fully written, but we
346 # don't use atomic writes in the working copy.
346 # don't use atomic writes in the working copy.
347 with open(path, b'rb') as srcfd, wvfs(filename, b'wb') as destfd:
347 with open(path, b'rb') as srcfd, wvfs(filename, b'wb') as destfd:
348 gothash = copyandhash(util.filechunkiter(srcfd), destfd)
348 gothash = copyandhash(util.filechunkiter(srcfd), destfd)
349 if gothash != hash:
349 if gothash != hash:
350 repo.ui.warn(
350 repo.ui.warn(
351 _(b'%s: data corruption in %s with hash %s\n')
351 _(b'%s: data corruption in %s with hash %s\n')
352 % (filename, path, gothash)
352 % (filename, path, gothash)
353 )
353 )
354 wvfs.unlink(filename)
354 wvfs.unlink(filename)
355 return False
355 return False
356 return True
356 return True
357
357
358
358
359 def copytostore(repo, ctx, file, fstandin):
359 def copytostore(repo, ctx, file, fstandin):
360 wvfs = repo.wvfs
360 wvfs = repo.wvfs
361 hash = readasstandin(ctx[fstandin])
361 hash = readasstandin(ctx[fstandin])
362 if instore(repo, hash):
362 if instore(repo, hash):
363 return
363 return
364 if wvfs.exists(file):
364 if wvfs.exists(file):
365 copytostoreabsolute(repo, wvfs.join(file), hash)
365 copytostoreabsolute(repo, wvfs.join(file), hash)
366 else:
366 else:
367 repo.ui.warn(
367 repo.ui.warn(
368 _(b"%s: largefile %s not available from local store\n")
368 _(b"%s: largefile %s not available from local store\n")
369 % (file, hash)
369 % (file, hash)
370 )
370 )
371
371
372
372
373 def copyalltostore(repo, node):
373 def copyalltostore(repo, node):
374 '''Copy all largefiles in a given revision to the store'''
374 '''Copy all largefiles in a given revision to the store'''
375
375
376 ctx = repo[node]
376 ctx = repo[node]
377 for filename in ctx.files():
377 for filename in ctx.files():
378 realfile = splitstandin(filename)
378 realfile = splitstandin(filename)
379 if realfile is not None and filename in ctx.manifest():
379 if realfile is not None and filename in ctx.manifest():
380 copytostore(repo, ctx, realfile, filename)
380 copytostore(repo, ctx, realfile, filename)
381
381
382
382
383 def copytostoreabsolute(repo, file, hash):
383 def copytostoreabsolute(repo, file, hash):
384 if inusercache(repo.ui, hash):
384 if inusercache(repo.ui, hash):
385 link(usercachepath(repo.ui, hash), storepath(repo, hash))
385 link(usercachepath(repo.ui, hash), storepath(repo, hash))
386 else:
386 else:
387 util.makedirs(os.path.dirname(storepath(repo, hash)))
387 util.makedirs(os.path.dirname(storepath(repo, hash)))
388 with open(file, b'rb') as srcf:
388 with open(file, b'rb') as srcf:
389 with util.atomictempfile(
389 with util.atomictempfile(
390 storepath(repo, hash), createmode=repo.store.createmode
390 storepath(repo, hash), createmode=repo.store.createmode
391 ) as dstf:
391 ) as dstf:
392 for chunk in util.filechunkiter(srcf):
392 for chunk in util.filechunkiter(srcf):
393 dstf.write(chunk)
393 dstf.write(chunk)
394 linktousercache(repo, hash)
394 linktousercache(repo, hash)
395
395
396
396
397 def linktousercache(repo, hash):
397 def linktousercache(repo, hash):
398 """Link / copy the largefile with the specified hash from the store
398 """Link / copy the largefile with the specified hash from the store
399 to the cache."""
399 to the cache."""
400 path = usercachepath(repo.ui, hash)
400 path = usercachepath(repo.ui, hash)
401 link(storepath(repo, hash), path)
401 link(storepath(repo, hash), path)
402
402
403
403
404 def getstandinmatcher(repo, rmatcher=None):
404 def getstandinmatcher(repo, rmatcher=None):
405 '''Return a match object that applies rmatcher to the standin directory'''
405 '''Return a match object that applies rmatcher to the standin directory'''
406 wvfs = repo.wvfs
406 wvfs = repo.wvfs
407 standindir = shortname
407 standindir = shortname
408
408
409 # no warnings about missing files or directories
409 # no warnings about missing files or directories
410 badfn = lambda f, msg: None
410 badfn = lambda f, msg: None
411
411
412 if rmatcher and not rmatcher.always():
412 if rmatcher and not rmatcher.always():
413 pats = [wvfs.join(standindir, pat) for pat in rmatcher.files()]
413 pats = [wvfs.join(standindir, pat) for pat in rmatcher.files()]
414 if not pats:
414 if not pats:
415 pats = [wvfs.join(standindir)]
415 pats = [wvfs.join(standindir)]
416 match = scmutil.match(repo[None], pats, badfn=badfn)
416 match = scmutil.match(repo[None], pats, badfn=badfn)
417 else:
417 else:
418 # no patterns: relative to repo root
418 # no patterns: relative to repo root
419 match = scmutil.match(repo[None], [wvfs.join(standindir)], badfn=badfn)
419 match = scmutil.match(repo[None], [wvfs.join(standindir)], badfn=badfn)
420 return match
420 return match
421
421
422
422
423 def composestandinmatcher(repo, rmatcher):
423 def composestandinmatcher(repo, rmatcher):
424 """Return a matcher that accepts standins corresponding to the
424 """Return a matcher that accepts standins corresponding to the
425 files accepted by rmatcher. Pass the list of files in the matcher
425 files accepted by rmatcher. Pass the list of files in the matcher
426 as the paths specified by the user."""
426 as the paths specified by the user."""
427 smatcher = getstandinmatcher(repo, rmatcher)
427 smatcher = getstandinmatcher(repo, rmatcher)
428 isstandin = smatcher.matchfn
428 isstandin = smatcher.matchfn
429
429
430 def composedmatchfn(f):
430 def composedmatchfn(f):
431 return isstandin(f) and rmatcher.matchfn(splitstandin(f))
431 return isstandin(f) and rmatcher.matchfn(splitstandin(f))
432
432
433 smatcher.matchfn = composedmatchfn
433 smatcher.matchfn = composedmatchfn
434
434
435 return smatcher
435 return smatcher
436
436
437
437
438 def standin(filename):
438 def standin(filename):
439 """Return the repo-relative path to the standin for the specified big
439 """Return the repo-relative path to the standin for the specified big
440 file."""
440 file."""
441 # Notes:
441 # Notes:
442 # 1) Some callers want an absolute path, but for instance addlargefiles
442 # 1) Some callers want an absolute path, but for instance addlargefiles
443 # needs it repo-relative so it can be passed to repo[None].add(). So
443 # needs it repo-relative so it can be passed to repo[None].add(). So
444 # leave it up to the caller to use repo.wjoin() to get an absolute path.
444 # leave it up to the caller to use repo.wjoin() to get an absolute path.
445 # 2) Join with '/' because that's what dirstate always uses, even on
445 # 2) Join with '/' because that's what dirstate always uses, even on
446 # Windows. Change existing separator to '/' first in case we are
446 # Windows. Change existing separator to '/' first in case we are
447 # passed filenames from an external source (like the command line).
447 # passed filenames from an external source (like the command line).
448 return shortnameslash + util.pconvert(filename)
448 return shortnameslash + util.pconvert(filename)
449
449
450
450
451 def isstandin(filename):
451 def isstandin(filename):
452 """Return true if filename is a big file standin. filename must be
452 """Return true if filename is a big file standin. filename must be
453 in Mercurial's internal form (slash-separated)."""
453 in Mercurial's internal form (slash-separated)."""
454 return filename.startswith(shortnameslash)
454 return filename.startswith(shortnameslash)
455
455
456
456
457 def splitstandin(filename):
457 def splitstandin(filename):
458 # Split on / because that's what dirstate always uses, even on Windows.
458 # Split on / because that's what dirstate always uses, even on Windows.
459 # Change local separator to / first just in case we are passed filenames
459 # Change local separator to / first just in case we are passed filenames
460 # from an external source (like the command line).
460 # from an external source (like the command line).
461 bits = util.pconvert(filename).split(b'/', 1)
461 bits = util.pconvert(filename).split(b'/', 1)
462 if len(bits) == 2 and bits[0] == shortname:
462 if len(bits) == 2 and bits[0] == shortname:
463 return bits[1]
463 return bits[1]
464 else:
464 else:
465 return None
465 return None
466
466
467
467
468 def updatestandin(repo, lfile, standin):
468 def updatestandin(repo, lfile, standin):
469 """Re-calculate hash value of lfile and write it into standin
469 """Re-calculate hash value of lfile and write it into standin
470
470
471 This assumes that "lfutil.standin(lfile) == standin", for efficiency.
471 This assumes that "lfutil.standin(lfile) == standin", for efficiency.
472 """
472 """
473 file = repo.wjoin(lfile)
473 file = repo.wjoin(lfile)
474 if repo.wvfs.exists(lfile):
474 if repo.wvfs.exists(lfile):
475 hash = hashfile(file)
475 hash = hashfile(file)
476 executable = getexecutable(file)
476 executable = getexecutable(file)
477 writestandin(repo, standin, hash, executable)
477 writestandin(repo, standin, hash, executable)
478 else:
478 else:
479 raise error.Abort(_(b'%s: file not found!') % lfile)
479 raise error.Abort(_(b'%s: file not found!') % lfile)
480
480
481
481
482 def readasstandin(fctx):
482 def readasstandin(fctx):
483 """read hex hash from given filectx of standin file
483 """read hex hash from given filectx of standin file
484
484
485 This encapsulates how "standin" data is stored into storage layer."""
485 This encapsulates how "standin" data is stored into storage layer."""
486 return fctx.data().strip()
486 return fctx.data().strip()
487
487
488
488
489 def writestandin(repo, standin, hash, executable):
489 def writestandin(repo, standin, hash, executable):
490 '''write hash to <repo.root>/<standin>'''
490 '''write hash to <repo.root>/<standin>'''
491 repo.wwrite(standin, hash + b'\n', executable and b'x' or b'')
491 repo.wwrite(standin, hash + b'\n', executable and b'x' or b'')
492
492
493
493
494 def copyandhash(instream, outfile):
494 def copyandhash(instream, outfile):
495 """Read bytes from instream (iterable) and write them to outfile,
495 """Read bytes from instream (iterable) and write them to outfile,
496 computing the SHA-1 hash of the data along the way. Return the hash."""
496 computing the SHA-1 hash of the data along the way. Return the hash."""
497 hasher = hashutil.sha1(b'')
497 hasher = hashutil.sha1(b'')
498 for data in instream:
498 for data in instream:
499 hasher.update(data)
499 hasher.update(data)
500 outfile.write(data)
500 outfile.write(data)
501 return hex(hasher.digest())
501 return hex(hasher.digest())
502
502
503
503
504 def hashfile(file):
504 def hashfile(file):
505 if not os.path.exists(file):
505 if not os.path.exists(file):
506 return b''
506 return b''
507 with open(file, b'rb') as fd:
507 with open(file, b'rb') as fd:
508 return hexsha1(fd)
508 return hexsha1(fd)
509
509
510
510
511 def getexecutable(filename):
511 def getexecutable(filename):
512 mode = os.stat(filename).st_mode
512 mode = os.stat(filename).st_mode
513 return (
513 return (
514 (mode & stat.S_IXUSR)
514 (mode & stat.S_IXUSR)
515 and (mode & stat.S_IXGRP)
515 and (mode & stat.S_IXGRP)
516 and (mode & stat.S_IXOTH)
516 and (mode & stat.S_IXOTH)
517 )
517 )
518
518
519
519
520 def urljoin(first, second, *arg):
520 def urljoin(first, second, *arg):
521 def join(left, right):
521 def join(left, right):
522 if not left.endswith(b'/'):
522 if not left.endswith(b'/'):
523 left += b'/'
523 left += b'/'
524 if right.startswith(b'/'):
524 if right.startswith(b'/'):
525 right = right[1:]
525 right = right[1:]
526 return left + right
526 return left + right
527
527
528 url = join(first, second)
528 url = join(first, second)
529 for a in arg:
529 for a in arg:
530 url = join(url, a)
530 url = join(url, a)
531 return url
531 return url
532
532
533
533
534 def hexsha1(fileobj):
534 def hexsha1(fileobj):
535 """hexsha1 returns the hex-encoded sha1 sum of the data in the file-like
535 """hexsha1 returns the hex-encoded sha1 sum of the data in the file-like
536 object data"""
536 object data"""
537 h = hashutil.sha1()
537 h = hashutil.sha1()
538 for chunk in util.filechunkiter(fileobj):
538 for chunk in util.filechunkiter(fileobj):
539 h.update(chunk)
539 h.update(chunk)
540 return hex(h.digest())
540 return hex(h.digest())
541
541
542
542
543 def httpsendfile(ui, filename):
543 def httpsendfile(ui, filename):
544 return httpconnection.httpsendfile(ui, filename, b'rb')
544 return httpconnection.httpsendfile(ui, filename, b'rb')
545
545
546
546
547 def unixpath(path):
547 def unixpath(path):
548 '''Return a version of path normalized for use with the lfdirstate.'''
548 '''Return a version of path normalized for use with the lfdirstate.'''
549 return util.pconvert(os.path.normpath(path))
549 return util.pconvert(os.path.normpath(path))
550
550
551
551
552 def islfilesrepo(repo):
552 def islfilesrepo(repo):
553 '''Return true if the repo is a largefile repo.'''
553 '''Return true if the repo is a largefile repo.'''
554 if b'largefiles' in repo.requirements and any(
554 if b'largefiles' in repo.requirements and any(
555 shortnameslash in f[1] for f in repo.store.datafiles()
555 shortnameslash in entry.unencoded_path
556 for entry in repo.store.datafiles()
556 ):
557 ):
557 return True
558 return True
558
559
559 return any(openlfdirstate(repo.ui, repo, False))
560 return any(openlfdirstate(repo.ui, repo, False))
560
561
561
562
562 class storeprotonotcapable(Exception):
563 class storeprotonotcapable(Exception):
563 def __init__(self, storetypes):
564 def __init__(self, storetypes):
564 self.storetypes = storetypes
565 self.storetypes = storetypes
565
566
566
567
567 def getstandinsstate(repo):
568 def getstandinsstate(repo):
568 standins = []
569 standins = []
569 matcher = getstandinmatcher(repo)
570 matcher = getstandinmatcher(repo)
570 wctx = repo[None]
571 wctx = repo[None]
571 for standin in repo.dirstate.walk(
572 for standin in repo.dirstate.walk(
572 matcher, subrepos=[], unknown=False, ignored=False
573 matcher, subrepos=[], unknown=False, ignored=False
573 ):
574 ):
574 lfile = splitstandin(standin)
575 lfile = splitstandin(standin)
575 try:
576 try:
576 hash = readasstandin(wctx[standin])
577 hash = readasstandin(wctx[standin])
577 except IOError:
578 except IOError:
578 hash = None
579 hash = None
579 standins.append((lfile, hash))
580 standins.append((lfile, hash))
580 return standins
581 return standins
581
582
582
583
583 def synclfdirstate(repo, lfdirstate, lfile, normallookup):
584 def synclfdirstate(repo, lfdirstate, lfile, normallookup):
584 lfstandin = standin(lfile)
585 lfstandin = standin(lfile)
585 if lfstandin not in repo.dirstate:
586 if lfstandin not in repo.dirstate:
586 lfdirstate.hacky_extension_update_file(
587 lfdirstate.hacky_extension_update_file(
587 lfile,
588 lfile,
588 p1_tracked=False,
589 p1_tracked=False,
589 wc_tracked=False,
590 wc_tracked=False,
590 )
591 )
591 else:
592 else:
592 entry = repo.dirstate.get_entry(lfstandin)
593 entry = repo.dirstate.get_entry(lfstandin)
593 lfdirstate.hacky_extension_update_file(
594 lfdirstate.hacky_extension_update_file(
594 lfile,
595 lfile,
595 wc_tracked=entry.tracked,
596 wc_tracked=entry.tracked,
596 p1_tracked=entry.p1_tracked,
597 p1_tracked=entry.p1_tracked,
597 p2_info=entry.p2_info,
598 p2_info=entry.p2_info,
598 possibly_dirty=True,
599 possibly_dirty=True,
599 )
600 )
600
601
601
602
602 def markcommitted(orig, ctx, node):
603 def markcommitted(orig, ctx, node):
603 repo = ctx.repo()
604 repo = ctx.repo()
604
605
605 with repo.dirstate.changing_parents(repo):
606 with repo.dirstate.changing_parents(repo):
606 orig(node)
607 orig(node)
607
608
608 # ATTENTION: "ctx.files()" may differ from "repo[node].files()"
609 # ATTENTION: "ctx.files()" may differ from "repo[node].files()"
609 # because files coming from the 2nd parent are omitted in the latter.
610 # because files coming from the 2nd parent are omitted in the latter.
610 #
611 #
611 # The former should be used to get targets of "synclfdirstate",
612 # The former should be used to get targets of "synclfdirstate",
612 # because such files:
613 # because such files:
613 # - are marked as "a" by "patch.patch()" (e.g. via transplant), and
614 # - are marked as "a" by "patch.patch()" (e.g. via transplant), and
614 # - have to be marked as "n" after commit, but
615 # - have to be marked as "n" after commit, but
615 # - aren't listed in "repo[node].files()"
616 # - aren't listed in "repo[node].files()"
616
617
617 lfdirstate = openlfdirstate(repo.ui, repo)
618 lfdirstate = openlfdirstate(repo.ui, repo)
618 for f in ctx.files():
619 for f in ctx.files():
619 lfile = splitstandin(f)
620 lfile = splitstandin(f)
620 if lfile is not None:
621 if lfile is not None:
621 synclfdirstate(repo, lfdirstate, lfile, False)
622 synclfdirstate(repo, lfdirstate, lfile, False)
622
623
623 # As part of committing, copy all of the largefiles into the cache.
624 # As part of committing, copy all of the largefiles into the cache.
624 #
625 #
625 # Using "node" instead of "ctx" implies additional "repo[node]"
626 # Using "node" instead of "ctx" implies additional "repo[node]"
626 # lookup while copyalltostore(), but can omit redundant check for
627 # lookup while copyalltostore(), but can omit redundant check for
627 # files comming from the 2nd parent, which should exist in store
628 # files comming from the 2nd parent, which should exist in store
628 # at merging.
629 # at merging.
629 copyalltostore(repo, node)
630 copyalltostore(repo, node)
630
631
631
632
632 def getlfilestoupdate(oldstandins, newstandins):
633 def getlfilestoupdate(oldstandins, newstandins):
633 changedstandins = set(oldstandins).symmetric_difference(set(newstandins))
634 changedstandins = set(oldstandins).symmetric_difference(set(newstandins))
634 filelist = []
635 filelist = []
635 for f in changedstandins:
636 for f in changedstandins:
636 if f[0] not in filelist:
637 if f[0] not in filelist:
637 filelist.append(f[0])
638 filelist.append(f[0])
638 return filelist
639 return filelist
639
640
640
641
641 def getlfilestoupload(repo, missing, addfunc):
642 def getlfilestoupload(repo, missing, addfunc):
642 makeprogress = repo.ui.makeprogress
643 makeprogress = repo.ui.makeprogress
643 with makeprogress(
644 with makeprogress(
644 _(b'finding outgoing largefiles'),
645 _(b'finding outgoing largefiles'),
645 unit=_(b'revisions'),
646 unit=_(b'revisions'),
646 total=len(missing),
647 total=len(missing),
647 ) as progress:
648 ) as progress:
648 for i, n in enumerate(missing):
649 for i, n in enumerate(missing):
649 progress.update(i)
650 progress.update(i)
650 parents = [p for p in repo[n].parents() if p != repo.nullid]
651 parents = [p for p in repo[n].parents() if p != repo.nullid]
651
652
652 with lfstatus(repo, value=False):
653 with lfstatus(repo, value=False):
653 ctx = repo[n]
654 ctx = repo[n]
654
655
655 files = set(ctx.files())
656 files = set(ctx.files())
656 if len(parents) == 2:
657 if len(parents) == 2:
657 mc = ctx.manifest()
658 mc = ctx.manifest()
658 mp1 = ctx.p1().manifest()
659 mp1 = ctx.p1().manifest()
659 mp2 = ctx.p2().manifest()
660 mp2 = ctx.p2().manifest()
660 for f in mp1:
661 for f in mp1:
661 if f not in mc:
662 if f not in mc:
662 files.add(f)
663 files.add(f)
663 for f in mp2:
664 for f in mp2:
664 if f not in mc:
665 if f not in mc:
665 files.add(f)
666 files.add(f)
666 for f in mc:
667 for f in mc:
667 if mc[f] != mp1.get(f, None) or mc[f] != mp2.get(f, None):
668 if mc[f] != mp1.get(f, None) or mc[f] != mp2.get(f, None):
668 files.add(f)
669 files.add(f)
669 for fn in files:
670 for fn in files:
670 if isstandin(fn) and fn in ctx:
671 if isstandin(fn) and fn in ctx:
671 addfunc(fn, readasstandin(ctx[fn]))
672 addfunc(fn, readasstandin(ctx[fn]))
672
673
673
674
674 def updatestandinsbymatch(repo, match):
675 def updatestandinsbymatch(repo, match):
675 """Update standins in the working directory according to specified match
676 """Update standins in the working directory according to specified match
676
677
677 This returns (possibly modified) ``match`` object to be used for
678 This returns (possibly modified) ``match`` object to be used for
678 subsequent commit process.
679 subsequent commit process.
679 """
680 """
680
681
681 ui = repo.ui
682 ui = repo.ui
682
683
683 # Case 1: user calls commit with no specific files or
684 # Case 1: user calls commit with no specific files or
684 # include/exclude patterns: refresh and commit all files that
685 # include/exclude patterns: refresh and commit all files that
685 # are "dirty".
686 # are "dirty".
686 if match is None or match.always():
687 if match is None or match.always():
687 # Spend a bit of time here to get a list of files we know
688 # Spend a bit of time here to get a list of files we know
688 # are modified so we can compare only against those.
689 # are modified so we can compare only against those.
689 # It can cost a lot of time (several seconds)
690 # It can cost a lot of time (several seconds)
690 # otherwise to update all standins if the largefiles are
691 # otherwise to update all standins if the largefiles are
691 # large.
692 # large.
692 dirtymatch = matchmod.always()
693 dirtymatch = matchmod.always()
693 with repo.dirstate.running_status(repo):
694 with repo.dirstate.running_status(repo):
694 lfdirstate = openlfdirstate(ui, repo)
695 lfdirstate = openlfdirstate(ui, repo)
695 unsure, s, mtime_boundary = lfdirstate.status(
696 unsure, s, mtime_boundary = lfdirstate.status(
696 dirtymatch,
697 dirtymatch,
697 subrepos=[],
698 subrepos=[],
698 ignored=False,
699 ignored=False,
699 clean=False,
700 clean=False,
700 unknown=False,
701 unknown=False,
701 )
702 )
702 modifiedfiles = unsure + s.modified + s.added + s.removed
703 modifiedfiles = unsure + s.modified + s.added + s.removed
703 lfiles = listlfiles(repo)
704 lfiles = listlfiles(repo)
704 # this only loops through largefiles that exist (not
705 # this only loops through largefiles that exist (not
705 # removed/renamed)
706 # removed/renamed)
706 for lfile in lfiles:
707 for lfile in lfiles:
707 if lfile in modifiedfiles:
708 if lfile in modifiedfiles:
708 fstandin = standin(lfile)
709 fstandin = standin(lfile)
709 if repo.wvfs.exists(fstandin):
710 if repo.wvfs.exists(fstandin):
710 # this handles the case where a rebase is being
711 # this handles the case where a rebase is being
711 # performed and the working copy is not updated
712 # performed and the working copy is not updated
712 # yet.
713 # yet.
713 if repo.wvfs.exists(lfile):
714 if repo.wvfs.exists(lfile):
714 updatestandin(repo, lfile, fstandin)
715 updatestandin(repo, lfile, fstandin)
715
716
716 return match
717 return match
717
718
718 lfiles = listlfiles(repo)
719 lfiles = listlfiles(repo)
719 match._files = repo._subdirlfs(match.files(), lfiles)
720 match._files = repo._subdirlfs(match.files(), lfiles)
720
721
721 # Case 2: user calls commit with specified patterns: refresh
722 # Case 2: user calls commit with specified patterns: refresh
722 # any matching big files.
723 # any matching big files.
723 smatcher = composestandinmatcher(repo, match)
724 smatcher = composestandinmatcher(repo, match)
724 standins = repo.dirstate.walk(
725 standins = repo.dirstate.walk(
725 smatcher, subrepos=[], unknown=False, ignored=False
726 smatcher, subrepos=[], unknown=False, ignored=False
726 )
727 )
727
728
728 # No matching big files: get out of the way and pass control to
729 # No matching big files: get out of the way and pass control to
729 # the usual commit() method.
730 # the usual commit() method.
730 if not standins:
731 if not standins:
731 return match
732 return match
732
733
733 # Refresh all matching big files. It's possible that the
734 # Refresh all matching big files. It's possible that the
734 # commit will end up failing, in which case the big files will
735 # commit will end up failing, in which case the big files will
735 # stay refreshed. No harm done: the user modified them and
736 # stay refreshed. No harm done: the user modified them and
736 # asked to commit them, so sooner or later we're going to
737 # asked to commit them, so sooner or later we're going to
737 # refresh the standins. Might as well leave them refreshed.
738 # refresh the standins. Might as well leave them refreshed.
738 lfdirstate = openlfdirstate(ui, repo)
739 lfdirstate = openlfdirstate(ui, repo)
739 for fstandin in standins:
740 for fstandin in standins:
740 lfile = splitstandin(fstandin)
741 lfile = splitstandin(fstandin)
741 if lfdirstate.get_entry(lfile).tracked:
742 if lfdirstate.get_entry(lfile).tracked:
742 updatestandin(repo, lfile, fstandin)
743 updatestandin(repo, lfile, fstandin)
743
744
744 # Cook up a new matcher that only matches regular files or
745 # Cook up a new matcher that only matches regular files or
745 # standins corresponding to the big files requested by the
746 # standins corresponding to the big files requested by the
746 # user. Have to modify _files to prevent commit() from
747 # user. Have to modify _files to prevent commit() from
747 # complaining "not tracked" for big files.
748 # complaining "not tracked" for big files.
748 match = copy.copy(match)
749 match = copy.copy(match)
749 origmatchfn = match.matchfn
750 origmatchfn = match.matchfn
750
751
751 # Check both the list of largefiles and the list of
752 # Check both the list of largefiles and the list of
752 # standins because if a largefile was removed, it
753 # standins because if a largefile was removed, it
753 # won't be in the list of largefiles at this point
754 # won't be in the list of largefiles at this point
754 match._files += sorted(standins)
755 match._files += sorted(standins)
755
756
756 actualfiles = []
757 actualfiles = []
757 for f in match._files:
758 for f in match._files:
758 fstandin = standin(f)
759 fstandin = standin(f)
759
760
760 # For largefiles, only one of the normal and standin should be
761 # For largefiles, only one of the normal and standin should be
761 # committed (except if one of them is a remove). In the case of a
762 # committed (except if one of them is a remove). In the case of a
762 # standin removal, drop the normal file if it is unknown to dirstate.
763 # standin removal, drop the normal file if it is unknown to dirstate.
763 # Thus, skip plain largefile names but keep the standin.
764 # Thus, skip plain largefile names but keep the standin.
764 if f in lfiles or fstandin in standins:
765 if f in lfiles or fstandin in standins:
765 if not repo.dirstate.get_entry(fstandin).removed:
766 if not repo.dirstate.get_entry(fstandin).removed:
766 if not repo.dirstate.get_entry(f).removed:
767 if not repo.dirstate.get_entry(f).removed:
767 continue
768 continue
768 elif not repo.dirstate.get_entry(f).any_tracked:
769 elif not repo.dirstate.get_entry(f).any_tracked:
769 continue
770 continue
770
771
771 actualfiles.append(f)
772 actualfiles.append(f)
772 match._files = actualfiles
773 match._files = actualfiles
773
774
774 def matchfn(f):
775 def matchfn(f):
775 if origmatchfn(f):
776 if origmatchfn(f):
776 return f not in lfiles
777 return f not in lfiles
777 else:
778 else:
778 return f in standins
779 return f in standins
779
780
780 match.matchfn = matchfn
781 match.matchfn = matchfn
781
782
782 return match
783 return match
783
784
784
785
785 class automatedcommithook:
786 class automatedcommithook:
786 """Stateful hook to update standins at the 1st commit of resuming
787 """Stateful hook to update standins at the 1st commit of resuming
787
788
788 For efficiency, updating standins in the working directory should
789 For efficiency, updating standins in the working directory should
789 be avoided while automated committing (like rebase, transplant and
790 be avoided while automated committing (like rebase, transplant and
790 so on), because they should be updated before committing.
791 so on), because they should be updated before committing.
791
792
792 But the 1st commit of resuming automated committing (e.g. ``rebase
793 But the 1st commit of resuming automated committing (e.g. ``rebase
793 --continue``) should update them, because largefiles may be
794 --continue``) should update them, because largefiles may be
794 modified manually.
795 modified manually.
795 """
796 """
796
797
797 def __init__(self, resuming):
798 def __init__(self, resuming):
798 self.resuming = resuming
799 self.resuming = resuming
799
800
800 def __call__(self, repo, match):
801 def __call__(self, repo, match):
801 if self.resuming:
802 if self.resuming:
802 self.resuming = False # avoids updating at subsequent commits
803 self.resuming = False # avoids updating at subsequent commits
803 return updatestandinsbymatch(repo, match)
804 return updatestandinsbymatch(repo, match)
804 else:
805 else:
805 return match
806 return match
806
807
807
808
808 def getstatuswriter(ui, repo, forcibly=None):
809 def getstatuswriter(ui, repo, forcibly=None):
809 """Return the function to write largefiles specific status out
810 """Return the function to write largefiles specific status out
810
811
811 If ``forcibly`` is ``None``, this returns the last element of
812 If ``forcibly`` is ``None``, this returns the last element of
812 ``repo._lfstatuswriters`` as "default" writer function.
813 ``repo._lfstatuswriters`` as "default" writer function.
813
814
814 Otherwise, this returns the function to always write out (or
815 Otherwise, this returns the function to always write out (or
815 ignore if ``not forcibly``) status.
816 ignore if ``not forcibly``) status.
816 """
817 """
817 if forcibly is None and util.safehasattr(repo, b'_largefilesenabled'):
818 if forcibly is None and util.safehasattr(repo, b'_largefilesenabled'):
818 return repo._lfstatuswriters[-1]
819 return repo._lfstatuswriters[-1]
819 else:
820 else:
820 if forcibly:
821 if forcibly:
821 return ui.status # forcibly WRITE OUT
822 return ui.status # forcibly WRITE OUT
822 else:
823 else:
823 return lambda *msg, **opts: None # forcibly IGNORE
824 return lambda *msg, **opts: None # forcibly IGNORE
@@ -1,469 +1,470 b''
1 # Copyright 2009-2010 Gregory P. Ward
1 # Copyright 2009-2010 Gregory P. Ward
2 # Copyright 2009-2010 Intelerad Medical Systems Incorporated
2 # Copyright 2009-2010 Intelerad Medical Systems Incorporated
3 # Copyright 2010-2011 Fog Creek Software
3 # Copyright 2010-2011 Fog Creek Software
4 # Copyright 2010-2011 Unity Technologies
4 # Copyright 2010-2011 Unity Technologies
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 '''setup for largefiles repositories: reposetup'''
9 '''setup for largefiles repositories: reposetup'''
10
10
11 import copy
11 import copy
12
12
13 from mercurial.i18n import _
13 from mercurial.i18n import _
14
14
15 from mercurial import (
15 from mercurial import (
16 error,
16 error,
17 extensions,
17 extensions,
18 localrepo,
18 localrepo,
19 match as matchmod,
19 match as matchmod,
20 scmutil,
20 scmutil,
21 util,
21 util,
22 )
22 )
23
23
24 from mercurial.dirstateutils import timestamp
24 from mercurial.dirstateutils import timestamp
25
25
26 from . import (
26 from . import (
27 lfcommands,
27 lfcommands,
28 lfutil,
28 lfutil,
29 )
29 )
30
30
31
31
32 def reposetup(ui, repo):
32 def reposetup(ui, repo):
33 # wire repositories should be given new wireproto functions
33 # wire repositories should be given new wireproto functions
34 # by "proto.wirereposetup()" via "hg.wirepeersetupfuncs"
34 # by "proto.wirereposetup()" via "hg.wirepeersetupfuncs"
35 if not repo.local():
35 if not repo.local():
36 return
36 return
37
37
38 class lfilesrepo(repo.__class__):
38 class lfilesrepo(repo.__class__):
39 # the mark to examine whether "repo" object enables largefiles or not
39 # the mark to examine whether "repo" object enables largefiles or not
40 _largefilesenabled = True
40 _largefilesenabled = True
41
41
42 lfstatus = False
42 lfstatus = False
43
43
44 # When lfstatus is set, return a context that gives the names
44 # When lfstatus is set, return a context that gives the names
45 # of largefiles instead of their corresponding standins and
45 # of largefiles instead of their corresponding standins and
46 # identifies the largefiles as always binary, regardless of
46 # identifies the largefiles as always binary, regardless of
47 # their actual contents.
47 # their actual contents.
48 def __getitem__(self, changeid):
48 def __getitem__(self, changeid):
49 ctx = super(lfilesrepo, self).__getitem__(changeid)
49 ctx = super(lfilesrepo, self).__getitem__(changeid)
50 if self.lfstatus:
50 if self.lfstatus:
51
51
52 def files(orig):
52 def files(orig):
53 filenames = orig()
53 filenames = orig()
54 return [lfutil.splitstandin(f) or f for f in filenames]
54 return [lfutil.splitstandin(f) or f for f in filenames]
55
55
56 extensions.wrapfunction(ctx, 'files', files)
56 extensions.wrapfunction(ctx, 'files', files)
57
57
58 def manifest(orig):
58 def manifest(orig):
59 man1 = orig()
59 man1 = orig()
60
60
61 class lfilesmanifest(man1.__class__):
61 class lfilesmanifest(man1.__class__):
62 def __contains__(self, filename):
62 def __contains__(self, filename):
63 orig = super(lfilesmanifest, self).__contains__
63 orig = super(lfilesmanifest, self).__contains__
64 return orig(filename) or orig(
64 return orig(filename) or orig(
65 lfutil.standin(filename)
65 lfutil.standin(filename)
66 )
66 )
67
67
68 man1.__class__ = lfilesmanifest
68 man1.__class__ = lfilesmanifest
69 return man1
69 return man1
70
70
71 extensions.wrapfunction(ctx, 'manifest', manifest)
71 extensions.wrapfunction(ctx, 'manifest', manifest)
72
72
73 def filectx(orig, path, fileid=None, filelog=None):
73 def filectx(orig, path, fileid=None, filelog=None):
74 try:
74 try:
75 if filelog is not None:
75 if filelog is not None:
76 result = orig(path, fileid, filelog)
76 result = orig(path, fileid, filelog)
77 else:
77 else:
78 result = orig(path, fileid)
78 result = orig(path, fileid)
79 except error.LookupError:
79 except error.LookupError:
80 # Adding a null character will cause Mercurial to
80 # Adding a null character will cause Mercurial to
81 # identify this as a binary file.
81 # identify this as a binary file.
82 if filelog is not None:
82 if filelog is not None:
83 result = orig(lfutil.standin(path), fileid, filelog)
83 result = orig(lfutil.standin(path), fileid, filelog)
84 else:
84 else:
85 result = orig(lfutil.standin(path), fileid)
85 result = orig(lfutil.standin(path), fileid)
86 olddata = result.data
86 olddata = result.data
87 result.data = lambda: olddata() + b'\0'
87 result.data = lambda: olddata() + b'\0'
88 return result
88 return result
89
89
90 extensions.wrapfunction(ctx, 'filectx', filectx)
90 extensions.wrapfunction(ctx, 'filectx', filectx)
91
91
92 return ctx
92 return ctx
93
93
94 # Figure out the status of big files and insert them into the
94 # Figure out the status of big files and insert them into the
95 # appropriate list in the result. Also removes standin files
95 # appropriate list in the result. Also removes standin files
96 # from the listing. Revert to the original status if
96 # from the listing. Revert to the original status if
97 # self.lfstatus is False.
97 # self.lfstatus is False.
98 # XXX large file status is buggy when used on repo proxy.
98 # XXX large file status is buggy when used on repo proxy.
99 # XXX this needs to be investigated.
99 # XXX this needs to be investigated.
100 @localrepo.unfilteredmethod
100 @localrepo.unfilteredmethod
101 def status(
101 def status(
102 self,
102 self,
103 node1=b'.',
103 node1=b'.',
104 node2=None,
104 node2=None,
105 match=None,
105 match=None,
106 ignored=False,
106 ignored=False,
107 clean=False,
107 clean=False,
108 unknown=False,
108 unknown=False,
109 listsubrepos=False,
109 listsubrepos=False,
110 ):
110 ):
111 listignored, listclean, listunknown = ignored, clean, unknown
111 listignored, listclean, listunknown = ignored, clean, unknown
112 orig = super(lfilesrepo, self).status
112 orig = super(lfilesrepo, self).status
113 if not self.lfstatus:
113 if not self.lfstatus:
114 return orig(
114 return orig(
115 node1,
115 node1,
116 node2,
116 node2,
117 match,
117 match,
118 listignored,
118 listignored,
119 listclean,
119 listclean,
120 listunknown,
120 listunknown,
121 listsubrepos,
121 listsubrepos,
122 )
122 )
123
123
124 # some calls in this function rely on the old version of status
124 # some calls in this function rely on the old version of status
125 self.lfstatus = False
125 self.lfstatus = False
126 ctx1 = self[node1]
126 ctx1 = self[node1]
127 ctx2 = self[node2]
127 ctx2 = self[node2]
128 working = ctx2.rev() is None
128 working = ctx2.rev() is None
129 parentworking = working and ctx1 == self[b'.']
129 parentworking = working and ctx1 == self[b'.']
130
130
131 if match is None:
131 if match is None:
132 match = matchmod.always()
132 match = matchmod.always()
133
133
134 try:
134 try:
135 # updating the dirstate is optional
135 # updating the dirstate is optional
136 # so we don't wait on the lock
136 # so we don't wait on the lock
137 wlock = self.wlock(False)
137 wlock = self.wlock(False)
138 gotlock = True
138 gotlock = True
139 except error.LockError:
139 except error.LockError:
140 wlock = util.nullcontextmanager()
140 wlock = util.nullcontextmanager()
141 gotlock = False
141 gotlock = False
142 with wlock, self.dirstate.running_status(self):
142 with wlock, self.dirstate.running_status(self):
143
143
144 # First check if paths or patterns were specified on the
144 # First check if paths or patterns were specified on the
145 # command line. If there were, and they don't match any
145 # command line. If there were, and they don't match any
146 # largefiles, we should just bail here and let super
146 # largefiles, we should just bail here and let super
147 # handle it -- thus gaining a big performance boost.
147 # handle it -- thus gaining a big performance boost.
148 lfdirstate = lfutil.openlfdirstate(ui, self)
148 lfdirstate = lfutil.openlfdirstate(ui, self)
149 if not match.always():
149 if not match.always():
150 for f in lfdirstate:
150 for f in lfdirstate:
151 if match(f):
151 if match(f):
152 break
152 break
153 else:
153 else:
154 return orig(
154 return orig(
155 node1,
155 node1,
156 node2,
156 node2,
157 match,
157 match,
158 listignored,
158 listignored,
159 listclean,
159 listclean,
160 listunknown,
160 listunknown,
161 listsubrepos,
161 listsubrepos,
162 )
162 )
163
163
164 # Create a copy of match that matches standins instead
164 # Create a copy of match that matches standins instead
165 # of largefiles.
165 # of largefiles.
166 def tostandins(files):
166 def tostandins(files):
167 if not working:
167 if not working:
168 return files
168 return files
169 newfiles = []
169 newfiles = []
170 dirstate = self.dirstate
170 dirstate = self.dirstate
171 for f in files:
171 for f in files:
172 sf = lfutil.standin(f)
172 sf = lfutil.standin(f)
173 if sf in dirstate:
173 if sf in dirstate:
174 newfiles.append(sf)
174 newfiles.append(sf)
175 elif dirstate.hasdir(sf):
175 elif dirstate.hasdir(sf):
176 # Directory entries could be regular or
176 # Directory entries could be regular or
177 # standin, check both
177 # standin, check both
178 newfiles.extend((f, sf))
178 newfiles.extend((f, sf))
179 else:
179 else:
180 newfiles.append(f)
180 newfiles.append(f)
181 return newfiles
181 return newfiles
182
182
183 m = copy.copy(match)
183 m = copy.copy(match)
184 m._files = tostandins(m._files)
184 m._files = tostandins(m._files)
185
185
186 result = orig(
186 result = orig(
187 node1, node2, m, ignored, clean, unknown, listsubrepos
187 node1, node2, m, ignored, clean, unknown, listsubrepos
188 )
188 )
189 if working:
189 if working:
190
190
191 def sfindirstate(f):
191 def sfindirstate(f):
192 sf = lfutil.standin(f)
192 sf = lfutil.standin(f)
193 dirstate = self.dirstate
193 dirstate = self.dirstate
194 return sf in dirstate or dirstate.hasdir(sf)
194 return sf in dirstate or dirstate.hasdir(sf)
195
195
196 match._files = [f for f in match._files if sfindirstate(f)]
196 match._files = [f for f in match._files if sfindirstate(f)]
197 # Don't waste time getting the ignored and unknown
197 # Don't waste time getting the ignored and unknown
198 # files from lfdirstate
198 # files from lfdirstate
199 unsure, s, mtime_boundary = lfdirstate.status(
199 unsure, s, mtime_boundary = lfdirstate.status(
200 match,
200 match,
201 subrepos=[],
201 subrepos=[],
202 ignored=False,
202 ignored=False,
203 clean=listclean,
203 clean=listclean,
204 unknown=False,
204 unknown=False,
205 )
205 )
206 (modified, added, removed, deleted, clean) = (
206 (modified, added, removed, deleted, clean) = (
207 s.modified,
207 s.modified,
208 s.added,
208 s.added,
209 s.removed,
209 s.removed,
210 s.deleted,
210 s.deleted,
211 s.clean,
211 s.clean,
212 )
212 )
213 if parentworking:
213 if parentworking:
214 wctx = repo[None]
214 wctx = repo[None]
215 for lfile in unsure:
215 for lfile in unsure:
216 standin = lfutil.standin(lfile)
216 standin = lfutil.standin(lfile)
217 if standin not in ctx1:
217 if standin not in ctx1:
218 # from second parent
218 # from second parent
219 modified.append(lfile)
219 modified.append(lfile)
220 elif lfutil.readasstandin(
220 elif lfutil.readasstandin(
221 ctx1[standin]
221 ctx1[standin]
222 ) != lfutil.hashfile(self.wjoin(lfile)):
222 ) != lfutil.hashfile(self.wjoin(lfile)):
223 modified.append(lfile)
223 modified.append(lfile)
224 else:
224 else:
225 if listclean:
225 if listclean:
226 clean.append(lfile)
226 clean.append(lfile)
227 s = wctx[lfile].lstat()
227 s = wctx[lfile].lstat()
228 mode = s.st_mode
228 mode = s.st_mode
229 size = s.st_size
229 size = s.st_size
230 mtime = timestamp.reliable_mtime_of(
230 mtime = timestamp.reliable_mtime_of(
231 s, mtime_boundary
231 s, mtime_boundary
232 )
232 )
233 if mtime is not None:
233 if mtime is not None:
234 cache_data = (mode, size, mtime)
234 cache_data = (mode, size, mtime)
235 lfdirstate.set_clean(lfile, cache_data)
235 lfdirstate.set_clean(lfile, cache_data)
236 else:
236 else:
237 tocheck = unsure + modified + added + clean
237 tocheck = unsure + modified + added + clean
238 modified, added, clean = [], [], []
238 modified, added, clean = [], [], []
239 checkexec = self.dirstate._checkexec
239 checkexec = self.dirstate._checkexec
240
240
241 for lfile in tocheck:
241 for lfile in tocheck:
242 standin = lfutil.standin(lfile)
242 standin = lfutil.standin(lfile)
243 if standin in ctx1:
243 if standin in ctx1:
244 abslfile = self.wjoin(lfile)
244 abslfile = self.wjoin(lfile)
245 if (
245 if (
246 lfutil.readasstandin(ctx1[standin])
246 lfutil.readasstandin(ctx1[standin])
247 != lfutil.hashfile(abslfile)
247 != lfutil.hashfile(abslfile)
248 ) or (
248 ) or (
249 checkexec
249 checkexec
250 and (b'x' in ctx1.flags(standin))
250 and (b'x' in ctx1.flags(standin))
251 != bool(lfutil.getexecutable(abslfile))
251 != bool(lfutil.getexecutable(abslfile))
252 ):
252 ):
253 modified.append(lfile)
253 modified.append(lfile)
254 elif listclean:
254 elif listclean:
255 clean.append(lfile)
255 clean.append(lfile)
256 else:
256 else:
257 added.append(lfile)
257 added.append(lfile)
258
258
259 # at this point, 'removed' contains largefiles
259 # at this point, 'removed' contains largefiles
260 # marked as 'R' in the working context.
260 # marked as 'R' in the working context.
261 # then, largefiles not managed also in the target
261 # then, largefiles not managed also in the target
262 # context should be excluded from 'removed'.
262 # context should be excluded from 'removed'.
263 removed = [
263 removed = [
264 lfile
264 lfile
265 for lfile in removed
265 for lfile in removed
266 if lfutil.standin(lfile) in ctx1
266 if lfutil.standin(lfile) in ctx1
267 ]
267 ]
268
268
269 # Standins no longer found in lfdirstate have been deleted
269 # Standins no longer found in lfdirstate have been deleted
270 for standin in ctx1.walk(lfutil.getstandinmatcher(self)):
270 for standin in ctx1.walk(lfutil.getstandinmatcher(self)):
271 lfile = lfutil.splitstandin(standin)
271 lfile = lfutil.splitstandin(standin)
272 if not match(lfile):
272 if not match(lfile):
273 continue
273 continue
274 if lfile not in lfdirstate:
274 if lfile not in lfdirstate:
275 deleted.append(lfile)
275 deleted.append(lfile)
276 # Sync "largefile has been removed" back to the
276 # Sync "largefile has been removed" back to the
277 # standin. Removing a file as a side effect of
277 # standin. Removing a file as a side effect of
278 # running status is gross, but the alternatives (if
278 # running status is gross, but the alternatives (if
279 # any) are worse.
279 # any) are worse.
280 self.wvfs.unlinkpath(standin, ignoremissing=True)
280 self.wvfs.unlinkpath(standin, ignoremissing=True)
281
281
282 # Filter result lists
282 # Filter result lists
283 result = list(result)
283 result = list(result)
284
284
285 # Largefiles are not really removed when they're
285 # Largefiles are not really removed when they're
286 # still in the normal dirstate. Likewise, normal
286 # still in the normal dirstate. Likewise, normal
287 # files are not really removed if they are still in
287 # files are not really removed if they are still in
288 # lfdirstate. This happens in merges where files
288 # lfdirstate. This happens in merges where files
289 # change type.
289 # change type.
290 removed = [f for f in removed if f not in self.dirstate]
290 removed = [f for f in removed if f not in self.dirstate]
291 result[2] = [f for f in result[2] if f not in lfdirstate]
291 result[2] = [f for f in result[2] if f not in lfdirstate]
292
292
293 lfiles = set(lfdirstate)
293 lfiles = set(lfdirstate)
294 # Unknown files
294 # Unknown files
295 result[4] = set(result[4]).difference(lfiles)
295 result[4] = set(result[4]).difference(lfiles)
296 # Ignored files
296 # Ignored files
297 result[5] = set(result[5]).difference(lfiles)
297 result[5] = set(result[5]).difference(lfiles)
298 # combine normal files and largefiles
298 # combine normal files and largefiles
299 normals = [
299 normals = [
300 [fn for fn in filelist if not lfutil.isstandin(fn)]
300 [fn for fn in filelist if not lfutil.isstandin(fn)]
301 for filelist in result
301 for filelist in result
302 ]
302 ]
303 lfstatus = (
303 lfstatus = (
304 modified,
304 modified,
305 added,
305 added,
306 removed,
306 removed,
307 deleted,
307 deleted,
308 [],
308 [],
309 [],
309 [],
310 clean,
310 clean,
311 )
311 )
312 result = [
312 result = [
313 sorted(list1 + list2)
313 sorted(list1 + list2)
314 for (list1, list2) in zip(normals, lfstatus)
314 for (list1, list2) in zip(normals, lfstatus)
315 ]
315 ]
316 else: # not against working directory
316 else: # not against working directory
317 result = [
317 result = [
318 [lfutil.splitstandin(f) or f for f in items]
318 [lfutil.splitstandin(f) or f for f in items]
319 for items in result
319 for items in result
320 ]
320 ]
321
321
322 if gotlock:
322 if gotlock:
323 lfdirstate.write(self.currenttransaction())
323 lfdirstate.write(self.currenttransaction())
324 else:
324 else:
325 lfdirstate.invalidate()
325 lfdirstate.invalidate()
326
326
327 self.lfstatus = True
327 self.lfstatus = True
328 return scmutil.status(*result)
328 return scmutil.status(*result)
329
329
330 def commitctx(self, ctx, *args, **kwargs):
330 def commitctx(self, ctx, *args, **kwargs):
331 node = super(lfilesrepo, self).commitctx(ctx, *args, **kwargs)
331 node = super(lfilesrepo, self).commitctx(ctx, *args, **kwargs)
332
332
333 class lfilesctx(ctx.__class__):
333 class lfilesctx(ctx.__class__):
334 def markcommitted(self, node):
334 def markcommitted(self, node):
335 orig = super(lfilesctx, self).markcommitted
335 orig = super(lfilesctx, self).markcommitted
336 return lfutil.markcommitted(orig, self, node)
336 return lfutil.markcommitted(orig, self, node)
337
337
338 ctx.__class__ = lfilesctx
338 ctx.__class__ = lfilesctx
339 return node
339 return node
340
340
341 # Before commit, largefile standins have not had their
341 # Before commit, largefile standins have not had their
342 # contents updated to reflect the hash of their largefile.
342 # contents updated to reflect the hash of their largefile.
343 # Do that here.
343 # Do that here.
344 def commit(
344 def commit(
345 self,
345 self,
346 text=b"",
346 text=b"",
347 user=None,
347 user=None,
348 date=None,
348 date=None,
349 match=None,
349 match=None,
350 force=False,
350 force=False,
351 editor=False,
351 editor=False,
352 extra=None,
352 extra=None,
353 ):
353 ):
354 if extra is None:
354 if extra is None:
355 extra = {}
355 extra = {}
356 orig = super(lfilesrepo, self).commit
356 orig = super(lfilesrepo, self).commit
357
357
358 with self.wlock():
358 with self.wlock():
359 lfcommithook = self._lfcommithooks[-1]
359 lfcommithook = self._lfcommithooks[-1]
360 match = lfcommithook(self, match)
360 match = lfcommithook(self, match)
361 result = orig(
361 result = orig(
362 text=text,
362 text=text,
363 user=user,
363 user=user,
364 date=date,
364 date=date,
365 match=match,
365 match=match,
366 force=force,
366 force=force,
367 editor=editor,
367 editor=editor,
368 extra=extra,
368 extra=extra,
369 )
369 )
370 return result
370 return result
371
371
372 # TODO: _subdirlfs should be moved into "lfutil.py", because
372 # TODO: _subdirlfs should be moved into "lfutil.py", because
373 # it is referred only from "lfutil.updatestandinsbymatch"
373 # it is referred only from "lfutil.updatestandinsbymatch"
374 def _subdirlfs(self, files, lfiles):
374 def _subdirlfs(self, files, lfiles):
375 """
375 """
376 Adjust matched file list
376 Adjust matched file list
377 If we pass a directory to commit whose only committable files
377 If we pass a directory to commit whose only committable files
378 are largefiles, the core commit code aborts before finding
378 are largefiles, the core commit code aborts before finding
379 the largefiles.
379 the largefiles.
380 So we do the following:
380 So we do the following:
381 For directories that only have largefiles as matches,
381 For directories that only have largefiles as matches,
382 we explicitly add the largefiles to the match list and remove
382 we explicitly add the largefiles to the match list and remove
383 the directory.
383 the directory.
384 In other cases, we leave the match list unmodified.
384 In other cases, we leave the match list unmodified.
385 """
385 """
386 actualfiles = []
386 actualfiles = []
387 dirs = []
387 dirs = []
388 regulars = []
388 regulars = []
389
389
390 for f in files:
390 for f in files:
391 if lfutil.isstandin(f + b'/'):
391 if lfutil.isstandin(f + b'/'):
392 raise error.Abort(
392 raise error.Abort(
393 _(b'file "%s" is a largefile standin') % f,
393 _(b'file "%s" is a largefile standin') % f,
394 hint=b'commit the largefile itself instead',
394 hint=b'commit the largefile itself instead',
395 )
395 )
396 # Scan directories
396 # Scan directories
397 if self.wvfs.isdir(f):
397 if self.wvfs.isdir(f):
398 dirs.append(f)
398 dirs.append(f)
399 else:
399 else:
400 regulars.append(f)
400 regulars.append(f)
401
401
402 for f in dirs:
402 for f in dirs:
403 matcheddir = False
403 matcheddir = False
404 d = self.dirstate.normalize(f) + b'/'
404 d = self.dirstate.normalize(f) + b'/'
405 # Check for matched normal files
405 # Check for matched normal files
406 for mf in regulars:
406 for mf in regulars:
407 if self.dirstate.normalize(mf).startswith(d):
407 if self.dirstate.normalize(mf).startswith(d):
408 actualfiles.append(f)
408 actualfiles.append(f)
409 matcheddir = True
409 matcheddir = True
410 break
410 break
411 if not matcheddir:
411 if not matcheddir:
412 # If no normal match, manually append
412 # If no normal match, manually append
413 # any matching largefiles
413 # any matching largefiles
414 for lf in lfiles:
414 for lf in lfiles:
415 if self.dirstate.normalize(lf).startswith(d):
415 if self.dirstate.normalize(lf).startswith(d):
416 actualfiles.append(lf)
416 actualfiles.append(lf)
417 if not matcheddir:
417 if not matcheddir:
418 # There may still be normal files in the dir, so
418 # There may still be normal files in the dir, so
419 # add a directory to the list, which
419 # add a directory to the list, which
420 # forces status/dirstate to walk all files and
420 # forces status/dirstate to walk all files and
421 # call the match function on the matcher, even
421 # call the match function on the matcher, even
422 # on case sensitive filesystems.
422 # on case sensitive filesystems.
423 actualfiles.append(b'.')
423 actualfiles.append(b'.')
424 matcheddir = True
424 matcheddir = True
425 # Nothing in dir, so readd it
425 # Nothing in dir, so readd it
426 # and let commit reject it
426 # and let commit reject it
427 if not matcheddir:
427 if not matcheddir:
428 actualfiles.append(f)
428 actualfiles.append(f)
429
429
430 # Always add normal files
430 # Always add normal files
431 actualfiles += regulars
431 actualfiles += regulars
432 return actualfiles
432 return actualfiles
433
433
434 repo.__class__ = lfilesrepo
434 repo.__class__ = lfilesrepo
435
435
436 # stack of hooks being executed before committing.
436 # stack of hooks being executed before committing.
437 # only last element ("_lfcommithooks[-1]") is used for each committing.
437 # only last element ("_lfcommithooks[-1]") is used for each committing.
438 repo._lfcommithooks = [lfutil.updatestandinsbymatch]
438 repo._lfcommithooks = [lfutil.updatestandinsbymatch]
439
439
440 # Stack of status writer functions taking "*msg, **opts" arguments
440 # Stack of status writer functions taking "*msg, **opts" arguments
441 # like "ui.status()". Only last element ("_lfstatuswriters[-1]")
441 # like "ui.status()". Only last element ("_lfstatuswriters[-1]")
442 # is used to write status out.
442 # is used to write status out.
443 repo._lfstatuswriters = [ui.status]
443 repo._lfstatuswriters = [ui.status]
444
444
445 def prepushoutgoinghook(pushop):
445 def prepushoutgoinghook(pushop):
446 """Push largefiles for pushop before pushing revisions."""
446 """Push largefiles for pushop before pushing revisions."""
447 lfrevs = pushop.lfrevs
447 lfrevs = pushop.lfrevs
448 if lfrevs is None:
448 if lfrevs is None:
449 lfrevs = pushop.outgoing.missing
449 lfrevs = pushop.outgoing.missing
450 if lfrevs:
450 if lfrevs:
451 toupload = set()
451 toupload = set()
452 addfunc = lambda fn, lfhash: toupload.add(lfhash)
452 addfunc = lambda fn, lfhash: toupload.add(lfhash)
453 lfutil.getlfilestoupload(pushop.repo, lfrevs, addfunc)
453 lfutil.getlfilestoupload(pushop.repo, lfrevs, addfunc)
454 lfcommands.uploadlfiles(ui, pushop.repo, pushop.remote, toupload)
454 lfcommands.uploadlfiles(ui, pushop.repo, pushop.remote, toupload)
455
455
456 repo.prepushoutgoinghooks.add(b"largefiles", prepushoutgoinghook)
456 repo.prepushoutgoinghooks.add(b"largefiles", prepushoutgoinghook)
457
457
458 def checkrequireslfiles(ui, repo, **kwargs):
458 def checkrequireslfiles(ui, repo, **kwargs):
459 with repo.lock():
459 with repo.lock():
460 if b'largefiles' not in repo.requirements and any(
460 if b'largefiles' not in repo.requirements and any(
461 lfutil.shortname + b'/' in f[1] for f in repo.store.datafiles()
461 lfutil.shortname + b'/' in entry.unencoded_path
462 for entry in repo.store.datafiles()
462 ):
463 ):
463 repo.requirements.add(b'largefiles')
464 repo.requirements.add(b'largefiles')
464 scmutil.writereporequirements(repo)
465 scmutil.writereporequirements(repo)
465
466
466 ui.setconfig(
467 ui.setconfig(
467 b'hooks', b'changegroup.lfiles', checkrequireslfiles, b'largefiles'
468 b'hooks', b'changegroup.lfiles', checkrequireslfiles, b'largefiles'
468 )
469 )
469 ui.setconfig(b'hooks', b'commit.lfiles', checkrequireslfiles, b'largefiles')
470 ui.setconfig(b'hooks', b'commit.lfiles', checkrequireslfiles, b'largefiles')
@@ -1,695 +1,696 b''
1 # narrowcommands.py - command modifications for narrowhg extension
1 # narrowcommands.py - command modifications for narrowhg extension
2 #
2 #
3 # Copyright 2017 Google, Inc.
3 # Copyright 2017 Google, Inc.
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 import itertools
8 import itertools
9 import os
9 import os
10
10
11 from mercurial.i18n import _
11 from mercurial.i18n import _
12 from mercurial.node import (
12 from mercurial.node import (
13 hex,
13 hex,
14 short,
14 short,
15 )
15 )
16 from mercurial import (
16 from mercurial import (
17 bundle2,
17 bundle2,
18 cmdutil,
18 cmdutil,
19 commands,
19 commands,
20 discovery,
20 discovery,
21 encoding,
21 encoding,
22 error,
22 error,
23 exchange,
23 exchange,
24 extensions,
24 extensions,
25 hg,
25 hg,
26 narrowspec,
26 narrowspec,
27 pathutil,
27 pathutil,
28 pycompat,
28 pycompat,
29 registrar,
29 registrar,
30 repair,
30 repair,
31 repoview,
31 repoview,
32 requirements,
32 requirements,
33 sparse,
33 sparse,
34 util,
34 util,
35 wireprototypes,
35 wireprototypes,
36 )
36 )
37 from mercurial.utils import (
37 from mercurial.utils import (
38 urlutil,
38 urlutil,
39 )
39 )
40
40
41 table = {}
41 table = {}
42 command = registrar.command(table)
42 command = registrar.command(table)
43
43
44
44
45 def setup():
45 def setup():
46 """Wraps user-facing mercurial commands with narrow-aware versions."""
46 """Wraps user-facing mercurial commands with narrow-aware versions."""
47
47
48 entry = extensions.wrapcommand(commands.table, b'clone', clonenarrowcmd)
48 entry = extensions.wrapcommand(commands.table, b'clone', clonenarrowcmd)
49 entry[1].append(
49 entry[1].append(
50 (b'', b'narrow', None, _(b"create a narrow clone of select files"))
50 (b'', b'narrow', None, _(b"create a narrow clone of select files"))
51 )
51 )
52 entry[1].append(
52 entry[1].append(
53 (
53 (
54 b'',
54 b'',
55 b'depth',
55 b'depth',
56 b'',
56 b'',
57 _(b"limit the history fetched by distance from heads"),
57 _(b"limit the history fetched by distance from heads"),
58 )
58 )
59 )
59 )
60 entry[1].append((b'', b'narrowspec', b'', _(b"read narrowspecs from file")))
60 entry[1].append((b'', b'narrowspec', b'', _(b"read narrowspecs from file")))
61 # TODO(durin42): unify sparse/narrow --include/--exclude logic a bit
61 # TODO(durin42): unify sparse/narrow --include/--exclude logic a bit
62 if b'sparse' not in extensions.enabled():
62 if b'sparse' not in extensions.enabled():
63 entry[1].append(
63 entry[1].append(
64 (b'', b'include', [], _(b"specifically fetch this file/directory"))
64 (b'', b'include', [], _(b"specifically fetch this file/directory"))
65 )
65 )
66 entry[1].append(
66 entry[1].append(
67 (
67 (
68 b'',
68 b'',
69 b'exclude',
69 b'exclude',
70 [],
70 [],
71 _(b"do not fetch this file/directory, even if included"),
71 _(b"do not fetch this file/directory, even if included"),
72 )
72 )
73 )
73 )
74
74
75 entry = extensions.wrapcommand(commands.table, b'pull', pullnarrowcmd)
75 entry = extensions.wrapcommand(commands.table, b'pull', pullnarrowcmd)
76 entry[1].append(
76 entry[1].append(
77 (
77 (
78 b'',
78 b'',
79 b'depth',
79 b'depth',
80 b'',
80 b'',
81 _(b"limit the history fetched by distance from heads"),
81 _(b"limit the history fetched by distance from heads"),
82 )
82 )
83 )
83 )
84
84
85 extensions.wrapcommand(commands.table, b'archive', archivenarrowcmd)
85 extensions.wrapcommand(commands.table, b'archive', archivenarrowcmd)
86
86
87
87
88 def clonenarrowcmd(orig, ui, repo, *args, **opts):
88 def clonenarrowcmd(orig, ui, repo, *args, **opts):
89 """Wraps clone command, so 'hg clone' first wraps localrepo.clone()."""
89 """Wraps clone command, so 'hg clone' first wraps localrepo.clone()."""
90 opts = pycompat.byteskwargs(opts)
90 opts = pycompat.byteskwargs(opts)
91 wrappedextraprepare = util.nullcontextmanager()
91 wrappedextraprepare = util.nullcontextmanager()
92 narrowspecfile = opts[b'narrowspec']
92 narrowspecfile = opts[b'narrowspec']
93
93
94 if narrowspecfile:
94 if narrowspecfile:
95 filepath = os.path.join(encoding.getcwd(), narrowspecfile)
95 filepath = os.path.join(encoding.getcwd(), narrowspecfile)
96 ui.status(_(b"reading narrowspec from '%s'\n") % filepath)
96 ui.status(_(b"reading narrowspec from '%s'\n") % filepath)
97 try:
97 try:
98 fdata = util.readfile(filepath)
98 fdata = util.readfile(filepath)
99 except IOError as inst:
99 except IOError as inst:
100 raise error.Abort(
100 raise error.Abort(
101 _(b"cannot read narrowspecs from '%s': %s")
101 _(b"cannot read narrowspecs from '%s': %s")
102 % (filepath, encoding.strtolocal(inst.strerror))
102 % (filepath, encoding.strtolocal(inst.strerror))
103 )
103 )
104
104
105 includes, excludes, profiles = sparse.parseconfig(ui, fdata, b'narrow')
105 includes, excludes, profiles = sparse.parseconfig(ui, fdata, b'narrow')
106 if profiles:
106 if profiles:
107 raise error.ConfigError(
107 raise error.ConfigError(
108 _(
108 _(
109 b"cannot specify other files using '%include' in"
109 b"cannot specify other files using '%include' in"
110 b" narrowspec"
110 b" narrowspec"
111 )
111 )
112 )
112 )
113
113
114 narrowspec.validatepatterns(includes)
114 narrowspec.validatepatterns(includes)
115 narrowspec.validatepatterns(excludes)
115 narrowspec.validatepatterns(excludes)
116
116
117 # narrowspec is passed so we should assume that user wants narrow clone
117 # narrowspec is passed so we should assume that user wants narrow clone
118 opts[b'narrow'] = True
118 opts[b'narrow'] = True
119 opts[b'include'].extend(includes)
119 opts[b'include'].extend(includes)
120 opts[b'exclude'].extend(excludes)
120 opts[b'exclude'].extend(excludes)
121
121
122 if opts[b'narrow']:
122 if opts[b'narrow']:
123
123
124 def pullbundle2extraprepare_widen(orig, pullop, kwargs):
124 def pullbundle2extraprepare_widen(orig, pullop, kwargs):
125 orig(pullop, kwargs)
125 orig(pullop, kwargs)
126
126
127 if opts.get(b'depth'):
127 if opts.get(b'depth'):
128 kwargs[b'depth'] = opts[b'depth']
128 kwargs[b'depth'] = opts[b'depth']
129
129
130 wrappedextraprepare = extensions.wrappedfunction(
130 wrappedextraprepare = extensions.wrappedfunction(
131 exchange, b'_pullbundle2extraprepare', pullbundle2extraprepare_widen
131 exchange, b'_pullbundle2extraprepare', pullbundle2extraprepare_widen
132 )
132 )
133
133
134 with wrappedextraprepare:
134 with wrappedextraprepare:
135 return orig(ui, repo, *args, **pycompat.strkwargs(opts))
135 return orig(ui, repo, *args, **pycompat.strkwargs(opts))
136
136
137
137
138 def pullnarrowcmd(orig, ui, repo, *args, **opts):
138 def pullnarrowcmd(orig, ui, repo, *args, **opts):
139 """Wraps pull command to allow modifying narrow spec."""
139 """Wraps pull command to allow modifying narrow spec."""
140 wrappedextraprepare = util.nullcontextmanager()
140 wrappedextraprepare = util.nullcontextmanager()
141 if requirements.NARROW_REQUIREMENT in repo.requirements:
141 if requirements.NARROW_REQUIREMENT in repo.requirements:
142
142
143 def pullbundle2extraprepare_widen(orig, pullop, kwargs):
143 def pullbundle2extraprepare_widen(orig, pullop, kwargs):
144 orig(pullop, kwargs)
144 orig(pullop, kwargs)
145 if opts.get('depth'):
145 if opts.get('depth'):
146 kwargs[b'depth'] = opts['depth']
146 kwargs[b'depth'] = opts['depth']
147
147
148 wrappedextraprepare = extensions.wrappedfunction(
148 wrappedextraprepare = extensions.wrappedfunction(
149 exchange, b'_pullbundle2extraprepare', pullbundle2extraprepare_widen
149 exchange, b'_pullbundle2extraprepare', pullbundle2extraprepare_widen
150 )
150 )
151
151
152 with wrappedextraprepare:
152 with wrappedextraprepare:
153 return orig(ui, repo, *args, **opts)
153 return orig(ui, repo, *args, **opts)
154
154
155
155
156 def archivenarrowcmd(orig, ui, repo, *args, **opts):
156 def archivenarrowcmd(orig, ui, repo, *args, **opts):
157 """Wraps archive command to narrow the default includes."""
157 """Wraps archive command to narrow the default includes."""
158 if requirements.NARROW_REQUIREMENT in repo.requirements:
158 if requirements.NARROW_REQUIREMENT in repo.requirements:
159 repo_includes, repo_excludes = repo.narrowpats
159 repo_includes, repo_excludes = repo.narrowpats
160 includes = set(opts.get('include', []))
160 includes = set(opts.get('include', []))
161 excludes = set(opts.get('exclude', []))
161 excludes = set(opts.get('exclude', []))
162 includes, excludes, unused_invalid = narrowspec.restrictpatterns(
162 includes, excludes, unused_invalid = narrowspec.restrictpatterns(
163 includes, excludes, repo_includes, repo_excludes
163 includes, excludes, repo_includes, repo_excludes
164 )
164 )
165 if includes:
165 if includes:
166 opts['include'] = includes
166 opts['include'] = includes
167 if excludes:
167 if excludes:
168 opts['exclude'] = excludes
168 opts['exclude'] = excludes
169 return orig(ui, repo, *args, **opts)
169 return orig(ui, repo, *args, **opts)
170
170
171
171
172 def pullbundle2extraprepare(orig, pullop, kwargs):
172 def pullbundle2extraprepare(orig, pullop, kwargs):
173 repo = pullop.repo
173 repo = pullop.repo
174 if requirements.NARROW_REQUIREMENT not in repo.requirements:
174 if requirements.NARROW_REQUIREMENT not in repo.requirements:
175 return orig(pullop, kwargs)
175 return orig(pullop, kwargs)
176
176
177 if wireprototypes.NARROWCAP not in pullop.remote.capabilities():
177 if wireprototypes.NARROWCAP not in pullop.remote.capabilities():
178 raise error.Abort(_(b"server does not support narrow clones"))
178 raise error.Abort(_(b"server does not support narrow clones"))
179 orig(pullop, kwargs)
179 orig(pullop, kwargs)
180 kwargs[b'narrow'] = True
180 kwargs[b'narrow'] = True
181 include, exclude = repo.narrowpats
181 include, exclude = repo.narrowpats
182 kwargs[b'oldincludepats'] = include
182 kwargs[b'oldincludepats'] = include
183 kwargs[b'oldexcludepats'] = exclude
183 kwargs[b'oldexcludepats'] = exclude
184 if include:
184 if include:
185 kwargs[b'includepats'] = include
185 kwargs[b'includepats'] = include
186 if exclude:
186 if exclude:
187 kwargs[b'excludepats'] = exclude
187 kwargs[b'excludepats'] = exclude
188 # calculate known nodes only in ellipses cases because in non-ellipses cases
188 # calculate known nodes only in ellipses cases because in non-ellipses cases
189 # we have all the nodes
189 # we have all the nodes
190 if wireprototypes.ELLIPSESCAP1 in pullop.remote.capabilities():
190 if wireprototypes.ELLIPSESCAP1 in pullop.remote.capabilities():
191 kwargs[b'known'] = [
191 kwargs[b'known'] = [
192 hex(ctx.node())
192 hex(ctx.node())
193 for ctx in repo.set(b'::%ln', pullop.common)
193 for ctx in repo.set(b'::%ln', pullop.common)
194 if ctx.node() != repo.nullid
194 if ctx.node() != repo.nullid
195 ]
195 ]
196 if not kwargs[b'known']:
196 if not kwargs[b'known']:
197 # Mercurial serializes an empty list as '' and deserializes it as
197 # Mercurial serializes an empty list as '' and deserializes it as
198 # [''], so delete it instead to avoid handling the empty string on
198 # [''], so delete it instead to avoid handling the empty string on
199 # the server.
199 # the server.
200 del kwargs[b'known']
200 del kwargs[b'known']
201
201
202
202
203 extensions.wrapfunction(
203 extensions.wrapfunction(
204 exchange, b'_pullbundle2extraprepare', pullbundle2extraprepare
204 exchange, b'_pullbundle2extraprepare', pullbundle2extraprepare
205 )
205 )
206
206
207
207
208 def _narrow(
208 def _narrow(
209 ui,
209 ui,
210 repo,
210 repo,
211 remote,
211 remote,
212 commoninc,
212 commoninc,
213 oldincludes,
213 oldincludes,
214 oldexcludes,
214 oldexcludes,
215 newincludes,
215 newincludes,
216 newexcludes,
216 newexcludes,
217 force,
217 force,
218 backup,
218 backup,
219 ):
219 ):
220 oldmatch = narrowspec.match(repo.root, oldincludes, oldexcludes)
220 oldmatch = narrowspec.match(repo.root, oldincludes, oldexcludes)
221 newmatch = narrowspec.match(repo.root, newincludes, newexcludes)
221 newmatch = narrowspec.match(repo.root, newincludes, newexcludes)
222
222
223 # This is essentially doing "hg outgoing" to find all local-only
223 # This is essentially doing "hg outgoing" to find all local-only
224 # commits. We will then check that the local-only commits don't
224 # commits. We will then check that the local-only commits don't
225 # have any changes to files that will be untracked.
225 # have any changes to files that will be untracked.
226 unfi = repo.unfiltered()
226 unfi = repo.unfiltered()
227 outgoing = discovery.findcommonoutgoing(unfi, remote, commoninc=commoninc)
227 outgoing = discovery.findcommonoutgoing(unfi, remote, commoninc=commoninc)
228 ui.status(_(b'looking for local changes to affected paths\n'))
228 ui.status(_(b'looking for local changes to affected paths\n'))
229 progress = ui.makeprogress(
229 progress = ui.makeprogress(
230 topic=_(b'changesets'),
230 topic=_(b'changesets'),
231 unit=_(b'changesets'),
231 unit=_(b'changesets'),
232 total=len(outgoing.missing) + len(outgoing.excluded),
232 total=len(outgoing.missing) + len(outgoing.excluded),
233 )
233 )
234 localnodes = []
234 localnodes = []
235 with progress:
235 with progress:
236 for n in itertools.chain(outgoing.missing, outgoing.excluded):
236 for n in itertools.chain(outgoing.missing, outgoing.excluded):
237 progress.increment()
237 progress.increment()
238 if any(oldmatch(f) and not newmatch(f) for f in unfi[n].files()):
238 if any(oldmatch(f) and not newmatch(f) for f in unfi[n].files()):
239 localnodes.append(n)
239 localnodes.append(n)
240 revstostrip = unfi.revs(b'descendants(%ln)', localnodes)
240 revstostrip = unfi.revs(b'descendants(%ln)', localnodes)
241 hiddenrevs = repoview.filterrevs(repo, b'visible')
241 hiddenrevs = repoview.filterrevs(repo, b'visible')
242 visibletostrip = list(
242 visibletostrip = list(
243 repo.changelog.node(r) for r in (revstostrip - hiddenrevs)
243 repo.changelog.node(r) for r in (revstostrip - hiddenrevs)
244 )
244 )
245 if visibletostrip:
245 if visibletostrip:
246 ui.status(
246 ui.status(
247 _(
247 _(
248 b'The following changeset(s) or their ancestors have '
248 b'The following changeset(s) or their ancestors have '
249 b'local changes not on the remote:\n'
249 b'local changes not on the remote:\n'
250 )
250 )
251 )
251 )
252 maxnodes = 10
252 maxnodes = 10
253 if ui.verbose or len(visibletostrip) <= maxnodes:
253 if ui.verbose or len(visibletostrip) <= maxnodes:
254 for n in visibletostrip:
254 for n in visibletostrip:
255 ui.status(b'%s\n' % short(n))
255 ui.status(b'%s\n' % short(n))
256 else:
256 else:
257 for n in visibletostrip[:maxnodes]:
257 for n in visibletostrip[:maxnodes]:
258 ui.status(b'%s\n' % short(n))
258 ui.status(b'%s\n' % short(n))
259 ui.status(
259 ui.status(
260 _(b'...and %d more, use --verbose to list all\n')
260 _(b'...and %d more, use --verbose to list all\n')
261 % (len(visibletostrip) - maxnodes)
261 % (len(visibletostrip) - maxnodes)
262 )
262 )
263 if not force:
263 if not force:
264 raise error.StateError(
264 raise error.StateError(
265 _(b'local changes found'),
265 _(b'local changes found'),
266 hint=_(b'use --force-delete-local-changes to ignore'),
266 hint=_(b'use --force-delete-local-changes to ignore'),
267 )
267 )
268
268
269 with ui.uninterruptible():
269 with ui.uninterruptible():
270 if revstostrip:
270 if revstostrip:
271 tostrip = [unfi.changelog.node(r) for r in revstostrip]
271 tostrip = [unfi.changelog.node(r) for r in revstostrip]
272 if repo[b'.'].node() in tostrip:
272 if repo[b'.'].node() in tostrip:
273 # stripping working copy, so move to a different commit first
273 # stripping working copy, so move to a different commit first
274 urev = max(
274 urev = max(
275 repo.revs(
275 repo.revs(
276 b'(::%n) - %ln + null',
276 b'(::%n) - %ln + null',
277 repo[b'.'].node(),
277 repo[b'.'].node(),
278 visibletostrip,
278 visibletostrip,
279 )
279 )
280 )
280 )
281 hg.clean(repo, urev)
281 hg.clean(repo, urev)
282 overrides = {(b'devel', b'strip-obsmarkers'): False}
282 overrides = {(b'devel', b'strip-obsmarkers'): False}
283 if backup:
283 if backup:
284 ui.status(_(b'moving unwanted changesets to backup\n'))
284 ui.status(_(b'moving unwanted changesets to backup\n'))
285 else:
285 else:
286 ui.status(_(b'deleting unwanted changesets\n'))
286 ui.status(_(b'deleting unwanted changesets\n'))
287 with ui.configoverride(overrides, b'narrow'):
287 with ui.configoverride(overrides, b'narrow'):
288 repair.strip(ui, unfi, tostrip, topic=b'narrow', backup=backup)
288 repair.strip(ui, unfi, tostrip, topic=b'narrow', backup=backup)
289
289
290 todelete = []
290 todelete = []
291 for t, f, size in repo.store.datafiles():
291 for entry in repo.store.datafiles():
292 f = entry.unencoded_path
292 if f.startswith(b'data/'):
293 if f.startswith(b'data/'):
293 file = f[5:-2]
294 file = f[5:-2]
294 if not newmatch(file):
295 if not newmatch(file):
295 todelete.append(f)
296 todelete.append(f)
296 elif f.startswith(b'meta/'):
297 elif f.startswith(b'meta/'):
297 dir = f[5:-13]
298 dir = f[5:-13]
298 dirs = sorted(pathutil.dirs({dir})) + [dir]
299 dirs = sorted(pathutil.dirs({dir})) + [dir]
299 include = True
300 include = True
300 for d in dirs:
301 for d in dirs:
301 visit = newmatch.visitdir(d)
302 visit = newmatch.visitdir(d)
302 if not visit:
303 if not visit:
303 include = False
304 include = False
304 break
305 break
305 if visit == b'all':
306 if visit == b'all':
306 break
307 break
307 if not include:
308 if not include:
308 todelete.append(f)
309 todelete.append(f)
309
310
310 repo.destroying()
311 repo.destroying()
311
312
312 with repo.transaction(b'narrowing'):
313 with repo.transaction(b'narrowing'):
313 # Update narrowspec before removing revlogs, so repo won't be
314 # Update narrowspec before removing revlogs, so repo won't be
314 # corrupt in case of crash
315 # corrupt in case of crash
315 repo.setnarrowpats(newincludes, newexcludes)
316 repo.setnarrowpats(newincludes, newexcludes)
316
317
317 for f in todelete:
318 for f in todelete:
318 ui.status(_(b'deleting %s\n') % f)
319 ui.status(_(b'deleting %s\n') % f)
319 util.unlinkpath(repo.svfs.join(f))
320 util.unlinkpath(repo.svfs.join(f))
320 repo.store.markremoved(f)
321 repo.store.markremoved(f)
321
322
322 ui.status(_(b'deleting unwanted files from working copy\n'))
323 ui.status(_(b'deleting unwanted files from working copy\n'))
323 with repo.dirstate.changing_parents(repo):
324 with repo.dirstate.changing_parents(repo):
324 narrowspec.updateworkingcopy(repo, assumeclean=True)
325 narrowspec.updateworkingcopy(repo, assumeclean=True)
325 narrowspec.copytoworkingcopy(repo)
326 narrowspec.copytoworkingcopy(repo)
326
327
327 repo.destroyed()
328 repo.destroyed()
328
329
329
330
330 def _widen(
331 def _widen(
331 ui,
332 ui,
332 repo,
333 repo,
333 remote,
334 remote,
334 commoninc,
335 commoninc,
335 oldincludes,
336 oldincludes,
336 oldexcludes,
337 oldexcludes,
337 newincludes,
338 newincludes,
338 newexcludes,
339 newexcludes,
339 ):
340 ):
340 # for now we assume that if a server has ellipses enabled, we will be
341 # for now we assume that if a server has ellipses enabled, we will be
341 # exchanging ellipses nodes. In future we should add ellipses as a client
342 # exchanging ellipses nodes. In future we should add ellipses as a client
342 # side requirement (maybe) to distinguish a client is shallow or not and
343 # side requirement (maybe) to distinguish a client is shallow or not and
343 # then send that information to server whether we want ellipses or not.
344 # then send that information to server whether we want ellipses or not.
344 # Theoretically a non-ellipses repo should be able to use narrow
345 # Theoretically a non-ellipses repo should be able to use narrow
345 # functionality from an ellipses enabled server
346 # functionality from an ellipses enabled server
346 remotecap = remote.capabilities()
347 remotecap = remote.capabilities()
347 ellipsesremote = any(
348 ellipsesremote = any(
348 cap in remotecap for cap in wireprototypes.SUPPORTED_ELLIPSESCAP
349 cap in remotecap for cap in wireprototypes.SUPPORTED_ELLIPSESCAP
349 )
350 )
350
351
351 # check whether we are talking to a server which supports old version of
352 # check whether we are talking to a server which supports old version of
352 # ellipses capabilities
353 # ellipses capabilities
353 isoldellipses = (
354 isoldellipses = (
354 ellipsesremote
355 ellipsesremote
355 and wireprototypes.ELLIPSESCAP1 in remotecap
356 and wireprototypes.ELLIPSESCAP1 in remotecap
356 and wireprototypes.ELLIPSESCAP not in remotecap
357 and wireprototypes.ELLIPSESCAP not in remotecap
357 )
358 )
358
359
359 def pullbundle2extraprepare_widen(orig, pullop, kwargs):
360 def pullbundle2extraprepare_widen(orig, pullop, kwargs):
360 orig(pullop, kwargs)
361 orig(pullop, kwargs)
361 # The old{in,ex}cludepats have already been set by orig()
362 # The old{in,ex}cludepats have already been set by orig()
362 kwargs[b'includepats'] = newincludes
363 kwargs[b'includepats'] = newincludes
363 kwargs[b'excludepats'] = newexcludes
364 kwargs[b'excludepats'] = newexcludes
364
365
365 wrappedextraprepare = extensions.wrappedfunction(
366 wrappedextraprepare = extensions.wrappedfunction(
366 exchange, b'_pullbundle2extraprepare', pullbundle2extraprepare_widen
367 exchange, b'_pullbundle2extraprepare', pullbundle2extraprepare_widen
367 )
368 )
368
369
369 # define a function that narrowbundle2 can call after creating the
370 # define a function that narrowbundle2 can call after creating the
370 # backup bundle, but before applying the bundle from the server
371 # backup bundle, but before applying the bundle from the server
371 def setnewnarrowpats():
372 def setnewnarrowpats():
372 repo.setnarrowpats(newincludes, newexcludes)
373 repo.setnarrowpats(newincludes, newexcludes)
373
374
374 repo.setnewnarrowpats = setnewnarrowpats
375 repo.setnewnarrowpats = setnewnarrowpats
375 # silence the devel-warning of applying an empty changegroup
376 # silence the devel-warning of applying an empty changegroup
376 overrides = {(b'devel', b'all-warnings'): False}
377 overrides = {(b'devel', b'all-warnings'): False}
377
378
378 common = commoninc[0]
379 common = commoninc[0]
379 with ui.uninterruptible():
380 with ui.uninterruptible():
380 if ellipsesremote:
381 if ellipsesremote:
381 ds = repo.dirstate
382 ds = repo.dirstate
382 p1, p2 = ds.p1(), ds.p2()
383 p1, p2 = ds.p1(), ds.p2()
383 with ds.changing_parents(repo):
384 with ds.changing_parents(repo):
384 ds.setparents(repo.nullid, repo.nullid)
385 ds.setparents(repo.nullid, repo.nullid)
385 if isoldellipses:
386 if isoldellipses:
386 with wrappedextraprepare:
387 with wrappedextraprepare:
387 exchange.pull(repo, remote, heads=common)
388 exchange.pull(repo, remote, heads=common)
388 else:
389 else:
389 known = []
390 known = []
390 if ellipsesremote:
391 if ellipsesremote:
391 known = [
392 known = [
392 ctx.node()
393 ctx.node()
393 for ctx in repo.set(b'::%ln', common)
394 for ctx in repo.set(b'::%ln', common)
394 if ctx.node() != repo.nullid
395 if ctx.node() != repo.nullid
395 ]
396 ]
396 with remote.commandexecutor() as e:
397 with remote.commandexecutor() as e:
397 bundle = e.callcommand(
398 bundle = e.callcommand(
398 b'narrow_widen',
399 b'narrow_widen',
399 {
400 {
400 b'oldincludes': oldincludes,
401 b'oldincludes': oldincludes,
401 b'oldexcludes': oldexcludes,
402 b'oldexcludes': oldexcludes,
402 b'newincludes': newincludes,
403 b'newincludes': newincludes,
403 b'newexcludes': newexcludes,
404 b'newexcludes': newexcludes,
404 b'cgversion': b'03',
405 b'cgversion': b'03',
405 b'commonheads': common,
406 b'commonheads': common,
406 b'known': known,
407 b'known': known,
407 b'ellipses': ellipsesremote,
408 b'ellipses': ellipsesremote,
408 },
409 },
409 ).result()
410 ).result()
410
411
411 trmanager = exchange.transactionmanager(
412 trmanager = exchange.transactionmanager(
412 repo, b'widen', remote.url()
413 repo, b'widen', remote.url()
413 )
414 )
414 with trmanager, repo.ui.configoverride(overrides, b'widen'):
415 with trmanager, repo.ui.configoverride(overrides, b'widen'):
415 op = bundle2.bundleoperation(
416 op = bundle2.bundleoperation(
416 repo, trmanager.transaction, source=b'widen'
417 repo, trmanager.transaction, source=b'widen'
417 )
418 )
418 # TODO: we should catch error.Abort here
419 # TODO: we should catch error.Abort here
419 bundle2.processbundle(repo, bundle, op=op, remote=remote)
420 bundle2.processbundle(repo, bundle, op=op, remote=remote)
420
421
421 if ellipsesremote:
422 if ellipsesremote:
422 with ds.changing_parents(repo):
423 with ds.changing_parents(repo):
423 ds.setparents(p1, p2)
424 ds.setparents(p1, p2)
424
425
425 with repo.transaction(b'widening'), repo.dirstate.changing_parents(
426 with repo.transaction(b'widening'), repo.dirstate.changing_parents(
426 repo
427 repo
427 ):
428 ):
428 repo.setnewnarrowpats()
429 repo.setnewnarrowpats()
429 narrowspec.updateworkingcopy(repo)
430 narrowspec.updateworkingcopy(repo)
430 narrowspec.copytoworkingcopy(repo)
431 narrowspec.copytoworkingcopy(repo)
431
432
432
433
433 # TODO(rdamazio): Make new matcher format and update description
434 # TODO(rdamazio): Make new matcher format and update description
434 @command(
435 @command(
435 b'tracked',
436 b'tracked',
436 [
437 [
437 (b'', b'addinclude', [], _(b'new paths to include')),
438 (b'', b'addinclude', [], _(b'new paths to include')),
438 (b'', b'removeinclude', [], _(b'old paths to no longer include')),
439 (b'', b'removeinclude', [], _(b'old paths to no longer include')),
439 (
440 (
440 b'',
441 b'',
441 b'auto-remove-includes',
442 b'auto-remove-includes',
442 False,
443 False,
443 _(b'automatically choose unused includes to remove'),
444 _(b'automatically choose unused includes to remove'),
444 ),
445 ),
445 (b'', b'addexclude', [], _(b'new paths to exclude')),
446 (b'', b'addexclude', [], _(b'new paths to exclude')),
446 (b'', b'import-rules', b'', _(b'import narrowspecs from a file')),
447 (b'', b'import-rules', b'', _(b'import narrowspecs from a file')),
447 (b'', b'removeexclude', [], _(b'old paths to no longer exclude')),
448 (b'', b'removeexclude', [], _(b'old paths to no longer exclude')),
448 (
449 (
449 b'',
450 b'',
450 b'clear',
451 b'clear',
451 False,
452 False,
452 _(b'whether to replace the existing narrowspec'),
453 _(b'whether to replace the existing narrowspec'),
453 ),
454 ),
454 (
455 (
455 b'',
456 b'',
456 b'force-delete-local-changes',
457 b'force-delete-local-changes',
457 False,
458 False,
458 _(b'forces deletion of local changes when narrowing'),
459 _(b'forces deletion of local changes when narrowing'),
459 ),
460 ),
460 (
461 (
461 b'',
462 b'',
462 b'backup',
463 b'backup',
463 True,
464 True,
464 _(b'back up local changes when narrowing'),
465 _(b'back up local changes when narrowing'),
465 ),
466 ),
466 (
467 (
467 b'',
468 b'',
468 b'update-working-copy',
469 b'update-working-copy',
469 False,
470 False,
470 _(b'update working copy when the store has changed'),
471 _(b'update working copy when the store has changed'),
471 ),
472 ),
472 ]
473 ]
473 + commands.remoteopts,
474 + commands.remoteopts,
474 _(b'[OPTIONS]... [REMOTE]'),
475 _(b'[OPTIONS]... [REMOTE]'),
475 inferrepo=True,
476 inferrepo=True,
476 helpcategory=command.CATEGORY_MAINTENANCE,
477 helpcategory=command.CATEGORY_MAINTENANCE,
477 )
478 )
478 def trackedcmd(ui, repo, remotepath=None, *pats, **opts):
479 def trackedcmd(ui, repo, remotepath=None, *pats, **opts):
479 """show or change the current narrowspec
480 """show or change the current narrowspec
480
481
481 With no argument, shows the current narrowspec entries, one per line. Each
482 With no argument, shows the current narrowspec entries, one per line. Each
482 line will be prefixed with 'I' or 'X' for included or excluded patterns,
483 line will be prefixed with 'I' or 'X' for included or excluded patterns,
483 respectively.
484 respectively.
484
485
485 The narrowspec is comprised of expressions to match remote files and/or
486 The narrowspec is comprised of expressions to match remote files and/or
486 directories that should be pulled into your client.
487 directories that should be pulled into your client.
487 The narrowspec has *include* and *exclude* expressions, with excludes always
488 The narrowspec has *include* and *exclude* expressions, with excludes always
488 trumping includes: that is, if a file matches an exclude expression, it will
489 trumping includes: that is, if a file matches an exclude expression, it will
489 be excluded even if it also matches an include expression.
490 be excluded even if it also matches an include expression.
490 Excluding files that were never included has no effect.
491 Excluding files that were never included has no effect.
491
492
492 Each included or excluded entry is in the format described by
493 Each included or excluded entry is in the format described by
493 'hg help patterns'.
494 'hg help patterns'.
494
495
495 The options allow you to add or remove included and excluded expressions.
496 The options allow you to add or remove included and excluded expressions.
496
497
497 If --clear is specified, then all previous includes and excludes are DROPPED
498 If --clear is specified, then all previous includes and excludes are DROPPED
498 and replaced by the new ones specified to --addinclude and --addexclude.
499 and replaced by the new ones specified to --addinclude and --addexclude.
499 If --clear is specified without any further options, the narrowspec will be
500 If --clear is specified without any further options, the narrowspec will be
500 empty and will not match any files.
501 empty and will not match any files.
501
502
502 If --auto-remove-includes is specified, then those includes that don't match
503 If --auto-remove-includes is specified, then those includes that don't match
503 any files modified by currently visible local commits (those not shared by
504 any files modified by currently visible local commits (those not shared by
504 the remote) will be added to the set of explicitly specified includes to
505 the remote) will be added to the set of explicitly specified includes to
505 remove.
506 remove.
506
507
507 --import-rules accepts a path to a file containing rules, allowing you to
508 --import-rules accepts a path to a file containing rules, allowing you to
508 add --addinclude, --addexclude rules in bulk. Like the other include and
509 add --addinclude, --addexclude rules in bulk. Like the other include and
509 exclude switches, the changes are applied immediately.
510 exclude switches, the changes are applied immediately.
510 """
511 """
511 opts = pycompat.byteskwargs(opts)
512 opts = pycompat.byteskwargs(opts)
512 if requirements.NARROW_REQUIREMENT not in repo.requirements:
513 if requirements.NARROW_REQUIREMENT not in repo.requirements:
513 raise error.InputError(
514 raise error.InputError(
514 _(
515 _(
515 b'the tracked command is only supported on '
516 b'the tracked command is only supported on '
516 b'repositories cloned with --narrow'
517 b'repositories cloned with --narrow'
517 )
518 )
518 )
519 )
519
520
520 # Before supporting, decide whether it "hg tracked --clear" should mean
521 # Before supporting, decide whether it "hg tracked --clear" should mean
521 # tracking no paths or all paths.
522 # tracking no paths or all paths.
522 if opts[b'clear']:
523 if opts[b'clear']:
523 raise error.InputError(_(b'the --clear option is not yet supported'))
524 raise error.InputError(_(b'the --clear option is not yet supported'))
524
525
525 # import rules from a file
526 # import rules from a file
526 newrules = opts.get(b'import_rules')
527 newrules = opts.get(b'import_rules')
527 if newrules:
528 if newrules:
528 try:
529 try:
529 filepath = os.path.join(encoding.getcwd(), newrules)
530 filepath = os.path.join(encoding.getcwd(), newrules)
530 fdata = util.readfile(filepath)
531 fdata = util.readfile(filepath)
531 except IOError as inst:
532 except IOError as inst:
532 raise error.StorageError(
533 raise error.StorageError(
533 _(b"cannot read narrowspecs from '%s': %s")
534 _(b"cannot read narrowspecs from '%s': %s")
534 % (filepath, encoding.strtolocal(inst.strerror))
535 % (filepath, encoding.strtolocal(inst.strerror))
535 )
536 )
536 includepats, excludepats, profiles = sparse.parseconfig(
537 includepats, excludepats, profiles = sparse.parseconfig(
537 ui, fdata, b'narrow'
538 ui, fdata, b'narrow'
538 )
539 )
539 if profiles:
540 if profiles:
540 raise error.InputError(
541 raise error.InputError(
541 _(
542 _(
542 b"including other spec files using '%include' "
543 b"including other spec files using '%include' "
543 b"is not supported in narrowspec"
544 b"is not supported in narrowspec"
544 )
545 )
545 )
546 )
546 opts[b'addinclude'].extend(includepats)
547 opts[b'addinclude'].extend(includepats)
547 opts[b'addexclude'].extend(excludepats)
548 opts[b'addexclude'].extend(excludepats)
548
549
549 addedincludes = narrowspec.parsepatterns(opts[b'addinclude'])
550 addedincludes = narrowspec.parsepatterns(opts[b'addinclude'])
550 removedincludes = narrowspec.parsepatterns(opts[b'removeinclude'])
551 removedincludes = narrowspec.parsepatterns(opts[b'removeinclude'])
551 addedexcludes = narrowspec.parsepatterns(opts[b'addexclude'])
552 addedexcludes = narrowspec.parsepatterns(opts[b'addexclude'])
552 removedexcludes = narrowspec.parsepatterns(opts[b'removeexclude'])
553 removedexcludes = narrowspec.parsepatterns(opts[b'removeexclude'])
553 autoremoveincludes = opts[b'auto_remove_includes']
554 autoremoveincludes = opts[b'auto_remove_includes']
554
555
555 update_working_copy = opts[b'update_working_copy']
556 update_working_copy = opts[b'update_working_copy']
556 only_show = not (
557 only_show = not (
557 addedincludes
558 addedincludes
558 or removedincludes
559 or removedincludes
559 or addedexcludes
560 or addedexcludes
560 or removedexcludes
561 or removedexcludes
561 or newrules
562 or newrules
562 or autoremoveincludes
563 or autoremoveincludes
563 or update_working_copy
564 or update_working_copy
564 )
565 )
565
566
566 # Only print the current narrowspec.
567 # Only print the current narrowspec.
567 if only_show:
568 if only_show:
568 oldincludes, oldexcludes = repo.narrowpats
569 oldincludes, oldexcludes = repo.narrowpats
569 ui.pager(b'tracked')
570 ui.pager(b'tracked')
570 fm = ui.formatter(b'narrow', opts)
571 fm = ui.formatter(b'narrow', opts)
571 for i in sorted(oldincludes):
572 for i in sorted(oldincludes):
572 fm.startitem()
573 fm.startitem()
573 fm.write(b'status', b'%s ', b'I', label=b'narrow.included')
574 fm.write(b'status', b'%s ', b'I', label=b'narrow.included')
574 fm.write(b'pat', b'%s\n', i, label=b'narrow.included')
575 fm.write(b'pat', b'%s\n', i, label=b'narrow.included')
575 for i in sorted(oldexcludes):
576 for i in sorted(oldexcludes):
576 fm.startitem()
577 fm.startitem()
577 fm.write(b'status', b'%s ', b'X', label=b'narrow.excluded')
578 fm.write(b'status', b'%s ', b'X', label=b'narrow.excluded')
578 fm.write(b'pat', b'%s\n', i, label=b'narrow.excluded')
579 fm.write(b'pat', b'%s\n', i, label=b'narrow.excluded')
579 fm.end()
580 fm.end()
580 return 0
581 return 0
581
582
582 with repo.wlock(), repo.lock():
583 with repo.wlock(), repo.lock():
583 oldincludes, oldexcludes = repo.narrowpats
584 oldincludes, oldexcludes = repo.narrowpats
584
585
585 # filter the user passed additions and deletions into actual additions and
586 # filter the user passed additions and deletions into actual additions and
586 # deletions of excludes and includes
587 # deletions of excludes and includes
587 addedincludes -= oldincludes
588 addedincludes -= oldincludes
588 removedincludes &= oldincludes
589 removedincludes &= oldincludes
589 addedexcludes -= oldexcludes
590 addedexcludes -= oldexcludes
590 removedexcludes &= oldexcludes
591 removedexcludes &= oldexcludes
591
592
592 widening = addedincludes or removedexcludes
593 widening = addedincludes or removedexcludes
593 narrowing = removedincludes or addedexcludes
594 narrowing = removedincludes or addedexcludes
594
595
595 if update_working_copy:
596 if update_working_copy:
596 with repo.transaction(b'narrow-wc'), repo.dirstate.changing_parents(
597 with repo.transaction(b'narrow-wc'), repo.dirstate.changing_parents(
597 repo
598 repo
598 ):
599 ):
599 narrowspec.updateworkingcopy(repo)
600 narrowspec.updateworkingcopy(repo)
600 narrowspec.copytoworkingcopy(repo)
601 narrowspec.copytoworkingcopy(repo)
601 return 0
602 return 0
602
603
603 if not (widening or narrowing or autoremoveincludes):
604 if not (widening or narrowing or autoremoveincludes):
604 ui.status(_(b"nothing to widen or narrow\n"))
605 ui.status(_(b"nothing to widen or narrow\n"))
605 return 0
606 return 0
606
607
607 cmdutil.bailifchanged(repo)
608 cmdutil.bailifchanged(repo)
608
609
609 # Find the revisions we have in common with the remote. These will
610 # Find the revisions we have in common with the remote. These will
610 # be used for finding local-only changes for narrowing. They will
611 # be used for finding local-only changes for narrowing. They will
611 # also define the set of revisions to update for widening.
612 # also define the set of revisions to update for widening.
612 path = urlutil.get_unique_pull_path_obj(b'tracked', ui, remotepath)
613 path = urlutil.get_unique_pull_path_obj(b'tracked', ui, remotepath)
613 ui.status(_(b'comparing with %s\n') % urlutil.hidepassword(path.loc))
614 ui.status(_(b'comparing with %s\n') % urlutil.hidepassword(path.loc))
614 remote = hg.peer(repo, opts, path)
615 remote = hg.peer(repo, opts, path)
615
616
616 try:
617 try:
617 # check narrow support before doing anything if widening needs to be
618 # check narrow support before doing anything if widening needs to be
618 # performed. In future we should also abort if client is ellipses and
619 # performed. In future we should also abort if client is ellipses and
619 # server does not support ellipses
620 # server does not support ellipses
620 if (
621 if (
621 widening
622 widening
622 and wireprototypes.NARROWCAP not in remote.capabilities()
623 and wireprototypes.NARROWCAP not in remote.capabilities()
623 ):
624 ):
624 raise error.Abort(_(b"server does not support narrow clones"))
625 raise error.Abort(_(b"server does not support narrow clones"))
625
626
626 commoninc = discovery.findcommonincoming(repo, remote)
627 commoninc = discovery.findcommonincoming(repo, remote)
627
628
628 if autoremoveincludes:
629 if autoremoveincludes:
629 outgoing = discovery.findcommonoutgoing(
630 outgoing = discovery.findcommonoutgoing(
630 repo, remote, commoninc=commoninc
631 repo, remote, commoninc=commoninc
631 )
632 )
632 ui.status(_(b'looking for unused includes to remove\n'))
633 ui.status(_(b'looking for unused includes to remove\n'))
633 localfiles = set()
634 localfiles = set()
634 for n in itertools.chain(outgoing.missing, outgoing.excluded):
635 for n in itertools.chain(outgoing.missing, outgoing.excluded):
635 localfiles.update(repo[n].files())
636 localfiles.update(repo[n].files())
636 suggestedremovals = []
637 suggestedremovals = []
637 for include in sorted(oldincludes):
638 for include in sorted(oldincludes):
638 match = narrowspec.match(repo.root, [include], oldexcludes)
639 match = narrowspec.match(repo.root, [include], oldexcludes)
639 if not any(match(f) for f in localfiles):
640 if not any(match(f) for f in localfiles):
640 suggestedremovals.append(include)
641 suggestedremovals.append(include)
641 if suggestedremovals:
642 if suggestedremovals:
642 for s in suggestedremovals:
643 for s in suggestedremovals:
643 ui.status(b'%s\n' % s)
644 ui.status(b'%s\n' % s)
644 if (
645 if (
645 ui.promptchoice(
646 ui.promptchoice(
646 _(
647 _(
647 b'remove these unused includes (yn)?'
648 b'remove these unused includes (yn)?'
648 b'$$ &Yes $$ &No'
649 b'$$ &Yes $$ &No'
649 )
650 )
650 )
651 )
651 == 0
652 == 0
652 ):
653 ):
653 removedincludes.update(suggestedremovals)
654 removedincludes.update(suggestedremovals)
654 narrowing = True
655 narrowing = True
655 else:
656 else:
656 ui.status(_(b'found no unused includes\n'))
657 ui.status(_(b'found no unused includes\n'))
657
658
658 if narrowing:
659 if narrowing:
659 newincludes = oldincludes - removedincludes
660 newincludes = oldincludes - removedincludes
660 newexcludes = oldexcludes | addedexcludes
661 newexcludes = oldexcludes | addedexcludes
661 _narrow(
662 _narrow(
662 ui,
663 ui,
663 repo,
664 repo,
664 remote,
665 remote,
665 commoninc,
666 commoninc,
666 oldincludes,
667 oldincludes,
667 oldexcludes,
668 oldexcludes,
668 newincludes,
669 newincludes,
669 newexcludes,
670 newexcludes,
670 opts[b'force_delete_local_changes'],
671 opts[b'force_delete_local_changes'],
671 opts[b'backup'],
672 opts[b'backup'],
672 )
673 )
673 # _narrow() updated the narrowspec and _widen() below needs to
674 # _narrow() updated the narrowspec and _widen() below needs to
674 # use the updated values as its base (otherwise removed includes
675 # use the updated values as its base (otherwise removed includes
675 # and addedexcludes will be lost in the resulting narrowspec)
676 # and addedexcludes will be lost in the resulting narrowspec)
676 oldincludes = newincludes
677 oldincludes = newincludes
677 oldexcludes = newexcludes
678 oldexcludes = newexcludes
678
679
679 if widening:
680 if widening:
680 newincludes = oldincludes | addedincludes
681 newincludes = oldincludes | addedincludes
681 newexcludes = oldexcludes - removedexcludes
682 newexcludes = oldexcludes - removedexcludes
682 _widen(
683 _widen(
683 ui,
684 ui,
684 repo,
685 repo,
685 remote,
686 remote,
686 commoninc,
687 commoninc,
687 oldincludes,
688 oldincludes,
688 oldexcludes,
689 oldexcludes,
689 newincludes,
690 newincludes,
690 newexcludes,
691 newexcludes,
691 )
692 )
692 finally:
693 finally:
693 remote.close()
694 remote.close()
694
695
695 return 0
696 return 0
@@ -1,437 +1,446 b''
1 # remotefilelogserver.py - server logic for a remotefilelog server
1 # remotefilelogserver.py - server logic for a remotefilelog server
2 #
2 #
3 # Copyright 2013 Facebook, Inc.
3 # Copyright 2013 Facebook, Inc.
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 import os
8 import os
9 import stat
9 import stat
10 import time
10 import time
11 import zlib
11 import zlib
12
12
13 from mercurial.i18n import _
13 from mercurial.i18n import _
14 from mercurial.node import bin, hex
14 from mercurial.node import bin, hex
15 from mercurial.pycompat import open
15 from mercurial.pycompat import open
16 from mercurial import (
16 from mercurial import (
17 changegroup,
17 changegroup,
18 changelog,
18 changelog,
19 context,
19 context,
20 error,
20 error,
21 extensions,
21 extensions,
22 match,
22 match,
23 scmutil,
23 scmutil,
24 store,
24 store,
25 streamclone,
25 streamclone,
26 util,
26 util,
27 wireprotoserver,
27 wireprotoserver,
28 wireprototypes,
28 wireprototypes,
29 wireprotov1server,
29 wireprotov1server,
30 )
30 )
31 from . import (
31 from . import (
32 constants,
32 constants,
33 shallowutil,
33 shallowutil,
34 )
34 )
35
35
36 _sshv1server = wireprotoserver.sshv1protocolhandler
36 _sshv1server = wireprotoserver.sshv1protocolhandler
37
37
38
38
39 def setupserver(ui, repo):
39 def setupserver(ui, repo):
40 """Sets up a normal Mercurial repo so it can serve files to shallow repos."""
40 """Sets up a normal Mercurial repo so it can serve files to shallow repos."""
41 onetimesetup(ui)
41 onetimesetup(ui)
42
42
43 # don't send files to shallow clients during pulls
43 # don't send files to shallow clients during pulls
44 def generatefiles(
44 def generatefiles(
45 orig, self, changedfiles, linknodes, commonrevs, source, *args, **kwargs
45 orig, self, changedfiles, linknodes, commonrevs, source, *args, **kwargs
46 ):
46 ):
47 caps = self._bundlecaps or []
47 caps = self._bundlecaps or []
48 if constants.BUNDLE2_CAPABLITY in caps:
48 if constants.BUNDLE2_CAPABLITY in caps:
49 # only send files that don't match the specified patterns
49 # only send files that don't match the specified patterns
50 includepattern = None
50 includepattern = None
51 excludepattern = None
51 excludepattern = None
52 for cap in self._bundlecaps or []:
52 for cap in self._bundlecaps or []:
53 if cap.startswith(b"includepattern="):
53 if cap.startswith(b"includepattern="):
54 includepattern = cap[len(b"includepattern=") :].split(b'\0')
54 includepattern = cap[len(b"includepattern=") :].split(b'\0')
55 elif cap.startswith(b"excludepattern="):
55 elif cap.startswith(b"excludepattern="):
56 excludepattern = cap[len(b"excludepattern=") :].split(b'\0')
56 excludepattern = cap[len(b"excludepattern=") :].split(b'\0')
57
57
58 m = match.always()
58 m = match.always()
59 if includepattern or excludepattern:
59 if includepattern or excludepattern:
60 m = match.match(
60 m = match.match(
61 repo.root, b'', None, includepattern, excludepattern
61 repo.root, b'', None, includepattern, excludepattern
62 )
62 )
63
63
64 changedfiles = list([f for f in changedfiles if not m(f)])
64 changedfiles = list([f for f in changedfiles if not m(f)])
65 return orig(
65 return orig(
66 self, changedfiles, linknodes, commonrevs, source, *args, **kwargs
66 self, changedfiles, linknodes, commonrevs, source, *args, **kwargs
67 )
67 )
68
68
69 extensions.wrapfunction(
69 extensions.wrapfunction(
70 changegroup.cgpacker, b'generatefiles', generatefiles
70 changegroup.cgpacker, b'generatefiles', generatefiles
71 )
71 )
72
72
73
73
74 onetime = False
74 onetime = False
75
75
76
76
77 def onetimesetup(ui):
77 def onetimesetup(ui):
78 """Configures the wireprotocol for both clients and servers."""
78 """Configures the wireprotocol for both clients and servers."""
79 global onetime
79 global onetime
80 if onetime:
80 if onetime:
81 return
81 return
82 onetime = True
82 onetime = True
83
83
84 # support file content requests
84 # support file content requests
85 wireprotov1server.wireprotocommand(
85 wireprotov1server.wireprotocommand(
86 b'x_rfl_getflogheads', b'path', permission=b'pull'
86 b'x_rfl_getflogheads', b'path', permission=b'pull'
87 )(getflogheads)
87 )(getflogheads)
88 wireprotov1server.wireprotocommand(
88 wireprotov1server.wireprotocommand(
89 b'x_rfl_getfiles', b'', permission=b'pull'
89 b'x_rfl_getfiles', b'', permission=b'pull'
90 )(getfiles)
90 )(getfiles)
91 wireprotov1server.wireprotocommand(
91 wireprotov1server.wireprotocommand(
92 b'x_rfl_getfile', b'file node', permission=b'pull'
92 b'x_rfl_getfile', b'file node', permission=b'pull'
93 )(getfile)
93 )(getfile)
94
94
95 class streamstate:
95 class streamstate:
96 match = None
96 match = None
97 shallowremote = False
97 shallowremote = False
98 noflatmf = False
98 noflatmf = False
99
99
100 state = streamstate()
100 state = streamstate()
101
101
102 def stream_out_shallow(repo, proto, other):
102 def stream_out_shallow(repo, proto, other):
103 includepattern = None
103 includepattern = None
104 excludepattern = None
104 excludepattern = None
105 raw = other.get(b'includepattern')
105 raw = other.get(b'includepattern')
106 if raw:
106 if raw:
107 includepattern = raw.split(b'\0')
107 includepattern = raw.split(b'\0')
108 raw = other.get(b'excludepattern')
108 raw = other.get(b'excludepattern')
109 if raw:
109 if raw:
110 excludepattern = raw.split(b'\0')
110 excludepattern = raw.split(b'\0')
111
111
112 oldshallow = state.shallowremote
112 oldshallow = state.shallowremote
113 oldmatch = state.match
113 oldmatch = state.match
114 oldnoflatmf = state.noflatmf
114 oldnoflatmf = state.noflatmf
115 try:
115 try:
116 state.shallowremote = True
116 state.shallowremote = True
117 state.match = match.always()
117 state.match = match.always()
118 state.noflatmf = other.get(b'noflatmanifest') == b'True'
118 state.noflatmf = other.get(b'noflatmanifest') == b'True'
119 if includepattern or excludepattern:
119 if includepattern or excludepattern:
120 state.match = match.match(
120 state.match = match.match(
121 repo.root, b'', None, includepattern, excludepattern
121 repo.root, b'', None, includepattern, excludepattern
122 )
122 )
123 streamres = wireprotov1server.stream(repo, proto)
123 streamres = wireprotov1server.stream(repo, proto)
124
124
125 # Force the first value to execute, so the file list is computed
125 # Force the first value to execute, so the file list is computed
126 # within the try/finally scope
126 # within the try/finally scope
127 first = next(streamres.gen)
127 first = next(streamres.gen)
128 second = next(streamres.gen)
128 second = next(streamres.gen)
129
129
130 def gen():
130 def gen():
131 yield first
131 yield first
132 yield second
132 yield second
133 for value in streamres.gen:
133 for value in streamres.gen:
134 yield value
134 yield value
135
135
136 return wireprototypes.streamres(gen())
136 return wireprototypes.streamres(gen())
137 finally:
137 finally:
138 state.shallowremote = oldshallow
138 state.shallowremote = oldshallow
139 state.match = oldmatch
139 state.match = oldmatch
140 state.noflatmf = oldnoflatmf
140 state.noflatmf = oldnoflatmf
141
141
142 wireprotov1server.commands[b'stream_out_shallow'] = (
142 wireprotov1server.commands[b'stream_out_shallow'] = (
143 stream_out_shallow,
143 stream_out_shallow,
144 b'*',
144 b'*',
145 )
145 )
146
146
147 # don't clone filelogs to shallow clients
147 # don't clone filelogs to shallow clients
148 def _walkstreamfiles(orig, repo, matcher=None):
148 def _walkstreamfiles(orig, repo, matcher=None):
149 if state.shallowremote:
149 if state.shallowremote:
150 # if we are shallow ourselves, stream our local commits
150 # if we are shallow ourselves, stream our local commits
151 if shallowutil.isenabled(repo):
151 if shallowutil.isenabled(repo):
152 striplen = len(repo.store.path) + 1
152 striplen = len(repo.store.path) + 1
153 readdir = repo.store.rawvfs.readdir
153 readdir = repo.store.rawvfs.readdir
154 visit = [os.path.join(repo.store.path, b'data')]
154 visit = [os.path.join(repo.store.path, b'data')]
155 while visit:
155 while visit:
156 p = visit.pop()
156 p = visit.pop()
157 for f, kind, st in readdir(p, stat=True):
157 for f, kind, st in readdir(p, stat=True):
158 fp = p + b'/' + f
158 fp = p + b'/' + f
159 if kind == stat.S_IFREG:
159 if kind == stat.S_IFREG:
160 if not fp.endswith(b'.i') and not fp.endswith(
160 if not fp.endswith(b'.i') and not fp.endswith(
161 b'.d'
161 b'.d'
162 ):
162 ):
163 n = util.pconvert(fp[striplen:])
163 n = util.pconvert(fp[striplen:])
164 d = store.decodedir(n)
164 d = store.decodedir(n)
165 t = store.FILETYPE_OTHER
165 yield store.StoreEntry(
166 yield (t, d, st.st_size)
166 unencoded_path=d,
167 is_revlog=True,
168 revlog_type=None,
169 is_revlog_main=False,
170 is_volatile=False,
171 file_size=st.st_size,
172 )
173
167 if kind == stat.S_IFDIR:
174 if kind == stat.S_IFDIR:
168 visit.append(fp)
175 visit.append(fp)
169
176
170 if scmutil.istreemanifest(repo):
177 if scmutil.istreemanifest(repo):
171 for (t, u, s) in repo.store.datafiles():
178 for entry in repo.store.datafiles():
179 u = entry.unencoded_path
172 if u.startswith(b'meta/') and (
180 if u.startswith(b'meta/') and (
173 u.endswith(b'.i') or u.endswith(b'.d')
181 u.endswith(b'.i') or u.endswith(b'.d')
174 ):
182 ):
175 yield (t, u, s)
183 yield entry
176
184
177 # Return .d and .i files that do not match the shallow pattern
185 # Return .d and .i files that do not match the shallow pattern
178 match = state.match
186 match = state.match
179 if match and not match.always():
187 if match and not match.always():
180 for (t, u, s) in repo.store.datafiles():
188 for entry in repo.store.datafiles():
189 u = entry.unencoded_path
181 f = u[5:-2] # trim data/... and .i/.d
190 f = u[5:-2] # trim data/... and .i/.d
182 if not state.match(f):
191 if not state.match(f):
183 yield (t, u, s)
192 yield entry
184
193
185 for x in repo.store.topfiles():
194 for x in repo.store.topfiles():
186 if state.noflatmf and x[1][:11] == b'00manifest.':
195 if state.noflatmf and x[1][:11] == b'00manifest.':
187 continue
196 continue
188 yield x
197 yield x
189
198
190 elif shallowutil.isenabled(repo):
199 elif shallowutil.isenabled(repo):
191 # don't allow cloning from a shallow repo to a full repo
200 # don't allow cloning from a shallow repo to a full repo
192 # since it would require fetching every version of every
201 # since it would require fetching every version of every
193 # file in order to create the revlogs.
202 # file in order to create the revlogs.
194 raise error.Abort(
203 raise error.Abort(
195 _(b"Cannot clone from a shallow repo to a full repo.")
204 _(b"Cannot clone from a shallow repo to a full repo.")
196 )
205 )
197 else:
206 else:
198 for x in orig(repo, matcher):
207 for x in orig(repo, matcher):
199 yield x
208 yield x
200
209
201 extensions.wrapfunction(streamclone, b'_walkstreamfiles', _walkstreamfiles)
210 extensions.wrapfunction(streamclone, b'_walkstreamfiles', _walkstreamfiles)
202
211
203 # expose remotefilelog capabilities
212 # expose remotefilelog capabilities
204 def _capabilities(orig, repo, proto):
213 def _capabilities(orig, repo, proto):
205 caps = orig(repo, proto)
214 caps = orig(repo, proto)
206 if shallowutil.isenabled(repo) or ui.configbool(
215 if shallowutil.isenabled(repo) or ui.configbool(
207 b'remotefilelog', b'server'
216 b'remotefilelog', b'server'
208 ):
217 ):
209 if isinstance(proto, _sshv1server):
218 if isinstance(proto, _sshv1server):
210 # legacy getfiles method which only works over ssh
219 # legacy getfiles method which only works over ssh
211 caps.append(constants.NETWORK_CAP_LEGACY_SSH_GETFILES)
220 caps.append(constants.NETWORK_CAP_LEGACY_SSH_GETFILES)
212 caps.append(b'x_rfl_getflogheads')
221 caps.append(b'x_rfl_getflogheads')
213 caps.append(b'x_rfl_getfile')
222 caps.append(b'x_rfl_getfile')
214 return caps
223 return caps
215
224
216 extensions.wrapfunction(wireprotov1server, b'_capabilities', _capabilities)
225 extensions.wrapfunction(wireprotov1server, b'_capabilities', _capabilities)
217
226
218 def _adjustlinkrev(orig, self, *args, **kwargs):
227 def _adjustlinkrev(orig, self, *args, **kwargs):
219 # When generating file blobs, taking the real path is too slow on large
228 # When generating file blobs, taking the real path is too slow on large
220 # repos, so force it to just return the linkrev directly.
229 # repos, so force it to just return the linkrev directly.
221 repo = self._repo
230 repo = self._repo
222 if util.safehasattr(repo, b'forcelinkrev') and repo.forcelinkrev:
231 if util.safehasattr(repo, b'forcelinkrev') and repo.forcelinkrev:
223 return self._filelog.linkrev(self._filelog.rev(self._filenode))
232 return self._filelog.linkrev(self._filelog.rev(self._filenode))
224 return orig(self, *args, **kwargs)
233 return orig(self, *args, **kwargs)
225
234
226 extensions.wrapfunction(
235 extensions.wrapfunction(
227 context.basefilectx, b'_adjustlinkrev', _adjustlinkrev
236 context.basefilectx, b'_adjustlinkrev', _adjustlinkrev
228 )
237 )
229
238
230 def _iscmd(orig, cmd):
239 def _iscmd(orig, cmd):
231 if cmd == b'x_rfl_getfiles':
240 if cmd == b'x_rfl_getfiles':
232 return False
241 return False
233 return orig(cmd)
242 return orig(cmd)
234
243
235 extensions.wrapfunction(wireprotoserver, b'iscmd', _iscmd)
244 extensions.wrapfunction(wireprotoserver, b'iscmd', _iscmd)
236
245
237
246
238 def _loadfileblob(repo, cachepath, path, node):
247 def _loadfileblob(repo, cachepath, path, node):
239 filecachepath = os.path.join(cachepath, path, hex(node))
248 filecachepath = os.path.join(cachepath, path, hex(node))
240 if not os.path.exists(filecachepath) or os.path.getsize(filecachepath) == 0:
249 if not os.path.exists(filecachepath) or os.path.getsize(filecachepath) == 0:
241 filectx = repo.filectx(path, fileid=node)
250 filectx = repo.filectx(path, fileid=node)
242 if filectx.node() == repo.nullid:
251 if filectx.node() == repo.nullid:
243 repo.changelog = changelog.changelog(repo.svfs)
252 repo.changelog = changelog.changelog(repo.svfs)
244 filectx = repo.filectx(path, fileid=node)
253 filectx = repo.filectx(path, fileid=node)
245
254
246 text = createfileblob(filectx)
255 text = createfileblob(filectx)
247 # TODO configurable compression engines
256 # TODO configurable compression engines
248 text = zlib.compress(text)
257 text = zlib.compress(text)
249
258
250 # everything should be user & group read/writable
259 # everything should be user & group read/writable
251 oldumask = os.umask(0o002)
260 oldumask = os.umask(0o002)
252 try:
261 try:
253 dirname = os.path.dirname(filecachepath)
262 dirname = os.path.dirname(filecachepath)
254 if not os.path.exists(dirname):
263 if not os.path.exists(dirname):
255 try:
264 try:
256 os.makedirs(dirname)
265 os.makedirs(dirname)
257 except FileExistsError:
266 except FileExistsError:
258 pass
267 pass
259
268
260 f = None
269 f = None
261 try:
270 try:
262 f = util.atomictempfile(filecachepath, b"wb")
271 f = util.atomictempfile(filecachepath, b"wb")
263 f.write(text)
272 f.write(text)
264 except (IOError, OSError):
273 except (IOError, OSError):
265 # Don't abort if the user only has permission to read,
274 # Don't abort if the user only has permission to read,
266 # and not write.
275 # and not write.
267 pass
276 pass
268 finally:
277 finally:
269 if f:
278 if f:
270 f.close()
279 f.close()
271 finally:
280 finally:
272 os.umask(oldumask)
281 os.umask(oldumask)
273 else:
282 else:
274 with open(filecachepath, b"rb") as f:
283 with open(filecachepath, b"rb") as f:
275 text = f.read()
284 text = f.read()
276 return text
285 return text
277
286
278
287
279 def getflogheads(repo, proto, path):
288 def getflogheads(repo, proto, path):
280 """A server api for requesting a filelog's heads"""
289 """A server api for requesting a filelog's heads"""
281 flog = repo.file(path)
290 flog = repo.file(path)
282 heads = flog.heads()
291 heads = flog.heads()
283 return b'\n'.join((hex(head) for head in heads if head != repo.nullid))
292 return b'\n'.join((hex(head) for head in heads if head != repo.nullid))
284
293
285
294
286 def getfile(repo, proto, file, node):
295 def getfile(repo, proto, file, node):
287 """A server api for requesting a particular version of a file. Can be used
296 """A server api for requesting a particular version of a file. Can be used
288 in batches to request many files at once. The return protocol is:
297 in batches to request many files at once. The return protocol is:
289 <errorcode>\0<data/errormsg> where <errorcode> is 0 for success or
298 <errorcode>\0<data/errormsg> where <errorcode> is 0 for success or
290 non-zero for an error.
299 non-zero for an error.
291
300
292 data is a compressed blob with revlog flag and ancestors information. See
301 data is a compressed blob with revlog flag and ancestors information. See
293 createfileblob for its content.
302 createfileblob for its content.
294 """
303 """
295 if shallowutil.isenabled(repo):
304 if shallowutil.isenabled(repo):
296 return b'1\0' + _(b'cannot fetch remote files from shallow repo')
305 return b'1\0' + _(b'cannot fetch remote files from shallow repo')
297 cachepath = repo.ui.config(b"remotefilelog", b"servercachepath")
306 cachepath = repo.ui.config(b"remotefilelog", b"servercachepath")
298 if not cachepath:
307 if not cachepath:
299 cachepath = os.path.join(repo.path, b"remotefilelogcache")
308 cachepath = os.path.join(repo.path, b"remotefilelogcache")
300 node = bin(node.strip())
309 node = bin(node.strip())
301 if node == repo.nullid:
310 if node == repo.nullid:
302 return b'0\0'
311 return b'0\0'
303 return b'0\0' + _loadfileblob(repo, cachepath, file, node)
312 return b'0\0' + _loadfileblob(repo, cachepath, file, node)
304
313
305
314
306 def getfiles(repo, proto):
315 def getfiles(repo, proto):
307 """A server api for requesting particular versions of particular files."""
316 """A server api for requesting particular versions of particular files."""
308 if shallowutil.isenabled(repo):
317 if shallowutil.isenabled(repo):
309 raise error.Abort(_(b'cannot fetch remote files from shallow repo'))
318 raise error.Abort(_(b'cannot fetch remote files from shallow repo'))
310 if not isinstance(proto, _sshv1server):
319 if not isinstance(proto, _sshv1server):
311 raise error.Abort(_(b'cannot fetch remote files over non-ssh protocol'))
320 raise error.Abort(_(b'cannot fetch remote files over non-ssh protocol'))
312
321
313 def streamer():
322 def streamer():
314 fin = proto._fin
323 fin = proto._fin
315
324
316 cachepath = repo.ui.config(b"remotefilelog", b"servercachepath")
325 cachepath = repo.ui.config(b"remotefilelog", b"servercachepath")
317 if not cachepath:
326 if not cachepath:
318 cachepath = os.path.join(repo.path, b"remotefilelogcache")
327 cachepath = os.path.join(repo.path, b"remotefilelogcache")
319
328
320 while True:
329 while True:
321 request = fin.readline()[:-1]
330 request = fin.readline()[:-1]
322 if not request:
331 if not request:
323 break
332 break
324
333
325 node = bin(request[:40])
334 node = bin(request[:40])
326 if node == repo.nullid:
335 if node == repo.nullid:
327 yield b'0\n'
336 yield b'0\n'
328 continue
337 continue
329
338
330 path = request[40:]
339 path = request[40:]
331
340
332 text = _loadfileblob(repo, cachepath, path, node)
341 text = _loadfileblob(repo, cachepath, path, node)
333
342
334 yield b'%d\n%s' % (len(text), text)
343 yield b'%d\n%s' % (len(text), text)
335
344
336 # it would be better to only flush after processing a whole batch
345 # it would be better to only flush after processing a whole batch
337 # but currently we don't know if there are more requests coming
346 # but currently we don't know if there are more requests coming
338 proto._fout.flush()
347 proto._fout.flush()
339
348
340 return wireprototypes.streamres(streamer())
349 return wireprototypes.streamres(streamer())
341
350
342
351
343 def createfileblob(filectx):
352 def createfileblob(filectx):
344 """
353 """
345 format:
354 format:
346 v0:
355 v0:
347 str(len(rawtext)) + '\0' + rawtext + ancestortext
356 str(len(rawtext)) + '\0' + rawtext + ancestortext
348 v1:
357 v1:
349 'v1' + '\n' + metalist + '\0' + rawtext + ancestortext
358 'v1' + '\n' + metalist + '\0' + rawtext + ancestortext
350 metalist := metalist + '\n' + meta | meta
359 metalist := metalist + '\n' + meta | meta
351 meta := sizemeta | flagmeta
360 meta := sizemeta | flagmeta
352 sizemeta := METAKEYSIZE + str(len(rawtext))
361 sizemeta := METAKEYSIZE + str(len(rawtext))
353 flagmeta := METAKEYFLAG + str(flag)
362 flagmeta := METAKEYFLAG + str(flag)
354
363
355 note: sizemeta must exist. METAKEYFLAG and METAKEYSIZE must have a
364 note: sizemeta must exist. METAKEYFLAG and METAKEYSIZE must have a
356 length of 1.
365 length of 1.
357 """
366 """
358 flog = filectx.filelog()
367 flog = filectx.filelog()
359 frev = filectx.filerev()
368 frev = filectx.filerev()
360 revlogflags = flog._revlog.flags(frev)
369 revlogflags = flog._revlog.flags(frev)
361 if revlogflags == 0:
370 if revlogflags == 0:
362 # normal files
371 # normal files
363 text = filectx.data()
372 text = filectx.data()
364 else:
373 else:
365 # lfs, read raw revision data
374 # lfs, read raw revision data
366 text = flog.rawdata(frev)
375 text = flog.rawdata(frev)
367
376
368 repo = filectx._repo
377 repo = filectx._repo
369
378
370 ancestors = [filectx]
379 ancestors = [filectx]
371
380
372 try:
381 try:
373 repo.forcelinkrev = True
382 repo.forcelinkrev = True
374 ancestors.extend([f for f in filectx.ancestors()])
383 ancestors.extend([f for f in filectx.ancestors()])
375
384
376 ancestortext = b""
385 ancestortext = b""
377 for ancestorctx in ancestors:
386 for ancestorctx in ancestors:
378 parents = ancestorctx.parents()
387 parents = ancestorctx.parents()
379 p1 = repo.nullid
388 p1 = repo.nullid
380 p2 = repo.nullid
389 p2 = repo.nullid
381 if len(parents) > 0:
390 if len(parents) > 0:
382 p1 = parents[0].filenode()
391 p1 = parents[0].filenode()
383 if len(parents) > 1:
392 if len(parents) > 1:
384 p2 = parents[1].filenode()
393 p2 = parents[1].filenode()
385
394
386 copyname = b""
395 copyname = b""
387 rename = ancestorctx.renamed()
396 rename = ancestorctx.renamed()
388 if rename:
397 if rename:
389 copyname = rename[0]
398 copyname = rename[0]
390 linknode = ancestorctx.node()
399 linknode = ancestorctx.node()
391 ancestortext += b"%s%s%s%s%s\0" % (
400 ancestortext += b"%s%s%s%s%s\0" % (
392 ancestorctx.filenode(),
401 ancestorctx.filenode(),
393 p1,
402 p1,
394 p2,
403 p2,
395 linknode,
404 linknode,
396 copyname,
405 copyname,
397 )
406 )
398 finally:
407 finally:
399 repo.forcelinkrev = False
408 repo.forcelinkrev = False
400
409
401 header = shallowutil.buildfileblobheader(len(text), revlogflags)
410 header = shallowutil.buildfileblobheader(len(text), revlogflags)
402
411
403 return b"%s\0%s%s" % (header, text, ancestortext)
412 return b"%s\0%s%s" % (header, text, ancestortext)
404
413
405
414
406 def gcserver(ui, repo):
415 def gcserver(ui, repo):
407 if not repo.ui.configbool(b"remotefilelog", b"server"):
416 if not repo.ui.configbool(b"remotefilelog", b"server"):
408 return
417 return
409
418
410 neededfiles = set()
419 neededfiles = set()
411 heads = repo.revs(b"heads(tip~25000:) - null")
420 heads = repo.revs(b"heads(tip~25000:) - null")
412
421
413 cachepath = repo.vfs.join(b"remotefilelogcache")
422 cachepath = repo.vfs.join(b"remotefilelogcache")
414 for head in heads:
423 for head in heads:
415 mf = repo[head].manifest()
424 mf = repo[head].manifest()
416 for filename, filenode in mf.items():
425 for filename, filenode in mf.items():
417 filecachepath = os.path.join(cachepath, filename, hex(filenode))
426 filecachepath = os.path.join(cachepath, filename, hex(filenode))
418 neededfiles.add(filecachepath)
427 neededfiles.add(filecachepath)
419
428
420 # delete unneeded older files
429 # delete unneeded older files
421 days = repo.ui.configint(b"remotefilelog", b"serverexpiration")
430 days = repo.ui.configint(b"remotefilelog", b"serverexpiration")
422 expiration = time.time() - (days * 24 * 60 * 60)
431 expiration = time.time() - (days * 24 * 60 * 60)
423
432
424 progress = ui.makeprogress(_(b"removing old server cache"), unit=b"files")
433 progress = ui.makeprogress(_(b"removing old server cache"), unit=b"files")
425 progress.update(0)
434 progress.update(0)
426 for root, dirs, files in os.walk(cachepath):
435 for root, dirs, files in os.walk(cachepath):
427 for file in files:
436 for file in files:
428 filepath = os.path.join(root, file)
437 filepath = os.path.join(root, file)
429 progress.increment()
438 progress.increment()
430 if filepath in neededfiles:
439 if filepath in neededfiles:
431 continue
440 continue
432
441
433 stat = os.stat(filepath)
442 stat = os.stat(filepath)
434 if stat.st_mtime < expiration:
443 if stat.st_mtime < expiration:
435 os.remove(filepath)
444 os.remove(filepath)
436
445
437 progress.complete()
446 progress.complete()
@@ -1,574 +1,576 b''
1 # repair.py - functions for repository repair for mercurial
1 # repair.py - functions for repository repair for mercurial
2 #
2 #
3 # Copyright 2005, 2006 Chris Mason <mason@suse.com>
3 # Copyright 2005, 2006 Chris Mason <mason@suse.com>
4 # Copyright 2007 Olivia Mackall
4 # Copyright 2007 Olivia Mackall
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9
9
10 from .i18n import _
10 from .i18n import _
11 from .node import (
11 from .node import (
12 hex,
12 hex,
13 short,
13 short,
14 )
14 )
15 from . import (
15 from . import (
16 bundle2,
16 bundle2,
17 changegroup,
17 changegroup,
18 discovery,
18 discovery,
19 error,
19 error,
20 exchange,
20 exchange,
21 obsolete,
21 obsolete,
22 obsutil,
22 obsutil,
23 pathutil,
23 pathutil,
24 phases,
24 phases,
25 requirements,
25 requirements,
26 scmutil,
26 scmutil,
27 transaction,
27 transaction,
28 util,
28 util,
29 )
29 )
30 from .utils import (
30 from .utils import (
31 hashutil,
31 hashutil,
32 urlutil,
32 urlutil,
33 )
33 )
34
34
35
35
36 def backupbundle(
36 def backupbundle(
37 repo,
37 repo,
38 bases,
38 bases,
39 heads,
39 heads,
40 node,
40 node,
41 suffix,
41 suffix,
42 compress=True,
42 compress=True,
43 obsolescence=True,
43 obsolescence=True,
44 tmp_backup=False,
44 tmp_backup=False,
45 ):
45 ):
46 """create a bundle with the specified revisions as a backup"""
46 """create a bundle with the specified revisions as a backup"""
47
47
48 backupdir = b"strip-backup"
48 backupdir = b"strip-backup"
49 vfs = repo.vfs
49 vfs = repo.vfs
50 if not vfs.isdir(backupdir):
50 if not vfs.isdir(backupdir):
51 vfs.mkdir(backupdir)
51 vfs.mkdir(backupdir)
52
52
53 # Include a hash of all the nodes in the filename for uniqueness
53 # Include a hash of all the nodes in the filename for uniqueness
54 allcommits = repo.set(b'%ln::%ln', bases, heads)
54 allcommits = repo.set(b'%ln::%ln', bases, heads)
55 allhashes = sorted(c.hex() for c in allcommits)
55 allhashes = sorted(c.hex() for c in allcommits)
56 totalhash = hashutil.sha1(b''.join(allhashes)).digest()
56 totalhash = hashutil.sha1(b''.join(allhashes)).digest()
57 name = b"%s/%s-%s-%s.hg" % (
57 name = b"%s/%s-%s-%s.hg" % (
58 backupdir,
58 backupdir,
59 short(node),
59 short(node),
60 hex(totalhash[:4]),
60 hex(totalhash[:4]),
61 suffix,
61 suffix,
62 )
62 )
63
63
64 cgversion = changegroup.localversion(repo)
64 cgversion = changegroup.localversion(repo)
65 comp = None
65 comp = None
66 if cgversion != b'01':
66 if cgversion != b'01':
67 bundletype = b"HG20"
67 bundletype = b"HG20"
68 if compress:
68 if compress:
69 comp = b'BZ'
69 comp = b'BZ'
70 elif compress:
70 elif compress:
71 bundletype = b"HG10BZ"
71 bundletype = b"HG10BZ"
72 else:
72 else:
73 bundletype = b"HG10UN"
73 bundletype = b"HG10UN"
74
74
75 outgoing = discovery.outgoing(repo, missingroots=bases, ancestorsof=heads)
75 outgoing = discovery.outgoing(repo, missingroots=bases, ancestorsof=heads)
76 contentopts = {
76 contentopts = {
77 b'cg.version': cgversion,
77 b'cg.version': cgversion,
78 b'obsolescence': obsolescence,
78 b'obsolescence': obsolescence,
79 b'phases': True,
79 b'phases': True,
80 }
80 }
81 return bundle2.writenewbundle(
81 return bundle2.writenewbundle(
82 repo.ui,
82 repo.ui,
83 repo,
83 repo,
84 b'strip',
84 b'strip',
85 name,
85 name,
86 bundletype,
86 bundletype,
87 outgoing,
87 outgoing,
88 contentopts,
88 contentopts,
89 vfs,
89 vfs,
90 compression=comp,
90 compression=comp,
91 allow_internal=tmp_backup,
91 allow_internal=tmp_backup,
92 )
92 )
93
93
94
94
95 def _collectfiles(repo, striprev):
95 def _collectfiles(repo, striprev):
96 """find out the filelogs affected by the strip"""
96 """find out the filelogs affected by the strip"""
97 files = set()
97 files = set()
98
98
99 for x in range(striprev, len(repo)):
99 for x in range(striprev, len(repo)):
100 files.update(repo[x].files())
100 files.update(repo[x].files())
101
101
102 return sorted(files)
102 return sorted(files)
103
103
104
104
105 def _collectrevlog(revlog, striprev):
105 def _collectrevlog(revlog, striprev):
106 _, brokenset = revlog.getstrippoint(striprev)
106 _, brokenset = revlog.getstrippoint(striprev)
107 return [revlog.linkrev(r) for r in brokenset]
107 return [revlog.linkrev(r) for r in brokenset]
108
108
109
109
110 def _collectbrokencsets(repo, files, striprev):
110 def _collectbrokencsets(repo, files, striprev):
111 """return the changesets which will be broken by the truncation"""
111 """return the changesets which will be broken by the truncation"""
112 s = set()
112 s = set()
113
113
114 for revlog in manifestrevlogs(repo):
114 for revlog in manifestrevlogs(repo):
115 s.update(_collectrevlog(revlog, striprev))
115 s.update(_collectrevlog(revlog, striprev))
116 for fname in files:
116 for fname in files:
117 s.update(_collectrevlog(repo.file(fname), striprev))
117 s.update(_collectrevlog(repo.file(fname), striprev))
118
118
119 return s
119 return s
120
120
121
121
122 def strip(ui, repo, nodelist, backup=True, topic=b'backup'):
122 def strip(ui, repo, nodelist, backup=True, topic=b'backup'):
123 # This function requires the caller to lock the repo, but it operates
123 # This function requires the caller to lock the repo, but it operates
124 # within a transaction of its own, and thus requires there to be no current
124 # within a transaction of its own, and thus requires there to be no current
125 # transaction when it is called.
125 # transaction when it is called.
126 if repo.currenttransaction() is not None:
126 if repo.currenttransaction() is not None:
127 raise error.ProgrammingError(b'cannot strip from inside a transaction')
127 raise error.ProgrammingError(b'cannot strip from inside a transaction')
128
128
129 # Simple way to maintain backwards compatibility for this
129 # Simple way to maintain backwards compatibility for this
130 # argument.
130 # argument.
131 if backup in [b'none', b'strip']:
131 if backup in [b'none', b'strip']:
132 backup = False
132 backup = False
133
133
134 repo = repo.unfiltered()
134 repo = repo.unfiltered()
135 repo.destroying()
135 repo.destroying()
136 vfs = repo.vfs
136 vfs = repo.vfs
137 # load bookmark before changelog to avoid side effect from outdated
137 # load bookmark before changelog to avoid side effect from outdated
138 # changelog (see repo._refreshchangelog)
138 # changelog (see repo._refreshchangelog)
139 repo._bookmarks
139 repo._bookmarks
140 cl = repo.changelog
140 cl = repo.changelog
141
141
142 # TODO handle undo of merge sets
142 # TODO handle undo of merge sets
143 if isinstance(nodelist, bytes):
143 if isinstance(nodelist, bytes):
144 nodelist = [nodelist]
144 nodelist = [nodelist]
145 striplist = [cl.rev(node) for node in nodelist]
145 striplist = [cl.rev(node) for node in nodelist]
146 striprev = min(striplist)
146 striprev = min(striplist)
147
147
148 files = _collectfiles(repo, striprev)
148 files = _collectfiles(repo, striprev)
149 saverevs = _collectbrokencsets(repo, files, striprev)
149 saverevs = _collectbrokencsets(repo, files, striprev)
150
150
151 # Some revisions with rev > striprev may not be descendants of striprev.
151 # Some revisions with rev > striprev may not be descendants of striprev.
152 # We have to find these revisions and put them in a bundle, so that
152 # We have to find these revisions and put them in a bundle, so that
153 # we can restore them after the truncations.
153 # we can restore them after the truncations.
154 # To create the bundle we use repo.changegroupsubset which requires
154 # To create the bundle we use repo.changegroupsubset which requires
155 # the list of heads and bases of the set of interesting revisions.
155 # the list of heads and bases of the set of interesting revisions.
156 # (head = revision in the set that has no descendant in the set;
156 # (head = revision in the set that has no descendant in the set;
157 # base = revision in the set that has no ancestor in the set)
157 # base = revision in the set that has no ancestor in the set)
158 tostrip = set(striplist)
158 tostrip = set(striplist)
159 saveheads = set(saverevs)
159 saveheads = set(saverevs)
160 for r in cl.revs(start=striprev + 1):
160 for r in cl.revs(start=striprev + 1):
161 if any(p in tostrip for p in cl.parentrevs(r)):
161 if any(p in tostrip for p in cl.parentrevs(r)):
162 tostrip.add(r)
162 tostrip.add(r)
163
163
164 if r not in tostrip:
164 if r not in tostrip:
165 saverevs.add(r)
165 saverevs.add(r)
166 saveheads.difference_update(cl.parentrevs(r))
166 saveheads.difference_update(cl.parentrevs(r))
167 saveheads.add(r)
167 saveheads.add(r)
168 saveheads = [cl.node(r) for r in saveheads]
168 saveheads = [cl.node(r) for r in saveheads]
169
169
170 # compute base nodes
170 # compute base nodes
171 if saverevs:
171 if saverevs:
172 descendants = set(cl.descendants(saverevs))
172 descendants = set(cl.descendants(saverevs))
173 saverevs.difference_update(descendants)
173 saverevs.difference_update(descendants)
174 savebases = [cl.node(r) for r in saverevs]
174 savebases = [cl.node(r) for r in saverevs]
175 stripbases = [cl.node(r) for r in tostrip]
175 stripbases = [cl.node(r) for r in tostrip]
176
176
177 stripobsidx = obsmarkers = ()
177 stripobsidx = obsmarkers = ()
178 if repo.ui.configbool(b'devel', b'strip-obsmarkers'):
178 if repo.ui.configbool(b'devel', b'strip-obsmarkers'):
179 obsmarkers = obsutil.exclusivemarkers(repo, stripbases)
179 obsmarkers = obsutil.exclusivemarkers(repo, stripbases)
180 if obsmarkers:
180 if obsmarkers:
181 stripobsidx = [
181 stripobsidx = [
182 i for i, m in enumerate(repo.obsstore) if m in obsmarkers
182 i for i, m in enumerate(repo.obsstore) if m in obsmarkers
183 ]
183 ]
184
184
185 newbmtarget, updatebm = _bookmarkmovements(repo, tostrip)
185 newbmtarget, updatebm = _bookmarkmovements(repo, tostrip)
186
186
187 backupfile = None
187 backupfile = None
188 node = nodelist[-1]
188 node = nodelist[-1]
189 if backup:
189 if backup:
190 backupfile = _createstripbackup(repo, stripbases, node, topic)
190 backupfile = _createstripbackup(repo, stripbases, node, topic)
191 # create a changegroup for all the branches we need to keep
191 # create a changegroup for all the branches we need to keep
192 tmpbundlefile = None
192 tmpbundlefile = None
193 if saveheads:
193 if saveheads:
194 # do not compress temporary bundle if we remove it from disk later
194 # do not compress temporary bundle if we remove it from disk later
195 #
195 #
196 # We do not include obsolescence, it might re-introduce prune markers
196 # We do not include obsolescence, it might re-introduce prune markers
197 # we are trying to strip. This is harmless since the stripped markers
197 # we are trying to strip. This is harmless since the stripped markers
198 # are already backed up and we did not touched the markers for the
198 # are already backed up and we did not touched the markers for the
199 # saved changesets.
199 # saved changesets.
200 tmpbundlefile = backupbundle(
200 tmpbundlefile = backupbundle(
201 repo,
201 repo,
202 savebases,
202 savebases,
203 saveheads,
203 saveheads,
204 node,
204 node,
205 b'temp',
205 b'temp',
206 compress=False,
206 compress=False,
207 obsolescence=False,
207 obsolescence=False,
208 tmp_backup=True,
208 tmp_backup=True,
209 )
209 )
210
210
211 with ui.uninterruptible():
211 with ui.uninterruptible():
212 try:
212 try:
213 with repo.transaction(b"strip") as tr:
213 with repo.transaction(b"strip") as tr:
214 # TODO this code violates the interface abstraction of the
214 # TODO this code violates the interface abstraction of the
215 # transaction and makes assumptions that file storage is
215 # transaction and makes assumptions that file storage is
216 # using append-only files. We'll need some kind of storage
216 # using append-only files. We'll need some kind of storage
217 # API to handle stripping for us.
217 # API to handle stripping for us.
218 oldfiles = set(tr._offsetmap.keys())
218 oldfiles = set(tr._offsetmap.keys())
219 oldfiles.update(tr._newfiles)
219 oldfiles.update(tr._newfiles)
220
220
221 tr.startgroup()
221 tr.startgroup()
222 cl.strip(striprev, tr)
222 cl.strip(striprev, tr)
223 stripmanifest(repo, striprev, tr, files)
223 stripmanifest(repo, striprev, tr, files)
224
224
225 for fn in files:
225 for fn in files:
226 repo.file(fn).strip(striprev, tr)
226 repo.file(fn).strip(striprev, tr)
227 tr.endgroup()
227 tr.endgroup()
228
228
229 entries = tr.readjournal()
229 entries = tr.readjournal()
230
230
231 for file, troffset in entries:
231 for file, troffset in entries:
232 if file in oldfiles:
232 if file in oldfiles:
233 continue
233 continue
234 with repo.svfs(file, b'a', checkambig=True) as fp:
234 with repo.svfs(file, b'a', checkambig=True) as fp:
235 fp.truncate(troffset)
235 fp.truncate(troffset)
236 if troffset == 0:
236 if troffset == 0:
237 repo.store.markremoved(file)
237 repo.store.markremoved(file)
238
238
239 deleteobsmarkers(repo.obsstore, stripobsidx)
239 deleteobsmarkers(repo.obsstore, stripobsidx)
240 del repo.obsstore
240 del repo.obsstore
241 repo.invalidatevolatilesets()
241 repo.invalidatevolatilesets()
242 repo._phasecache.filterunknown(repo)
242 repo._phasecache.filterunknown(repo)
243
243
244 if tmpbundlefile:
244 if tmpbundlefile:
245 ui.note(_(b"adding branch\n"))
245 ui.note(_(b"adding branch\n"))
246 f = vfs.open(tmpbundlefile, b"rb")
246 f = vfs.open(tmpbundlefile, b"rb")
247 gen = exchange.readbundle(ui, f, tmpbundlefile, vfs)
247 gen = exchange.readbundle(ui, f, tmpbundlefile, vfs)
248 # silence internal shuffling chatter
248 # silence internal shuffling chatter
249 maybe_silent = (
249 maybe_silent = (
250 repo.ui.silent()
250 repo.ui.silent()
251 if not repo.ui.verbose
251 if not repo.ui.verbose
252 else util.nullcontextmanager()
252 else util.nullcontextmanager()
253 )
253 )
254 with maybe_silent:
254 with maybe_silent:
255 tmpbundleurl = b'bundle:' + vfs.join(tmpbundlefile)
255 tmpbundleurl = b'bundle:' + vfs.join(tmpbundlefile)
256 txnname = b'strip'
256 txnname = b'strip'
257 if not isinstance(gen, bundle2.unbundle20):
257 if not isinstance(gen, bundle2.unbundle20):
258 txnname = b"strip\n%s" % urlutil.hidepassword(
258 txnname = b"strip\n%s" % urlutil.hidepassword(
259 tmpbundleurl
259 tmpbundleurl
260 )
260 )
261 with repo.transaction(txnname) as tr:
261 with repo.transaction(txnname) as tr:
262 bundle2.applybundle(
262 bundle2.applybundle(
263 repo, gen, tr, source=b'strip', url=tmpbundleurl
263 repo, gen, tr, source=b'strip', url=tmpbundleurl
264 )
264 )
265 f.close()
265 f.close()
266
266
267 with repo.transaction(b'repair') as tr:
267 with repo.transaction(b'repair') as tr:
268 bmchanges = [(m, repo[newbmtarget].node()) for m in updatebm]
268 bmchanges = [(m, repo[newbmtarget].node()) for m in updatebm]
269 repo._bookmarks.applychanges(repo, tr, bmchanges)
269 repo._bookmarks.applychanges(repo, tr, bmchanges)
270
270
271 transaction.cleanup_undo_files(repo.ui.warn, repo.vfs_map)
271 transaction.cleanup_undo_files(repo.ui.warn, repo.vfs_map)
272
272
273 except: # re-raises
273 except: # re-raises
274 if backupfile:
274 if backupfile:
275 ui.warn(
275 ui.warn(
276 _(b"strip failed, backup bundle stored in '%s'\n")
276 _(b"strip failed, backup bundle stored in '%s'\n")
277 % vfs.join(backupfile)
277 % vfs.join(backupfile)
278 )
278 )
279 if tmpbundlefile:
279 if tmpbundlefile:
280 ui.warn(
280 ui.warn(
281 _(b"strip failed, unrecovered changes stored in '%s'\n")
281 _(b"strip failed, unrecovered changes stored in '%s'\n")
282 % vfs.join(tmpbundlefile)
282 % vfs.join(tmpbundlefile)
283 )
283 )
284 ui.warn(
284 ui.warn(
285 _(
285 _(
286 b"(fix the problem, then recover the changesets with "
286 b"(fix the problem, then recover the changesets with "
287 b"\"hg unbundle '%s'\")\n"
287 b"\"hg unbundle '%s'\")\n"
288 )
288 )
289 % vfs.join(tmpbundlefile)
289 % vfs.join(tmpbundlefile)
290 )
290 )
291 raise
291 raise
292 else:
292 else:
293 if tmpbundlefile:
293 if tmpbundlefile:
294 # Remove temporary bundle only if there were no exceptions
294 # Remove temporary bundle only if there were no exceptions
295 vfs.unlink(tmpbundlefile)
295 vfs.unlink(tmpbundlefile)
296
296
297 repo.destroyed()
297 repo.destroyed()
298 # return the backup file path (or None if 'backup' was False) so
298 # return the backup file path (or None if 'backup' was False) so
299 # extensions can use it
299 # extensions can use it
300 return backupfile
300 return backupfile
301
301
302
302
303 def softstrip(ui, repo, nodelist, backup=True, topic=b'backup'):
303 def softstrip(ui, repo, nodelist, backup=True, topic=b'backup'):
304 """perform a "soft" strip using the archived phase"""
304 """perform a "soft" strip using the archived phase"""
305 tostrip = [c.node() for c in repo.set(b'sort(%ln::)', nodelist)]
305 tostrip = [c.node() for c in repo.set(b'sort(%ln::)', nodelist)]
306 if not tostrip:
306 if not tostrip:
307 return None
307 return None
308
308
309 backupfile = None
309 backupfile = None
310 if backup:
310 if backup:
311 node = tostrip[0]
311 node = tostrip[0]
312 backupfile = _createstripbackup(repo, tostrip, node, topic)
312 backupfile = _createstripbackup(repo, tostrip, node, topic)
313
313
314 newbmtarget, updatebm = _bookmarkmovements(repo, tostrip)
314 newbmtarget, updatebm = _bookmarkmovements(repo, tostrip)
315 with repo.transaction(b'strip') as tr:
315 with repo.transaction(b'strip') as tr:
316 phases.retractboundary(repo, tr, phases.archived, tostrip)
316 phases.retractboundary(repo, tr, phases.archived, tostrip)
317 bmchanges = [(m, repo[newbmtarget].node()) for m in updatebm]
317 bmchanges = [(m, repo[newbmtarget].node()) for m in updatebm]
318 repo._bookmarks.applychanges(repo, tr, bmchanges)
318 repo._bookmarks.applychanges(repo, tr, bmchanges)
319 return backupfile
319 return backupfile
320
320
321
321
322 def _bookmarkmovements(repo, tostrip):
322 def _bookmarkmovements(repo, tostrip):
323 # compute necessary bookmark movement
323 # compute necessary bookmark movement
324 bm = repo._bookmarks
324 bm = repo._bookmarks
325 updatebm = []
325 updatebm = []
326 for m in bm:
326 for m in bm:
327 rev = repo[bm[m]].rev()
327 rev = repo[bm[m]].rev()
328 if rev in tostrip:
328 if rev in tostrip:
329 updatebm.append(m)
329 updatebm.append(m)
330 newbmtarget = None
330 newbmtarget = None
331 # If we need to move bookmarks, compute bookmark
331 # If we need to move bookmarks, compute bookmark
332 # targets. Otherwise we can skip doing this logic.
332 # targets. Otherwise we can skip doing this logic.
333 if updatebm:
333 if updatebm:
334 # For a set s, max(parents(s) - s) is the same as max(heads(::s - s)),
334 # For a set s, max(parents(s) - s) is the same as max(heads(::s - s)),
335 # but is much faster
335 # but is much faster
336 newbmtarget = repo.revs(b'max(parents(%ld) - (%ld))', tostrip, tostrip)
336 newbmtarget = repo.revs(b'max(parents(%ld) - (%ld))', tostrip, tostrip)
337 if newbmtarget:
337 if newbmtarget:
338 newbmtarget = repo[newbmtarget.first()].node()
338 newbmtarget = repo[newbmtarget.first()].node()
339 else:
339 else:
340 newbmtarget = b'.'
340 newbmtarget = b'.'
341 return newbmtarget, updatebm
341 return newbmtarget, updatebm
342
342
343
343
344 def _createstripbackup(repo, stripbases, node, topic):
344 def _createstripbackup(repo, stripbases, node, topic):
345 # backup the changeset we are about to strip
345 # backup the changeset we are about to strip
346 vfs = repo.vfs
346 vfs = repo.vfs
347 unfi = repo.unfiltered()
347 unfi = repo.unfiltered()
348 to_node = unfi.changelog.node
348 to_node = unfi.changelog.node
349 # internal changeset are internal implementation details that should not
349 # internal changeset are internal implementation details that should not
350 # leave the repository and not be exposed to the users. In addition feature
350 # leave the repository and not be exposed to the users. In addition feature
351 # using them requires to be resistant to strip. See test case for more
351 # using them requires to be resistant to strip. See test case for more
352 # details.
352 # details.
353 all_backup = unfi.revs(
353 all_backup = unfi.revs(
354 b"(%ln)::(%ld) and not _internal()",
354 b"(%ln)::(%ld) and not _internal()",
355 stripbases,
355 stripbases,
356 unfi.changelog.headrevs(),
356 unfi.changelog.headrevs(),
357 )
357 )
358 if not all_backup:
358 if not all_backup:
359 return None
359 return None
360
360
361 def to_nodes(revs):
361 def to_nodes(revs):
362 return [to_node(r) for r in revs]
362 return [to_node(r) for r in revs]
363
363
364 bases = to_nodes(unfi.revs("roots(%ld)", all_backup))
364 bases = to_nodes(unfi.revs("roots(%ld)", all_backup))
365 heads = to_nodes(unfi.revs("heads(%ld)", all_backup))
365 heads = to_nodes(unfi.revs("heads(%ld)", all_backup))
366 backupfile = backupbundle(repo, bases, heads, node, topic)
366 backupfile = backupbundle(repo, bases, heads, node, topic)
367 repo.ui.status(_(b"saved backup bundle to %s\n") % vfs.join(backupfile))
367 repo.ui.status(_(b"saved backup bundle to %s\n") % vfs.join(backupfile))
368 repo.ui.log(
368 repo.ui.log(
369 b"backupbundle", b"saved backup bundle to %s\n", vfs.join(backupfile)
369 b"backupbundle", b"saved backup bundle to %s\n", vfs.join(backupfile)
370 )
370 )
371 return backupfile
371 return backupfile
372
372
373
373
374 def safestriproots(ui, repo, nodes):
374 def safestriproots(ui, repo, nodes):
375 """return list of roots of nodes where descendants are covered by nodes"""
375 """return list of roots of nodes where descendants are covered by nodes"""
376 torev = repo.unfiltered().changelog.rev
376 torev = repo.unfiltered().changelog.rev
377 revs = {torev(n) for n in nodes}
377 revs = {torev(n) for n in nodes}
378 # tostrip = wanted - unsafe = wanted - ancestors(orphaned)
378 # tostrip = wanted - unsafe = wanted - ancestors(orphaned)
379 # orphaned = affected - wanted
379 # orphaned = affected - wanted
380 # affected = descendants(roots(wanted))
380 # affected = descendants(roots(wanted))
381 # wanted = revs
381 # wanted = revs
382 revset = b'%ld - ( ::( (roots(%ld):: and not _phase(%s)) -%ld) )'
382 revset = b'%ld - ( ::( (roots(%ld):: and not _phase(%s)) -%ld) )'
383 tostrip = set(repo.revs(revset, revs, revs, phases.internal, revs))
383 tostrip = set(repo.revs(revset, revs, revs, phases.internal, revs))
384 notstrip = revs - tostrip
384 notstrip = revs - tostrip
385 if notstrip:
385 if notstrip:
386 nodestr = b', '.join(sorted(short(repo[n].node()) for n in notstrip))
386 nodestr = b', '.join(sorted(short(repo[n].node()) for n in notstrip))
387 ui.warn(
387 ui.warn(
388 _(b'warning: orphaned descendants detected, not stripping %s\n')
388 _(b'warning: orphaned descendants detected, not stripping %s\n')
389 % nodestr
389 % nodestr
390 )
390 )
391 return [c.node() for c in repo.set(b'roots(%ld)', tostrip)]
391 return [c.node() for c in repo.set(b'roots(%ld)', tostrip)]
392
392
393
393
394 class stripcallback:
394 class stripcallback:
395 """used as a transaction postclose callback"""
395 """used as a transaction postclose callback"""
396
396
397 def __init__(self, ui, repo, backup, topic):
397 def __init__(self, ui, repo, backup, topic):
398 self.ui = ui
398 self.ui = ui
399 self.repo = repo
399 self.repo = repo
400 self.backup = backup
400 self.backup = backup
401 self.topic = topic or b'backup'
401 self.topic = topic or b'backup'
402 self.nodelist = []
402 self.nodelist = []
403
403
404 def addnodes(self, nodes):
404 def addnodes(self, nodes):
405 self.nodelist.extend(nodes)
405 self.nodelist.extend(nodes)
406
406
407 def __call__(self, tr):
407 def __call__(self, tr):
408 roots = safestriproots(self.ui, self.repo, self.nodelist)
408 roots = safestriproots(self.ui, self.repo, self.nodelist)
409 if roots:
409 if roots:
410 strip(self.ui, self.repo, roots, self.backup, self.topic)
410 strip(self.ui, self.repo, roots, self.backup, self.topic)
411
411
412
412
413 def delayedstrip(ui, repo, nodelist, topic=None, backup=True):
413 def delayedstrip(ui, repo, nodelist, topic=None, backup=True):
414 """like strip, but works inside transaction and won't strip irreverent revs
414 """like strip, but works inside transaction and won't strip irreverent revs
415
415
416 nodelist must explicitly contain all descendants. Otherwise a warning will
416 nodelist must explicitly contain all descendants. Otherwise a warning will
417 be printed that some nodes are not stripped.
417 be printed that some nodes are not stripped.
418
418
419 Will do a backup if `backup` is True. The last non-None "topic" will be
419 Will do a backup if `backup` is True. The last non-None "topic" will be
420 used as the backup topic name. The default backup topic name is "backup".
420 used as the backup topic name. The default backup topic name is "backup".
421 """
421 """
422 tr = repo.currenttransaction()
422 tr = repo.currenttransaction()
423 if not tr:
423 if not tr:
424 nodes = safestriproots(ui, repo, nodelist)
424 nodes = safestriproots(ui, repo, nodelist)
425 return strip(ui, repo, nodes, backup=backup, topic=topic)
425 return strip(ui, repo, nodes, backup=backup, topic=topic)
426 # transaction postclose callbacks are called in alphabet order.
426 # transaction postclose callbacks are called in alphabet order.
427 # use '\xff' as prefix so we are likely to be called last.
427 # use '\xff' as prefix so we are likely to be called last.
428 callback = tr.getpostclose(b'\xffstrip')
428 callback = tr.getpostclose(b'\xffstrip')
429 if callback is None:
429 if callback is None:
430 callback = stripcallback(ui, repo, backup=backup, topic=topic)
430 callback = stripcallback(ui, repo, backup=backup, topic=topic)
431 tr.addpostclose(b'\xffstrip', callback)
431 tr.addpostclose(b'\xffstrip', callback)
432 if topic:
432 if topic:
433 callback.topic = topic
433 callback.topic = topic
434 callback.addnodes(nodelist)
434 callback.addnodes(nodelist)
435
435
436
436
437 def stripmanifest(repo, striprev, tr, files):
437 def stripmanifest(repo, striprev, tr, files):
438 for revlog in manifestrevlogs(repo):
438 for revlog in manifestrevlogs(repo):
439 revlog.strip(striprev, tr)
439 revlog.strip(striprev, tr)
440
440
441
441
442 def manifestrevlogs(repo):
442 def manifestrevlogs(repo):
443 yield repo.manifestlog.getstorage(b'')
443 yield repo.manifestlog.getstorage(b'')
444 if scmutil.istreemanifest(repo):
444 if scmutil.istreemanifest(repo):
445 # This logic is safe if treemanifest isn't enabled, but also
445 # This logic is safe if treemanifest isn't enabled, but also
446 # pointless, so we skip it if treemanifest isn't enabled.
446 # pointless, so we skip it if treemanifest isn't enabled.
447 for t, unencoded, size in repo.store.datafiles():
447 for entry in repo.store.datafiles():
448 unencoded = entry.unencoded_path
449 # XXX use the entry.revlog_type instead
448 if unencoded.startswith(b'meta/') and unencoded.endswith(
450 if unencoded.startswith(b'meta/') and unencoded.endswith(
449 b'00manifest.i'
451 b'00manifest.i'
450 ):
452 ):
451 dir = unencoded[5:-12]
453 dir = unencoded[5:-12]
452 yield repo.manifestlog.getstorage(dir)
454 yield repo.manifestlog.getstorage(dir)
453
455
454
456
455 def rebuildfncache(ui, repo, only_data=False):
457 def rebuildfncache(ui, repo, only_data=False):
456 """Rebuilds the fncache file from repo history.
458 """Rebuilds the fncache file from repo history.
457
459
458 Missing entries will be added. Extra entries will be removed.
460 Missing entries will be added. Extra entries will be removed.
459 """
461 """
460 repo = repo.unfiltered()
462 repo = repo.unfiltered()
461
463
462 if requirements.FNCACHE_REQUIREMENT not in repo.requirements:
464 if requirements.FNCACHE_REQUIREMENT not in repo.requirements:
463 ui.warn(
465 ui.warn(
464 _(
466 _(
465 b'(not rebuilding fncache because repository does not '
467 b'(not rebuilding fncache because repository does not '
466 b'support fncache)\n'
468 b'support fncache)\n'
467 )
469 )
468 )
470 )
469 return
471 return
470
472
471 with repo.lock():
473 with repo.lock():
472 fnc = repo.store.fncache
474 fnc = repo.store.fncache
473 fnc.ensureloaded(warn=ui.warn)
475 fnc.ensureloaded(warn=ui.warn)
474
476
475 oldentries = set(fnc.entries)
477 oldentries = set(fnc.entries)
476 newentries = set()
478 newentries = set()
477 seenfiles = set()
479 seenfiles = set()
478
480
479 if only_data:
481 if only_data:
480 # Trust the listing of .i from the fncache, but not the .d. This is
482 # Trust the listing of .i from the fncache, but not the .d. This is
481 # much faster, because we only need to stat every possible .d files,
483 # much faster, because we only need to stat every possible .d files,
482 # instead of reading the full changelog
484 # instead of reading the full changelog
483 for f in fnc:
485 for f in fnc:
484 if f[:5] == b'data/' and f[-2:] == b'.i':
486 if f[:5] == b'data/' and f[-2:] == b'.i':
485 seenfiles.add(f[5:-2])
487 seenfiles.add(f[5:-2])
486 newentries.add(f)
488 newentries.add(f)
487 dataf = f[:-2] + b'.d'
489 dataf = f[:-2] + b'.d'
488 if repo.store._exists(dataf):
490 if repo.store._exists(dataf):
489 newentries.add(dataf)
491 newentries.add(dataf)
490 else:
492 else:
491 progress = ui.makeprogress(
493 progress = ui.makeprogress(
492 _(b'rebuilding'), unit=_(b'changesets'), total=len(repo)
494 _(b'rebuilding'), unit=_(b'changesets'), total=len(repo)
493 )
495 )
494 for rev in repo:
496 for rev in repo:
495 progress.update(rev)
497 progress.update(rev)
496
498
497 ctx = repo[rev]
499 ctx = repo[rev]
498 for f in ctx.files():
500 for f in ctx.files():
499 # This is to minimize I/O.
501 # This is to minimize I/O.
500 if f in seenfiles:
502 if f in seenfiles:
501 continue
503 continue
502 seenfiles.add(f)
504 seenfiles.add(f)
503
505
504 i = b'data/%s.i' % f
506 i = b'data/%s.i' % f
505 d = b'data/%s.d' % f
507 d = b'data/%s.d' % f
506
508
507 if repo.store._exists(i):
509 if repo.store._exists(i):
508 newentries.add(i)
510 newentries.add(i)
509 if repo.store._exists(d):
511 if repo.store._exists(d):
510 newentries.add(d)
512 newentries.add(d)
511
513
512 progress.complete()
514 progress.complete()
513
515
514 if requirements.TREEMANIFEST_REQUIREMENT in repo.requirements:
516 if requirements.TREEMANIFEST_REQUIREMENT in repo.requirements:
515 # This logic is safe if treemanifest isn't enabled, but also
517 # This logic is safe if treemanifest isn't enabled, but also
516 # pointless, so we skip it if treemanifest isn't enabled.
518 # pointless, so we skip it if treemanifest isn't enabled.
517 for dir in pathutil.dirs(seenfiles):
519 for dir in pathutil.dirs(seenfiles):
518 i = b'meta/%s/00manifest.i' % dir
520 i = b'meta/%s/00manifest.i' % dir
519 d = b'meta/%s/00manifest.d' % dir
521 d = b'meta/%s/00manifest.d' % dir
520
522
521 if repo.store._exists(i):
523 if repo.store._exists(i):
522 newentries.add(i)
524 newentries.add(i)
523 if repo.store._exists(d):
525 if repo.store._exists(d):
524 newentries.add(d)
526 newentries.add(d)
525
527
526 addcount = len(newentries - oldentries)
528 addcount = len(newentries - oldentries)
527 removecount = len(oldentries - newentries)
529 removecount = len(oldentries - newentries)
528 for p in sorted(oldentries - newentries):
530 for p in sorted(oldentries - newentries):
529 ui.write(_(b'removing %s\n') % p)
531 ui.write(_(b'removing %s\n') % p)
530 for p in sorted(newentries - oldentries):
532 for p in sorted(newentries - oldentries):
531 ui.write(_(b'adding %s\n') % p)
533 ui.write(_(b'adding %s\n') % p)
532
534
533 if addcount or removecount:
535 if addcount or removecount:
534 ui.write(
536 ui.write(
535 _(b'%d items added, %d removed from fncache\n')
537 _(b'%d items added, %d removed from fncache\n')
536 % (addcount, removecount)
538 % (addcount, removecount)
537 )
539 )
538 fnc.entries = newentries
540 fnc.entries = newentries
539 fnc._dirty = True
541 fnc._dirty = True
540
542
541 with repo.transaction(b'fncache') as tr:
543 with repo.transaction(b'fncache') as tr:
542 fnc.write(tr)
544 fnc.write(tr)
543 else:
545 else:
544 ui.write(_(b'fncache already up to date\n'))
546 ui.write(_(b'fncache already up to date\n'))
545
547
546
548
547 def deleteobsmarkers(obsstore, indices):
549 def deleteobsmarkers(obsstore, indices):
548 """Delete some obsmarkers from obsstore and return how many were deleted
550 """Delete some obsmarkers from obsstore and return how many were deleted
549
551
550 'indices' is a list of ints which are the indices
552 'indices' is a list of ints which are the indices
551 of the markers to be deleted.
553 of the markers to be deleted.
552
554
553 Every invocation of this function completely rewrites the obsstore file,
555 Every invocation of this function completely rewrites the obsstore file,
554 skipping the markers we want to be removed. The new temporary file is
556 skipping the markers we want to be removed. The new temporary file is
555 created, remaining markers are written there and on .close() this file
557 created, remaining markers are written there and on .close() this file
556 gets atomically renamed to obsstore, thus guaranteeing consistency."""
558 gets atomically renamed to obsstore, thus guaranteeing consistency."""
557 if not indices:
559 if not indices:
558 # we don't want to rewrite the obsstore with the same content
560 # we don't want to rewrite the obsstore with the same content
559 return
561 return
560
562
561 left = []
563 left = []
562 current = obsstore._all
564 current = obsstore._all
563 n = 0
565 n = 0
564 for i, m in enumerate(current):
566 for i, m in enumerate(current):
565 if i in indices:
567 if i in indices:
566 n += 1
568 n += 1
567 continue
569 continue
568 left.append(m)
570 left.append(m)
569
571
570 newobsstorefile = obsstore.svfs(b'obsstore', b'w', atomictemp=True)
572 newobsstorefile = obsstore.svfs(b'obsstore', b'w', atomictemp=True)
571 for bytes in obsolete.encodemarkers(left, True, obsstore._version):
573 for bytes in obsolete.encodemarkers(left, True, obsstore._version):
572 newobsstorefile.write(bytes)
574 newobsstorefile.write(bytes)
573 newobsstorefile.close()
575 newobsstorefile.close()
574 return n
576 return n
@@ -1,883 +1,888 b''
1 # censor code related to censoring revision
1 # censor code related to censoring revision
2 # coding: utf8
2 # coding: utf8
3 #
3 #
4 # Copyright 2021 Pierre-Yves David <pierre-yves.david@octobus.net>
4 # Copyright 2021 Pierre-Yves David <pierre-yves.david@octobus.net>
5 # Copyright 2015 Google, Inc <martinvonz@google.com>
5 # Copyright 2015 Google, Inc <martinvonz@google.com>
6 #
6 #
7 # This software may be used and distributed according to the terms of the
7 # This software may be used and distributed according to the terms of the
8 # GNU General Public License version 2 or any later version.
8 # GNU General Public License version 2 or any later version.
9
9
10 import binascii
10 import binascii
11 import contextlib
11 import contextlib
12 import os
12 import os
13 import struct
13 import struct
14
14
15 from ..node import (
15 from ..node import (
16 nullrev,
16 nullrev,
17 )
17 )
18 from .constants import (
18 from .constants import (
19 COMP_MODE_PLAIN,
19 COMP_MODE_PLAIN,
20 ENTRY_DATA_COMPRESSED_LENGTH,
20 ENTRY_DATA_COMPRESSED_LENGTH,
21 ENTRY_DATA_COMPRESSION_MODE,
21 ENTRY_DATA_COMPRESSION_MODE,
22 ENTRY_DATA_OFFSET,
22 ENTRY_DATA_OFFSET,
23 ENTRY_DATA_UNCOMPRESSED_LENGTH,
23 ENTRY_DATA_UNCOMPRESSED_LENGTH,
24 ENTRY_DELTA_BASE,
24 ENTRY_DELTA_BASE,
25 ENTRY_LINK_REV,
25 ENTRY_LINK_REV,
26 ENTRY_NODE_ID,
26 ENTRY_NODE_ID,
27 ENTRY_PARENT_1,
27 ENTRY_PARENT_1,
28 ENTRY_PARENT_2,
28 ENTRY_PARENT_2,
29 ENTRY_SIDEDATA_COMPRESSED_LENGTH,
29 ENTRY_SIDEDATA_COMPRESSED_LENGTH,
30 ENTRY_SIDEDATA_COMPRESSION_MODE,
30 ENTRY_SIDEDATA_COMPRESSION_MODE,
31 ENTRY_SIDEDATA_OFFSET,
31 ENTRY_SIDEDATA_OFFSET,
32 REVIDX_ISCENSORED,
32 REVIDX_ISCENSORED,
33 REVLOGV0,
33 REVLOGV0,
34 REVLOGV1,
34 REVLOGV1,
35 )
35 )
36 from ..i18n import _
36 from ..i18n import _
37
37
38 from .. import (
38 from .. import (
39 error,
39 error,
40 mdiff,
40 mdiff,
41 pycompat,
41 pycompat,
42 revlogutils,
42 revlogutils,
43 util,
43 util,
44 )
44 )
45 from ..utils import (
45 from ..utils import (
46 storageutil,
46 storageutil,
47 )
47 )
48 from . import (
48 from . import (
49 constants,
49 constants,
50 deltas,
50 deltas,
51 )
51 )
52
52
53
53
54 def v1_censor(rl, tr, censornode, tombstone=b''):
54 def v1_censor(rl, tr, censornode, tombstone=b''):
55 """censors a revision in a "version 1" revlog"""
55 """censors a revision in a "version 1" revlog"""
56 assert rl._format_version == constants.REVLOGV1, rl._format_version
56 assert rl._format_version == constants.REVLOGV1, rl._format_version
57
57
58 # avoid cycle
58 # avoid cycle
59 from .. import revlog
59 from .. import revlog
60
60
61 censorrev = rl.rev(censornode)
61 censorrev = rl.rev(censornode)
62 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
62 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
63
63
64 # Rewriting the revlog in place is hard. Our strategy for censoring is
64 # Rewriting the revlog in place is hard. Our strategy for censoring is
65 # to create a new revlog, copy all revisions to it, then replace the
65 # to create a new revlog, copy all revisions to it, then replace the
66 # revlogs on transaction close.
66 # revlogs on transaction close.
67 #
67 #
68 # This is a bit dangerous. We could easily have a mismatch of state.
68 # This is a bit dangerous. We could easily have a mismatch of state.
69 newrl = revlog.revlog(
69 newrl = revlog.revlog(
70 rl.opener,
70 rl.opener,
71 target=rl.target,
71 target=rl.target,
72 radix=rl.radix,
72 radix=rl.radix,
73 postfix=b'tmpcensored',
73 postfix=b'tmpcensored',
74 censorable=True,
74 censorable=True,
75 )
75 )
76 newrl._format_version = rl._format_version
76 newrl._format_version = rl._format_version
77 newrl._format_flags = rl._format_flags
77 newrl._format_flags = rl._format_flags
78 newrl._generaldelta = rl._generaldelta
78 newrl._generaldelta = rl._generaldelta
79 newrl._parse_index = rl._parse_index
79 newrl._parse_index = rl._parse_index
80
80
81 for rev in rl.revs():
81 for rev in rl.revs():
82 node = rl.node(rev)
82 node = rl.node(rev)
83 p1, p2 = rl.parents(node)
83 p1, p2 = rl.parents(node)
84
84
85 if rev == censorrev:
85 if rev == censorrev:
86 newrl.addrawrevision(
86 newrl.addrawrevision(
87 tombstone,
87 tombstone,
88 tr,
88 tr,
89 rl.linkrev(censorrev),
89 rl.linkrev(censorrev),
90 p1,
90 p1,
91 p2,
91 p2,
92 censornode,
92 censornode,
93 constants.REVIDX_ISCENSORED,
93 constants.REVIDX_ISCENSORED,
94 )
94 )
95
95
96 if newrl.deltaparent(rev) != nullrev:
96 if newrl.deltaparent(rev) != nullrev:
97 m = _(b'censored revision stored as delta; cannot censor')
97 m = _(b'censored revision stored as delta; cannot censor')
98 h = _(
98 h = _(
99 b'censoring of revlogs is not fully implemented;'
99 b'censoring of revlogs is not fully implemented;'
100 b' please report this bug'
100 b' please report this bug'
101 )
101 )
102 raise error.Abort(m, hint=h)
102 raise error.Abort(m, hint=h)
103 continue
103 continue
104
104
105 if rl.iscensored(rev):
105 if rl.iscensored(rev):
106 if rl.deltaparent(rev) != nullrev:
106 if rl.deltaparent(rev) != nullrev:
107 m = _(
107 m = _(
108 b'cannot censor due to censored '
108 b'cannot censor due to censored '
109 b'revision having delta stored'
109 b'revision having delta stored'
110 )
110 )
111 raise error.Abort(m)
111 raise error.Abort(m)
112 rawtext = rl._chunk(rev)
112 rawtext = rl._chunk(rev)
113 else:
113 else:
114 rawtext = rl.rawdata(rev)
114 rawtext = rl.rawdata(rev)
115
115
116 newrl.addrawrevision(
116 newrl.addrawrevision(
117 rawtext, tr, rl.linkrev(rev), p1, p2, node, rl.flags(rev)
117 rawtext, tr, rl.linkrev(rev), p1, p2, node, rl.flags(rev)
118 )
118 )
119
119
120 tr.addbackup(rl._indexfile, location=b'store')
120 tr.addbackup(rl._indexfile, location=b'store')
121 if not rl._inline:
121 if not rl._inline:
122 tr.addbackup(rl._datafile, location=b'store')
122 tr.addbackup(rl._datafile, location=b'store')
123
123
124 rl.opener.rename(newrl._indexfile, rl._indexfile)
124 rl.opener.rename(newrl._indexfile, rl._indexfile)
125 if not rl._inline:
125 if not rl._inline:
126 rl.opener.rename(newrl._datafile, rl._datafile)
126 rl.opener.rename(newrl._datafile, rl._datafile)
127
127
128 rl.clearcaches()
128 rl.clearcaches()
129 rl._loadindex()
129 rl._loadindex()
130
130
131
131
132 def v2_censor(revlog, tr, censornode, tombstone=b''):
132 def v2_censor(revlog, tr, censornode, tombstone=b''):
133 """censors a revision in a "version 2" revlog"""
133 """censors a revision in a "version 2" revlog"""
134 assert revlog._format_version != REVLOGV0, revlog._format_version
134 assert revlog._format_version != REVLOGV0, revlog._format_version
135 assert revlog._format_version != REVLOGV1, revlog._format_version
135 assert revlog._format_version != REVLOGV1, revlog._format_version
136
136
137 censor_revs = {revlog.rev(censornode)}
137 censor_revs = {revlog.rev(censornode)}
138 _rewrite_v2(revlog, tr, censor_revs, tombstone)
138 _rewrite_v2(revlog, tr, censor_revs, tombstone)
139
139
140
140
141 def _rewrite_v2(revlog, tr, censor_revs, tombstone=b''):
141 def _rewrite_v2(revlog, tr, censor_revs, tombstone=b''):
142 """rewrite a revlog to censor some of its content
142 """rewrite a revlog to censor some of its content
143
143
144 General principle
144 General principle
145
145
146 We create new revlog files (index/data/sidedata) to copy the content of
146 We create new revlog files (index/data/sidedata) to copy the content of
147 the existing data without the censored data.
147 the existing data without the censored data.
148
148
149 We need to recompute new delta for any revision that used the censored
149 We need to recompute new delta for any revision that used the censored
150 revision as delta base. As the cumulative size of the new delta may be
150 revision as delta base. As the cumulative size of the new delta may be
151 large, we store them in a temporary file until they are stored in their
151 large, we store them in a temporary file until they are stored in their
152 final destination.
152 final destination.
153
153
154 All data before the censored data can be blindly copied. The rest needs
154 All data before the censored data can be blindly copied. The rest needs
155 to be copied as we go and the associated index entry needs adjustement.
155 to be copied as we go and the associated index entry needs adjustement.
156 """
156 """
157 assert revlog._format_version != REVLOGV0, revlog._format_version
157 assert revlog._format_version != REVLOGV0, revlog._format_version
158 assert revlog._format_version != REVLOGV1, revlog._format_version
158 assert revlog._format_version != REVLOGV1, revlog._format_version
159
159
160 old_index = revlog.index
160 old_index = revlog.index
161 docket = revlog._docket
161 docket = revlog._docket
162
162
163 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
163 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
164
164
165 first_excl_rev = min(censor_revs)
165 first_excl_rev = min(censor_revs)
166
166
167 first_excl_entry = revlog.index[first_excl_rev]
167 first_excl_entry = revlog.index[first_excl_rev]
168 index_cutoff = revlog.index.entry_size * first_excl_rev
168 index_cutoff = revlog.index.entry_size * first_excl_rev
169 data_cutoff = first_excl_entry[ENTRY_DATA_OFFSET] >> 16
169 data_cutoff = first_excl_entry[ENTRY_DATA_OFFSET] >> 16
170 sidedata_cutoff = revlog.sidedata_cut_off(first_excl_rev)
170 sidedata_cutoff = revlog.sidedata_cut_off(first_excl_rev)
171
171
172 with pycompat.unnamedtempfile(mode=b"w+b") as tmp_storage:
172 with pycompat.unnamedtempfile(mode=b"w+b") as tmp_storage:
173 # rev β†’ (new_base, data_start, data_end, compression_mode)
173 # rev β†’ (new_base, data_start, data_end, compression_mode)
174 rewritten_entries = _precompute_rewritten_delta(
174 rewritten_entries = _precompute_rewritten_delta(
175 revlog,
175 revlog,
176 old_index,
176 old_index,
177 censor_revs,
177 censor_revs,
178 tmp_storage,
178 tmp_storage,
179 )
179 )
180
180
181 all_files = _setup_new_files(
181 all_files = _setup_new_files(
182 revlog,
182 revlog,
183 index_cutoff,
183 index_cutoff,
184 data_cutoff,
184 data_cutoff,
185 sidedata_cutoff,
185 sidedata_cutoff,
186 )
186 )
187
187
188 # we dont need to open the old index file since its content already
188 # we dont need to open the old index file since its content already
189 # exist in a usable form in `old_index`.
189 # exist in a usable form in `old_index`.
190 with all_files() as open_files:
190 with all_files() as open_files:
191 (
191 (
192 old_data_file,
192 old_data_file,
193 old_sidedata_file,
193 old_sidedata_file,
194 new_index_file,
194 new_index_file,
195 new_data_file,
195 new_data_file,
196 new_sidedata_file,
196 new_sidedata_file,
197 ) = open_files
197 ) = open_files
198
198
199 # writing the censored revision
199 # writing the censored revision
200
200
201 # Writing all subsequent revisions
201 # Writing all subsequent revisions
202 for rev in range(first_excl_rev, len(old_index)):
202 for rev in range(first_excl_rev, len(old_index)):
203 if rev in censor_revs:
203 if rev in censor_revs:
204 _rewrite_censor(
204 _rewrite_censor(
205 revlog,
205 revlog,
206 old_index,
206 old_index,
207 open_files,
207 open_files,
208 rev,
208 rev,
209 tombstone,
209 tombstone,
210 )
210 )
211 else:
211 else:
212 _rewrite_simple(
212 _rewrite_simple(
213 revlog,
213 revlog,
214 old_index,
214 old_index,
215 open_files,
215 open_files,
216 rev,
216 rev,
217 rewritten_entries,
217 rewritten_entries,
218 tmp_storage,
218 tmp_storage,
219 )
219 )
220 docket.write(transaction=None, stripping=True)
220 docket.write(transaction=None, stripping=True)
221
221
222
222
223 def _precompute_rewritten_delta(
223 def _precompute_rewritten_delta(
224 revlog,
224 revlog,
225 old_index,
225 old_index,
226 excluded_revs,
226 excluded_revs,
227 tmp_storage,
227 tmp_storage,
228 ):
228 ):
229 """Compute new delta for revisions whose delta is based on revision that
229 """Compute new delta for revisions whose delta is based on revision that
230 will not survive as is.
230 will not survive as is.
231
231
232 Return a mapping: {rev β†’ (new_base, data_start, data_end, compression_mode)}
232 Return a mapping: {rev β†’ (new_base, data_start, data_end, compression_mode)}
233 """
233 """
234 dc = deltas.deltacomputer(revlog)
234 dc = deltas.deltacomputer(revlog)
235 rewritten_entries = {}
235 rewritten_entries = {}
236 first_excl_rev = min(excluded_revs)
236 first_excl_rev = min(excluded_revs)
237 with revlog._segmentfile._open_read() as dfh:
237 with revlog._segmentfile._open_read() as dfh:
238 for rev in range(first_excl_rev, len(old_index)):
238 for rev in range(first_excl_rev, len(old_index)):
239 if rev in excluded_revs:
239 if rev in excluded_revs:
240 # this revision will be preserved as is, so we don't need to
240 # this revision will be preserved as is, so we don't need to
241 # consider recomputing a delta.
241 # consider recomputing a delta.
242 continue
242 continue
243 entry = old_index[rev]
243 entry = old_index[rev]
244 if entry[ENTRY_DELTA_BASE] not in excluded_revs:
244 if entry[ENTRY_DELTA_BASE] not in excluded_revs:
245 continue
245 continue
246 # This is a revision that use the censored revision as the base
246 # This is a revision that use the censored revision as the base
247 # for its delta. We need a need new deltas
247 # for its delta. We need a need new deltas
248 if entry[ENTRY_DATA_UNCOMPRESSED_LENGTH] == 0:
248 if entry[ENTRY_DATA_UNCOMPRESSED_LENGTH] == 0:
249 # this revision is empty, we can delta against nullrev
249 # this revision is empty, we can delta against nullrev
250 rewritten_entries[rev] = (nullrev, 0, 0, COMP_MODE_PLAIN)
250 rewritten_entries[rev] = (nullrev, 0, 0, COMP_MODE_PLAIN)
251 else:
251 else:
252
252
253 text = revlog.rawdata(rev, _df=dfh)
253 text = revlog.rawdata(rev, _df=dfh)
254 info = revlogutils.revisioninfo(
254 info = revlogutils.revisioninfo(
255 node=entry[ENTRY_NODE_ID],
255 node=entry[ENTRY_NODE_ID],
256 p1=revlog.node(entry[ENTRY_PARENT_1]),
256 p1=revlog.node(entry[ENTRY_PARENT_1]),
257 p2=revlog.node(entry[ENTRY_PARENT_2]),
257 p2=revlog.node(entry[ENTRY_PARENT_2]),
258 btext=[text],
258 btext=[text],
259 textlen=len(text),
259 textlen=len(text),
260 cachedelta=None,
260 cachedelta=None,
261 flags=entry[ENTRY_DATA_OFFSET] & 0xFFFF,
261 flags=entry[ENTRY_DATA_OFFSET] & 0xFFFF,
262 )
262 )
263 d = dc.finddeltainfo(
263 d = dc.finddeltainfo(
264 info, dfh, excluded_bases=excluded_revs, target_rev=rev
264 info, dfh, excluded_bases=excluded_revs, target_rev=rev
265 )
265 )
266 default_comp = revlog._docket.default_compression_header
266 default_comp = revlog._docket.default_compression_header
267 comp_mode, d = deltas.delta_compression(default_comp, d)
267 comp_mode, d = deltas.delta_compression(default_comp, d)
268 # using `tell` is a bit lazy, but we are not here for speed
268 # using `tell` is a bit lazy, but we are not here for speed
269 start = tmp_storage.tell()
269 start = tmp_storage.tell()
270 tmp_storage.write(d.data[1])
270 tmp_storage.write(d.data[1])
271 end = tmp_storage.tell()
271 end = tmp_storage.tell()
272 rewritten_entries[rev] = (d.base, start, end, comp_mode)
272 rewritten_entries[rev] = (d.base, start, end, comp_mode)
273 return rewritten_entries
273 return rewritten_entries
274
274
275
275
276 def _setup_new_files(
276 def _setup_new_files(
277 revlog,
277 revlog,
278 index_cutoff,
278 index_cutoff,
279 data_cutoff,
279 data_cutoff,
280 sidedata_cutoff,
280 sidedata_cutoff,
281 ):
281 ):
282 """
282 """
283
283
284 return a context manager to open all the relevant files:
284 return a context manager to open all the relevant files:
285 - old_data_file,
285 - old_data_file,
286 - old_sidedata_file,
286 - old_sidedata_file,
287 - new_index_file,
287 - new_index_file,
288 - new_data_file,
288 - new_data_file,
289 - new_sidedata_file,
289 - new_sidedata_file,
290
290
291 The old_index_file is not here because it is accessed through the
291 The old_index_file is not here because it is accessed through the
292 `old_index` object if the caller function.
292 `old_index` object if the caller function.
293 """
293 """
294 docket = revlog._docket
294 docket = revlog._docket
295 old_index_filepath = revlog.opener.join(docket.index_filepath())
295 old_index_filepath = revlog.opener.join(docket.index_filepath())
296 old_data_filepath = revlog.opener.join(docket.data_filepath())
296 old_data_filepath = revlog.opener.join(docket.data_filepath())
297 old_sidedata_filepath = revlog.opener.join(docket.sidedata_filepath())
297 old_sidedata_filepath = revlog.opener.join(docket.sidedata_filepath())
298
298
299 new_index_filepath = revlog.opener.join(docket.new_index_file())
299 new_index_filepath = revlog.opener.join(docket.new_index_file())
300 new_data_filepath = revlog.opener.join(docket.new_data_file())
300 new_data_filepath = revlog.opener.join(docket.new_data_file())
301 new_sidedata_filepath = revlog.opener.join(docket.new_sidedata_file())
301 new_sidedata_filepath = revlog.opener.join(docket.new_sidedata_file())
302
302
303 util.copyfile(old_index_filepath, new_index_filepath, nb_bytes=index_cutoff)
303 util.copyfile(old_index_filepath, new_index_filepath, nb_bytes=index_cutoff)
304 util.copyfile(old_data_filepath, new_data_filepath, nb_bytes=data_cutoff)
304 util.copyfile(old_data_filepath, new_data_filepath, nb_bytes=data_cutoff)
305 util.copyfile(
305 util.copyfile(
306 old_sidedata_filepath,
306 old_sidedata_filepath,
307 new_sidedata_filepath,
307 new_sidedata_filepath,
308 nb_bytes=sidedata_cutoff,
308 nb_bytes=sidedata_cutoff,
309 )
309 )
310 revlog.opener.register_file(docket.index_filepath())
310 revlog.opener.register_file(docket.index_filepath())
311 revlog.opener.register_file(docket.data_filepath())
311 revlog.opener.register_file(docket.data_filepath())
312 revlog.opener.register_file(docket.sidedata_filepath())
312 revlog.opener.register_file(docket.sidedata_filepath())
313
313
314 docket.index_end = index_cutoff
314 docket.index_end = index_cutoff
315 docket.data_end = data_cutoff
315 docket.data_end = data_cutoff
316 docket.sidedata_end = sidedata_cutoff
316 docket.sidedata_end = sidedata_cutoff
317
317
318 # reload the revlog internal information
318 # reload the revlog internal information
319 revlog.clearcaches()
319 revlog.clearcaches()
320 revlog._loadindex(docket=docket)
320 revlog._loadindex(docket=docket)
321
321
322 @contextlib.contextmanager
322 @contextlib.contextmanager
323 def all_files_opener():
323 def all_files_opener():
324 # hide opening in an helper function to please check-code, black
324 # hide opening in an helper function to please check-code, black
325 # and various python version at the same time
325 # and various python version at the same time
326 with open(old_data_filepath, 'rb') as old_data_file:
326 with open(old_data_filepath, 'rb') as old_data_file:
327 with open(old_sidedata_filepath, 'rb') as old_sidedata_file:
327 with open(old_sidedata_filepath, 'rb') as old_sidedata_file:
328 with open(new_index_filepath, 'r+b') as new_index_file:
328 with open(new_index_filepath, 'r+b') as new_index_file:
329 with open(new_data_filepath, 'r+b') as new_data_file:
329 with open(new_data_filepath, 'r+b') as new_data_file:
330 with open(
330 with open(
331 new_sidedata_filepath, 'r+b'
331 new_sidedata_filepath, 'r+b'
332 ) as new_sidedata_file:
332 ) as new_sidedata_file:
333 new_index_file.seek(0, os.SEEK_END)
333 new_index_file.seek(0, os.SEEK_END)
334 assert new_index_file.tell() == index_cutoff
334 assert new_index_file.tell() == index_cutoff
335 new_data_file.seek(0, os.SEEK_END)
335 new_data_file.seek(0, os.SEEK_END)
336 assert new_data_file.tell() == data_cutoff
336 assert new_data_file.tell() == data_cutoff
337 new_sidedata_file.seek(0, os.SEEK_END)
337 new_sidedata_file.seek(0, os.SEEK_END)
338 assert new_sidedata_file.tell() == sidedata_cutoff
338 assert new_sidedata_file.tell() == sidedata_cutoff
339 yield (
339 yield (
340 old_data_file,
340 old_data_file,
341 old_sidedata_file,
341 old_sidedata_file,
342 new_index_file,
342 new_index_file,
343 new_data_file,
343 new_data_file,
344 new_sidedata_file,
344 new_sidedata_file,
345 )
345 )
346
346
347 return all_files_opener
347 return all_files_opener
348
348
349
349
350 def _rewrite_simple(
350 def _rewrite_simple(
351 revlog,
351 revlog,
352 old_index,
352 old_index,
353 all_files,
353 all_files,
354 rev,
354 rev,
355 rewritten_entries,
355 rewritten_entries,
356 tmp_storage,
356 tmp_storage,
357 ):
357 ):
358 """append a normal revision to the index after the rewritten one(s)"""
358 """append a normal revision to the index after the rewritten one(s)"""
359 (
359 (
360 old_data_file,
360 old_data_file,
361 old_sidedata_file,
361 old_sidedata_file,
362 new_index_file,
362 new_index_file,
363 new_data_file,
363 new_data_file,
364 new_sidedata_file,
364 new_sidedata_file,
365 ) = all_files
365 ) = all_files
366 entry = old_index[rev]
366 entry = old_index[rev]
367 flags = entry[ENTRY_DATA_OFFSET] & 0xFFFF
367 flags = entry[ENTRY_DATA_OFFSET] & 0xFFFF
368 old_data_offset = entry[ENTRY_DATA_OFFSET] >> 16
368 old_data_offset = entry[ENTRY_DATA_OFFSET] >> 16
369
369
370 if rev not in rewritten_entries:
370 if rev not in rewritten_entries:
371 old_data_file.seek(old_data_offset)
371 old_data_file.seek(old_data_offset)
372 new_data_size = entry[ENTRY_DATA_COMPRESSED_LENGTH]
372 new_data_size = entry[ENTRY_DATA_COMPRESSED_LENGTH]
373 new_data = old_data_file.read(new_data_size)
373 new_data = old_data_file.read(new_data_size)
374 data_delta_base = entry[ENTRY_DELTA_BASE]
374 data_delta_base = entry[ENTRY_DELTA_BASE]
375 d_comp_mode = entry[ENTRY_DATA_COMPRESSION_MODE]
375 d_comp_mode = entry[ENTRY_DATA_COMPRESSION_MODE]
376 else:
376 else:
377 (
377 (
378 data_delta_base,
378 data_delta_base,
379 start,
379 start,
380 end,
380 end,
381 d_comp_mode,
381 d_comp_mode,
382 ) = rewritten_entries[rev]
382 ) = rewritten_entries[rev]
383 new_data_size = end - start
383 new_data_size = end - start
384 tmp_storage.seek(start)
384 tmp_storage.seek(start)
385 new_data = tmp_storage.read(new_data_size)
385 new_data = tmp_storage.read(new_data_size)
386
386
387 # It might be faster to group continuous read/write operation,
387 # It might be faster to group continuous read/write operation,
388 # however, this is censor, an operation that is not focussed
388 # however, this is censor, an operation that is not focussed
389 # around stellar performance. So I have not written this
389 # around stellar performance. So I have not written this
390 # optimisation yet.
390 # optimisation yet.
391 new_data_offset = new_data_file.tell()
391 new_data_offset = new_data_file.tell()
392 new_data_file.write(new_data)
392 new_data_file.write(new_data)
393
393
394 sidedata_size = entry[ENTRY_SIDEDATA_COMPRESSED_LENGTH]
394 sidedata_size = entry[ENTRY_SIDEDATA_COMPRESSED_LENGTH]
395 new_sidedata_offset = new_sidedata_file.tell()
395 new_sidedata_offset = new_sidedata_file.tell()
396 if 0 < sidedata_size:
396 if 0 < sidedata_size:
397 old_sidedata_offset = entry[ENTRY_SIDEDATA_OFFSET]
397 old_sidedata_offset = entry[ENTRY_SIDEDATA_OFFSET]
398 old_sidedata_file.seek(old_sidedata_offset)
398 old_sidedata_file.seek(old_sidedata_offset)
399 new_sidedata = old_sidedata_file.read(sidedata_size)
399 new_sidedata = old_sidedata_file.read(sidedata_size)
400 new_sidedata_file.write(new_sidedata)
400 new_sidedata_file.write(new_sidedata)
401
401
402 data_uncompressed_length = entry[ENTRY_DATA_UNCOMPRESSED_LENGTH]
402 data_uncompressed_length = entry[ENTRY_DATA_UNCOMPRESSED_LENGTH]
403 sd_com_mode = entry[ENTRY_SIDEDATA_COMPRESSION_MODE]
403 sd_com_mode = entry[ENTRY_SIDEDATA_COMPRESSION_MODE]
404 assert data_delta_base <= rev, (data_delta_base, rev)
404 assert data_delta_base <= rev, (data_delta_base, rev)
405
405
406 new_entry = revlogutils.entry(
406 new_entry = revlogutils.entry(
407 flags=flags,
407 flags=flags,
408 data_offset=new_data_offset,
408 data_offset=new_data_offset,
409 data_compressed_length=new_data_size,
409 data_compressed_length=new_data_size,
410 data_uncompressed_length=data_uncompressed_length,
410 data_uncompressed_length=data_uncompressed_length,
411 data_delta_base=data_delta_base,
411 data_delta_base=data_delta_base,
412 link_rev=entry[ENTRY_LINK_REV],
412 link_rev=entry[ENTRY_LINK_REV],
413 parent_rev_1=entry[ENTRY_PARENT_1],
413 parent_rev_1=entry[ENTRY_PARENT_1],
414 parent_rev_2=entry[ENTRY_PARENT_2],
414 parent_rev_2=entry[ENTRY_PARENT_2],
415 node_id=entry[ENTRY_NODE_ID],
415 node_id=entry[ENTRY_NODE_ID],
416 sidedata_offset=new_sidedata_offset,
416 sidedata_offset=new_sidedata_offset,
417 sidedata_compressed_length=sidedata_size,
417 sidedata_compressed_length=sidedata_size,
418 data_compression_mode=d_comp_mode,
418 data_compression_mode=d_comp_mode,
419 sidedata_compression_mode=sd_com_mode,
419 sidedata_compression_mode=sd_com_mode,
420 )
420 )
421 revlog.index.append(new_entry)
421 revlog.index.append(new_entry)
422 entry_bin = revlog.index.entry_binary(rev)
422 entry_bin = revlog.index.entry_binary(rev)
423 new_index_file.write(entry_bin)
423 new_index_file.write(entry_bin)
424
424
425 revlog._docket.index_end = new_index_file.tell()
425 revlog._docket.index_end = new_index_file.tell()
426 revlog._docket.data_end = new_data_file.tell()
426 revlog._docket.data_end = new_data_file.tell()
427 revlog._docket.sidedata_end = new_sidedata_file.tell()
427 revlog._docket.sidedata_end = new_sidedata_file.tell()
428
428
429
429
430 def _rewrite_censor(
430 def _rewrite_censor(
431 revlog,
431 revlog,
432 old_index,
432 old_index,
433 all_files,
433 all_files,
434 rev,
434 rev,
435 tombstone,
435 tombstone,
436 ):
436 ):
437 """rewrite and append a censored revision"""
437 """rewrite and append a censored revision"""
438 (
438 (
439 old_data_file,
439 old_data_file,
440 old_sidedata_file,
440 old_sidedata_file,
441 new_index_file,
441 new_index_file,
442 new_data_file,
442 new_data_file,
443 new_sidedata_file,
443 new_sidedata_file,
444 ) = all_files
444 ) = all_files
445 entry = old_index[rev]
445 entry = old_index[rev]
446
446
447 # XXX consider trying the default compression too
447 # XXX consider trying the default compression too
448 new_data_size = len(tombstone)
448 new_data_size = len(tombstone)
449 new_data_offset = new_data_file.tell()
449 new_data_offset = new_data_file.tell()
450 new_data_file.write(tombstone)
450 new_data_file.write(tombstone)
451
451
452 # we are not adding any sidedata as they might leak info about the censored version
452 # we are not adding any sidedata as they might leak info about the censored version
453
453
454 link_rev = entry[ENTRY_LINK_REV]
454 link_rev = entry[ENTRY_LINK_REV]
455
455
456 p1 = entry[ENTRY_PARENT_1]
456 p1 = entry[ENTRY_PARENT_1]
457 p2 = entry[ENTRY_PARENT_2]
457 p2 = entry[ENTRY_PARENT_2]
458
458
459 new_entry = revlogutils.entry(
459 new_entry = revlogutils.entry(
460 flags=constants.REVIDX_ISCENSORED,
460 flags=constants.REVIDX_ISCENSORED,
461 data_offset=new_data_offset,
461 data_offset=new_data_offset,
462 data_compressed_length=new_data_size,
462 data_compressed_length=new_data_size,
463 data_uncompressed_length=new_data_size,
463 data_uncompressed_length=new_data_size,
464 data_delta_base=rev,
464 data_delta_base=rev,
465 link_rev=link_rev,
465 link_rev=link_rev,
466 parent_rev_1=p1,
466 parent_rev_1=p1,
467 parent_rev_2=p2,
467 parent_rev_2=p2,
468 node_id=entry[ENTRY_NODE_ID],
468 node_id=entry[ENTRY_NODE_ID],
469 sidedata_offset=0,
469 sidedata_offset=0,
470 sidedata_compressed_length=0,
470 sidedata_compressed_length=0,
471 data_compression_mode=COMP_MODE_PLAIN,
471 data_compression_mode=COMP_MODE_PLAIN,
472 sidedata_compression_mode=COMP_MODE_PLAIN,
472 sidedata_compression_mode=COMP_MODE_PLAIN,
473 )
473 )
474 revlog.index.append(new_entry)
474 revlog.index.append(new_entry)
475 entry_bin = revlog.index.entry_binary(rev)
475 entry_bin = revlog.index.entry_binary(rev)
476 new_index_file.write(entry_bin)
476 new_index_file.write(entry_bin)
477 revlog._docket.index_end = new_index_file.tell()
477 revlog._docket.index_end = new_index_file.tell()
478 revlog._docket.data_end = new_data_file.tell()
478 revlog._docket.data_end = new_data_file.tell()
479
479
480
480
481 def _get_filename_from_filelog_index(path):
481 def _get_filename_from_filelog_index(path):
482 # Drop the extension and the `data/` prefix
482 # Drop the extension and the `data/` prefix
483 path_part = path.rsplit(b'.', 1)[0].split(b'/', 1)
483 path_part = path.rsplit(b'.', 1)[0].split(b'/', 1)
484 if len(path_part) < 2:
484 if len(path_part) < 2:
485 msg = _(b"cannot recognize filelog from filename: '%s'")
485 msg = _(b"cannot recognize filelog from filename: '%s'")
486 msg %= path
486 msg %= path
487 raise error.Abort(msg)
487 raise error.Abort(msg)
488
488
489 return path_part[1]
489 return path_part[1]
490
490
491
491
492 def _filelog_from_filename(repo, path):
492 def _filelog_from_filename(repo, path):
493 """Returns the filelog for the given `path`. Stolen from `engine.py`"""
493 """Returns the filelog for the given `path`. Stolen from `engine.py`"""
494
494
495 from .. import filelog # avoid cycle
495 from .. import filelog # avoid cycle
496
496
497 fl = filelog.filelog(repo.svfs, path)
497 fl = filelog.filelog(repo.svfs, path)
498 return fl
498 return fl
499
499
500
500
501 def _write_swapped_parents(repo, rl, rev, offset, fp):
501 def _write_swapped_parents(repo, rl, rev, offset, fp):
502 """Swaps p1 and p2 and overwrites the revlog entry for `rev` in `fp`"""
502 """Swaps p1 and p2 and overwrites the revlog entry for `rev` in `fp`"""
503 from ..pure import parsers # avoid cycle
503 from ..pure import parsers # avoid cycle
504
504
505 if repo._currentlock(repo._lockref) is None:
505 if repo._currentlock(repo._lockref) is None:
506 # Let's be paranoid about it
506 # Let's be paranoid about it
507 msg = "repo needs to be locked to rewrite parents"
507 msg = "repo needs to be locked to rewrite parents"
508 raise error.ProgrammingError(msg)
508 raise error.ProgrammingError(msg)
509
509
510 index_format = parsers.IndexObject.index_format
510 index_format = parsers.IndexObject.index_format
511 entry = rl.index[rev]
511 entry = rl.index[rev]
512 new_entry = list(entry)
512 new_entry = list(entry)
513 new_entry[5], new_entry[6] = entry[6], entry[5]
513 new_entry[5], new_entry[6] = entry[6], entry[5]
514 packed = index_format.pack(*new_entry[:8])
514 packed = index_format.pack(*new_entry[:8])
515 fp.seek(offset)
515 fp.seek(offset)
516 fp.write(packed)
516 fp.write(packed)
517
517
518
518
519 def _reorder_filelog_parents(repo, fl, to_fix):
519 def _reorder_filelog_parents(repo, fl, to_fix):
520 """
520 """
521 Swaps p1 and p2 for all `to_fix` revisions of filelog `fl` and writes the
521 Swaps p1 and p2 for all `to_fix` revisions of filelog `fl` and writes the
522 new version to disk, overwriting the old one with a rename.
522 new version to disk, overwriting the old one with a rename.
523 """
523 """
524 from ..pure import parsers # avoid cycle
524 from ..pure import parsers # avoid cycle
525
525
526 ui = repo.ui
526 ui = repo.ui
527 assert len(to_fix) > 0
527 assert len(to_fix) > 0
528 rl = fl._revlog
528 rl = fl._revlog
529 if rl._format_version != constants.REVLOGV1:
529 if rl._format_version != constants.REVLOGV1:
530 msg = "expected version 1 revlog, got version '%d'" % rl._format_version
530 msg = "expected version 1 revlog, got version '%d'" % rl._format_version
531 raise error.ProgrammingError(msg)
531 raise error.ProgrammingError(msg)
532
532
533 index_file = rl._indexfile
533 index_file = rl._indexfile
534 new_file_path = index_file + b'.tmp-parents-fix'
534 new_file_path = index_file + b'.tmp-parents-fix'
535 repaired_msg = _(b"repaired revision %d of 'filelog %s'\n")
535 repaired_msg = _(b"repaired revision %d of 'filelog %s'\n")
536
536
537 with ui.uninterruptible():
537 with ui.uninterruptible():
538 try:
538 try:
539 util.copyfile(
539 util.copyfile(
540 rl.opener.join(index_file),
540 rl.opener.join(index_file),
541 rl.opener.join(new_file_path),
541 rl.opener.join(new_file_path),
542 checkambig=rl._checkambig,
542 checkambig=rl._checkambig,
543 )
543 )
544
544
545 with rl.opener(new_file_path, mode=b"r+") as fp:
545 with rl.opener(new_file_path, mode=b"r+") as fp:
546 if rl._inline:
546 if rl._inline:
547 index = parsers.InlinedIndexObject(fp.read())
547 index = parsers.InlinedIndexObject(fp.read())
548 for rev in fl.revs():
548 for rev in fl.revs():
549 if rev in to_fix:
549 if rev in to_fix:
550 offset = index._calculate_index(rev)
550 offset = index._calculate_index(rev)
551 _write_swapped_parents(repo, rl, rev, offset, fp)
551 _write_swapped_parents(repo, rl, rev, offset, fp)
552 ui.write(repaired_msg % (rev, index_file))
552 ui.write(repaired_msg % (rev, index_file))
553 else:
553 else:
554 index_format = parsers.IndexObject.index_format
554 index_format = parsers.IndexObject.index_format
555 for rev in to_fix:
555 for rev in to_fix:
556 offset = rev * index_format.size
556 offset = rev * index_format.size
557 _write_swapped_parents(repo, rl, rev, offset, fp)
557 _write_swapped_parents(repo, rl, rev, offset, fp)
558 ui.write(repaired_msg % (rev, index_file))
558 ui.write(repaired_msg % (rev, index_file))
559
559
560 rl.opener.rename(new_file_path, index_file)
560 rl.opener.rename(new_file_path, index_file)
561 rl.clearcaches()
561 rl.clearcaches()
562 rl._loadindex()
562 rl._loadindex()
563 finally:
563 finally:
564 util.tryunlink(new_file_path)
564 util.tryunlink(new_file_path)
565
565
566
566
567 def _is_revision_affected(fl, filerev, metadata_cache=None):
567 def _is_revision_affected(fl, filerev, metadata_cache=None):
568 full_text = lambda: fl._revlog.rawdata(filerev)
568 full_text = lambda: fl._revlog.rawdata(filerev)
569 parent_revs = lambda: fl._revlog.parentrevs(filerev)
569 parent_revs = lambda: fl._revlog.parentrevs(filerev)
570 return _is_revision_affected_inner(
570 return _is_revision_affected_inner(
571 full_text, parent_revs, filerev, metadata_cache
571 full_text, parent_revs, filerev, metadata_cache
572 )
572 )
573
573
574
574
575 def _is_revision_affected_inner(
575 def _is_revision_affected_inner(
576 full_text,
576 full_text,
577 parents_revs,
577 parents_revs,
578 filerev,
578 filerev,
579 metadata_cache=None,
579 metadata_cache=None,
580 ):
580 ):
581 """Mercurial currently (5.9rc0) uses `p1 == nullrev and p2 != nullrev` as a
581 """Mercurial currently (5.9rc0) uses `p1 == nullrev and p2 != nullrev` as a
582 special meaning compared to the reverse in the context of filelog-based
582 special meaning compared to the reverse in the context of filelog-based
583 copytracing. issue6528 exists because new code assumed that parent ordering
583 copytracing. issue6528 exists because new code assumed that parent ordering
584 didn't matter, so this detects if the revision contains metadata (since
584 didn't matter, so this detects if the revision contains metadata (since
585 it's only used for filelog-based copytracing) and its parents are in the
585 it's only used for filelog-based copytracing) and its parents are in the
586 "wrong" order."""
586 "wrong" order."""
587 try:
587 try:
588 raw_text = full_text()
588 raw_text = full_text()
589 except error.CensoredNodeError:
589 except error.CensoredNodeError:
590 # We don't care about censored nodes as they never carry metadata
590 # We don't care about censored nodes as they never carry metadata
591 return False
591 return False
592
592
593 # raw text can be a `memoryview`, which doesn't implement `startswith`
593 # raw text can be a `memoryview`, which doesn't implement `startswith`
594 has_meta = bytes(raw_text[:2]) == b'\x01\n'
594 has_meta = bytes(raw_text[:2]) == b'\x01\n'
595 if metadata_cache is not None:
595 if metadata_cache is not None:
596 metadata_cache[filerev] = has_meta
596 metadata_cache[filerev] = has_meta
597 if has_meta:
597 if has_meta:
598 (p1, p2) = parents_revs()
598 (p1, p2) = parents_revs()
599 if p1 != nullrev and p2 == nullrev:
599 if p1 != nullrev and p2 == nullrev:
600 return True
600 return True
601 return False
601 return False
602
602
603
603
604 def _is_revision_affected_fast(repo, fl, filerev, metadata_cache):
604 def _is_revision_affected_fast(repo, fl, filerev, metadata_cache):
605 rl = fl._revlog
605 rl = fl._revlog
606 is_censored = lambda: rl.iscensored(filerev)
606 is_censored = lambda: rl.iscensored(filerev)
607 delta_base = lambda: rl.deltaparent(filerev)
607 delta_base = lambda: rl.deltaparent(filerev)
608 delta = lambda: rl._chunk(filerev)
608 delta = lambda: rl._chunk(filerev)
609 full_text = lambda: rl.rawdata(filerev)
609 full_text = lambda: rl.rawdata(filerev)
610 parent_revs = lambda: rl.parentrevs(filerev)
610 parent_revs = lambda: rl.parentrevs(filerev)
611 return _is_revision_affected_fast_inner(
611 return _is_revision_affected_fast_inner(
612 is_censored,
612 is_censored,
613 delta_base,
613 delta_base,
614 delta,
614 delta,
615 full_text,
615 full_text,
616 parent_revs,
616 parent_revs,
617 filerev,
617 filerev,
618 metadata_cache,
618 metadata_cache,
619 )
619 )
620
620
621
621
622 def _is_revision_affected_fast_inner(
622 def _is_revision_affected_fast_inner(
623 is_censored,
623 is_censored,
624 delta_base,
624 delta_base,
625 delta,
625 delta,
626 full_text,
626 full_text,
627 parent_revs,
627 parent_revs,
628 filerev,
628 filerev,
629 metadata_cache,
629 metadata_cache,
630 ):
630 ):
631 """Optimization fast-path for `_is_revision_affected`.
631 """Optimization fast-path for `_is_revision_affected`.
632
632
633 `metadata_cache` is a dict of `{rev: has_metadata}` which allows any
633 `metadata_cache` is a dict of `{rev: has_metadata}` which allows any
634 revision to check if its base has metadata, saving computation of the full
634 revision to check if its base has metadata, saving computation of the full
635 text, instead looking at the current delta.
635 text, instead looking at the current delta.
636
636
637 This optimization only works if the revisions are looked at in order."""
637 This optimization only works if the revisions are looked at in order."""
638
638
639 if is_censored():
639 if is_censored():
640 # Censored revisions don't contain metadata, so they cannot be affected
640 # Censored revisions don't contain metadata, so they cannot be affected
641 metadata_cache[filerev] = False
641 metadata_cache[filerev] = False
642 return False
642 return False
643
643
644 p1, p2 = parent_revs()
644 p1, p2 = parent_revs()
645 if p1 == nullrev or p2 != nullrev:
645 if p1 == nullrev or p2 != nullrev:
646 return False
646 return False
647
647
648 delta_parent = delta_base()
648 delta_parent = delta_base()
649 parent_has_metadata = metadata_cache.get(delta_parent)
649 parent_has_metadata = metadata_cache.get(delta_parent)
650 if parent_has_metadata is None:
650 if parent_has_metadata is None:
651 return _is_revision_affected_inner(
651 return _is_revision_affected_inner(
652 full_text,
652 full_text,
653 parent_revs,
653 parent_revs,
654 filerev,
654 filerev,
655 metadata_cache,
655 metadata_cache,
656 )
656 )
657
657
658 chunk = delta()
658 chunk = delta()
659 if not len(chunk):
659 if not len(chunk):
660 # No diff for this revision
660 # No diff for this revision
661 return parent_has_metadata
661 return parent_has_metadata
662
662
663 header_length = 12
663 header_length = 12
664 if len(chunk) < header_length:
664 if len(chunk) < header_length:
665 raise error.Abort(_(b"patch cannot be decoded"))
665 raise error.Abort(_(b"patch cannot be decoded"))
666
666
667 start, _end, _length = struct.unpack(b">lll", chunk[:header_length])
667 start, _end, _length = struct.unpack(b">lll", chunk[:header_length])
668
668
669 if start < 2: # len(b'\x01\n') == 2
669 if start < 2: # len(b'\x01\n') == 2
670 # This delta does *something* to the metadata marker (if any).
670 # This delta does *something* to the metadata marker (if any).
671 # Check it the slow way
671 # Check it the slow way
672 is_affected = _is_revision_affected_inner(
672 is_affected = _is_revision_affected_inner(
673 full_text,
673 full_text,
674 parent_revs,
674 parent_revs,
675 filerev,
675 filerev,
676 metadata_cache,
676 metadata_cache,
677 )
677 )
678 return is_affected
678 return is_affected
679
679
680 # The diff did not remove or add the metadata header, it's then in the same
680 # The diff did not remove or add the metadata header, it's then in the same
681 # situation as its parent
681 # situation as its parent
682 metadata_cache[filerev] = parent_has_metadata
682 metadata_cache[filerev] = parent_has_metadata
683 return parent_has_metadata
683 return parent_has_metadata
684
684
685
685
686 def _from_report(ui, repo, context, from_report, dry_run):
686 def _from_report(ui, repo, context, from_report, dry_run):
687 """
687 """
688 Fix the revisions given in the `from_report` file, but still checks if the
688 Fix the revisions given in the `from_report` file, but still checks if the
689 revisions are indeed affected to prevent an unfortunate cyclic situation
689 revisions are indeed affected to prevent an unfortunate cyclic situation
690 where we'd swap well-ordered parents again.
690 where we'd swap well-ordered parents again.
691
691
692 See the doc for `debug_fix_issue6528` for the format documentation.
692 See the doc for `debug_fix_issue6528` for the format documentation.
693 """
693 """
694 ui.write(_(b"loading report file '%s'\n") % from_report)
694 ui.write(_(b"loading report file '%s'\n") % from_report)
695
695
696 with context(), open(from_report, mode='rb') as f:
696 with context(), open(from_report, mode='rb') as f:
697 for line in f.read().split(b'\n'):
697 for line in f.read().split(b'\n'):
698 if not line:
698 if not line:
699 continue
699 continue
700 filenodes, filename = line.split(b' ', 1)
700 filenodes, filename = line.split(b' ', 1)
701 fl = _filelog_from_filename(repo, filename)
701 fl = _filelog_from_filename(repo, filename)
702 to_fix = set(
702 to_fix = set(
703 fl.rev(binascii.unhexlify(n)) for n in filenodes.split(b',')
703 fl.rev(binascii.unhexlify(n)) for n in filenodes.split(b',')
704 )
704 )
705 excluded = set()
705 excluded = set()
706
706
707 for filerev in to_fix:
707 for filerev in to_fix:
708 if _is_revision_affected(fl, filerev):
708 if _is_revision_affected(fl, filerev):
709 msg = b"found affected revision %d for filelog '%s'\n"
709 msg = b"found affected revision %d for filelog '%s'\n"
710 ui.warn(msg % (filerev, filename))
710 ui.warn(msg % (filerev, filename))
711 else:
711 else:
712 msg = _(b"revision %s of file '%s' is not affected\n")
712 msg = _(b"revision %s of file '%s' is not affected\n")
713 msg %= (binascii.hexlify(fl.node(filerev)), filename)
713 msg %= (binascii.hexlify(fl.node(filerev)), filename)
714 ui.warn(msg)
714 ui.warn(msg)
715 excluded.add(filerev)
715 excluded.add(filerev)
716
716
717 to_fix = to_fix - excluded
717 to_fix = to_fix - excluded
718 if not to_fix:
718 if not to_fix:
719 msg = _(b"no affected revisions were found for '%s'\n")
719 msg = _(b"no affected revisions were found for '%s'\n")
720 ui.write(msg % filename)
720 ui.write(msg % filename)
721 continue
721 continue
722 if not dry_run:
722 if not dry_run:
723 _reorder_filelog_parents(repo, fl, sorted(to_fix))
723 _reorder_filelog_parents(repo, fl, sorted(to_fix))
724
724
725
725
726 def filter_delta_issue6528(revlog, deltas_iter):
726 def filter_delta_issue6528(revlog, deltas_iter):
727 """filter incomind deltas to repaire issue 6528 on the fly"""
727 """filter incomind deltas to repaire issue 6528 on the fly"""
728 metadata_cache = {}
728 metadata_cache = {}
729
729
730 deltacomputer = deltas.deltacomputer(revlog)
730 deltacomputer = deltas.deltacomputer(revlog)
731
731
732 for rev, d in enumerate(deltas_iter, len(revlog)):
732 for rev, d in enumerate(deltas_iter, len(revlog)):
733 (
733 (
734 node,
734 node,
735 p1_node,
735 p1_node,
736 p2_node,
736 p2_node,
737 linknode,
737 linknode,
738 deltabase,
738 deltabase,
739 delta,
739 delta,
740 flags,
740 flags,
741 sidedata,
741 sidedata,
742 ) = d
742 ) = d
743
743
744 if not revlog.index.has_node(deltabase):
744 if not revlog.index.has_node(deltabase):
745 raise error.LookupError(
745 raise error.LookupError(
746 deltabase, revlog.radix, _(b'unknown parent')
746 deltabase, revlog.radix, _(b'unknown parent')
747 )
747 )
748 base_rev = revlog.rev(deltabase)
748 base_rev = revlog.rev(deltabase)
749 if not revlog.index.has_node(p1_node):
749 if not revlog.index.has_node(p1_node):
750 raise error.LookupError(p1_node, revlog.radix, _(b'unknown parent'))
750 raise error.LookupError(p1_node, revlog.radix, _(b'unknown parent'))
751 p1_rev = revlog.rev(p1_node)
751 p1_rev = revlog.rev(p1_node)
752 if not revlog.index.has_node(p2_node):
752 if not revlog.index.has_node(p2_node):
753 raise error.LookupError(p2_node, revlog.radix, _(b'unknown parent'))
753 raise error.LookupError(p2_node, revlog.radix, _(b'unknown parent'))
754 p2_rev = revlog.rev(p2_node)
754 p2_rev = revlog.rev(p2_node)
755
755
756 is_censored = lambda: bool(flags & REVIDX_ISCENSORED)
756 is_censored = lambda: bool(flags & REVIDX_ISCENSORED)
757 delta_base = lambda: revlog.rev(delta_base)
757 delta_base = lambda: revlog.rev(delta_base)
758 delta_base = lambda: base_rev
758 delta_base = lambda: base_rev
759 parent_revs = lambda: (p1_rev, p2_rev)
759 parent_revs = lambda: (p1_rev, p2_rev)
760
760
761 def full_text():
761 def full_text():
762 # note: being able to reuse the full text computation in the
762 # note: being able to reuse the full text computation in the
763 # underlying addrevision would be useful however this is a bit too
763 # underlying addrevision would be useful however this is a bit too
764 # intrusive the for the "quick" issue6528 we are writing before the
764 # intrusive the for the "quick" issue6528 we are writing before the
765 # 5.8 release
765 # 5.8 release
766 textlen = mdiff.patchedsize(revlog.size(base_rev), delta)
766 textlen = mdiff.patchedsize(revlog.size(base_rev), delta)
767
767
768 revinfo = revlogutils.revisioninfo(
768 revinfo = revlogutils.revisioninfo(
769 node,
769 node,
770 p1_node,
770 p1_node,
771 p2_node,
771 p2_node,
772 [None],
772 [None],
773 textlen,
773 textlen,
774 (base_rev, delta),
774 (base_rev, delta),
775 flags,
775 flags,
776 )
776 )
777 # cached by the global "writing" context
777 # cached by the global "writing" context
778 assert revlog._writinghandles is not None
778 assert revlog._writinghandles is not None
779 if revlog._inline:
779 if revlog._inline:
780 fh = revlog._writinghandles[0]
780 fh = revlog._writinghandles[0]
781 else:
781 else:
782 fh = revlog._writinghandles[1]
782 fh = revlog._writinghandles[1]
783 return deltacomputer.buildtext(revinfo, fh)
783 return deltacomputer.buildtext(revinfo, fh)
784
784
785 is_affected = _is_revision_affected_fast_inner(
785 is_affected = _is_revision_affected_fast_inner(
786 is_censored,
786 is_censored,
787 delta_base,
787 delta_base,
788 lambda: delta,
788 lambda: delta,
789 full_text,
789 full_text,
790 parent_revs,
790 parent_revs,
791 rev,
791 rev,
792 metadata_cache,
792 metadata_cache,
793 )
793 )
794 if is_affected:
794 if is_affected:
795 d = (
795 d = (
796 node,
796 node,
797 p2_node,
797 p2_node,
798 p1_node,
798 p1_node,
799 linknode,
799 linknode,
800 deltabase,
800 deltabase,
801 delta,
801 delta,
802 flags,
802 flags,
803 sidedata,
803 sidedata,
804 )
804 )
805 yield d
805 yield d
806
806
807
807
808 def repair_issue6528(
808 def repair_issue6528(
809 ui, repo, dry_run=False, to_report=None, from_report=None, paranoid=False
809 ui, repo, dry_run=False, to_report=None, from_report=None, paranoid=False
810 ):
810 ):
811 from .. import store # avoid cycle
811 from .. import store # avoid cycle
812
812
813 @contextlib.contextmanager
813 @contextlib.contextmanager
814 def context():
814 def context():
815 if dry_run or to_report: # No need for locking
815 if dry_run or to_report: # No need for locking
816 yield
816 yield
817 else:
817 else:
818 with repo.wlock(), repo.lock():
818 with repo.wlock(), repo.lock():
819 yield
819 yield
820
820
821 if from_report:
821 if from_report:
822 return _from_report(ui, repo, context, from_report, dry_run)
822 return _from_report(ui, repo, context, from_report, dry_run)
823
823
824 report_entries = []
824 report_entries = []
825
825
826 with context():
826 with context():
827 files = list(
827 files = list(
828 (file_type, path)
828 entry
829 for (file_type, path, _s) in repo.store.datafiles()
829 for entry in repo.store.datafiles()
830 if path.endswith(b'.i') and file_type & store.FILEFLAGS_FILELOG
830 if (
831 entry.unencoded_path.endswith(b'.i')
832 and entry.is_revlog
833 and entry.revlog_type == store.FILEFLAGS_FILELOG
834 )
831 )
835 )
832
836
833 progress = ui.makeprogress(
837 progress = ui.makeprogress(
834 _(b"looking for affected revisions"),
838 _(b"looking for affected revisions"),
835 unit=_(b"filelogs"),
839 unit=_(b"filelogs"),
836 total=len(files),
840 total=len(files),
837 )
841 )
838 found_nothing = True
842 found_nothing = True
839
843
840 for file_type, path in files:
844 for entry in files:
845 path = entry.unencoded_path
841 progress.increment()
846 progress.increment()
842 filename = _get_filename_from_filelog_index(path)
847 filename = _get_filename_from_filelog_index(path)
843 fl = _filelog_from_filename(repo, filename)
848 fl = _filelog_from_filename(repo, filename)
844
849
845 # Set of filerevs (or hex filenodes if `to_report`) that need fixing
850 # Set of filerevs (or hex filenodes if `to_report`) that need fixing
846 to_fix = set()
851 to_fix = set()
847 metadata_cache = {}
852 metadata_cache = {}
848 for filerev in fl.revs():
853 for filerev in fl.revs():
849 affected = _is_revision_affected_fast(
854 affected = _is_revision_affected_fast(
850 repo, fl, filerev, metadata_cache
855 repo, fl, filerev, metadata_cache
851 )
856 )
852 if paranoid:
857 if paranoid:
853 slow = _is_revision_affected(fl, filerev)
858 slow = _is_revision_affected(fl, filerev)
854 if slow != affected:
859 if slow != affected:
855 msg = _(b"paranoid check failed for '%s' at node %s")
860 msg = _(b"paranoid check failed for '%s' at node %s")
856 node = binascii.hexlify(fl.node(filerev))
861 node = binascii.hexlify(fl.node(filerev))
857 raise error.Abort(msg % (filename, node))
862 raise error.Abort(msg % (filename, node))
858 if affected:
863 if affected:
859 msg = b"found affected revision %d for filelog '%s'\n"
864 msg = b"found affected revision %d for filelog '%s'\n"
860 ui.warn(msg % (filerev, path))
865 ui.warn(msg % (filerev, path))
861 found_nothing = False
866 found_nothing = False
862 if not dry_run:
867 if not dry_run:
863 if to_report:
868 if to_report:
864 to_fix.add(binascii.hexlify(fl.node(filerev)))
869 to_fix.add(binascii.hexlify(fl.node(filerev)))
865 else:
870 else:
866 to_fix.add(filerev)
871 to_fix.add(filerev)
867
872
868 if to_fix:
873 if to_fix:
869 to_fix = sorted(to_fix)
874 to_fix = sorted(to_fix)
870 if to_report:
875 if to_report:
871 report_entries.append((filename, to_fix))
876 report_entries.append((filename, to_fix))
872 else:
877 else:
873 _reorder_filelog_parents(repo, fl, to_fix)
878 _reorder_filelog_parents(repo, fl, to_fix)
874
879
875 if found_nothing:
880 if found_nothing:
876 ui.write(_(b"no affected revisions were found\n"))
881 ui.write(_(b"no affected revisions were found\n"))
877
882
878 if to_report and report_entries:
883 if to_report and report_entries:
879 with open(to_report, mode="wb") as f:
884 with open(to_report, mode="wb") as f:
880 for path, to_fix in report_entries:
885 for path, to_fix in report_entries:
881 f.write(b"%s %s\n" % (b",".join(to_fix), path))
886 f.write(b"%s %s\n" % (b",".join(to_fix), path))
882
887
883 progress.complete()
888 progress.complete()
@@ -1,855 +1,901 b''
1 # store.py - repository store handling for Mercurial
1 # store.py - repository store handling for Mercurial
2 #
2 #
3 # Copyright 2008 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2008 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8
8
9 import functools
9 import functools
10 import os
10 import os
11 import re
11 import re
12 import stat
12 import stat
13 from typing import Generator
13
14
14 from .i18n import _
15 from .i18n import _
15 from .pycompat import getattr
16 from .pycompat import getattr
17 from .thirdparty import attr
16 from .node import hex
18 from .node import hex
17 from . import (
19 from . import (
18 changelog,
20 changelog,
19 error,
21 error,
20 manifest,
22 manifest,
21 policy,
23 policy,
22 pycompat,
24 pycompat,
23 util,
25 util,
24 vfs as vfsmod,
26 vfs as vfsmod,
25 )
27 )
26 from .utils import hashutil
28 from .utils import hashutil
27
29
28 parsers = policy.importmod('parsers')
30 parsers = policy.importmod('parsers')
29 # how much bytes should be read from fncache in one read
31 # how much bytes should be read from fncache in one read
30 # It is done to prevent loading large fncache files into memory
32 # It is done to prevent loading large fncache files into memory
31 fncache_chunksize = 10 ** 6
33 fncache_chunksize = 10 ** 6
32
34
33
35
34 def _matchtrackedpath(path, matcher):
36 def _matchtrackedpath(path, matcher):
35 """parses a fncache entry and returns whether the entry is tracking a path
37 """parses a fncache entry and returns whether the entry is tracking a path
36 matched by matcher or not.
38 matched by matcher or not.
37
39
38 If matcher is None, returns True"""
40 If matcher is None, returns True"""
39
41
40 if matcher is None:
42 if matcher is None:
41 return True
43 return True
42 path = decodedir(path)
44 path = decodedir(path)
43 if path.startswith(b'data/'):
45 if path.startswith(b'data/'):
44 return matcher(path[len(b'data/') : -len(b'.i')])
46 return matcher(path[len(b'data/') : -len(b'.i')])
45 elif path.startswith(b'meta/'):
47 elif path.startswith(b'meta/'):
46 return matcher.visitdir(path[len(b'meta/') : -len(b'/00manifest.i')])
48 return matcher.visitdir(path[len(b'meta/') : -len(b'/00manifest.i')])
47
49
48 raise error.ProgrammingError(b"cannot decode path %s" % path)
50 raise error.ProgrammingError(b"cannot decode path %s" % path)
49
51
50
52
51 # This avoids a collision between a file named foo and a dir named
53 # This avoids a collision between a file named foo and a dir named
52 # foo.i or foo.d
54 # foo.i or foo.d
53 def _encodedir(path):
55 def _encodedir(path):
54 """
56 """
55 >>> _encodedir(b'data/foo.i')
57 >>> _encodedir(b'data/foo.i')
56 'data/foo.i'
58 'data/foo.i'
57 >>> _encodedir(b'data/foo.i/bla.i')
59 >>> _encodedir(b'data/foo.i/bla.i')
58 'data/foo.i.hg/bla.i'
60 'data/foo.i.hg/bla.i'
59 >>> _encodedir(b'data/foo.i.hg/bla.i')
61 >>> _encodedir(b'data/foo.i.hg/bla.i')
60 'data/foo.i.hg.hg/bla.i'
62 'data/foo.i.hg.hg/bla.i'
61 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
63 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
62 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
64 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
63 """
65 """
64 return (
66 return (
65 path.replace(b".hg/", b".hg.hg/")
67 path.replace(b".hg/", b".hg.hg/")
66 .replace(b".i/", b".i.hg/")
68 .replace(b".i/", b".i.hg/")
67 .replace(b".d/", b".d.hg/")
69 .replace(b".d/", b".d.hg/")
68 )
70 )
69
71
70
72
71 encodedir = getattr(parsers, 'encodedir', _encodedir)
73 encodedir = getattr(parsers, 'encodedir', _encodedir)
72
74
73
75
74 def decodedir(path):
76 def decodedir(path):
75 """
77 """
76 >>> decodedir(b'data/foo.i')
78 >>> decodedir(b'data/foo.i')
77 'data/foo.i'
79 'data/foo.i'
78 >>> decodedir(b'data/foo.i.hg/bla.i')
80 >>> decodedir(b'data/foo.i.hg/bla.i')
79 'data/foo.i/bla.i'
81 'data/foo.i/bla.i'
80 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
82 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
81 'data/foo.i.hg/bla.i'
83 'data/foo.i.hg/bla.i'
82 """
84 """
83 if b".hg/" not in path:
85 if b".hg/" not in path:
84 return path
86 return path
85 return (
87 return (
86 path.replace(b".d.hg/", b".d/")
88 path.replace(b".d.hg/", b".d/")
87 .replace(b".i.hg/", b".i/")
89 .replace(b".i.hg/", b".i/")
88 .replace(b".hg.hg/", b".hg/")
90 .replace(b".hg.hg/", b".hg/")
89 )
91 )
90
92
91
93
92 def _reserved():
94 def _reserved():
93 """characters that are problematic for filesystems
95 """characters that are problematic for filesystems
94
96
95 * ascii escapes (0..31)
97 * ascii escapes (0..31)
96 * ascii hi (126..255)
98 * ascii hi (126..255)
97 * windows specials
99 * windows specials
98
100
99 these characters will be escaped by encodefunctions
101 these characters will be escaped by encodefunctions
100 """
102 """
101 winreserved = [ord(x) for x in u'\\:*?"<>|']
103 winreserved = [ord(x) for x in u'\\:*?"<>|']
102 for x in range(32):
104 for x in range(32):
103 yield x
105 yield x
104 for x in range(126, 256):
106 for x in range(126, 256):
105 yield x
107 yield x
106 for x in winreserved:
108 for x in winreserved:
107 yield x
109 yield x
108
110
109
111
110 def _buildencodefun():
112 def _buildencodefun():
111 """
113 """
112 >>> enc, dec = _buildencodefun()
114 >>> enc, dec = _buildencodefun()
113
115
114 >>> enc(b'nothing/special.txt')
116 >>> enc(b'nothing/special.txt')
115 'nothing/special.txt'
117 'nothing/special.txt'
116 >>> dec(b'nothing/special.txt')
118 >>> dec(b'nothing/special.txt')
117 'nothing/special.txt'
119 'nothing/special.txt'
118
120
119 >>> enc(b'HELLO')
121 >>> enc(b'HELLO')
120 '_h_e_l_l_o'
122 '_h_e_l_l_o'
121 >>> dec(b'_h_e_l_l_o')
123 >>> dec(b'_h_e_l_l_o')
122 'HELLO'
124 'HELLO'
123
125
124 >>> enc(b'hello:world?')
126 >>> enc(b'hello:world?')
125 'hello~3aworld~3f'
127 'hello~3aworld~3f'
126 >>> dec(b'hello~3aworld~3f')
128 >>> dec(b'hello~3aworld~3f')
127 'hello:world?'
129 'hello:world?'
128
130
129 >>> enc(b'the\\x07quick\\xADshot')
131 >>> enc(b'the\\x07quick\\xADshot')
130 'the~07quick~adshot'
132 'the~07quick~adshot'
131 >>> dec(b'the~07quick~adshot')
133 >>> dec(b'the~07quick~adshot')
132 'the\\x07quick\\xadshot'
134 'the\\x07quick\\xadshot'
133 """
135 """
134 e = b'_'
136 e = b'_'
135 xchr = pycompat.bytechr
137 xchr = pycompat.bytechr
136 asciistr = list(map(xchr, range(127)))
138 asciistr = list(map(xchr, range(127)))
137 capitals = list(range(ord(b"A"), ord(b"Z") + 1))
139 capitals = list(range(ord(b"A"), ord(b"Z") + 1))
138
140
139 cmap = {x: x for x in asciistr}
141 cmap = {x: x for x in asciistr}
140 for x in _reserved():
142 for x in _reserved():
141 cmap[xchr(x)] = b"~%02x" % x
143 cmap[xchr(x)] = b"~%02x" % x
142 for x in capitals + [ord(e)]:
144 for x in capitals + [ord(e)]:
143 cmap[xchr(x)] = e + xchr(x).lower()
145 cmap[xchr(x)] = e + xchr(x).lower()
144
146
145 dmap = {}
147 dmap = {}
146 for k, v in cmap.items():
148 for k, v in cmap.items():
147 dmap[v] = k
149 dmap[v] = k
148
150
149 def decode(s):
151 def decode(s):
150 i = 0
152 i = 0
151 while i < len(s):
153 while i < len(s):
152 for l in range(1, 4):
154 for l in range(1, 4):
153 try:
155 try:
154 yield dmap[s[i : i + l]]
156 yield dmap[s[i : i + l]]
155 i += l
157 i += l
156 break
158 break
157 except KeyError:
159 except KeyError:
158 pass
160 pass
159 else:
161 else:
160 raise KeyError
162 raise KeyError
161
163
162 return (
164 return (
163 lambda s: b''.join([cmap[s[c : c + 1]] for c in range(len(s))]),
165 lambda s: b''.join([cmap[s[c : c + 1]] for c in range(len(s))]),
164 lambda s: b''.join(list(decode(s))),
166 lambda s: b''.join(list(decode(s))),
165 )
167 )
166
168
167
169
168 _encodefname, _decodefname = _buildencodefun()
170 _encodefname, _decodefname = _buildencodefun()
169
171
170
172
171 def encodefilename(s):
173 def encodefilename(s):
172 """
174 """
173 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
175 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
174 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
176 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
175 """
177 """
176 return _encodefname(encodedir(s))
178 return _encodefname(encodedir(s))
177
179
178
180
179 def decodefilename(s):
181 def decodefilename(s):
180 """
182 """
181 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
183 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
182 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
184 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
183 """
185 """
184 return decodedir(_decodefname(s))
186 return decodedir(_decodefname(s))
185
187
186
188
187 def _buildlowerencodefun():
189 def _buildlowerencodefun():
188 """
190 """
189 >>> f = _buildlowerencodefun()
191 >>> f = _buildlowerencodefun()
190 >>> f(b'nothing/special.txt')
192 >>> f(b'nothing/special.txt')
191 'nothing/special.txt'
193 'nothing/special.txt'
192 >>> f(b'HELLO')
194 >>> f(b'HELLO')
193 'hello'
195 'hello'
194 >>> f(b'hello:world?')
196 >>> f(b'hello:world?')
195 'hello~3aworld~3f'
197 'hello~3aworld~3f'
196 >>> f(b'the\\x07quick\\xADshot')
198 >>> f(b'the\\x07quick\\xADshot')
197 'the~07quick~adshot'
199 'the~07quick~adshot'
198 """
200 """
199 xchr = pycompat.bytechr
201 xchr = pycompat.bytechr
200 cmap = {xchr(x): xchr(x) for x in range(127)}
202 cmap = {xchr(x): xchr(x) for x in range(127)}
201 for x in _reserved():
203 for x in _reserved():
202 cmap[xchr(x)] = b"~%02x" % x
204 cmap[xchr(x)] = b"~%02x" % x
203 for x in range(ord(b"A"), ord(b"Z") + 1):
205 for x in range(ord(b"A"), ord(b"Z") + 1):
204 cmap[xchr(x)] = xchr(x).lower()
206 cmap[xchr(x)] = xchr(x).lower()
205
207
206 def lowerencode(s):
208 def lowerencode(s):
207 return b"".join([cmap[c] for c in pycompat.iterbytestr(s)])
209 return b"".join([cmap[c] for c in pycompat.iterbytestr(s)])
208
210
209 return lowerencode
211 return lowerencode
210
212
211
213
212 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
214 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
213
215
214 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
216 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
215 _winres3 = (b'aux', b'con', b'prn', b'nul') # length 3
217 _winres3 = (b'aux', b'con', b'prn', b'nul') # length 3
216 _winres4 = (b'com', b'lpt') # length 4 (with trailing 1..9)
218 _winres4 = (b'com', b'lpt') # length 4 (with trailing 1..9)
217
219
218
220
219 def _auxencode(path, dotencode):
221 def _auxencode(path, dotencode):
220 """
222 """
221 Encodes filenames containing names reserved by Windows or which end in
223 Encodes filenames containing names reserved by Windows or which end in
222 period or space. Does not touch other single reserved characters c.
224 period or space. Does not touch other single reserved characters c.
223 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
225 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
224 Additionally encodes space or period at the beginning, if dotencode is
226 Additionally encodes space or period at the beginning, if dotencode is
225 True. Parameter path is assumed to be all lowercase.
227 True. Parameter path is assumed to be all lowercase.
226 A segment only needs encoding if a reserved name appears as a
228 A segment only needs encoding if a reserved name appears as a
227 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
229 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
228 doesn't need encoding.
230 doesn't need encoding.
229
231
230 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
232 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
231 >>> _auxencode(s.split(b'/'), True)
233 >>> _auxencode(s.split(b'/'), True)
232 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
234 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
233 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
235 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
234 >>> _auxencode(s.split(b'/'), False)
236 >>> _auxencode(s.split(b'/'), False)
235 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
237 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
236 >>> _auxencode([b'foo. '], True)
238 >>> _auxencode([b'foo. '], True)
237 ['foo.~20']
239 ['foo.~20']
238 >>> _auxencode([b' .foo'], True)
240 >>> _auxencode([b' .foo'], True)
239 ['~20.foo']
241 ['~20.foo']
240 """
242 """
241 for i, n in enumerate(path):
243 for i, n in enumerate(path):
242 if not n:
244 if not n:
243 continue
245 continue
244 if dotencode and n[0] in b'. ':
246 if dotencode and n[0] in b'. ':
245 n = b"~%02x" % ord(n[0:1]) + n[1:]
247 n = b"~%02x" % ord(n[0:1]) + n[1:]
246 path[i] = n
248 path[i] = n
247 else:
249 else:
248 l = n.find(b'.')
250 l = n.find(b'.')
249 if l == -1:
251 if l == -1:
250 l = len(n)
252 l = len(n)
251 if (l == 3 and n[:3] in _winres3) or (
253 if (l == 3 and n[:3] in _winres3) or (
252 l == 4
254 l == 4
253 and n[3:4] <= b'9'
255 and n[3:4] <= b'9'
254 and n[3:4] >= b'1'
256 and n[3:4] >= b'1'
255 and n[:3] in _winres4
257 and n[:3] in _winres4
256 ):
258 ):
257 # encode third letter ('aux' -> 'au~78')
259 # encode third letter ('aux' -> 'au~78')
258 ec = b"~%02x" % ord(n[2:3])
260 ec = b"~%02x" % ord(n[2:3])
259 n = n[0:2] + ec + n[3:]
261 n = n[0:2] + ec + n[3:]
260 path[i] = n
262 path[i] = n
261 if n[-1] in b'. ':
263 if n[-1] in b'. ':
262 # encode last period or space ('foo...' -> 'foo..~2e')
264 # encode last period or space ('foo...' -> 'foo..~2e')
263 path[i] = n[:-1] + b"~%02x" % ord(n[-1:])
265 path[i] = n[:-1] + b"~%02x" % ord(n[-1:])
264 return path
266 return path
265
267
266
268
267 _maxstorepathlen = 120
269 _maxstorepathlen = 120
268 _dirprefixlen = 8
270 _dirprefixlen = 8
269 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
271 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
270
272
271
273
272 def _hashencode(path, dotencode):
274 def _hashencode(path, dotencode):
273 digest = hex(hashutil.sha1(path).digest())
275 digest = hex(hashutil.sha1(path).digest())
274 le = lowerencode(path[5:]).split(b'/') # skips prefix 'data/' or 'meta/'
276 le = lowerencode(path[5:]).split(b'/') # skips prefix 'data/' or 'meta/'
275 parts = _auxencode(le, dotencode)
277 parts = _auxencode(le, dotencode)
276 basename = parts[-1]
278 basename = parts[-1]
277 _root, ext = os.path.splitext(basename)
279 _root, ext = os.path.splitext(basename)
278 sdirs = []
280 sdirs = []
279 sdirslen = 0
281 sdirslen = 0
280 for p in parts[:-1]:
282 for p in parts[:-1]:
281 d = p[:_dirprefixlen]
283 d = p[:_dirprefixlen]
282 if d[-1] in b'. ':
284 if d[-1] in b'. ':
283 # Windows can't access dirs ending in period or space
285 # Windows can't access dirs ending in period or space
284 d = d[:-1] + b'_'
286 d = d[:-1] + b'_'
285 if sdirslen == 0:
287 if sdirslen == 0:
286 t = len(d)
288 t = len(d)
287 else:
289 else:
288 t = sdirslen + 1 + len(d)
290 t = sdirslen + 1 + len(d)
289 if t > _maxshortdirslen:
291 if t > _maxshortdirslen:
290 break
292 break
291 sdirs.append(d)
293 sdirs.append(d)
292 sdirslen = t
294 sdirslen = t
293 dirs = b'/'.join(sdirs)
295 dirs = b'/'.join(sdirs)
294 if len(dirs) > 0:
296 if len(dirs) > 0:
295 dirs += b'/'
297 dirs += b'/'
296 res = b'dh/' + dirs + digest + ext
298 res = b'dh/' + dirs + digest + ext
297 spaceleft = _maxstorepathlen - len(res)
299 spaceleft = _maxstorepathlen - len(res)
298 if spaceleft > 0:
300 if spaceleft > 0:
299 filler = basename[:spaceleft]
301 filler = basename[:spaceleft]
300 res = b'dh/' + dirs + filler + digest + ext
302 res = b'dh/' + dirs + filler + digest + ext
301 return res
303 return res
302
304
303
305
304 def _hybridencode(path, dotencode):
306 def _hybridencode(path, dotencode):
305 """encodes path with a length limit
307 """encodes path with a length limit
306
308
307 Encodes all paths that begin with 'data/', according to the following.
309 Encodes all paths that begin with 'data/', according to the following.
308
310
309 Default encoding (reversible):
311 Default encoding (reversible):
310
312
311 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
313 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
312 characters are encoded as '~xx', where xx is the two digit hex code
314 characters are encoded as '~xx', where xx is the two digit hex code
313 of the character (see encodefilename).
315 of the character (see encodefilename).
314 Relevant path components consisting of Windows reserved filenames are
316 Relevant path components consisting of Windows reserved filenames are
315 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
317 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
316
318
317 Hashed encoding (not reversible):
319 Hashed encoding (not reversible):
318
320
319 If the default-encoded path is longer than _maxstorepathlen, a
321 If the default-encoded path is longer than _maxstorepathlen, a
320 non-reversible hybrid hashing of the path is done instead.
322 non-reversible hybrid hashing of the path is done instead.
321 This encoding uses up to _dirprefixlen characters of all directory
323 This encoding uses up to _dirprefixlen characters of all directory
322 levels of the lowerencoded path, but not more levels than can fit into
324 levels of the lowerencoded path, but not more levels than can fit into
323 _maxshortdirslen.
325 _maxshortdirslen.
324 Then follows the filler followed by the sha digest of the full path.
326 Then follows the filler followed by the sha digest of the full path.
325 The filler is the beginning of the basename of the lowerencoded path
327 The filler is the beginning of the basename of the lowerencoded path
326 (the basename is everything after the last path separator). The filler
328 (the basename is everything after the last path separator). The filler
327 is as long as possible, filling in characters from the basename until
329 is as long as possible, filling in characters from the basename until
328 the encoded path has _maxstorepathlen characters (or all chars of the
330 the encoded path has _maxstorepathlen characters (or all chars of the
329 basename have been taken).
331 basename have been taken).
330 The extension (e.g. '.i' or '.d') is preserved.
332 The extension (e.g. '.i' or '.d') is preserved.
331
333
332 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
334 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
333 encoding was used.
335 encoding was used.
334 """
336 """
335 path = encodedir(path)
337 path = encodedir(path)
336 ef = _encodefname(path).split(b'/')
338 ef = _encodefname(path).split(b'/')
337 res = b'/'.join(_auxencode(ef, dotencode))
339 res = b'/'.join(_auxencode(ef, dotencode))
338 if len(res) > _maxstorepathlen:
340 if len(res) > _maxstorepathlen:
339 res = _hashencode(path, dotencode)
341 res = _hashencode(path, dotencode)
340 return res
342 return res
341
343
342
344
343 def _pathencode(path):
345 def _pathencode(path):
344 de = encodedir(path)
346 de = encodedir(path)
345 if len(path) > _maxstorepathlen:
347 if len(path) > _maxstorepathlen:
346 return _hashencode(de, True)
348 return _hashencode(de, True)
347 ef = _encodefname(de).split(b'/')
349 ef = _encodefname(de).split(b'/')
348 res = b'/'.join(_auxencode(ef, True))
350 res = b'/'.join(_auxencode(ef, True))
349 if len(res) > _maxstorepathlen:
351 if len(res) > _maxstorepathlen:
350 return _hashencode(de, True)
352 return _hashencode(de, True)
351 return res
353 return res
352
354
353
355
354 _pathencode = getattr(parsers, 'pathencode', _pathencode)
356 _pathencode = getattr(parsers, 'pathencode', _pathencode)
355
357
356
358
357 def _plainhybridencode(f):
359 def _plainhybridencode(f):
358 return _hybridencode(f, False)
360 return _hybridencode(f, False)
359
361
360
362
361 def _calcmode(vfs):
363 def _calcmode(vfs):
362 try:
364 try:
363 # files in .hg/ will be created using this mode
365 # files in .hg/ will be created using this mode
364 mode = vfs.stat().st_mode
366 mode = vfs.stat().st_mode
365 # avoid some useless chmods
367 # avoid some useless chmods
366 if (0o777 & ~util.umask) == (0o777 & mode):
368 if (0o777 & ~util.umask) == (0o777 & mode):
367 mode = None
369 mode = None
368 except OSError:
370 except OSError:
369 mode = None
371 mode = None
370 return mode
372 return mode
371
373
372
374
373 _data = [
375 _data = [
374 b'bookmarks',
376 b'bookmarks',
375 b'narrowspec',
377 b'narrowspec',
376 b'data',
378 b'data',
377 b'meta',
379 b'meta',
378 b'00manifest.d',
380 b'00manifest.d',
379 b'00manifest.i',
381 b'00manifest.i',
380 b'00changelog.d',
382 b'00changelog.d',
381 b'00changelog.i',
383 b'00changelog.i',
382 b'phaseroots',
384 b'phaseroots',
383 b'obsstore',
385 b'obsstore',
384 b'requires',
386 b'requires',
385 ]
387 ]
386
388
387 REVLOG_FILES_MAIN_EXT = (b'.i',)
389 REVLOG_FILES_MAIN_EXT = (b'.i',)
388 REVLOG_FILES_OTHER_EXT = (
390 REVLOG_FILES_OTHER_EXT = (
389 b'.idx',
391 b'.idx',
390 b'.d',
392 b'.d',
391 b'.dat',
393 b'.dat',
392 b'.n',
394 b'.n',
393 b'.nd',
395 b'.nd',
394 b'.sda',
396 b'.sda',
395 )
397 )
396 # files that are "volatile" and might change between listing and streaming
398 # files that are "volatile" and might change between listing and streaming
397 #
399 #
398 # note: the ".nd" file are nodemap data and won't "change" but they might be
400 # note: the ".nd" file are nodemap data and won't "change" but they might be
399 # deleted.
401 # deleted.
400 REVLOG_FILES_VOLATILE_EXT = (b'.n', b'.nd')
402 REVLOG_FILES_VOLATILE_EXT = (b'.n', b'.nd')
401
403
402 # some exception to the above matching
404 # some exception to the above matching
403 #
405 #
404 # XXX This is currently not in use because of issue6542
406 # XXX This is currently not in use because of issue6542
405 EXCLUDED = re.compile(br'.*undo\.[^/]+\.(nd?|i)$')
407 EXCLUDED = re.compile(br'.*undo\.[^/]+\.(nd?|i)$')
406
408
407
409
408 def is_revlog(f, kind, st):
410 def is_revlog(f, kind, st):
409 if kind != stat.S_IFREG:
411 if kind != stat.S_IFREG:
410 return None
412 return None
411 return revlog_type(f)
413 return revlog_type(f)
412
414
413
415
414 def revlog_type(f):
416 def revlog_type(f):
415 # XXX we need to filter `undo.` created by the transaction here, however
417 # XXX we need to filter `undo.` created by the transaction here, however
416 # being naive about it also filter revlog for `undo.*` files, leading to
418 # being naive about it also filter revlog for `undo.*` files, leading to
417 # issue6542. So we no longer use EXCLUDED.
419 # issue6542. So we no longer use EXCLUDED.
418 if f.endswith(REVLOG_FILES_MAIN_EXT):
420 if f.endswith(REVLOG_FILES_MAIN_EXT):
419 return FILEFLAGS_REVLOG_MAIN
421 return FILEFLAGS_REVLOG_MAIN
420 elif f.endswith(REVLOG_FILES_OTHER_EXT):
422 elif f.endswith(REVLOG_FILES_OTHER_EXT):
421 t = FILETYPE_FILELOG_OTHER
423 t = FILETYPE_FILELOG_OTHER
422 if f.endswith(REVLOG_FILES_VOLATILE_EXT):
424 if f.endswith(REVLOG_FILES_VOLATILE_EXT):
423 t |= FILEFLAGS_VOLATILE
425 t |= FILEFLAGS_VOLATILE
424 return t
426 return t
425 return None
427 return None
426
428
427
429
428 # the file is part of changelog data
430 # the file is part of changelog data
429 FILEFLAGS_CHANGELOG = 1 << 13
431 FILEFLAGS_CHANGELOG = 1 << 13
430 # the file is part of manifest data
432 # the file is part of manifest data
431 FILEFLAGS_MANIFESTLOG = 1 << 12
433 FILEFLAGS_MANIFESTLOG = 1 << 12
432 # the file is part of filelog data
434 # the file is part of filelog data
433 FILEFLAGS_FILELOG = 1 << 11
435 FILEFLAGS_FILELOG = 1 << 11
434 # file that are not directly part of a revlog
436 # file that are not directly part of a revlog
435 FILEFLAGS_OTHER = 1 << 10
437 FILEFLAGS_OTHER = 1 << 10
436
438
437 # the main entry point for a revlog
439 # the main entry point for a revlog
438 FILEFLAGS_REVLOG_MAIN = 1 << 1
440 FILEFLAGS_REVLOG_MAIN = 1 << 1
439 # a secondary file for a revlog
441 # a secondary file for a revlog
440 FILEFLAGS_REVLOG_OTHER = 1 << 0
442 FILEFLAGS_REVLOG_OTHER = 1 << 0
441
443
442 # files that are "volatile" and might change between listing and streaming
444 # files that are "volatile" and might change between listing and streaming
443 FILEFLAGS_VOLATILE = 1 << 20
445 FILEFLAGS_VOLATILE = 1 << 20
444
446
445 FILETYPE_CHANGELOG_MAIN = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_MAIN
447 FILETYPE_CHANGELOG_MAIN = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_MAIN
446 FILETYPE_CHANGELOG_OTHER = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_OTHER
448 FILETYPE_CHANGELOG_OTHER = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_OTHER
447 FILETYPE_MANIFESTLOG_MAIN = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_MAIN
449 FILETYPE_MANIFESTLOG_MAIN = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_MAIN
448 FILETYPE_MANIFESTLOG_OTHER = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_OTHER
450 FILETYPE_MANIFESTLOG_OTHER = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_OTHER
449 FILETYPE_FILELOG_MAIN = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_MAIN
451 FILETYPE_FILELOG_MAIN = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_MAIN
450 FILETYPE_FILELOG_OTHER = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_OTHER
452 FILETYPE_FILELOG_OTHER = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_OTHER
451 FILETYPE_OTHER = FILEFLAGS_OTHER
453 FILETYPE_OTHER = FILEFLAGS_OTHER
452
454
453
455
456 @attr.s(slots=True)
457 class StoreEntry:
458 """An entry in the store
459
460 This is returned by `store.walk` and represent some data in the store."""
461
462 unencoded_path = attr.ib()
463 is_revlog = attr.ib(default=False)
464 revlog_type = attr.ib(default=None)
465 is_revlog_main = attr.ib(default=None)
466 is_volatile = attr.ib(default=False)
467 file_size = attr.ib(default=None)
468
469
454 class basicstore:
470 class basicstore:
455 '''base class for local repository stores'''
471 '''base class for local repository stores'''
456
472
457 def __init__(self, path, vfstype):
473 def __init__(self, path, vfstype):
458 vfs = vfstype(path)
474 vfs = vfstype(path)
459 self.path = vfs.base
475 self.path = vfs.base
460 self.createmode = _calcmode(vfs)
476 self.createmode = _calcmode(vfs)
461 vfs.createmode = self.createmode
477 vfs.createmode = self.createmode
462 self.rawvfs = vfs
478 self.rawvfs = vfs
463 self.vfs = vfsmod.filtervfs(vfs, encodedir)
479 self.vfs = vfsmod.filtervfs(vfs, encodedir)
464 self.opener = self.vfs
480 self.opener = self.vfs
465
481
466 def join(self, f):
482 def join(self, f):
467 return self.path + b'/' + encodedir(f)
483 return self.path + b'/' + encodedir(f)
468
484
469 def _walk(self, relpath, recurse):
485 def _walk(self, relpath, recurse):
470 '''yields (revlog_type, unencoded, size)'''
486 '''yields (revlog_type, unencoded, size)'''
471 path = self.path
487 path = self.path
472 if relpath:
488 if relpath:
473 path += b'/' + relpath
489 path += b'/' + relpath
474 striplen = len(self.path) + 1
490 striplen = len(self.path) + 1
475 l = []
491 l = []
476 if self.rawvfs.isdir(path):
492 if self.rawvfs.isdir(path):
477 visit = [path]
493 visit = [path]
478 readdir = self.rawvfs.readdir
494 readdir = self.rawvfs.readdir
479 while visit:
495 while visit:
480 p = visit.pop()
496 p = visit.pop()
481 for f, kind, st in readdir(p, stat=True):
497 for f, kind, st in readdir(p, stat=True):
482 fp = p + b'/' + f
498 fp = p + b'/' + f
483 rl_type = is_revlog(f, kind, st)
499 rl_type = is_revlog(f, kind, st)
484 if rl_type is not None:
500 if rl_type is not None:
485 n = util.pconvert(fp[striplen:])
501 n = util.pconvert(fp[striplen:])
486 l.append((rl_type, decodedir(n), st.st_size))
502 l.append((rl_type, decodedir(n), st.st_size))
487 elif kind == stat.S_IFDIR and recurse:
503 elif kind == stat.S_IFDIR and recurse:
488 visit.append(fp)
504 visit.append(fp)
489 l.sort()
505 l.sort()
490 return l
506 return l
491
507
492 def changelog(self, trypending, concurrencychecker=None):
508 def changelog(self, trypending, concurrencychecker=None):
493 return changelog.changelog(
509 return changelog.changelog(
494 self.vfs,
510 self.vfs,
495 trypending=trypending,
511 trypending=trypending,
496 concurrencychecker=concurrencychecker,
512 concurrencychecker=concurrencychecker,
497 )
513 )
498
514
499 def manifestlog(self, repo, storenarrowmatch):
515 def manifestlog(self, repo, storenarrowmatch):
500 rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs)
516 rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs)
501 return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
517 return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
502
518
503 def datafiles(self, matcher=None, undecodable=None):
519 def datafiles(
520 self, matcher=None, undecodable=None
521 ) -> Generator[StoreEntry, None, None]:
504 """Like walk, but excluding the changelog and root manifest.
522 """Like walk, but excluding the changelog and root manifest.
505
523
506 When [undecodable] is None, revlogs names that can't be
524 When [undecodable] is None, revlogs names that can't be
507 decoded cause an exception. When it is provided, it should
525 decoded cause an exception. When it is provided, it should
508 be a list and the filenames that can't be decoded are added
526 be a list and the filenames that can't be decoded are added
509 to it instead. This is very rarely needed."""
527 to it instead. This is very rarely needed."""
510 files = self._walk(b'data', True) + self._walk(b'meta', True)
528 files = self._walk(b'data', True) + self._walk(b'meta', True)
511 for (t, u, s) in files:
529 for (t, u, s) in files:
512 if t is not None:
530 if t is not None:
513 yield (FILEFLAGS_FILELOG | t, u, s)
531 yield StoreEntry(
532 unencoded_path=u,
533 is_revlog=True,
534 revlog_type=FILEFLAGS_FILELOG,
535 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
536 is_volatile=bool(t & FILEFLAGS_VOLATILE),
537 file_size=s,
538 )
514
539
515 def topfiles(self):
540 def topfiles(self) -> Generator[StoreEntry, None, None]:
516 # yield manifest before changelog
541 # yield manifest before changelog
517 files = reversed(self._walk(b'', False))
542 files = reversed(self._walk(b'', False))
518 for (t, u, s) in files:
543 for (t, u, s) in files:
519 if u.startswith(b'00changelog'):
544 if u.startswith(b'00changelog'):
520 yield (FILEFLAGS_CHANGELOG | t, u, s)
545 revlog_type = FILEFLAGS_CHANGELOG
521 elif u.startswith(b'00manifest'):
546 elif u.startswith(b'00manifest'):
522 yield (FILEFLAGS_MANIFESTLOG | t, u, s)
547 revlog_type = FILEFLAGS_MANIFESTLOG
523 else:
548 else:
524 yield (FILETYPE_OTHER | t, u, s)
549 revlog_type = None
550 yield StoreEntry(
551 unencoded_path=u,
552 is_revlog=revlog_type is not None,
553 revlog_type=revlog_type,
554 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
555 is_volatile=bool(t & FILEFLAGS_VOLATILE),
556 file_size=s,
557 )
525
558
526 def walk(self, matcher=None):
559 def walk(self, matcher=None) -> Generator[StoreEntry, None, None]:
527 """return files related to data storage (ie: revlogs)
560 """return files related to data storage (ie: revlogs)
528
561
529 yields (file_type, unencoded, size)
562 yields (file_type, unencoded, size)
530
563
531 if a matcher is passed, storage files of only those tracked paths
564 if a matcher is passed, storage files of only those tracked paths
532 are passed with matches the matcher
565 are passed with matches the matcher
533 """
566 """
534 # yield data files first
567 # yield data files first
535 for x in self.datafiles(matcher):
568 for x in self.datafiles(matcher):
536 yield x
569 yield x
537 for x in self.topfiles():
570 for x in self.topfiles():
538 yield x
571 yield x
539
572
540 def copylist(self):
573 def copylist(self):
541 return _data
574 return _data
542
575
543 def write(self, tr):
576 def write(self, tr):
544 pass
577 pass
545
578
546 def invalidatecaches(self):
579 def invalidatecaches(self):
547 pass
580 pass
548
581
549 def markremoved(self, fn):
582 def markremoved(self, fn):
550 pass
583 pass
551
584
552 def __contains__(self, path):
585 def __contains__(self, path):
553 '''Checks if the store contains path'''
586 '''Checks if the store contains path'''
554 path = b"/".join((b"data", path))
587 path = b"/".join((b"data", path))
555 # file?
588 # file?
556 if self.vfs.exists(path + b".i"):
589 if self.vfs.exists(path + b".i"):
557 return True
590 return True
558 # dir?
591 # dir?
559 if not path.endswith(b"/"):
592 if not path.endswith(b"/"):
560 path = path + b"/"
593 path = path + b"/"
561 return self.vfs.exists(path)
594 return self.vfs.exists(path)
562
595
563
596
564 class encodedstore(basicstore):
597 class encodedstore(basicstore):
565 def __init__(self, path, vfstype):
598 def __init__(self, path, vfstype):
566 vfs = vfstype(path + b'/store')
599 vfs = vfstype(path + b'/store')
567 self.path = vfs.base
600 self.path = vfs.base
568 self.createmode = _calcmode(vfs)
601 self.createmode = _calcmode(vfs)
569 vfs.createmode = self.createmode
602 vfs.createmode = self.createmode
570 self.rawvfs = vfs
603 self.rawvfs = vfs
571 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
604 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
572 self.opener = self.vfs
605 self.opener = self.vfs
573
606
574 # note: topfiles would also need a decode phase. It is just that in
607 # note: topfiles would also need a decode phase. It is just that in
575 # practice we do not have any file outside of `data/` that needs encoding.
608 # practice we do not have any file outside of `data/` that needs encoding.
576 # However that might change so we should probably add a test and encoding
609 # However that might change so we should probably add a test and encoding
577 # decoding for it too. see issue6548
610 # decoding for it too. see issue6548
578
611
579 def datafiles(self, matcher=None, undecodable=None):
612 def datafiles(
580 for t, f1, size in super(encodedstore, self).datafiles():
613 self, matcher=None, undecodable=None
614 ) -> Generator[StoreEntry, None, None]:
615 for entry in super(encodedstore, self).datafiles():
581 try:
616 try:
617 f1 = entry.unencoded_path
582 f2 = decodefilename(f1)
618 f2 = decodefilename(f1)
583 except KeyError:
619 except KeyError:
584 if undecodable is None:
620 if undecodable is None:
585 msg = _(b'undecodable revlog name %s') % f1
621 msg = _(b'undecodable revlog name %s') % f1
586 raise error.StorageError(msg)
622 raise error.StorageError(msg)
587 else:
623 else:
588 undecodable.append(f1)
624 undecodable.append(f1)
589 continue
625 continue
590 if not _matchtrackedpath(f2, matcher):
626 if not _matchtrackedpath(f2, matcher):
591 continue
627 continue
592 yield t, f2, size
628 entry.unencoded_path = f2
629 yield entry
593
630
594 def join(self, f):
631 def join(self, f):
595 return self.path + b'/' + encodefilename(f)
632 return self.path + b'/' + encodefilename(f)
596
633
597 def copylist(self):
634 def copylist(self):
598 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in _data]
635 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in _data]
599
636
600
637
601 class fncache:
638 class fncache:
602 # the filename used to be partially encoded
639 # the filename used to be partially encoded
603 # hence the encodedir/decodedir dance
640 # hence the encodedir/decodedir dance
604 def __init__(self, vfs):
641 def __init__(self, vfs):
605 self.vfs = vfs
642 self.vfs = vfs
606 self._ignores = set()
643 self._ignores = set()
607 self.entries = None
644 self.entries = None
608 self._dirty = False
645 self._dirty = False
609 # set of new additions to fncache
646 # set of new additions to fncache
610 self.addls = set()
647 self.addls = set()
611
648
612 def ensureloaded(self, warn=None):
649 def ensureloaded(self, warn=None):
613 """read the fncache file if not already read.
650 """read the fncache file if not already read.
614
651
615 If the file on disk is corrupted, raise. If warn is provided,
652 If the file on disk is corrupted, raise. If warn is provided,
616 warn and keep going instead."""
653 warn and keep going instead."""
617 if self.entries is None:
654 if self.entries is None:
618 self._load(warn)
655 self._load(warn)
619
656
620 def _load(self, warn=None):
657 def _load(self, warn=None):
621 '''fill the entries from the fncache file'''
658 '''fill the entries from the fncache file'''
622 self._dirty = False
659 self._dirty = False
623 try:
660 try:
624 fp = self.vfs(b'fncache', mode=b'rb')
661 fp = self.vfs(b'fncache', mode=b'rb')
625 except IOError:
662 except IOError:
626 # skip nonexistent file
663 # skip nonexistent file
627 self.entries = set()
664 self.entries = set()
628 return
665 return
629
666
630 self.entries = set()
667 self.entries = set()
631 chunk = b''
668 chunk = b''
632 for c in iter(functools.partial(fp.read, fncache_chunksize), b''):
669 for c in iter(functools.partial(fp.read, fncache_chunksize), b''):
633 chunk += c
670 chunk += c
634 try:
671 try:
635 p = chunk.rindex(b'\n')
672 p = chunk.rindex(b'\n')
636 self.entries.update(decodedir(chunk[: p + 1]).splitlines())
673 self.entries.update(decodedir(chunk[: p + 1]).splitlines())
637 chunk = chunk[p + 1 :]
674 chunk = chunk[p + 1 :]
638 except ValueError:
675 except ValueError:
639 # substring '\n' not found, maybe the entry is bigger than the
676 # substring '\n' not found, maybe the entry is bigger than the
640 # chunksize, so let's keep iterating
677 # chunksize, so let's keep iterating
641 pass
678 pass
642
679
643 if chunk:
680 if chunk:
644 msg = _(b"fncache does not ends with a newline")
681 msg = _(b"fncache does not ends with a newline")
645 if warn:
682 if warn:
646 warn(msg + b'\n')
683 warn(msg + b'\n')
647 else:
684 else:
648 raise error.Abort(
685 raise error.Abort(
649 msg,
686 msg,
650 hint=_(
687 hint=_(
651 b"use 'hg debugrebuildfncache' to "
688 b"use 'hg debugrebuildfncache' to "
652 b"rebuild the fncache"
689 b"rebuild the fncache"
653 ),
690 ),
654 )
691 )
655 self._checkentries(fp, warn)
692 self._checkentries(fp, warn)
656 fp.close()
693 fp.close()
657
694
658 def _checkentries(self, fp, warn):
695 def _checkentries(self, fp, warn):
659 """make sure there is no empty string in entries"""
696 """make sure there is no empty string in entries"""
660 if b'' in self.entries:
697 if b'' in self.entries:
661 fp.seek(0)
698 fp.seek(0)
662 for n, line in enumerate(fp):
699 for n, line in enumerate(fp):
663 if not line.rstrip(b'\n'):
700 if not line.rstrip(b'\n'):
664 t = _(b'invalid entry in fncache, line %d') % (n + 1)
701 t = _(b'invalid entry in fncache, line %d') % (n + 1)
665 if warn:
702 if warn:
666 warn(t + b'\n')
703 warn(t + b'\n')
667 else:
704 else:
668 raise error.Abort(t)
705 raise error.Abort(t)
669
706
670 def write(self, tr):
707 def write(self, tr):
671 if self._dirty:
708 if self._dirty:
672 assert self.entries is not None
709 assert self.entries is not None
673 self.entries = self.entries | self.addls
710 self.entries = self.entries | self.addls
674 self.addls = set()
711 self.addls = set()
675 tr.addbackup(b'fncache')
712 tr.addbackup(b'fncache')
676 fp = self.vfs(b'fncache', mode=b'wb', atomictemp=True)
713 fp = self.vfs(b'fncache', mode=b'wb', atomictemp=True)
677 if self.entries:
714 if self.entries:
678 fp.write(encodedir(b'\n'.join(self.entries) + b'\n'))
715 fp.write(encodedir(b'\n'.join(self.entries) + b'\n'))
679 fp.close()
716 fp.close()
680 self._dirty = False
717 self._dirty = False
681 if self.addls:
718 if self.addls:
682 # if we have just new entries, let's append them to the fncache
719 # if we have just new entries, let's append them to the fncache
683 tr.addbackup(b'fncache')
720 tr.addbackup(b'fncache')
684 fp = self.vfs(b'fncache', mode=b'ab', atomictemp=True)
721 fp = self.vfs(b'fncache', mode=b'ab', atomictemp=True)
685 if self.addls:
722 if self.addls:
686 fp.write(encodedir(b'\n'.join(self.addls) + b'\n'))
723 fp.write(encodedir(b'\n'.join(self.addls) + b'\n'))
687 fp.close()
724 fp.close()
688 self.entries = None
725 self.entries = None
689 self.addls = set()
726 self.addls = set()
690
727
691 def addignore(self, fn):
728 def addignore(self, fn):
692 self._ignores.add(fn)
729 self._ignores.add(fn)
693
730
694 def add(self, fn):
731 def add(self, fn):
695 if fn in self._ignores:
732 if fn in self._ignores:
696 return
733 return
697 if self.entries is None:
734 if self.entries is None:
698 self._load()
735 self._load()
699 if fn not in self.entries:
736 if fn not in self.entries:
700 self.addls.add(fn)
737 self.addls.add(fn)
701
738
702 def remove(self, fn):
739 def remove(self, fn):
703 if self.entries is None:
740 if self.entries is None:
704 self._load()
741 self._load()
705 if fn in self.addls:
742 if fn in self.addls:
706 self.addls.remove(fn)
743 self.addls.remove(fn)
707 return
744 return
708 try:
745 try:
709 self.entries.remove(fn)
746 self.entries.remove(fn)
710 self._dirty = True
747 self._dirty = True
711 except KeyError:
748 except KeyError:
712 pass
749 pass
713
750
714 def __contains__(self, fn):
751 def __contains__(self, fn):
715 if fn in self.addls:
752 if fn in self.addls:
716 return True
753 return True
717 if self.entries is None:
754 if self.entries is None:
718 self._load()
755 self._load()
719 return fn in self.entries
756 return fn in self.entries
720
757
721 def __iter__(self):
758 def __iter__(self):
722 if self.entries is None:
759 if self.entries is None:
723 self._load()
760 self._load()
724 return iter(self.entries | self.addls)
761 return iter(self.entries | self.addls)
725
762
726
763
727 class _fncachevfs(vfsmod.proxyvfs):
764 class _fncachevfs(vfsmod.proxyvfs):
728 def __init__(self, vfs, fnc, encode):
765 def __init__(self, vfs, fnc, encode):
729 vfsmod.proxyvfs.__init__(self, vfs)
766 vfsmod.proxyvfs.__init__(self, vfs)
730 self.fncache = fnc
767 self.fncache = fnc
731 self.encode = encode
768 self.encode = encode
732
769
733 def __call__(self, path, mode=b'r', *args, **kw):
770 def __call__(self, path, mode=b'r', *args, **kw):
734 encoded = self.encode(path)
771 encoded = self.encode(path)
735 if (
772 if (
736 mode not in (b'r', b'rb')
773 mode not in (b'r', b'rb')
737 and (path.startswith(b'data/') or path.startswith(b'meta/'))
774 and (path.startswith(b'data/') or path.startswith(b'meta/'))
738 and revlog_type(path) is not None
775 and revlog_type(path) is not None
739 ):
776 ):
740 # do not trigger a fncache load when adding a file that already is
777 # do not trigger a fncache load when adding a file that already is
741 # known to exist.
778 # known to exist.
742 notload = self.fncache.entries is None and self.vfs.exists(encoded)
779 notload = self.fncache.entries is None and self.vfs.exists(encoded)
743 if notload and b'r+' in mode and not self.vfs.stat(encoded).st_size:
780 if notload and b'r+' in mode and not self.vfs.stat(encoded).st_size:
744 # when appending to an existing file, if the file has size zero,
781 # when appending to an existing file, if the file has size zero,
745 # it should be considered as missing. Such zero-size files are
782 # it should be considered as missing. Such zero-size files are
746 # the result of truncation when a transaction is aborted.
783 # the result of truncation when a transaction is aborted.
747 notload = False
784 notload = False
748 if not notload:
785 if not notload:
749 self.fncache.add(path)
786 self.fncache.add(path)
750 return self.vfs(encoded, mode, *args, **kw)
787 return self.vfs(encoded, mode, *args, **kw)
751
788
752 def join(self, path):
789 def join(self, path):
753 if path:
790 if path:
754 return self.vfs.join(self.encode(path))
791 return self.vfs.join(self.encode(path))
755 else:
792 else:
756 return self.vfs.join(path)
793 return self.vfs.join(path)
757
794
758 def register_file(self, path):
795 def register_file(self, path):
759 """generic hook point to lets fncache steer its stew"""
796 """generic hook point to lets fncache steer its stew"""
760 if path.startswith(b'data/') or path.startswith(b'meta/'):
797 if path.startswith(b'data/') or path.startswith(b'meta/'):
761 self.fncache.add(path)
798 self.fncache.add(path)
762
799
763
800
764 class fncachestore(basicstore):
801 class fncachestore(basicstore):
765 def __init__(self, path, vfstype, dotencode):
802 def __init__(self, path, vfstype, dotencode):
766 if dotencode:
803 if dotencode:
767 encode = _pathencode
804 encode = _pathencode
768 else:
805 else:
769 encode = _plainhybridencode
806 encode = _plainhybridencode
770 self.encode = encode
807 self.encode = encode
771 vfs = vfstype(path + b'/store')
808 vfs = vfstype(path + b'/store')
772 self.path = vfs.base
809 self.path = vfs.base
773 self.pathsep = self.path + b'/'
810 self.pathsep = self.path + b'/'
774 self.createmode = _calcmode(vfs)
811 self.createmode = _calcmode(vfs)
775 vfs.createmode = self.createmode
812 vfs.createmode = self.createmode
776 self.rawvfs = vfs
813 self.rawvfs = vfs
777 fnc = fncache(vfs)
814 fnc = fncache(vfs)
778 self.fncache = fnc
815 self.fncache = fnc
779 self.vfs = _fncachevfs(vfs, fnc, encode)
816 self.vfs = _fncachevfs(vfs, fnc, encode)
780 self.opener = self.vfs
817 self.opener = self.vfs
781
818
782 def join(self, f):
819 def join(self, f):
783 return self.pathsep + self.encode(f)
820 return self.pathsep + self.encode(f)
784
821
785 def getsize(self, path):
822 def getsize(self, path):
786 return self.rawvfs.stat(path).st_size
823 return self.rawvfs.stat(path).st_size
787
824
788 def datafiles(self, matcher=None, undecodable=None):
825 def datafiles(
826 self, matcher=None, undecodable=None
827 ) -> Generator[StoreEntry, None, None]:
789 for f in sorted(self.fncache):
828 for f in sorted(self.fncache):
790 if not _matchtrackedpath(f, matcher):
829 if not _matchtrackedpath(f, matcher):
791 continue
830 continue
792 ef = self.encode(f)
831 ef = self.encode(f)
793 t = revlog_type(f)
832 t = revlog_type(f)
794 if t is None:
833 if t is None:
795 # Note: this should not be in the fncache then…
834 # Note: this should not be in the fncache then…
796 #
835 #
797 # However the fncache might contains such file added by
836 # However the fncache might contains such file added by
798 # previous version of Mercurial.
837 # previous version of Mercurial.
799 continue
838 continue
800 t |= FILEFLAGS_FILELOG
839 t |= FILEFLAGS_FILELOG
801 try:
840 try:
802 yield t, f, self.getsize(ef)
841 yield StoreEntry(
842 unencoded_path=f,
843 is_revlog=True,
844 revlog_type=FILEFLAGS_FILELOG,
845 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
846 is_volatile=bool(t & FILEFLAGS_VOLATILE),
847 file_size=self.getsize(ef),
848 )
803 except FileNotFoundError:
849 except FileNotFoundError:
804 pass
850 pass
805
851
806 def copylist(self):
852 def copylist(self):
807 d = (
853 d = (
808 b'bookmarks',
854 b'bookmarks',
809 b'narrowspec',
855 b'narrowspec',
810 b'data',
856 b'data',
811 b'meta',
857 b'meta',
812 b'dh',
858 b'dh',
813 b'fncache',
859 b'fncache',
814 b'phaseroots',
860 b'phaseroots',
815 b'obsstore',
861 b'obsstore',
816 b'00manifest.d',
862 b'00manifest.d',
817 b'00manifest.i',
863 b'00manifest.i',
818 b'00changelog.d',
864 b'00changelog.d',
819 b'00changelog.i',
865 b'00changelog.i',
820 b'requires',
866 b'requires',
821 )
867 )
822 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in d]
868 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in d]
823
869
824 def write(self, tr):
870 def write(self, tr):
825 self.fncache.write(tr)
871 self.fncache.write(tr)
826
872
827 def invalidatecaches(self):
873 def invalidatecaches(self):
828 self.fncache.entries = None
874 self.fncache.entries = None
829 self.fncache.addls = set()
875 self.fncache.addls = set()
830
876
831 def markremoved(self, fn):
877 def markremoved(self, fn):
832 self.fncache.remove(fn)
878 self.fncache.remove(fn)
833
879
834 def _exists(self, f):
880 def _exists(self, f):
835 ef = self.encode(f)
881 ef = self.encode(f)
836 try:
882 try:
837 self.getsize(ef)
883 self.getsize(ef)
838 return True
884 return True
839 except FileNotFoundError:
885 except FileNotFoundError:
840 return False
886 return False
841
887
842 def __contains__(self, path):
888 def __contains__(self, path):
843 '''Checks if the store contains path'''
889 '''Checks if the store contains path'''
844 path = b"/".join((b"data", path))
890 path = b"/".join((b"data", path))
845 # check for files (exact match)
891 # check for files (exact match)
846 e = path + b'.i'
892 e = path + b'.i'
847 if e in self.fncache and self._exists(e):
893 if e in self.fncache and self._exists(e):
848 return True
894 return True
849 # now check for directories (prefix match)
895 # now check for directories (prefix match)
850 if not path.endswith(b'/'):
896 if not path.endswith(b'/'):
851 path += b'/'
897 path += b'/'
852 for e in self.fncache:
898 for e in self.fncache:
853 if e.startswith(path) and self._exists(e):
899 if e.startswith(path) and self._exists(e):
854 return True
900 return True
855 return False
901 return False
@@ -1,935 +1,937 b''
1 # streamclone.py - producing and consuming streaming repository data
1 # streamclone.py - producing and consuming streaming repository data
2 #
2 #
3 # Copyright 2015 Gregory Szorc <gregory.szorc@gmail.com>
3 # Copyright 2015 Gregory Szorc <gregory.szorc@gmail.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8
8
9 import contextlib
9 import contextlib
10 import os
10 import os
11 import struct
11 import struct
12
12
13 from .i18n import _
13 from .i18n import _
14 from .pycompat import open
14 from .pycompat import open
15 from .interfaces import repository
15 from .interfaces import repository
16 from . import (
16 from . import (
17 bookmarks,
17 bookmarks,
18 cacheutil,
18 cacheutil,
19 error,
19 error,
20 narrowspec,
20 narrowspec,
21 phases,
21 phases,
22 pycompat,
22 pycompat,
23 requirements as requirementsmod,
23 requirements as requirementsmod,
24 scmutil,
24 scmutil,
25 store,
25 store,
26 transaction,
26 transaction,
27 util,
27 util,
28 )
28 )
29 from .revlogutils import (
29 from .revlogutils import (
30 nodemap,
30 nodemap,
31 )
31 )
32
32
33
33
34 def new_stream_clone_requirements(default_requirements, streamed_requirements):
34 def new_stream_clone_requirements(default_requirements, streamed_requirements):
35 """determine the final set of requirement for a new stream clone
35 """determine the final set of requirement for a new stream clone
36
36
37 this method combine the "default" requirements that a new repository would
37 this method combine the "default" requirements that a new repository would
38 use with the constaint we get from the stream clone content. We keep local
38 use with the constaint we get from the stream clone content. We keep local
39 configuration choice when possible.
39 configuration choice when possible.
40 """
40 """
41 requirements = set(default_requirements)
41 requirements = set(default_requirements)
42 requirements -= requirementsmod.STREAM_FIXED_REQUIREMENTS
42 requirements -= requirementsmod.STREAM_FIXED_REQUIREMENTS
43 requirements.update(streamed_requirements)
43 requirements.update(streamed_requirements)
44 return requirements
44 return requirements
45
45
46
46
47 def streamed_requirements(repo):
47 def streamed_requirements(repo):
48 """the set of requirement the new clone will have to support
48 """the set of requirement the new clone will have to support
49
49
50 This is used for advertising the stream options and to generate the actual
50 This is used for advertising the stream options and to generate the actual
51 stream content."""
51 stream content."""
52 requiredformats = (
52 requiredformats = (
53 repo.requirements & requirementsmod.STREAM_FIXED_REQUIREMENTS
53 repo.requirements & requirementsmod.STREAM_FIXED_REQUIREMENTS
54 )
54 )
55 return requiredformats
55 return requiredformats
56
56
57
57
58 def canperformstreamclone(pullop, bundle2=False):
58 def canperformstreamclone(pullop, bundle2=False):
59 """Whether it is possible to perform a streaming clone as part of pull.
59 """Whether it is possible to perform a streaming clone as part of pull.
60
60
61 ``bundle2`` will cause the function to consider stream clone through
61 ``bundle2`` will cause the function to consider stream clone through
62 bundle2 and only through bundle2.
62 bundle2 and only through bundle2.
63
63
64 Returns a tuple of (supported, requirements). ``supported`` is True if
64 Returns a tuple of (supported, requirements). ``supported`` is True if
65 streaming clone is supported and False otherwise. ``requirements`` is
65 streaming clone is supported and False otherwise. ``requirements`` is
66 a set of repo requirements from the remote, or ``None`` if stream clone
66 a set of repo requirements from the remote, or ``None`` if stream clone
67 isn't supported.
67 isn't supported.
68 """
68 """
69 repo = pullop.repo
69 repo = pullop.repo
70 remote = pullop.remote
70 remote = pullop.remote
71
71
72 bundle2supported = False
72 bundle2supported = False
73 if pullop.canusebundle2:
73 if pullop.canusebundle2:
74 if b'v2' in pullop.remotebundle2caps.get(b'stream', []):
74 if b'v2' in pullop.remotebundle2caps.get(b'stream', []):
75 bundle2supported = True
75 bundle2supported = True
76 # else
76 # else
77 # Server doesn't support bundle2 stream clone or doesn't support
77 # Server doesn't support bundle2 stream clone or doesn't support
78 # the versions we support. Fall back and possibly allow legacy.
78 # the versions we support. Fall back and possibly allow legacy.
79
79
80 # Ensures legacy code path uses available bundle2.
80 # Ensures legacy code path uses available bundle2.
81 if bundle2supported and not bundle2:
81 if bundle2supported and not bundle2:
82 return False, None
82 return False, None
83 # Ensures bundle2 doesn't try to do a stream clone if it isn't supported.
83 # Ensures bundle2 doesn't try to do a stream clone if it isn't supported.
84 elif bundle2 and not bundle2supported:
84 elif bundle2 and not bundle2supported:
85 return False, None
85 return False, None
86
86
87 # Streaming clone only works on empty repositories.
87 # Streaming clone only works on empty repositories.
88 if len(repo):
88 if len(repo):
89 return False, None
89 return False, None
90
90
91 # Streaming clone only works if all data is being requested.
91 # Streaming clone only works if all data is being requested.
92 if pullop.heads:
92 if pullop.heads:
93 return False, None
93 return False, None
94
94
95 streamrequested = pullop.streamclonerequested
95 streamrequested = pullop.streamclonerequested
96
96
97 # If we don't have a preference, let the server decide for us. This
97 # If we don't have a preference, let the server decide for us. This
98 # likely only comes into play in LANs.
98 # likely only comes into play in LANs.
99 if streamrequested is None:
99 if streamrequested is None:
100 # The server can advertise whether to prefer streaming clone.
100 # The server can advertise whether to prefer streaming clone.
101 streamrequested = remote.capable(b'stream-preferred')
101 streamrequested = remote.capable(b'stream-preferred')
102
102
103 if not streamrequested:
103 if not streamrequested:
104 return False, None
104 return False, None
105
105
106 # In order for stream clone to work, the client has to support all the
106 # In order for stream clone to work, the client has to support all the
107 # requirements advertised by the server.
107 # requirements advertised by the server.
108 #
108 #
109 # The server advertises its requirements via the "stream" and "streamreqs"
109 # The server advertises its requirements via the "stream" and "streamreqs"
110 # capability. "stream" (a value-less capability) is advertised if and only
110 # capability. "stream" (a value-less capability) is advertised if and only
111 # if the only requirement is "revlogv1." Else, the "streamreqs" capability
111 # if the only requirement is "revlogv1." Else, the "streamreqs" capability
112 # is advertised and contains a comma-delimited list of requirements.
112 # is advertised and contains a comma-delimited list of requirements.
113 requirements = set()
113 requirements = set()
114 if remote.capable(b'stream'):
114 if remote.capable(b'stream'):
115 requirements.add(requirementsmod.REVLOGV1_REQUIREMENT)
115 requirements.add(requirementsmod.REVLOGV1_REQUIREMENT)
116 else:
116 else:
117 streamreqs = remote.capable(b'streamreqs')
117 streamreqs = remote.capable(b'streamreqs')
118 # This is weird and shouldn't happen with modern servers.
118 # This is weird and shouldn't happen with modern servers.
119 if not streamreqs:
119 if not streamreqs:
120 pullop.repo.ui.warn(
120 pullop.repo.ui.warn(
121 _(
121 _(
122 b'warning: stream clone requested but server has them '
122 b'warning: stream clone requested but server has them '
123 b'disabled\n'
123 b'disabled\n'
124 )
124 )
125 )
125 )
126 return False, None
126 return False, None
127
127
128 streamreqs = set(streamreqs.split(b','))
128 streamreqs = set(streamreqs.split(b','))
129 # Server requires something we don't support. Bail.
129 # Server requires something we don't support. Bail.
130 missingreqs = streamreqs - repo.supported
130 missingreqs = streamreqs - repo.supported
131 if missingreqs:
131 if missingreqs:
132 pullop.repo.ui.warn(
132 pullop.repo.ui.warn(
133 _(
133 _(
134 b'warning: stream clone requested but client is missing '
134 b'warning: stream clone requested but client is missing '
135 b'requirements: %s\n'
135 b'requirements: %s\n'
136 )
136 )
137 % b', '.join(sorted(missingreqs))
137 % b', '.join(sorted(missingreqs))
138 )
138 )
139 pullop.repo.ui.warn(
139 pullop.repo.ui.warn(
140 _(
140 _(
141 b'(see https://www.mercurial-scm.org/wiki/MissingRequirement '
141 b'(see https://www.mercurial-scm.org/wiki/MissingRequirement '
142 b'for more information)\n'
142 b'for more information)\n'
143 )
143 )
144 )
144 )
145 return False, None
145 return False, None
146 requirements = streamreqs
146 requirements = streamreqs
147
147
148 return True, requirements
148 return True, requirements
149
149
150
150
151 def maybeperformlegacystreamclone(pullop):
151 def maybeperformlegacystreamclone(pullop):
152 """Possibly perform a legacy stream clone operation.
152 """Possibly perform a legacy stream clone operation.
153
153
154 Legacy stream clones are performed as part of pull but before all other
154 Legacy stream clones are performed as part of pull but before all other
155 operations.
155 operations.
156
156
157 A legacy stream clone will not be performed if a bundle2 stream clone is
157 A legacy stream clone will not be performed if a bundle2 stream clone is
158 supported.
158 supported.
159 """
159 """
160 from . import localrepo
160 from . import localrepo
161
161
162 supported, requirements = canperformstreamclone(pullop)
162 supported, requirements = canperformstreamclone(pullop)
163
163
164 if not supported:
164 if not supported:
165 return
165 return
166
166
167 repo = pullop.repo
167 repo = pullop.repo
168 remote = pullop.remote
168 remote = pullop.remote
169
169
170 # Save remote branchmap. We will use it later to speed up branchcache
170 # Save remote branchmap. We will use it later to speed up branchcache
171 # creation.
171 # creation.
172 rbranchmap = None
172 rbranchmap = None
173 if remote.capable(b'branchmap'):
173 if remote.capable(b'branchmap'):
174 with remote.commandexecutor() as e:
174 with remote.commandexecutor() as e:
175 rbranchmap = e.callcommand(b'branchmap', {}).result()
175 rbranchmap = e.callcommand(b'branchmap', {}).result()
176
176
177 repo.ui.status(_(b'streaming all changes\n'))
177 repo.ui.status(_(b'streaming all changes\n'))
178
178
179 with remote.commandexecutor() as e:
179 with remote.commandexecutor() as e:
180 fp = e.callcommand(b'stream_out', {}).result()
180 fp = e.callcommand(b'stream_out', {}).result()
181
181
182 # TODO strictly speaking, this code should all be inside the context
182 # TODO strictly speaking, this code should all be inside the context
183 # manager because the context manager is supposed to ensure all wire state
183 # manager because the context manager is supposed to ensure all wire state
184 # is flushed when exiting. But the legacy peers don't do this, so it
184 # is flushed when exiting. But the legacy peers don't do this, so it
185 # doesn't matter.
185 # doesn't matter.
186 l = fp.readline()
186 l = fp.readline()
187 try:
187 try:
188 resp = int(l)
188 resp = int(l)
189 except ValueError:
189 except ValueError:
190 raise error.ResponseError(
190 raise error.ResponseError(
191 _(b'unexpected response from remote server:'), l
191 _(b'unexpected response from remote server:'), l
192 )
192 )
193 if resp == 1:
193 if resp == 1:
194 raise error.Abort(_(b'operation forbidden by server'))
194 raise error.Abort(_(b'operation forbidden by server'))
195 elif resp == 2:
195 elif resp == 2:
196 raise error.Abort(_(b'locking the remote repository failed'))
196 raise error.Abort(_(b'locking the remote repository failed'))
197 elif resp != 0:
197 elif resp != 0:
198 raise error.Abort(_(b'the server sent an unknown error code'))
198 raise error.Abort(_(b'the server sent an unknown error code'))
199
199
200 l = fp.readline()
200 l = fp.readline()
201 try:
201 try:
202 filecount, bytecount = map(int, l.split(b' ', 1))
202 filecount, bytecount = map(int, l.split(b' ', 1))
203 except (ValueError, TypeError):
203 except (ValueError, TypeError):
204 raise error.ResponseError(
204 raise error.ResponseError(
205 _(b'unexpected response from remote server:'), l
205 _(b'unexpected response from remote server:'), l
206 )
206 )
207
207
208 with repo.lock():
208 with repo.lock():
209 consumev1(repo, fp, filecount, bytecount)
209 consumev1(repo, fp, filecount, bytecount)
210 repo.requirements = new_stream_clone_requirements(
210 repo.requirements = new_stream_clone_requirements(
211 repo.requirements,
211 repo.requirements,
212 requirements,
212 requirements,
213 )
213 )
214 repo.svfs.options = localrepo.resolvestorevfsoptions(
214 repo.svfs.options = localrepo.resolvestorevfsoptions(
215 repo.ui, repo.requirements, repo.features
215 repo.ui, repo.requirements, repo.features
216 )
216 )
217 scmutil.writereporequirements(repo)
217 scmutil.writereporequirements(repo)
218 nodemap.post_stream_cleanup(repo)
218 nodemap.post_stream_cleanup(repo)
219
219
220 if rbranchmap:
220 if rbranchmap:
221 repo._branchcaches.replace(repo, rbranchmap)
221 repo._branchcaches.replace(repo, rbranchmap)
222
222
223 repo.invalidate()
223 repo.invalidate()
224
224
225
225
226 def allowservergeneration(repo):
226 def allowservergeneration(repo):
227 """Whether streaming clones are allowed from the server."""
227 """Whether streaming clones are allowed from the server."""
228 if repository.REPO_FEATURE_STREAM_CLONE not in repo.features:
228 if repository.REPO_FEATURE_STREAM_CLONE not in repo.features:
229 return False
229 return False
230
230
231 if not repo.ui.configbool(b'server', b'uncompressed', untrusted=True):
231 if not repo.ui.configbool(b'server', b'uncompressed', untrusted=True):
232 return False
232 return False
233
233
234 # The way stream clone works makes it impossible to hide secret changesets.
234 # The way stream clone works makes it impossible to hide secret changesets.
235 # So don't allow this by default.
235 # So don't allow this by default.
236 secret = phases.hassecret(repo)
236 secret = phases.hassecret(repo)
237 if secret:
237 if secret:
238 return repo.ui.configbool(b'server', b'uncompressedallowsecret')
238 return repo.ui.configbool(b'server', b'uncompressedallowsecret')
239
239
240 return True
240 return True
241
241
242
242
243 # This is it's own function so extensions can override it.
243 # This is it's own function so extensions can override it.
244 def _walkstreamfiles(repo, matcher=None):
244 def _walkstreamfiles(repo, matcher=None):
245 return repo.store.walk(matcher)
245 return repo.store.walk(matcher)
246
246
247
247
248 def generatev1(repo):
248 def generatev1(repo):
249 """Emit content for version 1 of a streaming clone.
249 """Emit content for version 1 of a streaming clone.
250
250
251 This returns a 3-tuple of (file count, byte size, data iterator).
251 This returns a 3-tuple of (file count, byte size, data iterator).
252
252
253 The data iterator consists of N entries for each file being transferred.
253 The data iterator consists of N entries for each file being transferred.
254 Each file entry starts as a line with the file name and integer size
254 Each file entry starts as a line with the file name and integer size
255 delimited by a null byte.
255 delimited by a null byte.
256
256
257 The raw file data follows. Following the raw file data is the next file
257 The raw file data follows. Following the raw file data is the next file
258 entry, or EOF.
258 entry, or EOF.
259
259
260 When used on the wire protocol, an additional line indicating protocol
260 When used on the wire protocol, an additional line indicating protocol
261 success will be prepended to the stream. This function is not responsible
261 success will be prepended to the stream. This function is not responsible
262 for adding it.
262 for adding it.
263
263
264 This function will obtain a repository lock to ensure a consistent view of
264 This function will obtain a repository lock to ensure a consistent view of
265 the store is captured. It therefore may raise LockError.
265 the store is captured. It therefore may raise LockError.
266 """
266 """
267 entries = []
267 entries = []
268 total_bytes = 0
268 total_bytes = 0
269 # Get consistent snapshot of repo, lock during scan.
269 # Get consistent snapshot of repo, lock during scan.
270 with repo.lock():
270 with repo.lock():
271 repo.ui.debug(b'scanning\n')
271 repo.ui.debug(b'scanning\n')
272 for file_type, name, size in _walkstreamfiles(repo):
272 for entry in _walkstreamfiles(repo):
273 if size:
273 if entry.file_size:
274 entries.append((name, size))
274 entries.append((entry.unencoded_path, entry.file_size))
275 total_bytes += size
275 total_bytes += entry.file_size
276 _test_sync_point_walk_1(repo)
276 _test_sync_point_walk_1(repo)
277 _test_sync_point_walk_2(repo)
277 _test_sync_point_walk_2(repo)
278
278
279 repo.ui.debug(
279 repo.ui.debug(
280 b'%d files, %d bytes to transfer\n' % (len(entries), total_bytes)
280 b'%d files, %d bytes to transfer\n' % (len(entries), total_bytes)
281 )
281 )
282
282
283 svfs = repo.svfs
283 svfs = repo.svfs
284 debugflag = repo.ui.debugflag
284 debugflag = repo.ui.debugflag
285
285
286 def emitrevlogdata():
286 def emitrevlogdata():
287 for name, size in entries:
287 for name, size in entries:
288 if debugflag:
288 if debugflag:
289 repo.ui.debug(b'sending %s (%d bytes)\n' % (name, size))
289 repo.ui.debug(b'sending %s (%d bytes)\n' % (name, size))
290 # partially encode name over the wire for backwards compat
290 # partially encode name over the wire for backwards compat
291 yield b'%s\0%d\n' % (store.encodedir(name), size)
291 yield b'%s\0%d\n' % (store.encodedir(name), size)
292 # auditing at this stage is both pointless (paths are already
292 # auditing at this stage is both pointless (paths are already
293 # trusted by the local repo) and expensive
293 # trusted by the local repo) and expensive
294 with svfs(name, b'rb', auditpath=False) as fp:
294 with svfs(name, b'rb', auditpath=False) as fp:
295 if size <= 65536:
295 if size <= 65536:
296 yield fp.read(size)
296 yield fp.read(size)
297 else:
297 else:
298 for chunk in util.filechunkiter(fp, limit=size):
298 for chunk in util.filechunkiter(fp, limit=size):
299 yield chunk
299 yield chunk
300
300
301 return len(entries), total_bytes, emitrevlogdata()
301 return len(entries), total_bytes, emitrevlogdata()
302
302
303
303
304 def generatev1wireproto(repo):
304 def generatev1wireproto(repo):
305 """Emit content for version 1 of streaming clone suitable for the wire.
305 """Emit content for version 1 of streaming clone suitable for the wire.
306
306
307 This is the data output from ``generatev1()`` with 2 header lines. The
307 This is the data output from ``generatev1()`` with 2 header lines. The
308 first line indicates overall success. The 2nd contains the file count and
308 first line indicates overall success. The 2nd contains the file count and
309 byte size of payload.
309 byte size of payload.
310
310
311 The success line contains "0" for success, "1" for stream generation not
311 The success line contains "0" for success, "1" for stream generation not
312 allowed, and "2" for error locking the repository (possibly indicating
312 allowed, and "2" for error locking the repository (possibly indicating
313 a permissions error for the server process).
313 a permissions error for the server process).
314 """
314 """
315 if not allowservergeneration(repo):
315 if not allowservergeneration(repo):
316 yield b'1\n'
316 yield b'1\n'
317 return
317 return
318
318
319 try:
319 try:
320 filecount, bytecount, it = generatev1(repo)
320 filecount, bytecount, it = generatev1(repo)
321 except error.LockError:
321 except error.LockError:
322 yield b'2\n'
322 yield b'2\n'
323 return
323 return
324
324
325 # Indicates successful response.
325 # Indicates successful response.
326 yield b'0\n'
326 yield b'0\n'
327 yield b'%d %d\n' % (filecount, bytecount)
327 yield b'%d %d\n' % (filecount, bytecount)
328 for chunk in it:
328 for chunk in it:
329 yield chunk
329 yield chunk
330
330
331
331
332 def generatebundlev1(repo, compression=b'UN'):
332 def generatebundlev1(repo, compression=b'UN'):
333 """Emit content for version 1 of a stream clone bundle.
333 """Emit content for version 1 of a stream clone bundle.
334
334
335 The first 4 bytes of the output ("HGS1") denote this as stream clone
335 The first 4 bytes of the output ("HGS1") denote this as stream clone
336 bundle version 1.
336 bundle version 1.
337
337
338 The next 2 bytes indicate the compression type. Only "UN" is currently
338 The next 2 bytes indicate the compression type. Only "UN" is currently
339 supported.
339 supported.
340
340
341 The next 16 bytes are two 64-bit big endian unsigned integers indicating
341 The next 16 bytes are two 64-bit big endian unsigned integers indicating
342 file count and byte count, respectively.
342 file count and byte count, respectively.
343
343
344 The next 2 bytes is a 16-bit big endian unsigned short declaring the length
344 The next 2 bytes is a 16-bit big endian unsigned short declaring the length
345 of the requirements string, including a trailing \0. The following N bytes
345 of the requirements string, including a trailing \0. The following N bytes
346 are the requirements string, which is ASCII containing a comma-delimited
346 are the requirements string, which is ASCII containing a comma-delimited
347 list of repo requirements that are needed to support the data.
347 list of repo requirements that are needed to support the data.
348
348
349 The remaining content is the output of ``generatev1()`` (which may be
349 The remaining content is the output of ``generatev1()`` (which may be
350 compressed in the future).
350 compressed in the future).
351
351
352 Returns a tuple of (requirements, data generator).
352 Returns a tuple of (requirements, data generator).
353 """
353 """
354 if compression != b'UN':
354 if compression != b'UN':
355 raise ValueError(b'we do not support the compression argument yet')
355 raise ValueError(b'we do not support the compression argument yet')
356
356
357 requirements = streamed_requirements(repo)
357 requirements = streamed_requirements(repo)
358 requires = b','.join(sorted(requirements))
358 requires = b','.join(sorted(requirements))
359
359
360 def gen():
360 def gen():
361 yield b'HGS1'
361 yield b'HGS1'
362 yield compression
362 yield compression
363
363
364 filecount, bytecount, it = generatev1(repo)
364 filecount, bytecount, it = generatev1(repo)
365 repo.ui.status(
365 repo.ui.status(
366 _(b'writing %d bytes for %d files\n') % (bytecount, filecount)
366 _(b'writing %d bytes for %d files\n') % (bytecount, filecount)
367 )
367 )
368
368
369 yield struct.pack(b'>QQ', filecount, bytecount)
369 yield struct.pack(b'>QQ', filecount, bytecount)
370 yield struct.pack(b'>H', len(requires) + 1)
370 yield struct.pack(b'>H', len(requires) + 1)
371 yield requires + b'\0'
371 yield requires + b'\0'
372
372
373 # This is where we'll add compression in the future.
373 # This is where we'll add compression in the future.
374 assert compression == b'UN'
374 assert compression == b'UN'
375
375
376 progress = repo.ui.makeprogress(
376 progress = repo.ui.makeprogress(
377 _(b'bundle'), total=bytecount, unit=_(b'bytes')
377 _(b'bundle'), total=bytecount, unit=_(b'bytes')
378 )
378 )
379 progress.update(0)
379 progress.update(0)
380
380
381 for chunk in it:
381 for chunk in it:
382 progress.increment(step=len(chunk))
382 progress.increment(step=len(chunk))
383 yield chunk
383 yield chunk
384
384
385 progress.complete()
385 progress.complete()
386
386
387 return requirements, gen()
387 return requirements, gen()
388
388
389
389
390 def consumev1(repo, fp, filecount, bytecount):
390 def consumev1(repo, fp, filecount, bytecount):
391 """Apply the contents from version 1 of a streaming clone file handle.
391 """Apply the contents from version 1 of a streaming clone file handle.
392
392
393 This takes the output from "stream_out" and applies it to the specified
393 This takes the output from "stream_out" and applies it to the specified
394 repository.
394 repository.
395
395
396 Like "stream_out," the status line added by the wire protocol is not
396 Like "stream_out," the status line added by the wire protocol is not
397 handled by this function.
397 handled by this function.
398 """
398 """
399 with repo.lock():
399 with repo.lock():
400 repo.ui.status(
400 repo.ui.status(
401 _(b'%d files to transfer, %s of data\n')
401 _(b'%d files to transfer, %s of data\n')
402 % (filecount, util.bytecount(bytecount))
402 % (filecount, util.bytecount(bytecount))
403 )
403 )
404 progress = repo.ui.makeprogress(
404 progress = repo.ui.makeprogress(
405 _(b'clone'), total=bytecount, unit=_(b'bytes')
405 _(b'clone'), total=bytecount, unit=_(b'bytes')
406 )
406 )
407 progress.update(0)
407 progress.update(0)
408 start = util.timer()
408 start = util.timer()
409
409
410 # TODO: get rid of (potential) inconsistency
410 # TODO: get rid of (potential) inconsistency
411 #
411 #
412 # If transaction is started and any @filecache property is
412 # If transaction is started and any @filecache property is
413 # changed at this point, it causes inconsistency between
413 # changed at this point, it causes inconsistency between
414 # in-memory cached property and streamclone-ed file on the
414 # in-memory cached property and streamclone-ed file on the
415 # disk. Nested transaction prevents transaction scope "clone"
415 # disk. Nested transaction prevents transaction scope "clone"
416 # below from writing in-memory changes out at the end of it,
416 # below from writing in-memory changes out at the end of it,
417 # even though in-memory changes are discarded at the end of it
417 # even though in-memory changes are discarded at the end of it
418 # regardless of transaction nesting.
418 # regardless of transaction nesting.
419 #
419 #
420 # But transaction nesting can't be simply prohibited, because
420 # But transaction nesting can't be simply prohibited, because
421 # nesting occurs also in ordinary case (e.g. enabling
421 # nesting occurs also in ordinary case (e.g. enabling
422 # clonebundles).
422 # clonebundles).
423
423
424 with repo.transaction(b'clone'):
424 with repo.transaction(b'clone'):
425 with repo.svfs.backgroundclosing(repo.ui, expectedcount=filecount):
425 with repo.svfs.backgroundclosing(repo.ui, expectedcount=filecount):
426 for i in range(filecount):
426 for i in range(filecount):
427 # XXX doesn't support '\n' or '\r' in filenames
427 # XXX doesn't support '\n' or '\r' in filenames
428 l = fp.readline()
428 l = fp.readline()
429 try:
429 try:
430 name, size = l.split(b'\0', 1)
430 name, size = l.split(b'\0', 1)
431 size = int(size)
431 size = int(size)
432 except (ValueError, TypeError):
432 except (ValueError, TypeError):
433 raise error.ResponseError(
433 raise error.ResponseError(
434 _(b'unexpected response from remote server:'), l
434 _(b'unexpected response from remote server:'), l
435 )
435 )
436 if repo.ui.debugflag:
436 if repo.ui.debugflag:
437 repo.ui.debug(
437 repo.ui.debug(
438 b'adding %s (%s)\n' % (name, util.bytecount(size))
438 b'adding %s (%s)\n' % (name, util.bytecount(size))
439 )
439 )
440 # for backwards compat, name was partially encoded
440 # for backwards compat, name was partially encoded
441 path = store.decodedir(name)
441 path = store.decodedir(name)
442 with repo.svfs(path, b'w', backgroundclose=True) as ofp:
442 with repo.svfs(path, b'w', backgroundclose=True) as ofp:
443 for chunk in util.filechunkiter(fp, limit=size):
443 for chunk in util.filechunkiter(fp, limit=size):
444 progress.increment(step=len(chunk))
444 progress.increment(step=len(chunk))
445 ofp.write(chunk)
445 ofp.write(chunk)
446
446
447 # force @filecache properties to be reloaded from
447 # force @filecache properties to be reloaded from
448 # streamclone-ed file at next access
448 # streamclone-ed file at next access
449 repo.invalidate(clearfilecache=True)
449 repo.invalidate(clearfilecache=True)
450
450
451 elapsed = util.timer() - start
451 elapsed = util.timer() - start
452 if elapsed <= 0:
452 if elapsed <= 0:
453 elapsed = 0.001
453 elapsed = 0.001
454 progress.complete()
454 progress.complete()
455 repo.ui.status(
455 repo.ui.status(
456 _(b'transferred %s in %.1f seconds (%s/sec)\n')
456 _(b'transferred %s in %.1f seconds (%s/sec)\n')
457 % (
457 % (
458 util.bytecount(bytecount),
458 util.bytecount(bytecount),
459 elapsed,
459 elapsed,
460 util.bytecount(bytecount / elapsed),
460 util.bytecount(bytecount / elapsed),
461 )
461 )
462 )
462 )
463
463
464
464
465 def readbundle1header(fp):
465 def readbundle1header(fp):
466 compression = fp.read(2)
466 compression = fp.read(2)
467 if compression != b'UN':
467 if compression != b'UN':
468 raise error.Abort(
468 raise error.Abort(
469 _(
469 _(
470 b'only uncompressed stream clone bundles are '
470 b'only uncompressed stream clone bundles are '
471 b'supported; got %s'
471 b'supported; got %s'
472 )
472 )
473 % compression
473 % compression
474 )
474 )
475
475
476 filecount, bytecount = struct.unpack(b'>QQ', fp.read(16))
476 filecount, bytecount = struct.unpack(b'>QQ', fp.read(16))
477 requireslen = struct.unpack(b'>H', fp.read(2))[0]
477 requireslen = struct.unpack(b'>H', fp.read(2))[0]
478 requires = fp.read(requireslen)
478 requires = fp.read(requireslen)
479
479
480 if not requires.endswith(b'\0'):
480 if not requires.endswith(b'\0'):
481 raise error.Abort(
481 raise error.Abort(
482 _(
482 _(
483 b'malformed stream clone bundle: '
483 b'malformed stream clone bundle: '
484 b'requirements not properly encoded'
484 b'requirements not properly encoded'
485 )
485 )
486 )
486 )
487
487
488 requirements = set(requires.rstrip(b'\0').split(b','))
488 requirements = set(requires.rstrip(b'\0').split(b','))
489
489
490 return filecount, bytecount, requirements
490 return filecount, bytecount, requirements
491
491
492
492
493 def applybundlev1(repo, fp):
493 def applybundlev1(repo, fp):
494 """Apply the content from a stream clone bundle version 1.
494 """Apply the content from a stream clone bundle version 1.
495
495
496 We assume the 4 byte header has been read and validated and the file handle
496 We assume the 4 byte header has been read and validated and the file handle
497 is at the 2 byte compression identifier.
497 is at the 2 byte compression identifier.
498 """
498 """
499 if len(repo):
499 if len(repo):
500 raise error.Abort(
500 raise error.Abort(
501 _(b'cannot apply stream clone bundle on non-empty repo')
501 _(b'cannot apply stream clone bundle on non-empty repo')
502 )
502 )
503
503
504 filecount, bytecount, requirements = readbundle1header(fp)
504 filecount, bytecount, requirements = readbundle1header(fp)
505 missingreqs = requirements - repo.supported
505 missingreqs = requirements - repo.supported
506 if missingreqs:
506 if missingreqs:
507 raise error.Abort(
507 raise error.Abort(
508 _(b'unable to apply stream clone: unsupported format: %s')
508 _(b'unable to apply stream clone: unsupported format: %s')
509 % b', '.join(sorted(missingreqs))
509 % b', '.join(sorted(missingreqs))
510 )
510 )
511
511
512 consumev1(repo, fp, filecount, bytecount)
512 consumev1(repo, fp, filecount, bytecount)
513 nodemap.post_stream_cleanup(repo)
513 nodemap.post_stream_cleanup(repo)
514
514
515
515
516 class streamcloneapplier:
516 class streamcloneapplier:
517 """Class to manage applying streaming clone bundles.
517 """Class to manage applying streaming clone bundles.
518
518
519 We need to wrap ``applybundlev1()`` in a dedicated type to enable bundle
519 We need to wrap ``applybundlev1()`` in a dedicated type to enable bundle
520 readers to perform bundle type-specific functionality.
520 readers to perform bundle type-specific functionality.
521 """
521 """
522
522
523 def __init__(self, fh):
523 def __init__(self, fh):
524 self._fh = fh
524 self._fh = fh
525
525
526 def apply(self, repo):
526 def apply(self, repo):
527 return applybundlev1(repo, self._fh)
527 return applybundlev1(repo, self._fh)
528
528
529
529
530 # type of file to stream
530 # type of file to stream
531 _fileappend = 0 # append only file
531 _fileappend = 0 # append only file
532 _filefull = 1 # full snapshot file
532 _filefull = 1 # full snapshot file
533
533
534 # Source of the file
534 # Source of the file
535 _srcstore = b's' # store (svfs)
535 _srcstore = b's' # store (svfs)
536 _srccache = b'c' # cache (cache)
536 _srccache = b'c' # cache (cache)
537
537
538 # This is it's own function so extensions can override it.
538 # This is it's own function so extensions can override it.
539 def _walkstreamfullstorefiles(repo):
539 def _walkstreamfullstorefiles(repo):
540 """list snapshot file from the store"""
540 """list snapshot file from the store"""
541 fnames = []
541 fnames = []
542 if not repo.publishing():
542 if not repo.publishing():
543 fnames.append(b'phaseroots')
543 fnames.append(b'phaseroots')
544 return fnames
544 return fnames
545
545
546
546
547 def _filterfull(entry, copy, vfsmap):
547 def _filterfull(entry, copy, vfsmap):
548 """actually copy the snapshot files"""
548 """actually copy the snapshot files"""
549 src, name, ftype, data = entry
549 src, name, ftype, data = entry
550 if ftype != _filefull:
550 if ftype != _filefull:
551 return entry
551 return entry
552 return (src, name, ftype, copy(vfsmap[src].join(name)))
552 return (src, name, ftype, copy(vfsmap[src].join(name)))
553
553
554
554
555 @contextlib.contextmanager
555 @contextlib.contextmanager
556 def maketempcopies():
556 def maketempcopies():
557 """return a function to temporary copy file"""
557 """return a function to temporary copy file"""
558
558
559 files = []
559 files = []
560 dst_dir = pycompat.mkdtemp(prefix=b'hg-clone-')
560 dst_dir = pycompat.mkdtemp(prefix=b'hg-clone-')
561 try:
561 try:
562
562
563 def copy(src):
563 def copy(src):
564 fd, dst = pycompat.mkstemp(
564 fd, dst = pycompat.mkstemp(
565 prefix=os.path.basename(src), dir=dst_dir
565 prefix=os.path.basename(src), dir=dst_dir
566 )
566 )
567 os.close(fd)
567 os.close(fd)
568 files.append(dst)
568 files.append(dst)
569 util.copyfiles(src, dst, hardlink=True)
569 util.copyfiles(src, dst, hardlink=True)
570 return dst
570 return dst
571
571
572 yield copy
572 yield copy
573 finally:
573 finally:
574 for tmp in files:
574 for tmp in files:
575 util.tryunlink(tmp)
575 util.tryunlink(tmp)
576 util.tryrmdir(dst_dir)
576 util.tryrmdir(dst_dir)
577
577
578
578
579 def _makemap(repo):
579 def _makemap(repo):
580 """make a (src -> vfs) map for the repo"""
580 """make a (src -> vfs) map for the repo"""
581 vfsmap = {
581 vfsmap = {
582 _srcstore: repo.svfs,
582 _srcstore: repo.svfs,
583 _srccache: repo.cachevfs,
583 _srccache: repo.cachevfs,
584 }
584 }
585 # we keep repo.vfs out of the on purpose, ther are too many danger there
585 # we keep repo.vfs out of the on purpose, ther are too many danger there
586 # (eg: .hg/hgrc)
586 # (eg: .hg/hgrc)
587 assert repo.vfs not in vfsmap.values()
587 assert repo.vfs not in vfsmap.values()
588
588
589 return vfsmap
589 return vfsmap
590
590
591
591
592 def _emit2(repo, entries, totalfilesize):
592 def _emit2(repo, entries, totalfilesize):
593 """actually emit the stream bundle"""
593 """actually emit the stream bundle"""
594 vfsmap = _makemap(repo)
594 vfsmap = _makemap(repo)
595 # we keep repo.vfs out of the on purpose, ther are too many danger there
595 # we keep repo.vfs out of the on purpose, ther are too many danger there
596 # (eg: .hg/hgrc),
596 # (eg: .hg/hgrc),
597 #
597 #
598 # this assert is duplicated (from _makemap) as author might think this is
598 # this assert is duplicated (from _makemap) as author might think this is
599 # fine, while this is really not fine.
599 # fine, while this is really not fine.
600 if repo.vfs in vfsmap.values():
600 if repo.vfs in vfsmap.values():
601 raise error.ProgrammingError(
601 raise error.ProgrammingError(
602 b'repo.vfs must not be added to vfsmap for security reasons'
602 b'repo.vfs must not be added to vfsmap for security reasons'
603 )
603 )
604
604
605 progress = repo.ui.makeprogress(
605 progress = repo.ui.makeprogress(
606 _(b'bundle'), total=totalfilesize, unit=_(b'bytes')
606 _(b'bundle'), total=totalfilesize, unit=_(b'bytes')
607 )
607 )
608 progress.update(0)
608 progress.update(0)
609 with maketempcopies() as copy, progress:
609 with maketempcopies() as copy, progress:
610 # copy is delayed until we are in the try
610 # copy is delayed until we are in the try
611 entries = [_filterfull(e, copy, vfsmap) for e in entries]
611 entries = [_filterfull(e, copy, vfsmap) for e in entries]
612 yield None # this release the lock on the repository
612 yield None # this release the lock on the repository
613 totalbytecount = 0
613 totalbytecount = 0
614
614
615 for src, name, ftype, data in entries:
615 for src, name, ftype, data in entries:
616 vfs = vfsmap[src]
616 vfs = vfsmap[src]
617 yield src
617 yield src
618 yield util.uvarintencode(len(name))
618 yield util.uvarintencode(len(name))
619 if ftype == _fileappend:
619 if ftype == _fileappend:
620 fp = vfs(name)
620 fp = vfs(name)
621 size = data
621 size = data
622 elif ftype == _filefull:
622 elif ftype == _filefull:
623 fp = open(data, b'rb')
623 fp = open(data, b'rb')
624 size = util.fstat(fp).st_size
624 size = util.fstat(fp).st_size
625 bytecount = 0
625 bytecount = 0
626 try:
626 try:
627 yield util.uvarintencode(size)
627 yield util.uvarintencode(size)
628 yield name
628 yield name
629 if size <= 65536:
629 if size <= 65536:
630 chunks = (fp.read(size),)
630 chunks = (fp.read(size),)
631 else:
631 else:
632 chunks = util.filechunkiter(fp, limit=size)
632 chunks = util.filechunkiter(fp, limit=size)
633 for chunk in chunks:
633 for chunk in chunks:
634 bytecount += len(chunk)
634 bytecount += len(chunk)
635 totalbytecount += len(chunk)
635 totalbytecount += len(chunk)
636 progress.update(totalbytecount)
636 progress.update(totalbytecount)
637 yield chunk
637 yield chunk
638 if bytecount != size:
638 if bytecount != size:
639 # Would most likely be caused by a race due to `hg strip` or
639 # Would most likely be caused by a race due to `hg strip` or
640 # a revlog split
640 # a revlog split
641 raise error.Abort(
641 raise error.Abort(
642 _(
642 _(
643 b'clone could only read %d bytes from %s, but '
643 b'clone could only read %d bytes from %s, but '
644 b'expected %d bytes'
644 b'expected %d bytes'
645 )
645 )
646 % (bytecount, name, size)
646 % (bytecount, name, size)
647 )
647 )
648 finally:
648 finally:
649 fp.close()
649 fp.close()
650
650
651
651
652 def _test_sync_point_walk_1(repo):
652 def _test_sync_point_walk_1(repo):
653 """a function for synchronisation during tests"""
653 """a function for synchronisation during tests"""
654
654
655
655
656 def _test_sync_point_walk_2(repo):
656 def _test_sync_point_walk_2(repo):
657 """a function for synchronisation during tests"""
657 """a function for synchronisation during tests"""
658
658
659
659
660 def _v2_walk(repo, includes, excludes, includeobsmarkers):
660 def _v2_walk(repo, includes, excludes, includeobsmarkers):
661 """emit a seris of files information useful to clone a repo
661 """emit a seris of files information useful to clone a repo
662
662
663 return (entries, totalfilesize)
663 return (entries, totalfilesize)
664
664
665 entries is a list of tuple (vfs-key, file-path, file-type, size)
665 entries is a list of tuple (vfs-key, file-path, file-type, size)
666
666
667 - `vfs-key`: is a key to the right vfs to write the file (see _makemap)
667 - `vfs-key`: is a key to the right vfs to write the file (see _makemap)
668 - `name`: file path of the file to copy (to be feed to the vfss)
668 - `name`: file path of the file to copy (to be feed to the vfss)
669 - `file-type`: do this file need to be copied with the source lock ?
669 - `file-type`: do this file need to be copied with the source lock ?
670 - `size`: the size of the file (or None)
670 - `size`: the size of the file (or None)
671 """
671 """
672 assert repo._currentlock(repo._lockref) is not None
672 assert repo._currentlock(repo._lockref) is not None
673 entries = []
673 entries = []
674 totalfilesize = 0
674 totalfilesize = 0
675
675
676 matcher = None
676 matcher = None
677 if includes or excludes:
677 if includes or excludes:
678 matcher = narrowspec.match(repo.root, includes, excludes)
678 matcher = narrowspec.match(repo.root, includes, excludes)
679
679
680 for rl_type, name, size in _walkstreamfiles(repo, matcher):
680 for entry in _walkstreamfiles(repo, matcher):
681 if size:
681 if entry.file_size:
682 ft = _fileappend
682 ft = _fileappend
683 if rl_type & store.FILEFLAGS_VOLATILE:
683 if entry.is_volatile:
684 ft = _filefull
684 ft = _filefull
685 entries.append((_srcstore, name, ft, size))
685 entries.append(
686 totalfilesize += size
686 (_srcstore, entry.unencoded_path, ft, entry.file_size)
687 )
688 totalfilesize += entry.file_size
687 for name in _walkstreamfullstorefiles(repo):
689 for name in _walkstreamfullstorefiles(repo):
688 if repo.svfs.exists(name):
690 if repo.svfs.exists(name):
689 totalfilesize += repo.svfs.lstat(name).st_size
691 totalfilesize += repo.svfs.lstat(name).st_size
690 entries.append((_srcstore, name, _filefull, None))
692 entries.append((_srcstore, name, _filefull, None))
691 if includeobsmarkers and repo.svfs.exists(b'obsstore'):
693 if includeobsmarkers and repo.svfs.exists(b'obsstore'):
692 totalfilesize += repo.svfs.lstat(b'obsstore').st_size
694 totalfilesize += repo.svfs.lstat(b'obsstore').st_size
693 entries.append((_srcstore, b'obsstore', _filefull, None))
695 entries.append((_srcstore, b'obsstore', _filefull, None))
694 for name in cacheutil.cachetocopy(repo):
696 for name in cacheutil.cachetocopy(repo):
695 if repo.cachevfs.exists(name):
697 if repo.cachevfs.exists(name):
696 totalfilesize += repo.cachevfs.lstat(name).st_size
698 totalfilesize += repo.cachevfs.lstat(name).st_size
697 entries.append((_srccache, name, _filefull, None))
699 entries.append((_srccache, name, _filefull, None))
698 return entries, totalfilesize
700 return entries, totalfilesize
699
701
700
702
701 def generatev2(repo, includes, excludes, includeobsmarkers):
703 def generatev2(repo, includes, excludes, includeobsmarkers):
702 """Emit content for version 2 of a streaming clone.
704 """Emit content for version 2 of a streaming clone.
703
705
704 the data stream consists the following entries:
706 the data stream consists the following entries:
705 1) A char representing the file destination (eg: store or cache)
707 1) A char representing the file destination (eg: store or cache)
706 2) A varint containing the length of the filename
708 2) A varint containing the length of the filename
707 3) A varint containing the length of file data
709 3) A varint containing the length of file data
708 4) N bytes containing the filename (the internal, store-agnostic form)
710 4) N bytes containing the filename (the internal, store-agnostic form)
709 5) N bytes containing the file data
711 5) N bytes containing the file data
710
712
711 Returns a 3-tuple of (file count, file size, data iterator).
713 Returns a 3-tuple of (file count, file size, data iterator).
712 """
714 """
713
715
714 with repo.lock():
716 with repo.lock():
715
717
716 repo.ui.debug(b'scanning\n')
718 repo.ui.debug(b'scanning\n')
717
719
718 entries, totalfilesize = _v2_walk(
720 entries, totalfilesize = _v2_walk(
719 repo,
721 repo,
720 includes=includes,
722 includes=includes,
721 excludes=excludes,
723 excludes=excludes,
722 includeobsmarkers=includeobsmarkers,
724 includeobsmarkers=includeobsmarkers,
723 )
725 )
724
726
725 chunks = _emit2(repo, entries, totalfilesize)
727 chunks = _emit2(repo, entries, totalfilesize)
726 first = next(chunks)
728 first = next(chunks)
727 assert first is None
729 assert first is None
728 _test_sync_point_walk_1(repo)
730 _test_sync_point_walk_1(repo)
729 _test_sync_point_walk_2(repo)
731 _test_sync_point_walk_2(repo)
730
732
731 return len(entries), totalfilesize, chunks
733 return len(entries), totalfilesize, chunks
732
734
733
735
734 @contextlib.contextmanager
736 @contextlib.contextmanager
735 def nested(*ctxs):
737 def nested(*ctxs):
736 this = ctxs[0]
738 this = ctxs[0]
737 rest = ctxs[1:]
739 rest = ctxs[1:]
738 with this:
740 with this:
739 if rest:
741 if rest:
740 with nested(*rest):
742 with nested(*rest):
741 yield
743 yield
742 else:
744 else:
743 yield
745 yield
744
746
745
747
746 def consumev2(repo, fp, filecount, filesize):
748 def consumev2(repo, fp, filecount, filesize):
747 """Apply the contents from a version 2 streaming clone.
749 """Apply the contents from a version 2 streaming clone.
748
750
749 Data is read from an object that only needs to provide a ``read(size)``
751 Data is read from an object that only needs to provide a ``read(size)``
750 method.
752 method.
751 """
753 """
752 with repo.lock():
754 with repo.lock():
753 repo.ui.status(
755 repo.ui.status(
754 _(b'%d files to transfer, %s of data\n')
756 _(b'%d files to transfer, %s of data\n')
755 % (filecount, util.bytecount(filesize))
757 % (filecount, util.bytecount(filesize))
756 )
758 )
757
759
758 start = util.timer()
760 start = util.timer()
759 progress = repo.ui.makeprogress(
761 progress = repo.ui.makeprogress(
760 _(b'clone'), total=filesize, unit=_(b'bytes')
762 _(b'clone'), total=filesize, unit=_(b'bytes')
761 )
763 )
762 progress.update(0)
764 progress.update(0)
763
765
764 vfsmap = _makemap(repo)
766 vfsmap = _makemap(repo)
765 # we keep repo.vfs out of the on purpose, ther are too many danger
767 # we keep repo.vfs out of the on purpose, ther are too many danger
766 # there (eg: .hg/hgrc),
768 # there (eg: .hg/hgrc),
767 #
769 #
768 # this assert is duplicated (from _makemap) as author might think this
770 # this assert is duplicated (from _makemap) as author might think this
769 # is fine, while this is really not fine.
771 # is fine, while this is really not fine.
770 if repo.vfs in vfsmap.values():
772 if repo.vfs in vfsmap.values():
771 raise error.ProgrammingError(
773 raise error.ProgrammingError(
772 b'repo.vfs must not be added to vfsmap for security reasons'
774 b'repo.vfs must not be added to vfsmap for security reasons'
773 )
775 )
774
776
775 with repo.transaction(b'clone'):
777 with repo.transaction(b'clone'):
776 ctxs = (vfs.backgroundclosing(repo.ui) for vfs in vfsmap.values())
778 ctxs = (vfs.backgroundclosing(repo.ui) for vfs in vfsmap.values())
777 with nested(*ctxs):
779 with nested(*ctxs):
778 for i in range(filecount):
780 for i in range(filecount):
779 src = util.readexactly(fp, 1)
781 src = util.readexactly(fp, 1)
780 vfs = vfsmap[src]
782 vfs = vfsmap[src]
781 namelen = util.uvarintdecodestream(fp)
783 namelen = util.uvarintdecodestream(fp)
782 datalen = util.uvarintdecodestream(fp)
784 datalen = util.uvarintdecodestream(fp)
783
785
784 name = util.readexactly(fp, namelen)
786 name = util.readexactly(fp, namelen)
785
787
786 if repo.ui.debugflag:
788 if repo.ui.debugflag:
787 repo.ui.debug(
789 repo.ui.debug(
788 b'adding [%s] %s (%s)\n'
790 b'adding [%s] %s (%s)\n'
789 % (src, name, util.bytecount(datalen))
791 % (src, name, util.bytecount(datalen))
790 )
792 )
791
793
792 with vfs(name, b'w') as ofp:
794 with vfs(name, b'w') as ofp:
793 for chunk in util.filechunkiter(fp, limit=datalen):
795 for chunk in util.filechunkiter(fp, limit=datalen):
794 progress.increment(step=len(chunk))
796 progress.increment(step=len(chunk))
795 ofp.write(chunk)
797 ofp.write(chunk)
796
798
797 # force @filecache properties to be reloaded from
799 # force @filecache properties to be reloaded from
798 # streamclone-ed file at next access
800 # streamclone-ed file at next access
799 repo.invalidate(clearfilecache=True)
801 repo.invalidate(clearfilecache=True)
800
802
801 elapsed = util.timer() - start
803 elapsed = util.timer() - start
802 if elapsed <= 0:
804 if elapsed <= 0:
803 elapsed = 0.001
805 elapsed = 0.001
804 repo.ui.status(
806 repo.ui.status(
805 _(b'transferred %s in %.1f seconds (%s/sec)\n')
807 _(b'transferred %s in %.1f seconds (%s/sec)\n')
806 % (
808 % (
807 util.bytecount(progress.pos),
809 util.bytecount(progress.pos),
808 elapsed,
810 elapsed,
809 util.bytecount(progress.pos / elapsed),
811 util.bytecount(progress.pos / elapsed),
810 )
812 )
811 )
813 )
812 progress.complete()
814 progress.complete()
813
815
814
816
815 def applybundlev2(repo, fp, filecount, filesize, requirements):
817 def applybundlev2(repo, fp, filecount, filesize, requirements):
816 from . import localrepo
818 from . import localrepo
817
819
818 missingreqs = [r for r in requirements if r not in repo.supported]
820 missingreqs = [r for r in requirements if r not in repo.supported]
819 if missingreqs:
821 if missingreqs:
820 raise error.Abort(
822 raise error.Abort(
821 _(b'unable to apply stream clone: unsupported format: %s')
823 _(b'unable to apply stream clone: unsupported format: %s')
822 % b', '.join(sorted(missingreqs))
824 % b', '.join(sorted(missingreqs))
823 )
825 )
824
826
825 consumev2(repo, fp, filecount, filesize)
827 consumev2(repo, fp, filecount, filesize)
826
828
827 repo.requirements = new_stream_clone_requirements(
829 repo.requirements = new_stream_clone_requirements(
828 repo.requirements,
830 repo.requirements,
829 requirements,
831 requirements,
830 )
832 )
831 repo.svfs.options = localrepo.resolvestorevfsoptions(
833 repo.svfs.options = localrepo.resolvestorevfsoptions(
832 repo.ui, repo.requirements, repo.features
834 repo.ui, repo.requirements, repo.features
833 )
835 )
834 scmutil.writereporequirements(repo)
836 scmutil.writereporequirements(repo)
835 nodemap.post_stream_cleanup(repo)
837 nodemap.post_stream_cleanup(repo)
836
838
837
839
838 def _copy_files(src_vfs_map, dst_vfs_map, entries, progress):
840 def _copy_files(src_vfs_map, dst_vfs_map, entries, progress):
839 hardlink = [True]
841 hardlink = [True]
840
842
841 def copy_used():
843 def copy_used():
842 hardlink[0] = False
844 hardlink[0] = False
843 progress.topic = _(b'copying')
845 progress.topic = _(b'copying')
844
846
845 for k, path, size in entries:
847 for k, path, size in entries:
846 src_vfs = src_vfs_map[k]
848 src_vfs = src_vfs_map[k]
847 dst_vfs = dst_vfs_map[k]
849 dst_vfs = dst_vfs_map[k]
848 src_path = src_vfs.join(path)
850 src_path = src_vfs.join(path)
849 dst_path = dst_vfs.join(path)
851 dst_path = dst_vfs.join(path)
850 # We cannot use dirname and makedirs of dst_vfs here because the store
852 # We cannot use dirname and makedirs of dst_vfs here because the store
851 # encoding confuses them. See issue 6581 for details.
853 # encoding confuses them. See issue 6581 for details.
852 dirname = os.path.dirname(dst_path)
854 dirname = os.path.dirname(dst_path)
853 if not os.path.exists(dirname):
855 if not os.path.exists(dirname):
854 util.makedirs(dirname)
856 util.makedirs(dirname)
855 dst_vfs.register_file(path)
857 dst_vfs.register_file(path)
856 # XXX we could use the #nb_bytes argument.
858 # XXX we could use the #nb_bytes argument.
857 util.copyfile(
859 util.copyfile(
858 src_path,
860 src_path,
859 dst_path,
861 dst_path,
860 hardlink=hardlink[0],
862 hardlink=hardlink[0],
861 no_hardlink_cb=copy_used,
863 no_hardlink_cb=copy_used,
862 check_fs_hardlink=False,
864 check_fs_hardlink=False,
863 )
865 )
864 progress.increment()
866 progress.increment()
865 return hardlink[0]
867 return hardlink[0]
866
868
867
869
868 def local_copy(src_repo, dest_repo):
870 def local_copy(src_repo, dest_repo):
869 """copy all content from one local repository to another
871 """copy all content from one local repository to another
870
872
871 This is useful for local clone"""
873 This is useful for local clone"""
872 src_store_requirements = {
874 src_store_requirements = {
873 r
875 r
874 for r in src_repo.requirements
876 for r in src_repo.requirements
875 if r not in requirementsmod.WORKING_DIR_REQUIREMENTS
877 if r not in requirementsmod.WORKING_DIR_REQUIREMENTS
876 }
878 }
877 dest_store_requirements = {
879 dest_store_requirements = {
878 r
880 r
879 for r in dest_repo.requirements
881 for r in dest_repo.requirements
880 if r not in requirementsmod.WORKING_DIR_REQUIREMENTS
882 if r not in requirementsmod.WORKING_DIR_REQUIREMENTS
881 }
883 }
882 assert src_store_requirements == dest_store_requirements
884 assert src_store_requirements == dest_store_requirements
883
885
884 with dest_repo.lock():
886 with dest_repo.lock():
885 with src_repo.lock():
887 with src_repo.lock():
886
888
887 # bookmark is not integrated to the streaming as it might use the
889 # bookmark is not integrated to the streaming as it might use the
888 # `repo.vfs` and they are too many sentitive data accessible
890 # `repo.vfs` and they are too many sentitive data accessible
889 # through `repo.vfs` to expose it to streaming clone.
891 # through `repo.vfs` to expose it to streaming clone.
890 src_book_vfs = bookmarks.bookmarksvfs(src_repo)
892 src_book_vfs = bookmarks.bookmarksvfs(src_repo)
891 srcbookmarks = src_book_vfs.join(b'bookmarks')
893 srcbookmarks = src_book_vfs.join(b'bookmarks')
892 bm_count = 0
894 bm_count = 0
893 if os.path.exists(srcbookmarks):
895 if os.path.exists(srcbookmarks):
894 bm_count = 1
896 bm_count = 1
895
897
896 entries, totalfilesize = _v2_walk(
898 entries, totalfilesize = _v2_walk(
897 src_repo,
899 src_repo,
898 includes=None,
900 includes=None,
899 excludes=None,
901 excludes=None,
900 includeobsmarkers=True,
902 includeobsmarkers=True,
901 )
903 )
902 src_vfs_map = _makemap(src_repo)
904 src_vfs_map = _makemap(src_repo)
903 dest_vfs_map = _makemap(dest_repo)
905 dest_vfs_map = _makemap(dest_repo)
904 progress = src_repo.ui.makeprogress(
906 progress = src_repo.ui.makeprogress(
905 topic=_(b'linking'),
907 topic=_(b'linking'),
906 total=len(entries) + bm_count,
908 total=len(entries) + bm_count,
907 unit=_(b'files'),
909 unit=_(b'files'),
908 )
910 )
909 # copy files
911 # copy files
910 #
912 #
911 # We could copy the full file while the source repository is locked
913 # We could copy the full file while the source repository is locked
912 # and the other one without the lock. However, in the linking case,
914 # and the other one without the lock. However, in the linking case,
913 # this would also requires checks that nobody is appending any data
915 # this would also requires checks that nobody is appending any data
914 # to the files while we do the clone, so this is not done yet. We
916 # to the files while we do the clone, so this is not done yet. We
915 # could do this blindly when copying files.
917 # could do this blindly when copying files.
916 files = ((k, path, size) for k, path, ftype, size in entries)
918 files = ((k, path, size) for k, path, ftype, size in entries)
917 hardlink = _copy_files(src_vfs_map, dest_vfs_map, files, progress)
919 hardlink = _copy_files(src_vfs_map, dest_vfs_map, files, progress)
918
920
919 # copy bookmarks over
921 # copy bookmarks over
920 if bm_count:
922 if bm_count:
921 dst_book_vfs = bookmarks.bookmarksvfs(dest_repo)
923 dst_book_vfs = bookmarks.bookmarksvfs(dest_repo)
922 dstbookmarks = dst_book_vfs.join(b'bookmarks')
924 dstbookmarks = dst_book_vfs.join(b'bookmarks')
923 util.copyfile(srcbookmarks, dstbookmarks)
925 util.copyfile(srcbookmarks, dstbookmarks)
924 progress.complete()
926 progress.complete()
925 if hardlink:
927 if hardlink:
926 msg = b'linked %d files\n'
928 msg = b'linked %d files\n'
927 else:
929 else:
928 msg = b'copied %d files\n'
930 msg = b'copied %d files\n'
929 src_repo.ui.debug(msg % (len(entries) + bm_count))
931 src_repo.ui.debug(msg % (len(entries) + bm_count))
930
932
931 with dest_repo.transaction(b"localclone") as tr:
933 with dest_repo.transaction(b"localclone") as tr:
932 dest_repo.store.write(tr)
934 dest_repo.store.write(tr)
933
935
934 # clean up transaction file as they do not make sense
936 # clean up transaction file as they do not make sense
935 transaction.cleanup_undo_files(dest_repo.ui.warn, dest_repo.vfs_map)
937 transaction.cleanup_undo_files(dest_repo.ui.warn, dest_repo.vfs_map)
@@ -1,689 +1,690 b''
1 # upgrade.py - functions for in place upgrade of Mercurial repository
1 # upgrade.py - functions for in place upgrade of Mercurial repository
2 #
2 #
3 # Copyright (c) 2016-present, Gregory Szorc
3 # Copyright (c) 2016-present, Gregory Szorc
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8
8
9 import stat
9 import stat
10
10
11 from ..i18n import _
11 from ..i18n import _
12 from ..pycompat import getattr
12 from ..pycompat import getattr
13 from .. import (
13 from .. import (
14 changelog,
14 changelog,
15 error,
15 error,
16 filelog,
16 filelog,
17 manifest,
17 manifest,
18 metadata,
18 metadata,
19 pycompat,
19 pycompat,
20 requirements,
20 requirements,
21 scmutil,
21 scmutil,
22 store,
22 store,
23 util,
23 util,
24 vfs as vfsmod,
24 vfs as vfsmod,
25 )
25 )
26 from ..revlogutils import (
26 from ..revlogutils import (
27 constants as revlogconst,
27 constants as revlogconst,
28 flagutil,
28 flagutil,
29 nodemap,
29 nodemap,
30 sidedata as sidedatamod,
30 sidedata as sidedatamod,
31 )
31 )
32 from . import actions as upgrade_actions
32 from . import actions as upgrade_actions
33
33
34
34
35 def get_sidedata_helpers(srcrepo, dstrepo):
35 def get_sidedata_helpers(srcrepo, dstrepo):
36 use_w = srcrepo.ui.configbool(b'experimental', b'worker.repository-upgrade')
36 use_w = srcrepo.ui.configbool(b'experimental', b'worker.repository-upgrade')
37 sequential = pycompat.iswindows or not use_w
37 sequential = pycompat.iswindows or not use_w
38 if not sequential:
38 if not sequential:
39 srcrepo.register_sidedata_computer(
39 srcrepo.register_sidedata_computer(
40 revlogconst.KIND_CHANGELOG,
40 revlogconst.KIND_CHANGELOG,
41 sidedatamod.SD_FILES,
41 sidedatamod.SD_FILES,
42 (sidedatamod.SD_FILES,),
42 (sidedatamod.SD_FILES,),
43 metadata._get_worker_sidedata_adder(srcrepo, dstrepo),
43 metadata._get_worker_sidedata_adder(srcrepo, dstrepo),
44 flagutil.REVIDX_HASCOPIESINFO,
44 flagutil.REVIDX_HASCOPIESINFO,
45 replace=True,
45 replace=True,
46 )
46 )
47 return sidedatamod.get_sidedata_helpers(srcrepo, dstrepo._wanted_sidedata)
47 return sidedatamod.get_sidedata_helpers(srcrepo, dstrepo._wanted_sidedata)
48
48
49
49
50 def _revlogfrompath(repo, rl_type, path):
50 def _revlogfrompath(repo, rl_type, path):
51 """Obtain a revlog from a repo path.
51 """Obtain a revlog from a repo path.
52
52
53 An instance of the appropriate class is returned.
53 An instance of the appropriate class is returned.
54 """
54 """
55 if rl_type & store.FILEFLAGS_CHANGELOG:
55 if rl_type & store.FILEFLAGS_CHANGELOG:
56 return changelog.changelog(repo.svfs)
56 return changelog.changelog(repo.svfs)
57 elif rl_type & store.FILEFLAGS_MANIFESTLOG:
57 elif rl_type & store.FILEFLAGS_MANIFESTLOG:
58 mandir = b''
58 mandir = b''
59 if b'/' in path:
59 if b'/' in path:
60 mandir = path.rsplit(b'/', 1)[0]
60 mandir = path.rsplit(b'/', 1)[0]
61 return manifest.manifestrevlog(
61 return manifest.manifestrevlog(
62 repo.nodeconstants, repo.svfs, tree=mandir
62 repo.nodeconstants, repo.svfs, tree=mandir
63 )
63 )
64 else:
64 else:
65 # drop the extension and the `data/` prefix
65 # drop the extension and the `data/` prefix
66 path_part = path.rsplit(b'.', 1)[0].split(b'/', 1)
66 path_part = path.rsplit(b'.', 1)[0].split(b'/', 1)
67 if len(path_part) < 2:
67 if len(path_part) < 2:
68 msg = _(b'cannot recognize revlog from filename: %s')
68 msg = _(b'cannot recognize revlog from filename: %s')
69 msg %= path
69 msg %= path
70 raise error.Abort(msg)
70 raise error.Abort(msg)
71 path = path_part[1]
71 path = path_part[1]
72 return filelog.filelog(repo.svfs, path)
72 return filelog.filelog(repo.svfs, path)
73
73
74
74
75 def _copyrevlog(tr, destrepo, oldrl, rl_type, unencodedname):
75 def _copyrevlog(tr, destrepo, oldrl, rl_type, unencodedname):
76 """copy all relevant files for `oldrl` into `destrepo` store
76 """copy all relevant files for `oldrl` into `destrepo` store
77
77
78 Files are copied "as is" without any transformation. The copy is performed
78 Files are copied "as is" without any transformation. The copy is performed
79 without extra checks. Callers are responsible for making sure the copied
79 without extra checks. Callers are responsible for making sure the copied
80 content is compatible with format of the destination repository.
80 content is compatible with format of the destination repository.
81 """
81 """
82 oldrl = getattr(oldrl, '_revlog', oldrl)
82 oldrl = getattr(oldrl, '_revlog', oldrl)
83 newrl = _revlogfrompath(destrepo, rl_type, unencodedname)
83 newrl = _revlogfrompath(destrepo, rl_type, unencodedname)
84 newrl = getattr(newrl, '_revlog', newrl)
84 newrl = getattr(newrl, '_revlog', newrl)
85
85
86 oldvfs = oldrl.opener
86 oldvfs = oldrl.opener
87 newvfs = newrl.opener
87 newvfs = newrl.opener
88 oldindex = oldvfs.join(oldrl._indexfile)
88 oldindex = oldvfs.join(oldrl._indexfile)
89 newindex = newvfs.join(newrl._indexfile)
89 newindex = newvfs.join(newrl._indexfile)
90 olddata = oldvfs.join(oldrl._datafile)
90 olddata = oldvfs.join(oldrl._datafile)
91 newdata = newvfs.join(newrl._datafile)
91 newdata = newvfs.join(newrl._datafile)
92
92
93 with newvfs(newrl._indexfile, b'w'):
93 with newvfs(newrl._indexfile, b'w'):
94 pass # create all the directories
94 pass # create all the directories
95
95
96 util.copyfile(oldindex, newindex)
96 util.copyfile(oldindex, newindex)
97 copydata = oldrl.opener.exists(oldrl._datafile)
97 copydata = oldrl.opener.exists(oldrl._datafile)
98 if copydata:
98 if copydata:
99 util.copyfile(olddata, newdata)
99 util.copyfile(olddata, newdata)
100
100
101 if rl_type & store.FILEFLAGS_FILELOG:
101 if rl_type & store.FILEFLAGS_FILELOG:
102 destrepo.svfs.fncache.add(unencodedname)
102 destrepo.svfs.fncache.add(unencodedname)
103 if copydata:
103 if copydata:
104 destrepo.svfs.fncache.add(unencodedname[:-2] + b'.d')
104 destrepo.svfs.fncache.add(unencodedname[:-2] + b'.d')
105
105
106
106
107 UPGRADE_CHANGELOG = b"changelog"
107 UPGRADE_CHANGELOG = b"changelog"
108 UPGRADE_MANIFEST = b"manifest"
108 UPGRADE_MANIFEST = b"manifest"
109 UPGRADE_FILELOGS = b"all-filelogs"
109 UPGRADE_FILELOGS = b"all-filelogs"
110
110
111 UPGRADE_ALL_REVLOGS = frozenset(
111 UPGRADE_ALL_REVLOGS = frozenset(
112 [UPGRADE_CHANGELOG, UPGRADE_MANIFEST, UPGRADE_FILELOGS]
112 [UPGRADE_CHANGELOG, UPGRADE_MANIFEST, UPGRADE_FILELOGS]
113 )
113 )
114
114
115
115
116 def matchrevlog(revlogfilter, rl_type):
116 def matchrevlog(revlogfilter, rl_type):
117 """check if a revlog is selected for cloning.
117 """check if a revlog is selected for cloning.
118
118
119 In other words, are there any updates which need to be done on revlog
119 In other words, are there any updates which need to be done on revlog
120 or it can be blindly copied.
120 or it can be blindly copied.
121
121
122 The store entry is checked against the passed filter"""
122 The store entry is checked against the passed filter"""
123 if rl_type & store.FILEFLAGS_CHANGELOG:
123 if rl_type & store.FILEFLAGS_CHANGELOG:
124 return UPGRADE_CHANGELOG in revlogfilter
124 return UPGRADE_CHANGELOG in revlogfilter
125 elif rl_type & store.FILEFLAGS_MANIFESTLOG:
125 elif rl_type & store.FILEFLAGS_MANIFESTLOG:
126 return UPGRADE_MANIFEST in revlogfilter
126 return UPGRADE_MANIFEST in revlogfilter
127 assert rl_type & store.FILEFLAGS_FILELOG
127 assert rl_type & store.FILEFLAGS_FILELOG
128 return UPGRADE_FILELOGS in revlogfilter
128 return UPGRADE_FILELOGS in revlogfilter
129
129
130
130
131 def _perform_clone(
131 def _perform_clone(
132 ui,
132 ui,
133 dstrepo,
133 dstrepo,
134 tr,
134 tr,
135 old_revlog,
135 old_revlog,
136 rl_type,
136 rl_type,
137 unencoded,
137 unencoded,
138 upgrade_op,
138 upgrade_op,
139 sidedata_helpers,
139 sidedata_helpers,
140 oncopiedrevision,
140 oncopiedrevision,
141 ):
141 ):
142 """returns the new revlog object created"""
142 """returns the new revlog object created"""
143 newrl = None
143 newrl = None
144 if matchrevlog(upgrade_op.revlogs_to_process, rl_type):
144 if matchrevlog(upgrade_op.revlogs_to_process, rl_type):
145 ui.note(
145 ui.note(
146 _(b'cloning %d revisions from %s\n') % (len(old_revlog), unencoded)
146 _(b'cloning %d revisions from %s\n') % (len(old_revlog), unencoded)
147 )
147 )
148 newrl = _revlogfrompath(dstrepo, rl_type, unencoded)
148 newrl = _revlogfrompath(dstrepo, rl_type, unencoded)
149 old_revlog.clone(
149 old_revlog.clone(
150 tr,
150 tr,
151 newrl,
151 newrl,
152 addrevisioncb=oncopiedrevision,
152 addrevisioncb=oncopiedrevision,
153 deltareuse=upgrade_op.delta_reuse_mode,
153 deltareuse=upgrade_op.delta_reuse_mode,
154 forcedeltabothparents=upgrade_op.force_re_delta_both_parents,
154 forcedeltabothparents=upgrade_op.force_re_delta_both_parents,
155 sidedata_helpers=sidedata_helpers,
155 sidedata_helpers=sidedata_helpers,
156 )
156 )
157 else:
157 else:
158 msg = _(b'blindly copying %s containing %i revisions\n')
158 msg = _(b'blindly copying %s containing %i revisions\n')
159 ui.note(msg % (unencoded, len(old_revlog)))
159 ui.note(msg % (unencoded, len(old_revlog)))
160 _copyrevlog(tr, dstrepo, old_revlog, rl_type, unencoded)
160 _copyrevlog(tr, dstrepo, old_revlog, rl_type, unencoded)
161
161
162 newrl = _revlogfrompath(dstrepo, rl_type, unencoded)
162 newrl = _revlogfrompath(dstrepo, rl_type, unencoded)
163 return newrl
163 return newrl
164
164
165
165
166 def _clonerevlogs(
166 def _clonerevlogs(
167 ui,
167 ui,
168 srcrepo,
168 srcrepo,
169 dstrepo,
169 dstrepo,
170 tr,
170 tr,
171 upgrade_op,
171 upgrade_op,
172 ):
172 ):
173 """Copy revlogs between 2 repos."""
173 """Copy revlogs between 2 repos."""
174 revcount = 0
174 revcount = 0
175 srcsize = 0
175 srcsize = 0
176 srcrawsize = 0
176 srcrawsize = 0
177 dstsize = 0
177 dstsize = 0
178 fcount = 0
178 fcount = 0
179 frevcount = 0
179 frevcount = 0
180 fsrcsize = 0
180 fsrcsize = 0
181 frawsize = 0
181 frawsize = 0
182 fdstsize = 0
182 fdstsize = 0
183 mcount = 0
183 mcount = 0
184 mrevcount = 0
184 mrevcount = 0
185 msrcsize = 0
185 msrcsize = 0
186 mrawsize = 0
186 mrawsize = 0
187 mdstsize = 0
187 mdstsize = 0
188 crevcount = 0
188 crevcount = 0
189 csrcsize = 0
189 csrcsize = 0
190 crawsize = 0
190 crawsize = 0
191 cdstsize = 0
191 cdstsize = 0
192
192
193 alldatafiles = list(srcrepo.store.walk())
193 alldatafiles = list(srcrepo.store.walk())
194 # mapping of data files which needs to be cloned
194 # mapping of data files which needs to be cloned
195 # key is unencoded filename
195 # key is unencoded filename
196 # value is revlog_object_from_srcrepo
196 # value is revlog_object_from_srcrepo
197 manifests = {}
197 manifests = {}
198 changelogs = {}
198 changelogs = {}
199 filelogs = {}
199 filelogs = {}
200
200
201 # Perform a pass to collect metadata. This validates we can open all
201 # Perform a pass to collect metadata. This validates we can open all
202 # source files and allows a unified progress bar to be displayed.
202 # source files and allows a unified progress bar to be displayed.
203 for rl_type, unencoded, size in alldatafiles:
203 for entry in alldatafiles:
204 if not rl_type & store.FILEFLAGS_REVLOG_MAIN:
204 if not (entry.is_revlog and entry.is_revlog_main):
205 continue
205 continue
206 unencoded = entry.unencoded_path
206
207
207 # the store.walk function will wrongly pickup transaction backup and
208 # the store.walk function will wrongly pickup transaction backup and
208 # get confused. As a quick fix for 5.9 release, we ignore those.
209 # get confused. As a quick fix for 5.9 release, we ignore those.
209 # (this is not a module constants because it seems better to keep the
210 # (this is not a module constants because it seems better to keep the
210 # hack together)
211 # hack together)
211 skip_undo = (
212 skip_undo = (
212 b'undo.backup.00changelog.i',
213 b'undo.backup.00changelog.i',
213 b'undo.backup.00manifest.i',
214 b'undo.backup.00manifest.i',
214 )
215 )
215 if unencoded in skip_undo:
216 if unencoded in skip_undo:
216 continue
217 continue
217
218
218 rl = _revlogfrompath(srcrepo, rl_type, unencoded)
219 rl = _revlogfrompath(srcrepo, entry.revlog_type, unencoded)
219
220
220 info = rl.storageinfo(
221 info = rl.storageinfo(
221 exclusivefiles=True,
222 exclusivefiles=True,
222 revisionscount=True,
223 revisionscount=True,
223 trackedsize=True,
224 trackedsize=True,
224 storedsize=True,
225 storedsize=True,
225 )
226 )
226
227
227 revcount += info[b'revisionscount'] or 0
228 revcount += info[b'revisionscount'] or 0
228 datasize = info[b'storedsize'] or 0
229 datasize = info[b'storedsize'] or 0
229 rawsize = info[b'trackedsize'] or 0
230 rawsize = info[b'trackedsize'] or 0
230
231
231 srcsize += datasize
232 srcsize += datasize
232 srcrawsize += rawsize
233 srcrawsize += rawsize
233
234
234 # This is for the separate progress bars.
235 # This is for the separate progress bars.
235 if rl_type & store.FILEFLAGS_CHANGELOG:
236 if entry.revlog_type & store.FILEFLAGS_CHANGELOG:
236 changelogs[unencoded] = rl_type
237 changelogs[unencoded] = entry.revlog_type
237 crevcount += len(rl)
238 crevcount += len(rl)
238 csrcsize += datasize
239 csrcsize += datasize
239 crawsize += rawsize
240 crawsize += rawsize
240 elif rl_type & store.FILEFLAGS_MANIFESTLOG:
241 elif entry.revlog_type & store.FILEFLAGS_MANIFESTLOG:
241 manifests[unencoded] = rl_type
242 manifests[unencoded] = entry.revlog_type
242 mcount += 1
243 mcount += 1
243 mrevcount += len(rl)
244 mrevcount += len(rl)
244 msrcsize += datasize
245 msrcsize += datasize
245 mrawsize += rawsize
246 mrawsize += rawsize
246 elif rl_type & store.FILEFLAGS_FILELOG:
247 elif entry.revlog_type & store.FILEFLAGS_FILELOG:
247 filelogs[unencoded] = rl_type
248 filelogs[unencoded] = entry.revlog_type
248 fcount += 1
249 fcount += 1
249 frevcount += len(rl)
250 frevcount += len(rl)
250 fsrcsize += datasize
251 fsrcsize += datasize
251 frawsize += rawsize
252 frawsize += rawsize
252 else:
253 else:
253 error.ProgrammingError(b'unknown revlog type')
254 error.ProgrammingError(b'unknown revlog type')
254
255
255 if not revcount:
256 if not revcount:
256 return
257 return
257
258
258 ui.status(
259 ui.status(
259 _(
260 _(
260 b'migrating %d total revisions (%d in filelogs, %d in manifests, '
261 b'migrating %d total revisions (%d in filelogs, %d in manifests, '
261 b'%d in changelog)\n'
262 b'%d in changelog)\n'
262 )
263 )
263 % (revcount, frevcount, mrevcount, crevcount)
264 % (revcount, frevcount, mrevcount, crevcount)
264 )
265 )
265 ui.status(
266 ui.status(
266 _(b'migrating %s in store; %s tracked data\n')
267 _(b'migrating %s in store; %s tracked data\n')
267 % ((util.bytecount(srcsize), util.bytecount(srcrawsize)))
268 % ((util.bytecount(srcsize), util.bytecount(srcrawsize)))
268 )
269 )
269
270
270 # Used to keep track of progress.
271 # Used to keep track of progress.
271 progress = None
272 progress = None
272
273
273 def oncopiedrevision(rl, rev, node):
274 def oncopiedrevision(rl, rev, node):
274 progress.increment()
275 progress.increment()
275
276
276 sidedata_helpers = get_sidedata_helpers(srcrepo, dstrepo)
277 sidedata_helpers = get_sidedata_helpers(srcrepo, dstrepo)
277
278
278 # Migrating filelogs
279 # Migrating filelogs
279 ui.status(
280 ui.status(
280 _(
281 _(
281 b'migrating %d filelogs containing %d revisions '
282 b'migrating %d filelogs containing %d revisions '
282 b'(%s in store; %s tracked data)\n'
283 b'(%s in store; %s tracked data)\n'
283 )
284 )
284 % (
285 % (
285 fcount,
286 fcount,
286 frevcount,
287 frevcount,
287 util.bytecount(fsrcsize),
288 util.bytecount(fsrcsize),
288 util.bytecount(frawsize),
289 util.bytecount(frawsize),
289 )
290 )
290 )
291 )
291 progress = srcrepo.ui.makeprogress(_(b'file revisions'), total=frevcount)
292 progress = srcrepo.ui.makeprogress(_(b'file revisions'), total=frevcount)
292 for unencoded, rl_type in sorted(filelogs.items()):
293 for unencoded, rl_type in sorted(filelogs.items()):
293 oldrl = _revlogfrompath(srcrepo, rl_type, unencoded)
294 oldrl = _revlogfrompath(srcrepo, rl_type, unencoded)
294
295
295 newrl = _perform_clone(
296 newrl = _perform_clone(
296 ui,
297 ui,
297 dstrepo,
298 dstrepo,
298 tr,
299 tr,
299 oldrl,
300 oldrl,
300 rl_type,
301 rl_type,
301 unencoded,
302 unencoded,
302 upgrade_op,
303 upgrade_op,
303 sidedata_helpers,
304 sidedata_helpers,
304 oncopiedrevision,
305 oncopiedrevision,
305 )
306 )
306 info = newrl.storageinfo(storedsize=True)
307 info = newrl.storageinfo(storedsize=True)
307 fdstsize += info[b'storedsize'] or 0
308 fdstsize += info[b'storedsize'] or 0
308 ui.status(
309 ui.status(
309 _(
310 _(
310 b'finished migrating %d filelog revisions across %d '
311 b'finished migrating %d filelog revisions across %d '
311 b'filelogs; change in size: %s\n'
312 b'filelogs; change in size: %s\n'
312 )
313 )
313 % (frevcount, fcount, util.bytecount(fdstsize - fsrcsize))
314 % (frevcount, fcount, util.bytecount(fdstsize - fsrcsize))
314 )
315 )
315
316
316 # Migrating manifests
317 # Migrating manifests
317 ui.status(
318 ui.status(
318 _(
319 _(
319 b'migrating %d manifests containing %d revisions '
320 b'migrating %d manifests containing %d revisions '
320 b'(%s in store; %s tracked data)\n'
321 b'(%s in store; %s tracked data)\n'
321 )
322 )
322 % (
323 % (
323 mcount,
324 mcount,
324 mrevcount,
325 mrevcount,
325 util.bytecount(msrcsize),
326 util.bytecount(msrcsize),
326 util.bytecount(mrawsize),
327 util.bytecount(mrawsize),
327 )
328 )
328 )
329 )
329 if progress:
330 if progress:
330 progress.complete()
331 progress.complete()
331 progress = srcrepo.ui.makeprogress(
332 progress = srcrepo.ui.makeprogress(
332 _(b'manifest revisions'), total=mrevcount
333 _(b'manifest revisions'), total=mrevcount
333 )
334 )
334 for unencoded, rl_type in sorted(manifests.items()):
335 for unencoded, rl_type in sorted(manifests.items()):
335 oldrl = _revlogfrompath(srcrepo, rl_type, unencoded)
336 oldrl = _revlogfrompath(srcrepo, rl_type, unencoded)
336 newrl = _perform_clone(
337 newrl = _perform_clone(
337 ui,
338 ui,
338 dstrepo,
339 dstrepo,
339 tr,
340 tr,
340 oldrl,
341 oldrl,
341 rl_type,
342 rl_type,
342 unencoded,
343 unencoded,
343 upgrade_op,
344 upgrade_op,
344 sidedata_helpers,
345 sidedata_helpers,
345 oncopiedrevision,
346 oncopiedrevision,
346 )
347 )
347 info = newrl.storageinfo(storedsize=True)
348 info = newrl.storageinfo(storedsize=True)
348 mdstsize += info[b'storedsize'] or 0
349 mdstsize += info[b'storedsize'] or 0
349 ui.status(
350 ui.status(
350 _(
351 _(
351 b'finished migrating %d manifest revisions across %d '
352 b'finished migrating %d manifest revisions across %d '
352 b'manifests; change in size: %s\n'
353 b'manifests; change in size: %s\n'
353 )
354 )
354 % (mrevcount, mcount, util.bytecount(mdstsize - msrcsize))
355 % (mrevcount, mcount, util.bytecount(mdstsize - msrcsize))
355 )
356 )
356
357
357 # Migrating changelog
358 # Migrating changelog
358 ui.status(
359 ui.status(
359 _(
360 _(
360 b'migrating changelog containing %d revisions '
361 b'migrating changelog containing %d revisions '
361 b'(%s in store; %s tracked data)\n'
362 b'(%s in store; %s tracked data)\n'
362 )
363 )
363 % (
364 % (
364 crevcount,
365 crevcount,
365 util.bytecount(csrcsize),
366 util.bytecount(csrcsize),
366 util.bytecount(crawsize),
367 util.bytecount(crawsize),
367 )
368 )
368 )
369 )
369 if progress:
370 if progress:
370 progress.complete()
371 progress.complete()
371 progress = srcrepo.ui.makeprogress(
372 progress = srcrepo.ui.makeprogress(
372 _(b'changelog revisions'), total=crevcount
373 _(b'changelog revisions'), total=crevcount
373 )
374 )
374 for unencoded, rl_type in sorted(changelogs.items()):
375 for unencoded, rl_type in sorted(changelogs.items()):
375 oldrl = _revlogfrompath(srcrepo, rl_type, unencoded)
376 oldrl = _revlogfrompath(srcrepo, rl_type, unencoded)
376 newrl = _perform_clone(
377 newrl = _perform_clone(
377 ui,
378 ui,
378 dstrepo,
379 dstrepo,
379 tr,
380 tr,
380 oldrl,
381 oldrl,
381 rl_type,
382 rl_type,
382 unencoded,
383 unencoded,
383 upgrade_op,
384 upgrade_op,
384 sidedata_helpers,
385 sidedata_helpers,
385 oncopiedrevision,
386 oncopiedrevision,
386 )
387 )
387 info = newrl.storageinfo(storedsize=True)
388 info = newrl.storageinfo(storedsize=True)
388 cdstsize += info[b'storedsize'] or 0
389 cdstsize += info[b'storedsize'] or 0
389 progress.complete()
390 progress.complete()
390 ui.status(
391 ui.status(
391 _(
392 _(
392 b'finished migrating %d changelog revisions; change in size: '
393 b'finished migrating %d changelog revisions; change in size: '
393 b'%s\n'
394 b'%s\n'
394 )
395 )
395 % (crevcount, util.bytecount(cdstsize - csrcsize))
396 % (crevcount, util.bytecount(cdstsize - csrcsize))
396 )
397 )
397
398
398 dstsize = fdstsize + mdstsize + cdstsize
399 dstsize = fdstsize + mdstsize + cdstsize
399 ui.status(
400 ui.status(
400 _(
401 _(
401 b'finished migrating %d total revisions; total change in store '
402 b'finished migrating %d total revisions; total change in store '
402 b'size: %s\n'
403 b'size: %s\n'
403 )
404 )
404 % (revcount, util.bytecount(dstsize - srcsize))
405 % (revcount, util.bytecount(dstsize - srcsize))
405 )
406 )
406
407
407
408
408 def _files_to_copy_post_revlog_clone(srcrepo):
409 def _files_to_copy_post_revlog_clone(srcrepo):
409 """yields files which should be copied to destination after revlogs
410 """yields files which should be copied to destination after revlogs
410 are cloned"""
411 are cloned"""
411 for path, kind, st in sorted(srcrepo.store.vfs.readdir(b'', stat=True)):
412 for path, kind, st in sorted(srcrepo.store.vfs.readdir(b'', stat=True)):
412 # don't copy revlogs as they are already cloned
413 # don't copy revlogs as they are already cloned
413 if store.revlog_type(path) is not None:
414 if store.revlog_type(path) is not None:
414 continue
415 continue
415 # Skip transaction related files.
416 # Skip transaction related files.
416 if path.startswith(b'undo'):
417 if path.startswith(b'undo'):
417 continue
418 continue
418 # Only copy regular files.
419 # Only copy regular files.
419 if kind != stat.S_IFREG:
420 if kind != stat.S_IFREG:
420 continue
421 continue
421 # Skip other skipped files.
422 # Skip other skipped files.
422 if path in (b'lock', b'fncache'):
423 if path in (b'lock', b'fncache'):
423 continue
424 continue
424 # TODO: should we skip cache too?
425 # TODO: should we skip cache too?
425
426
426 yield path
427 yield path
427
428
428
429
429 def _replacestores(currentrepo, upgradedrepo, backupvfs, upgrade_op):
430 def _replacestores(currentrepo, upgradedrepo, backupvfs, upgrade_op):
430 """Replace the stores after current repository is upgraded
431 """Replace the stores after current repository is upgraded
431
432
432 Creates a backup of current repository store at backup path
433 Creates a backup of current repository store at backup path
433 Replaces upgraded store files in current repo from upgraded one
434 Replaces upgraded store files in current repo from upgraded one
434
435
435 Arguments:
436 Arguments:
436 currentrepo: repo object of current repository
437 currentrepo: repo object of current repository
437 upgradedrepo: repo object of the upgraded data
438 upgradedrepo: repo object of the upgraded data
438 backupvfs: vfs object for the backup path
439 backupvfs: vfs object for the backup path
439 upgrade_op: upgrade operation object
440 upgrade_op: upgrade operation object
440 to be used to decide what all is upgraded
441 to be used to decide what all is upgraded
441 """
442 """
442 # TODO: don't blindly rename everything in store
443 # TODO: don't blindly rename everything in store
443 # There can be upgrades where store is not touched at all
444 # There can be upgrades where store is not touched at all
444 if upgrade_op.backup_store:
445 if upgrade_op.backup_store:
445 util.rename(currentrepo.spath, backupvfs.join(b'store'))
446 util.rename(currentrepo.spath, backupvfs.join(b'store'))
446 else:
447 else:
447 currentrepo.vfs.rmtree(b'store', forcibly=True)
448 currentrepo.vfs.rmtree(b'store', forcibly=True)
448 util.rename(upgradedrepo.spath, currentrepo.spath)
449 util.rename(upgradedrepo.spath, currentrepo.spath)
449
450
450
451
451 def finishdatamigration(ui, srcrepo, dstrepo, requirements):
452 def finishdatamigration(ui, srcrepo, dstrepo, requirements):
452 """Hook point for extensions to perform additional actions during upgrade.
453 """Hook point for extensions to perform additional actions during upgrade.
453
454
454 This function is called after revlogs and store files have been copied but
455 This function is called after revlogs and store files have been copied but
455 before the new store is swapped into the original location.
456 before the new store is swapped into the original location.
456 """
457 """
457
458
458
459
459 def upgrade(ui, srcrepo, dstrepo, upgrade_op):
460 def upgrade(ui, srcrepo, dstrepo, upgrade_op):
460 """Do the low-level work of upgrading a repository.
461 """Do the low-level work of upgrading a repository.
461
462
462 The upgrade is effectively performed as a copy between a source
463 The upgrade is effectively performed as a copy between a source
463 repository and a temporary destination repository.
464 repository and a temporary destination repository.
464
465
465 The source repository is unmodified for as long as possible so the
466 The source repository is unmodified for as long as possible so the
466 upgrade can abort at any time without causing loss of service for
467 upgrade can abort at any time without causing loss of service for
467 readers and without corrupting the source repository.
468 readers and without corrupting the source repository.
468 """
469 """
469 assert srcrepo.currentwlock()
470 assert srcrepo.currentwlock()
470 assert dstrepo.currentwlock()
471 assert dstrepo.currentwlock()
471 backuppath = None
472 backuppath = None
472 backupvfs = None
473 backupvfs = None
473
474
474 ui.status(
475 ui.status(
475 _(
476 _(
476 b'(it is safe to interrupt this process any time before '
477 b'(it is safe to interrupt this process any time before '
477 b'data migration completes)\n'
478 b'data migration completes)\n'
478 )
479 )
479 )
480 )
480
481
481 if upgrade_actions.dirstatev2 in upgrade_op.upgrade_actions:
482 if upgrade_actions.dirstatev2 in upgrade_op.upgrade_actions:
482 ui.status(_(b'upgrading to dirstate-v2 from v1\n'))
483 ui.status(_(b'upgrading to dirstate-v2 from v1\n'))
483 upgrade_dirstate(ui, srcrepo, upgrade_op, b'v1', b'v2')
484 upgrade_dirstate(ui, srcrepo, upgrade_op, b'v1', b'v2')
484 upgrade_op.upgrade_actions.remove(upgrade_actions.dirstatev2)
485 upgrade_op.upgrade_actions.remove(upgrade_actions.dirstatev2)
485
486
486 if upgrade_actions.dirstatev2 in upgrade_op.removed_actions:
487 if upgrade_actions.dirstatev2 in upgrade_op.removed_actions:
487 ui.status(_(b'downgrading from dirstate-v2 to v1\n'))
488 ui.status(_(b'downgrading from dirstate-v2 to v1\n'))
488 upgrade_dirstate(ui, srcrepo, upgrade_op, b'v2', b'v1')
489 upgrade_dirstate(ui, srcrepo, upgrade_op, b'v2', b'v1')
489 upgrade_op.removed_actions.remove(upgrade_actions.dirstatev2)
490 upgrade_op.removed_actions.remove(upgrade_actions.dirstatev2)
490
491
491 if upgrade_actions.dirstatetrackedkey in upgrade_op.upgrade_actions:
492 if upgrade_actions.dirstatetrackedkey in upgrade_op.upgrade_actions:
492 ui.status(_(b'create dirstate-tracked-hint file\n'))
493 ui.status(_(b'create dirstate-tracked-hint file\n'))
493 upgrade_tracked_hint(ui, srcrepo, upgrade_op, add=True)
494 upgrade_tracked_hint(ui, srcrepo, upgrade_op, add=True)
494 upgrade_op.upgrade_actions.remove(upgrade_actions.dirstatetrackedkey)
495 upgrade_op.upgrade_actions.remove(upgrade_actions.dirstatetrackedkey)
495 elif upgrade_actions.dirstatetrackedkey in upgrade_op.removed_actions:
496 elif upgrade_actions.dirstatetrackedkey in upgrade_op.removed_actions:
496 ui.status(_(b'remove dirstate-tracked-hint file\n'))
497 ui.status(_(b'remove dirstate-tracked-hint file\n'))
497 upgrade_tracked_hint(ui, srcrepo, upgrade_op, add=False)
498 upgrade_tracked_hint(ui, srcrepo, upgrade_op, add=False)
498 upgrade_op.removed_actions.remove(upgrade_actions.dirstatetrackedkey)
499 upgrade_op.removed_actions.remove(upgrade_actions.dirstatetrackedkey)
499
500
500 if not (upgrade_op.upgrade_actions or upgrade_op.removed_actions):
501 if not (upgrade_op.upgrade_actions or upgrade_op.removed_actions):
501 return
502 return
502
503
503 if upgrade_op.requirements_only:
504 if upgrade_op.requirements_only:
504 ui.status(_(b'upgrading repository requirements\n'))
505 ui.status(_(b'upgrading repository requirements\n'))
505 scmutil.writereporequirements(srcrepo, upgrade_op.new_requirements)
506 scmutil.writereporequirements(srcrepo, upgrade_op.new_requirements)
506 # if there is only one action and that is persistent nodemap upgrade
507 # if there is only one action and that is persistent nodemap upgrade
507 # directly write the nodemap file and update requirements instead of going
508 # directly write the nodemap file and update requirements instead of going
508 # through the whole cloning process
509 # through the whole cloning process
509 elif (
510 elif (
510 len(upgrade_op.upgrade_actions) == 1
511 len(upgrade_op.upgrade_actions) == 1
511 and b'persistent-nodemap' in upgrade_op.upgrade_actions_names
512 and b'persistent-nodemap' in upgrade_op.upgrade_actions_names
512 and not upgrade_op.removed_actions
513 and not upgrade_op.removed_actions
513 ):
514 ):
514 ui.status(
515 ui.status(
515 _(b'upgrading repository to use persistent nodemap feature\n')
516 _(b'upgrading repository to use persistent nodemap feature\n')
516 )
517 )
517 with srcrepo.transaction(b'upgrade') as tr:
518 with srcrepo.transaction(b'upgrade') as tr:
518 unfi = srcrepo.unfiltered()
519 unfi = srcrepo.unfiltered()
519 cl = unfi.changelog
520 cl = unfi.changelog
520 nodemap.persist_nodemap(tr, cl, force=True)
521 nodemap.persist_nodemap(tr, cl, force=True)
521 # we want to directly operate on the underlying revlog to force
522 # we want to directly operate on the underlying revlog to force
522 # create a nodemap file. This is fine since this is upgrade code
523 # create a nodemap file. This is fine since this is upgrade code
523 # and it heavily relies on repository being revlog based
524 # and it heavily relies on repository being revlog based
524 # hence accessing private attributes can be justified
525 # hence accessing private attributes can be justified
525 nodemap.persist_nodemap(
526 nodemap.persist_nodemap(
526 tr, unfi.manifestlog._rootstore._revlog, force=True
527 tr, unfi.manifestlog._rootstore._revlog, force=True
527 )
528 )
528 scmutil.writereporequirements(srcrepo, upgrade_op.new_requirements)
529 scmutil.writereporequirements(srcrepo, upgrade_op.new_requirements)
529 elif (
530 elif (
530 len(upgrade_op.removed_actions) == 1
531 len(upgrade_op.removed_actions) == 1
531 and [
532 and [
532 x
533 x
533 for x in upgrade_op.removed_actions
534 for x in upgrade_op.removed_actions
534 if x.name == b'persistent-nodemap'
535 if x.name == b'persistent-nodemap'
535 ]
536 ]
536 and not upgrade_op.upgrade_actions
537 and not upgrade_op.upgrade_actions
537 ):
538 ):
538 ui.status(
539 ui.status(
539 _(b'downgrading repository to not use persistent nodemap feature\n')
540 _(b'downgrading repository to not use persistent nodemap feature\n')
540 )
541 )
541 with srcrepo.transaction(b'upgrade') as tr:
542 with srcrepo.transaction(b'upgrade') as tr:
542 unfi = srcrepo.unfiltered()
543 unfi = srcrepo.unfiltered()
543 cl = unfi.changelog
544 cl = unfi.changelog
544 nodemap.delete_nodemap(tr, srcrepo, cl)
545 nodemap.delete_nodemap(tr, srcrepo, cl)
545 # check comment 20 lines above for accessing private attributes
546 # check comment 20 lines above for accessing private attributes
546 nodemap.delete_nodemap(
547 nodemap.delete_nodemap(
547 tr, srcrepo, unfi.manifestlog._rootstore._revlog
548 tr, srcrepo, unfi.manifestlog._rootstore._revlog
548 )
549 )
549 scmutil.writereporequirements(srcrepo, upgrade_op.new_requirements)
550 scmutil.writereporequirements(srcrepo, upgrade_op.new_requirements)
550 else:
551 else:
551 with dstrepo.transaction(b'upgrade') as tr:
552 with dstrepo.transaction(b'upgrade') as tr:
552 _clonerevlogs(
553 _clonerevlogs(
553 ui,
554 ui,
554 srcrepo,
555 srcrepo,
555 dstrepo,
556 dstrepo,
556 tr,
557 tr,
557 upgrade_op,
558 upgrade_op,
558 )
559 )
559
560
560 # Now copy other files in the store directory.
561 # Now copy other files in the store directory.
561 for p in _files_to_copy_post_revlog_clone(srcrepo):
562 for p in _files_to_copy_post_revlog_clone(srcrepo):
562 srcrepo.ui.status(_(b'copying %s\n') % p)
563 srcrepo.ui.status(_(b'copying %s\n') % p)
563 src = srcrepo.store.rawvfs.join(p)
564 src = srcrepo.store.rawvfs.join(p)
564 dst = dstrepo.store.rawvfs.join(p)
565 dst = dstrepo.store.rawvfs.join(p)
565 util.copyfile(src, dst, copystat=True)
566 util.copyfile(src, dst, copystat=True)
566
567
567 finishdatamigration(ui, srcrepo, dstrepo, requirements)
568 finishdatamigration(ui, srcrepo, dstrepo, requirements)
568
569
569 ui.status(_(b'data fully upgraded in a temporary repository\n'))
570 ui.status(_(b'data fully upgraded in a temporary repository\n'))
570
571
571 if upgrade_op.backup_store:
572 if upgrade_op.backup_store:
572 backuppath = pycompat.mkdtemp(
573 backuppath = pycompat.mkdtemp(
573 prefix=b'upgradebackup.', dir=srcrepo.path
574 prefix=b'upgradebackup.', dir=srcrepo.path
574 )
575 )
575 backupvfs = vfsmod.vfs(backuppath)
576 backupvfs = vfsmod.vfs(backuppath)
576
577
577 # Make a backup of requires file first, as it is the first to be modified.
578 # Make a backup of requires file first, as it is the first to be modified.
578 util.copyfile(
579 util.copyfile(
579 srcrepo.vfs.join(b'requires'), backupvfs.join(b'requires')
580 srcrepo.vfs.join(b'requires'), backupvfs.join(b'requires')
580 )
581 )
581
582
582 # We install an arbitrary requirement that clients must not support
583 # We install an arbitrary requirement that clients must not support
583 # as a mechanism to lock out new clients during the data swap. This is
584 # as a mechanism to lock out new clients during the data swap. This is
584 # better than allowing a client to continue while the repository is in
585 # better than allowing a client to continue while the repository is in
585 # an inconsistent state.
586 # an inconsistent state.
586 ui.status(
587 ui.status(
587 _(
588 _(
588 b'marking source repository as being upgraded; clients will be '
589 b'marking source repository as being upgraded; clients will be '
589 b'unable to read from repository\n'
590 b'unable to read from repository\n'
590 )
591 )
591 )
592 )
592 scmutil.writereporequirements(
593 scmutil.writereporequirements(
593 srcrepo, srcrepo.requirements | {b'upgradeinprogress'}
594 srcrepo, srcrepo.requirements | {b'upgradeinprogress'}
594 )
595 )
595
596
596 ui.status(_(b'starting in-place swap of repository data\n'))
597 ui.status(_(b'starting in-place swap of repository data\n'))
597 if upgrade_op.backup_store:
598 if upgrade_op.backup_store:
598 ui.status(
599 ui.status(
599 _(b'replaced files will be backed up at %s\n') % backuppath
600 _(b'replaced files will be backed up at %s\n') % backuppath
600 )
601 )
601
602
602 # Now swap in the new store directory. Doing it as a rename should make
603 # Now swap in the new store directory. Doing it as a rename should make
603 # the operation nearly instantaneous and atomic (at least in well-behaved
604 # the operation nearly instantaneous and atomic (at least in well-behaved
604 # environments).
605 # environments).
605 ui.status(_(b'replacing store...\n'))
606 ui.status(_(b'replacing store...\n'))
606 tstart = util.timer()
607 tstart = util.timer()
607 _replacestores(srcrepo, dstrepo, backupvfs, upgrade_op)
608 _replacestores(srcrepo, dstrepo, backupvfs, upgrade_op)
608 elapsed = util.timer() - tstart
609 elapsed = util.timer() - tstart
609 ui.status(
610 ui.status(
610 _(
611 _(
611 b'store replacement complete; repository was inconsistent for '
612 b'store replacement complete; repository was inconsistent for '
612 b'%0.1fs\n'
613 b'%0.1fs\n'
613 )
614 )
614 % elapsed
615 % elapsed
615 )
616 )
616
617
617 # We first write the requirements file. Any new requirements will lock
618 # We first write the requirements file. Any new requirements will lock
618 # out legacy clients.
619 # out legacy clients.
619 ui.status(
620 ui.status(
620 _(
621 _(
621 b'finalizing requirements file and making repository readable '
622 b'finalizing requirements file and making repository readable '
622 b'again\n'
623 b'again\n'
623 )
624 )
624 )
625 )
625 scmutil.writereporequirements(srcrepo, upgrade_op.new_requirements)
626 scmutil.writereporequirements(srcrepo, upgrade_op.new_requirements)
626
627
627 if upgrade_op.backup_store:
628 if upgrade_op.backup_store:
628 # The lock file from the old store won't be removed because nothing has a
629 # The lock file from the old store won't be removed because nothing has a
629 # reference to its new location. So clean it up manually. Alternatively, we
630 # reference to its new location. So clean it up manually. Alternatively, we
630 # could update srcrepo.svfs and other variables to point to the new
631 # could update srcrepo.svfs and other variables to point to the new
631 # location. This is simpler.
632 # location. This is simpler.
632 assert backupvfs is not None # help pytype
633 assert backupvfs is not None # help pytype
633 backupvfs.unlink(b'store/lock')
634 backupvfs.unlink(b'store/lock')
634
635
635 return backuppath
636 return backuppath
636
637
637
638
638 def upgrade_dirstate(ui, srcrepo, upgrade_op, old, new):
639 def upgrade_dirstate(ui, srcrepo, upgrade_op, old, new):
639 if upgrade_op.backup_store:
640 if upgrade_op.backup_store:
640 backuppath = pycompat.mkdtemp(
641 backuppath = pycompat.mkdtemp(
641 prefix=b'upgradebackup.', dir=srcrepo.path
642 prefix=b'upgradebackup.', dir=srcrepo.path
642 )
643 )
643 ui.status(_(b'replaced files will be backed up at %s\n') % backuppath)
644 ui.status(_(b'replaced files will be backed up at %s\n') % backuppath)
644 backupvfs = vfsmod.vfs(backuppath)
645 backupvfs = vfsmod.vfs(backuppath)
645 util.copyfile(
646 util.copyfile(
646 srcrepo.vfs.join(b'requires'), backupvfs.join(b'requires')
647 srcrepo.vfs.join(b'requires'), backupvfs.join(b'requires')
647 )
648 )
648 try:
649 try:
649 util.copyfile(
650 util.copyfile(
650 srcrepo.vfs.join(b'dirstate'), backupvfs.join(b'dirstate')
651 srcrepo.vfs.join(b'dirstate'), backupvfs.join(b'dirstate')
651 )
652 )
652 except FileNotFoundError:
653 except FileNotFoundError:
653 # The dirstate does not exist on an empty repo or a repo with no
654 # The dirstate does not exist on an empty repo or a repo with no
654 # revision checked out
655 # revision checked out
655 pass
656 pass
656
657
657 assert srcrepo.dirstate._use_dirstate_v2 == (old == b'v2')
658 assert srcrepo.dirstate._use_dirstate_v2 == (old == b'v2')
658 use_v2 = new == b'v2'
659 use_v2 = new == b'v2'
659 if use_v2:
660 if use_v2:
660 # Write the requirements *before* upgrading
661 # Write the requirements *before* upgrading
661 scmutil.writereporequirements(srcrepo, upgrade_op.new_requirements)
662 scmutil.writereporequirements(srcrepo, upgrade_op.new_requirements)
662
663
663 srcrepo.dirstate._map.preload()
664 srcrepo.dirstate._map.preload()
664 srcrepo.dirstate._use_dirstate_v2 = use_v2
665 srcrepo.dirstate._use_dirstate_v2 = use_v2
665 srcrepo.dirstate._map._use_dirstate_v2 = use_v2
666 srcrepo.dirstate._map._use_dirstate_v2 = use_v2
666 srcrepo.dirstate._dirty = True
667 srcrepo.dirstate._dirty = True
667 try:
668 try:
668 srcrepo.vfs.unlink(b'dirstate')
669 srcrepo.vfs.unlink(b'dirstate')
669 except FileNotFoundError:
670 except FileNotFoundError:
670 # The dirstate does not exist on an empty repo or a repo with no
671 # The dirstate does not exist on an empty repo or a repo with no
671 # revision checked out
672 # revision checked out
672 pass
673 pass
673
674
674 srcrepo.dirstate.write(None)
675 srcrepo.dirstate.write(None)
675 if not use_v2:
676 if not use_v2:
676 # Remove the v2 requirement *after* downgrading
677 # Remove the v2 requirement *after* downgrading
677 scmutil.writereporequirements(srcrepo, upgrade_op.new_requirements)
678 scmutil.writereporequirements(srcrepo, upgrade_op.new_requirements)
678
679
679
680
680 def upgrade_tracked_hint(ui, srcrepo, upgrade_op, add):
681 def upgrade_tracked_hint(ui, srcrepo, upgrade_op, add):
681 if add:
682 if add:
682 srcrepo.dirstate._use_tracked_hint = True
683 srcrepo.dirstate._use_tracked_hint = True
683 srcrepo.dirstate._dirty = True
684 srcrepo.dirstate._dirty = True
684 srcrepo.dirstate._dirty_tracked_set = True
685 srcrepo.dirstate._dirty_tracked_set = True
685 srcrepo.dirstate.write(None)
686 srcrepo.dirstate.write(None)
686 if not add:
687 if not add:
687 srcrepo.dirstate.delete_tracked_hint()
688 srcrepo.dirstate.delete_tracked_hint()
688
689
689 scmutil.writereporequirements(srcrepo, upgrade_op.new_requirements)
690 scmutil.writereporequirements(srcrepo, upgrade_op.new_requirements)
@@ -1,621 +1,625 b''
1 # verify.py - repository integrity checking for Mercurial
1 # verify.py - repository integrity checking for Mercurial
2 #
2 #
3 # Copyright 2006, 2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2006, 2007 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8
8
9 import os
9 import os
10
10
11 from .i18n import _
11 from .i18n import _
12 from .node import short
12 from .node import short
13 from .utils import stringutil
13 from .utils import stringutil
14
14
15 from . import (
15 from . import (
16 error,
16 error,
17 pycompat,
17 pycompat,
18 requirements,
18 requirements,
19 revlog,
19 revlog,
20 util,
20 util,
21 )
21 )
22
22
23 VERIFY_DEFAULT = 0
23 VERIFY_DEFAULT = 0
24 VERIFY_FULL = 1
24 VERIFY_FULL = 1
25
25
26
26
27 def verify(repo, level=None):
27 def verify(repo, level=None):
28 with repo.lock():
28 with repo.lock():
29 v = verifier(repo, level)
29 v = verifier(repo, level)
30 return v.verify()
30 return v.verify()
31
31
32
32
33 def _normpath(f):
33 def _normpath(f):
34 # under hg < 2.4, convert didn't sanitize paths properly, so a
34 # under hg < 2.4, convert didn't sanitize paths properly, so a
35 # converted repo may contain repeated slashes
35 # converted repo may contain repeated slashes
36 while b'//' in f:
36 while b'//' in f:
37 f = f.replace(b'//', b'/')
37 f = f.replace(b'//', b'/')
38 return f
38 return f
39
39
40
40
41 HINT_FNCACHE = _(
41 HINT_FNCACHE = _(
42 b'hint: run "hg debugrebuildfncache" to recover from corrupt fncache\n'
42 b'hint: run "hg debugrebuildfncache" to recover from corrupt fncache\n'
43 )
43 )
44
44
45 WARN_PARENT_DIR_UNKNOWN_REV = _(
45 WARN_PARENT_DIR_UNKNOWN_REV = _(
46 b"parent-directory manifest refers to unknown revision %s"
46 b"parent-directory manifest refers to unknown revision %s"
47 )
47 )
48
48
49 WARN_UNKNOWN_COPY_SOURCE = _(
49 WARN_UNKNOWN_COPY_SOURCE = _(
50 b"warning: copy source of '%s' not in parents of %s"
50 b"warning: copy source of '%s' not in parents of %s"
51 )
51 )
52
52
53 WARN_NULLID_COPY_SOURCE = _(
53 WARN_NULLID_COPY_SOURCE = _(
54 b"warning: %s@%s: copy source revision is nullid %s:%s\n"
54 b"warning: %s@%s: copy source revision is nullid %s:%s\n"
55 )
55 )
56
56
57
57
58 class verifier:
58 class verifier:
59 def __init__(self, repo, level=None):
59 def __init__(self, repo, level=None):
60 self.repo = repo.unfiltered()
60 self.repo = repo.unfiltered()
61 self.ui = repo.ui
61 self.ui = repo.ui
62 self.match = repo.narrowmatch()
62 self.match = repo.narrowmatch()
63 if level is None:
63 if level is None:
64 level = VERIFY_DEFAULT
64 level = VERIFY_DEFAULT
65 self._level = level
65 self._level = level
66 self.badrevs = set()
66 self.badrevs = set()
67 self.errors = 0
67 self.errors = 0
68 self.warnings = 0
68 self.warnings = 0
69 self.havecl = len(repo.changelog) > 0
69 self.havecl = len(repo.changelog) > 0
70 self.havemf = len(repo.manifestlog.getstorage(b'')) > 0
70 self.havemf = len(repo.manifestlog.getstorage(b'')) > 0
71 self.revlogv1 = repo.changelog._format_version != revlog.REVLOGV0
71 self.revlogv1 = repo.changelog._format_version != revlog.REVLOGV0
72 self.lrugetctx = util.lrucachefunc(repo.unfiltered().__getitem__)
72 self.lrugetctx = util.lrucachefunc(repo.unfiltered().__getitem__)
73 self.refersmf = False
73 self.refersmf = False
74 self.fncachewarned = False
74 self.fncachewarned = False
75 # developer config: verify.skipflags
75 # developer config: verify.skipflags
76 self.skipflags = repo.ui.configint(b'verify', b'skipflags')
76 self.skipflags = repo.ui.configint(b'verify', b'skipflags')
77 self.warnorphanstorefiles = True
77 self.warnorphanstorefiles = True
78
78
79 def _warn(self, msg):
79 def _warn(self, msg):
80 """record a "warning" level issue"""
80 """record a "warning" level issue"""
81 self.ui.warn(msg + b"\n")
81 self.ui.warn(msg + b"\n")
82 self.warnings += 1
82 self.warnings += 1
83
83
84 def _err(self, linkrev, msg, filename=None):
84 def _err(self, linkrev, msg, filename=None):
85 """record a "error" level issue"""
85 """record a "error" level issue"""
86 if linkrev is not None:
86 if linkrev is not None:
87 self.badrevs.add(linkrev)
87 self.badrevs.add(linkrev)
88 linkrev = b"%d" % linkrev
88 linkrev = b"%d" % linkrev
89 else:
89 else:
90 linkrev = b'?'
90 linkrev = b'?'
91 msg = b"%s: %s" % (linkrev, msg)
91 msg = b"%s: %s" % (linkrev, msg)
92 if filename:
92 if filename:
93 msg = b"%s@%s" % (filename, msg)
93 msg = b"%s@%s" % (filename, msg)
94 self.ui.warn(b" " + msg + b"\n")
94 self.ui.warn(b" " + msg + b"\n")
95 self.errors += 1
95 self.errors += 1
96
96
97 def _exc(self, linkrev, msg, inst, filename=None):
97 def _exc(self, linkrev, msg, inst, filename=None):
98 """record exception raised during the verify process"""
98 """record exception raised during the verify process"""
99 fmsg = stringutil.forcebytestr(inst)
99 fmsg = stringutil.forcebytestr(inst)
100 if not fmsg:
100 if not fmsg:
101 fmsg = pycompat.byterepr(inst)
101 fmsg = pycompat.byterepr(inst)
102 self._err(linkrev, b"%s: %s" % (msg, fmsg), filename)
102 self._err(linkrev, b"%s: %s" % (msg, fmsg), filename)
103
103
104 def _checkrevlog(self, obj, name, linkrev):
104 def _checkrevlog(self, obj, name, linkrev):
105 """verify high level property of a revlog
105 """verify high level property of a revlog
106
106
107 - revlog is present,
107 - revlog is present,
108 - revlog is non-empty,
108 - revlog is non-empty,
109 - sizes (index and data) are correct,
109 - sizes (index and data) are correct,
110 - revlog's format version is correct.
110 - revlog's format version is correct.
111 """
111 """
112 if not len(obj) and (self.havecl or self.havemf):
112 if not len(obj) and (self.havecl or self.havemf):
113 self._err(linkrev, _(b"empty or missing %s") % name)
113 self._err(linkrev, _(b"empty or missing %s") % name)
114 return
114 return
115
115
116 d = obj.checksize()
116 d = obj.checksize()
117 if d[0]:
117 if d[0]:
118 self._err(None, _(b"data length off by %d bytes") % d[0], name)
118 self._err(None, _(b"data length off by %d bytes") % d[0], name)
119 if d[1]:
119 if d[1]:
120 self._err(None, _(b"index contains %d extra bytes") % d[1], name)
120 self._err(None, _(b"index contains %d extra bytes") % d[1], name)
121
121
122 if obj._format_version != revlog.REVLOGV0:
122 if obj._format_version != revlog.REVLOGV0:
123 if not self.revlogv1:
123 if not self.revlogv1:
124 self._warn(_(b"warning: `%s' uses revlog format 1") % name)
124 self._warn(_(b"warning: `%s' uses revlog format 1") % name)
125 elif self.revlogv1:
125 elif self.revlogv1:
126 self._warn(_(b"warning: `%s' uses revlog format 0") % name)
126 self._warn(_(b"warning: `%s' uses revlog format 0") % name)
127
127
128 def _checkentry(self, obj, i, node, seen, linkrevs, f):
128 def _checkentry(self, obj, i, node, seen, linkrevs, f):
129 """verify a single revlog entry
129 """verify a single revlog entry
130
130
131 arguments are:
131 arguments are:
132 - obj: the source revlog
132 - obj: the source revlog
133 - i: the revision number
133 - i: the revision number
134 - node: the revision node id
134 - node: the revision node id
135 - seen: nodes previously seen for this revlog
135 - seen: nodes previously seen for this revlog
136 - linkrevs: [changelog-revisions] introducing "node"
136 - linkrevs: [changelog-revisions] introducing "node"
137 - f: string label ("changelog", "manifest", or filename)
137 - f: string label ("changelog", "manifest", or filename)
138
138
139 Performs the following checks:
139 Performs the following checks:
140 - linkrev points to an existing changelog revision,
140 - linkrev points to an existing changelog revision,
141 - linkrev points to a changelog revision that introduces this revision,
141 - linkrev points to a changelog revision that introduces this revision,
142 - linkrev points to the lowest of these changesets,
142 - linkrev points to the lowest of these changesets,
143 - both parents exist in the revlog,
143 - both parents exist in the revlog,
144 - the revision is not duplicated.
144 - the revision is not duplicated.
145
145
146 Return the linkrev of the revision (or None for changelog's revisions).
146 Return the linkrev of the revision (or None for changelog's revisions).
147 """
147 """
148 lr = obj.linkrev(obj.rev(node))
148 lr = obj.linkrev(obj.rev(node))
149 if lr < 0 or (self.havecl and lr not in linkrevs):
149 if lr < 0 or (self.havecl and lr not in linkrevs):
150 if lr < 0 or lr >= len(self.repo.changelog):
150 if lr < 0 or lr >= len(self.repo.changelog):
151 msg = _(b"rev %d points to nonexistent changeset %d")
151 msg = _(b"rev %d points to nonexistent changeset %d")
152 else:
152 else:
153 msg = _(b"rev %d points to unexpected changeset %d")
153 msg = _(b"rev %d points to unexpected changeset %d")
154 self._err(None, msg % (i, lr), f)
154 self._err(None, msg % (i, lr), f)
155 if linkrevs:
155 if linkrevs:
156 if f and len(linkrevs) > 1:
156 if f and len(linkrevs) > 1:
157 try:
157 try:
158 # attempt to filter down to real linkrevs
158 # attempt to filter down to real linkrevs
159 linkrevs = []
159 linkrevs = []
160 for lr in linkrevs:
160 for lr in linkrevs:
161 if self.lrugetctx(lr)[f].filenode() == node:
161 if self.lrugetctx(lr)[f].filenode() == node:
162 linkrevs.append(lr)
162 linkrevs.append(lr)
163 except Exception:
163 except Exception:
164 pass
164 pass
165 msg = _(b" (expected %s)")
165 msg = _(b" (expected %s)")
166 msg %= b" ".join(map(pycompat.bytestr, linkrevs))
166 msg %= b" ".join(map(pycompat.bytestr, linkrevs))
167 self._warn(msg)
167 self._warn(msg)
168 lr = None # can't be trusted
168 lr = None # can't be trusted
169
169
170 try:
170 try:
171 p1, p2 = obj.parents(node)
171 p1, p2 = obj.parents(node)
172 if p1 not in seen and p1 != self.repo.nullid:
172 if p1 not in seen and p1 != self.repo.nullid:
173 msg = _(b"unknown parent 1 %s of %s") % (short(p1), short(node))
173 msg = _(b"unknown parent 1 %s of %s") % (short(p1), short(node))
174 self._err(lr, msg, f)
174 self._err(lr, msg, f)
175 if p2 not in seen and p2 != self.repo.nullid:
175 if p2 not in seen and p2 != self.repo.nullid:
176 msg = _(b"unknown parent 2 %s of %s") % (short(p2), short(node))
176 msg = _(b"unknown parent 2 %s of %s") % (short(p2), short(node))
177 self._err(lr, msg, f)
177 self._err(lr, msg, f)
178 except Exception as inst:
178 except Exception as inst:
179 self._exc(lr, _(b"checking parents of %s") % short(node), inst, f)
179 self._exc(lr, _(b"checking parents of %s") % short(node), inst, f)
180
180
181 if node in seen:
181 if node in seen:
182 self._err(lr, _(b"duplicate revision %d (%d)") % (i, seen[node]), f)
182 self._err(lr, _(b"duplicate revision %d (%d)") % (i, seen[node]), f)
183 seen[node] = i
183 seen[node] = i
184 return lr
184 return lr
185
185
186 def verify(self):
186 def verify(self):
187 """verify the content of the Mercurial repository
187 """verify the content of the Mercurial repository
188
188
189 This method run all verifications, displaying issues as they are found.
189 This method run all verifications, displaying issues as they are found.
190
190
191 return 1 if any error have been encountered, 0 otherwise."""
191 return 1 if any error have been encountered, 0 otherwise."""
192 # initial validation and generic report
192 # initial validation and generic report
193 repo = self.repo
193 repo = self.repo
194 ui = repo.ui
194 ui = repo.ui
195 if not repo.url().startswith(b'file:'):
195 if not repo.url().startswith(b'file:'):
196 raise error.Abort(_(b"cannot verify bundle or remote repos"))
196 raise error.Abort(_(b"cannot verify bundle or remote repos"))
197
197
198 if os.path.exists(repo.sjoin(b"journal")):
198 if os.path.exists(repo.sjoin(b"journal")):
199 ui.warn(_(b"abandoned transaction found - run hg recover\n"))
199 ui.warn(_(b"abandoned transaction found - run hg recover\n"))
200
200
201 if ui.verbose or not self.revlogv1:
201 if ui.verbose or not self.revlogv1:
202 ui.status(
202 ui.status(
203 _(b"repository uses revlog format %d\n")
203 _(b"repository uses revlog format %d\n")
204 % (self.revlogv1 and 1 or 0)
204 % (self.revlogv1 and 1 or 0)
205 )
205 )
206
206
207 # data verification
207 # data verification
208 mflinkrevs, filelinkrevs = self._verifychangelog()
208 mflinkrevs, filelinkrevs = self._verifychangelog()
209 filenodes = self._verifymanifest(mflinkrevs)
209 filenodes = self._verifymanifest(mflinkrevs)
210 del mflinkrevs
210 del mflinkrevs
211 self._crosscheckfiles(filelinkrevs, filenodes)
211 self._crosscheckfiles(filelinkrevs, filenodes)
212 totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)
212 totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)
213
213
214 if self.errors:
214 if self.errors:
215 ui.warn(_(b"not checking dirstate because of previous errors\n"))
215 ui.warn(_(b"not checking dirstate because of previous errors\n"))
216 dirstate_errors = 0
216 dirstate_errors = 0
217 else:
217 else:
218 dirstate_errors = self._verify_dirstate()
218 dirstate_errors = self._verify_dirstate()
219
219
220 # final report
220 # final report
221 ui.status(
221 ui.status(
222 _(b"checked %d changesets with %d changes to %d files\n")
222 _(b"checked %d changesets with %d changes to %d files\n")
223 % (len(repo.changelog), filerevisions, totalfiles)
223 % (len(repo.changelog), filerevisions, totalfiles)
224 )
224 )
225 if self.warnings:
225 if self.warnings:
226 ui.warn(_(b"%d warnings encountered!\n") % self.warnings)
226 ui.warn(_(b"%d warnings encountered!\n") % self.warnings)
227 if self.fncachewarned:
227 if self.fncachewarned:
228 ui.warn(HINT_FNCACHE)
228 ui.warn(HINT_FNCACHE)
229 if self.errors:
229 if self.errors:
230 ui.warn(_(b"%d integrity errors encountered!\n") % self.errors)
230 ui.warn(_(b"%d integrity errors encountered!\n") % self.errors)
231 if self.badrevs:
231 if self.badrevs:
232 msg = _(b"(first damaged changeset appears to be %d)\n")
232 msg = _(b"(first damaged changeset appears to be %d)\n")
233 msg %= min(self.badrevs)
233 msg %= min(self.badrevs)
234 ui.warn(msg)
234 ui.warn(msg)
235 if dirstate_errors:
235 if dirstate_errors:
236 ui.warn(
236 ui.warn(
237 _(b"dirstate inconsistent with current parent's manifest\n")
237 _(b"dirstate inconsistent with current parent's manifest\n")
238 )
238 )
239 ui.warn(_(b"%d dirstate errors\n") % dirstate_errors)
239 ui.warn(_(b"%d dirstate errors\n") % dirstate_errors)
240 return 1
240 return 1
241 return 0
241 return 0
242
242
243 def _verifychangelog(self):
243 def _verifychangelog(self):
244 """verify the changelog of a repository
244 """verify the changelog of a repository
245
245
246 The following checks are performed:
246 The following checks are performed:
247 - all of `_checkrevlog` checks,
247 - all of `_checkrevlog` checks,
248 - all of `_checkentry` checks (for each revisions),
248 - all of `_checkentry` checks (for each revisions),
249 - each revision can be read.
249 - each revision can be read.
250
250
251 The function returns some of the data observed in the changesets as a
251 The function returns some of the data observed in the changesets as a
252 (mflinkrevs, filelinkrevs) tuples:
252 (mflinkrevs, filelinkrevs) tuples:
253 - mflinkrevs: is a { manifest-node -> [changelog-rev] } mapping
253 - mflinkrevs: is a { manifest-node -> [changelog-rev] } mapping
254 - filelinkrevs: is a { file-path -> [changelog-rev] } mapping
254 - filelinkrevs: is a { file-path -> [changelog-rev] } mapping
255
255
256 If a matcher was specified, filelinkrevs will only contains matched
256 If a matcher was specified, filelinkrevs will only contains matched
257 files.
257 files.
258 """
258 """
259 ui = self.ui
259 ui = self.ui
260 repo = self.repo
260 repo = self.repo
261 match = self.match
261 match = self.match
262 cl = repo.changelog
262 cl = repo.changelog
263
263
264 ui.status(_(b"checking changesets\n"))
264 ui.status(_(b"checking changesets\n"))
265 mflinkrevs = {}
265 mflinkrevs = {}
266 filelinkrevs = {}
266 filelinkrevs = {}
267 seen = {}
267 seen = {}
268 self._checkrevlog(cl, b"changelog", 0)
268 self._checkrevlog(cl, b"changelog", 0)
269 progress = ui.makeprogress(
269 progress = ui.makeprogress(
270 _(b'checking'), unit=_(b'changesets'), total=len(repo)
270 _(b'checking'), unit=_(b'changesets'), total=len(repo)
271 )
271 )
272 for i in repo:
272 for i in repo:
273 progress.update(i)
273 progress.update(i)
274 n = cl.node(i)
274 n = cl.node(i)
275 self._checkentry(cl, i, n, seen, [i], b"changelog")
275 self._checkentry(cl, i, n, seen, [i], b"changelog")
276
276
277 try:
277 try:
278 changes = cl.read(n)
278 changes = cl.read(n)
279 if changes[0] != self.repo.nullid:
279 if changes[0] != self.repo.nullid:
280 mflinkrevs.setdefault(changes[0], []).append(i)
280 mflinkrevs.setdefault(changes[0], []).append(i)
281 self.refersmf = True
281 self.refersmf = True
282 for f in changes[3]:
282 for f in changes[3]:
283 if match(f):
283 if match(f):
284 filelinkrevs.setdefault(_normpath(f), []).append(i)
284 filelinkrevs.setdefault(_normpath(f), []).append(i)
285 except Exception as inst:
285 except Exception as inst:
286 self.refersmf = True
286 self.refersmf = True
287 self._exc(i, _(b"unpacking changeset %s") % short(n), inst)
287 self._exc(i, _(b"unpacking changeset %s") % short(n), inst)
288 progress.complete()
288 progress.complete()
289 return mflinkrevs, filelinkrevs
289 return mflinkrevs, filelinkrevs
290
290
291 def _verifymanifest(
291 def _verifymanifest(
292 self, mflinkrevs, dir=b"", storefiles=None, subdirprogress=None
292 self, mflinkrevs, dir=b"", storefiles=None, subdirprogress=None
293 ):
293 ):
294 """verify the manifestlog content
294 """verify the manifestlog content
295
295
296 Inputs:
296 Inputs:
297 - mflinkrevs: a {manifest-node -> [changelog-revisions]} mapping
297 - mflinkrevs: a {manifest-node -> [changelog-revisions]} mapping
298 - dir: a subdirectory to check (for tree manifest repo)
298 - dir: a subdirectory to check (for tree manifest repo)
299 - storefiles: set of currently "orphan" files.
299 - storefiles: set of currently "orphan" files.
300 - subdirprogress: a progress object
300 - subdirprogress: a progress object
301
301
302 This function checks:
302 This function checks:
303 * all of `_checkrevlog` checks (for all manifest related revlogs)
303 * all of `_checkrevlog` checks (for all manifest related revlogs)
304 * all of `_checkentry` checks (for all manifest related revisions)
304 * all of `_checkentry` checks (for all manifest related revisions)
305 * nodes for subdirectory exists in the sub-directory manifest
305 * nodes for subdirectory exists in the sub-directory manifest
306 * each manifest entries have a file path
306 * each manifest entries have a file path
307 * each manifest node refered in mflinkrevs exist in the manifest log
307 * each manifest node refered in mflinkrevs exist in the manifest log
308
308
309 If tree manifest is in use and a matchers is specified, only the
309 If tree manifest is in use and a matchers is specified, only the
310 sub-directories matching it will be verified.
310 sub-directories matching it will be verified.
311
311
312 return a two level mapping:
312 return a two level mapping:
313 {"path" -> { filenode -> changelog-revision}}
313 {"path" -> { filenode -> changelog-revision}}
314
314
315 This mapping primarily contains entries for every files in the
315 This mapping primarily contains entries for every files in the
316 repository. In addition, when tree-manifest is used, it also contains
316 repository. In addition, when tree-manifest is used, it also contains
317 sub-directory entries.
317 sub-directory entries.
318
318
319 If a matcher is provided, only matching paths will be included.
319 If a matcher is provided, only matching paths will be included.
320 """
320 """
321 repo = self.repo
321 repo = self.repo
322 ui = self.ui
322 ui = self.ui
323 match = self.match
323 match = self.match
324 mfl = self.repo.manifestlog
324 mfl = self.repo.manifestlog
325 mf = mfl.getstorage(dir)
325 mf = mfl.getstorage(dir)
326
326
327 if not dir:
327 if not dir:
328 self.ui.status(_(b"checking manifests\n"))
328 self.ui.status(_(b"checking manifests\n"))
329
329
330 filenodes = {}
330 filenodes = {}
331 subdirnodes = {}
331 subdirnodes = {}
332 seen = {}
332 seen = {}
333 label = b"manifest"
333 label = b"manifest"
334 if dir:
334 if dir:
335 label = dir
335 label = dir
336 revlogfiles = mf.files()
336 revlogfiles = mf.files()
337 storefiles.difference_update(revlogfiles)
337 storefiles.difference_update(revlogfiles)
338 if subdirprogress: # should be true since we're in a subdirectory
338 if subdirprogress: # should be true since we're in a subdirectory
339 subdirprogress.increment()
339 subdirprogress.increment()
340 if self.refersmf:
340 if self.refersmf:
341 # Do not check manifest if there are only changelog entries with
341 # Do not check manifest if there are only changelog entries with
342 # null manifests.
342 # null manifests.
343 self._checkrevlog(mf._revlog, label, 0)
343 self._checkrevlog(mf._revlog, label, 0)
344 progress = ui.makeprogress(
344 progress = ui.makeprogress(
345 _(b'checking'), unit=_(b'manifests'), total=len(mf)
345 _(b'checking'), unit=_(b'manifests'), total=len(mf)
346 )
346 )
347 for i in mf:
347 for i in mf:
348 if not dir:
348 if not dir:
349 progress.update(i)
349 progress.update(i)
350 n = mf.node(i)
350 n = mf.node(i)
351 lr = self._checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label)
351 lr = self._checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label)
352 if n in mflinkrevs:
352 if n in mflinkrevs:
353 del mflinkrevs[n]
353 del mflinkrevs[n]
354 elif dir:
354 elif dir:
355 msg = _(b"%s not in parent-directory manifest") % short(n)
355 msg = _(b"%s not in parent-directory manifest") % short(n)
356 self._err(lr, msg, label)
356 self._err(lr, msg, label)
357 else:
357 else:
358 self._err(lr, _(b"%s not in changesets") % short(n), label)
358 self._err(lr, _(b"%s not in changesets") % short(n), label)
359
359
360 try:
360 try:
361 mfdelta = mfl.get(dir, n).readdelta(shallow=True)
361 mfdelta = mfl.get(dir, n).readdelta(shallow=True)
362 for f, fn, fl in mfdelta.iterentries():
362 for f, fn, fl in mfdelta.iterentries():
363 if not f:
363 if not f:
364 self._err(lr, _(b"entry without name in manifest"))
364 self._err(lr, _(b"entry without name in manifest"))
365 elif f == b"/dev/null": # ignore this in very old repos
365 elif f == b"/dev/null": # ignore this in very old repos
366 continue
366 continue
367 fullpath = dir + _normpath(f)
367 fullpath = dir + _normpath(f)
368 if fl == b't':
368 if fl == b't':
369 if not match.visitdir(fullpath):
369 if not match.visitdir(fullpath):
370 continue
370 continue
371 sdn = subdirnodes.setdefault(fullpath + b'/', {})
371 sdn = subdirnodes.setdefault(fullpath + b'/', {})
372 sdn.setdefault(fn, []).append(lr)
372 sdn.setdefault(fn, []).append(lr)
373 else:
373 else:
374 if not match(fullpath):
374 if not match(fullpath):
375 continue
375 continue
376 filenodes.setdefault(fullpath, {}).setdefault(fn, lr)
376 filenodes.setdefault(fullpath, {}).setdefault(fn, lr)
377 except Exception as inst:
377 except Exception as inst:
378 self._exc(lr, _(b"reading delta %s") % short(n), inst, label)
378 self._exc(lr, _(b"reading delta %s") % short(n), inst, label)
379 if self._level >= VERIFY_FULL:
379 if self._level >= VERIFY_FULL:
380 try:
380 try:
381 # Various issues can affect manifest. So we read each full
381 # Various issues can affect manifest. So we read each full
382 # text from storage. This triggers the checks from the core
382 # text from storage. This triggers the checks from the core
383 # code (eg: hash verification, filename are ordered, etc.)
383 # code (eg: hash verification, filename are ordered, etc.)
384 mfdelta = mfl.get(dir, n).read()
384 mfdelta = mfl.get(dir, n).read()
385 except Exception as inst:
385 except Exception as inst:
386 msg = _(b"reading full manifest %s") % short(n)
386 msg = _(b"reading full manifest %s") % short(n)
387 self._exc(lr, msg, inst, label)
387 self._exc(lr, msg, inst, label)
388
388
389 if not dir:
389 if not dir:
390 progress.complete()
390 progress.complete()
391
391
392 if self.havemf:
392 if self.havemf:
393 # since we delete entry in `mflinkrevs` during iteration, any
393 # since we delete entry in `mflinkrevs` during iteration, any
394 # remaining entries are "missing". We need to issue errors for them.
394 # remaining entries are "missing". We need to issue errors for them.
395 changesetpairs = [(c, m) for m in mflinkrevs for c in mflinkrevs[m]]
395 changesetpairs = [(c, m) for m in mflinkrevs for c in mflinkrevs[m]]
396 for c, m in sorted(changesetpairs):
396 for c, m in sorted(changesetpairs):
397 if dir:
397 if dir:
398 self._err(c, WARN_PARENT_DIR_UNKNOWN_REV % short(m), label)
398 self._err(c, WARN_PARENT_DIR_UNKNOWN_REV % short(m), label)
399 else:
399 else:
400 msg = _(b"changeset refers to unknown revision %s")
400 msg = _(b"changeset refers to unknown revision %s")
401 msg %= short(m)
401 msg %= short(m)
402 self._err(c, msg, label)
402 self._err(c, msg, label)
403
403
404 if not dir and subdirnodes:
404 if not dir and subdirnodes:
405 self.ui.status(_(b"checking directory manifests\n"))
405 self.ui.status(_(b"checking directory manifests\n"))
406 storefiles = set()
406 storefiles = set()
407 subdirs = set()
407 subdirs = set()
408 revlogv1 = self.revlogv1
408 revlogv1 = self.revlogv1
409 undecodable = []
409 undecodable = []
410 for t, f, size in repo.store.datafiles(undecodable=undecodable):
410 for entry in repo.store.datafiles(undecodable=undecodable):
411 f = entry.unencoded_path
412 size = entry.file_size
411 if (size > 0 or not revlogv1) and f.startswith(b'meta/'):
413 if (size > 0 or not revlogv1) and f.startswith(b'meta/'):
412 storefiles.add(_normpath(f))
414 storefiles.add(_normpath(f))
413 subdirs.add(os.path.dirname(f))
415 subdirs.add(os.path.dirname(f))
414 for f in undecodable:
416 for f in undecodable:
415 self._err(None, _(b"cannot decode filename '%s'") % f)
417 self._err(None, _(b"cannot decode filename '%s'") % f)
416 subdirprogress = ui.makeprogress(
418 subdirprogress = ui.makeprogress(
417 _(b'checking'), unit=_(b'manifests'), total=len(subdirs)
419 _(b'checking'), unit=_(b'manifests'), total=len(subdirs)
418 )
420 )
419
421
420 for subdir, linkrevs in subdirnodes.items():
422 for subdir, linkrevs in subdirnodes.items():
421 subdirfilenodes = self._verifymanifest(
423 subdirfilenodes = self._verifymanifest(
422 linkrevs, subdir, storefiles, subdirprogress
424 linkrevs, subdir, storefiles, subdirprogress
423 )
425 )
424 for f, onefilenodes in subdirfilenodes.items():
426 for f, onefilenodes in subdirfilenodes.items():
425 filenodes.setdefault(f, {}).update(onefilenodes)
427 filenodes.setdefault(f, {}).update(onefilenodes)
426
428
427 if not dir and subdirnodes:
429 if not dir and subdirnodes:
428 assert subdirprogress is not None # help pytype
430 assert subdirprogress is not None # help pytype
429 subdirprogress.complete()
431 subdirprogress.complete()
430 if self.warnorphanstorefiles:
432 if self.warnorphanstorefiles:
431 for f in sorted(storefiles):
433 for f in sorted(storefiles):
432 self._warn(_(b"warning: orphan data file '%s'") % f)
434 self._warn(_(b"warning: orphan data file '%s'") % f)
433
435
434 return filenodes
436 return filenodes
435
437
436 def _crosscheckfiles(self, filelinkrevs, filenodes):
438 def _crosscheckfiles(self, filelinkrevs, filenodes):
437 repo = self.repo
439 repo = self.repo
438 ui = self.ui
440 ui = self.ui
439 ui.status(_(b"crosschecking files in changesets and manifests\n"))
441 ui.status(_(b"crosschecking files in changesets and manifests\n"))
440
442
441 total = len(filelinkrevs) + len(filenodes)
443 total = len(filelinkrevs) + len(filenodes)
442 progress = ui.makeprogress(
444 progress = ui.makeprogress(
443 _(b'crosschecking'), unit=_(b'files'), total=total
445 _(b'crosschecking'), unit=_(b'files'), total=total
444 )
446 )
445 if self.havemf:
447 if self.havemf:
446 for f in sorted(filelinkrevs):
448 for f in sorted(filelinkrevs):
447 progress.increment()
449 progress.increment()
448 if f not in filenodes:
450 if f not in filenodes:
449 lr = filelinkrevs[f][0]
451 lr = filelinkrevs[f][0]
450 self._err(lr, _(b"in changeset but not in manifest"), f)
452 self._err(lr, _(b"in changeset but not in manifest"), f)
451
453
452 if self.havecl:
454 if self.havecl:
453 for f in sorted(filenodes):
455 for f in sorted(filenodes):
454 progress.increment()
456 progress.increment()
455 if f not in filelinkrevs:
457 if f not in filelinkrevs:
456 try:
458 try:
457 fl = repo.file(f)
459 fl = repo.file(f)
458 lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
460 lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
459 except Exception:
461 except Exception:
460 lr = None
462 lr = None
461 self._err(lr, _(b"in manifest but not in changeset"), f)
463 self._err(lr, _(b"in manifest but not in changeset"), f)
462
464
463 progress.complete()
465 progress.complete()
464
466
465 def _verifyfiles(self, filenodes, filelinkrevs):
467 def _verifyfiles(self, filenodes, filelinkrevs):
466 repo = self.repo
468 repo = self.repo
467 ui = self.ui
469 ui = self.ui
468 lrugetctx = self.lrugetctx
470 lrugetctx = self.lrugetctx
469 revlogv1 = self.revlogv1
471 revlogv1 = self.revlogv1
470 havemf = self.havemf
472 havemf = self.havemf
471 ui.status(_(b"checking files\n"))
473 ui.status(_(b"checking files\n"))
472
474
473 storefiles = set()
475 storefiles = set()
474 undecodable = []
476 undecodable = []
475 for t, f, size in repo.store.datafiles(undecodable=undecodable):
477 for entry in repo.store.datafiles(undecodable=undecodable):
478 size = entry.file_size
479 f = entry.unencoded_path
476 if (size > 0 or not revlogv1) and f.startswith(b'data/'):
480 if (size > 0 or not revlogv1) and f.startswith(b'data/'):
477 storefiles.add(_normpath(f))
481 storefiles.add(_normpath(f))
478 for f in undecodable:
482 for f in undecodable:
479 self._err(None, _(b"cannot decode filename '%s'") % f)
483 self._err(None, _(b"cannot decode filename '%s'") % f)
480
484
481 state = {
485 state = {
482 # TODO this assumes revlog storage for changelog.
486 # TODO this assumes revlog storage for changelog.
483 b'expectedversion': self.repo.changelog._format_version,
487 b'expectedversion': self.repo.changelog._format_version,
484 b'skipflags': self.skipflags,
488 b'skipflags': self.skipflags,
485 # experimental config: censor.policy
489 # experimental config: censor.policy
486 b'erroroncensored': ui.config(b'censor', b'policy') == b'abort',
490 b'erroroncensored': ui.config(b'censor', b'policy') == b'abort',
487 }
491 }
488
492
489 files = sorted(set(filenodes) | set(filelinkrevs))
493 files = sorted(set(filenodes) | set(filelinkrevs))
490 revisions = 0
494 revisions = 0
491 progress = ui.makeprogress(
495 progress = ui.makeprogress(
492 _(b'checking'), unit=_(b'files'), total=len(files)
496 _(b'checking'), unit=_(b'files'), total=len(files)
493 )
497 )
494 for i, f in enumerate(files):
498 for i, f in enumerate(files):
495 progress.update(i, item=f)
499 progress.update(i, item=f)
496 try:
500 try:
497 linkrevs = filelinkrevs[f]
501 linkrevs = filelinkrevs[f]
498 except KeyError:
502 except KeyError:
499 # in manifest but not in changelog
503 # in manifest but not in changelog
500 linkrevs = []
504 linkrevs = []
501
505
502 if linkrevs:
506 if linkrevs:
503 lr = linkrevs[0]
507 lr = linkrevs[0]
504 else:
508 else:
505 lr = None
509 lr = None
506
510
507 try:
511 try:
508 fl = repo.file(f)
512 fl = repo.file(f)
509 except error.StorageError as e:
513 except error.StorageError as e:
510 self._err(lr, _(b"broken revlog! (%s)") % e, f)
514 self._err(lr, _(b"broken revlog! (%s)") % e, f)
511 continue
515 continue
512
516
513 for ff in fl.files():
517 for ff in fl.files():
514 try:
518 try:
515 storefiles.remove(ff)
519 storefiles.remove(ff)
516 except KeyError:
520 except KeyError:
517 if self.warnorphanstorefiles:
521 if self.warnorphanstorefiles:
518 msg = _(b" warning: revlog '%s' not in fncache!")
522 msg = _(b" warning: revlog '%s' not in fncache!")
519 self._warn(msg % ff)
523 self._warn(msg % ff)
520 self.fncachewarned = True
524 self.fncachewarned = True
521
525
522 if not len(fl) and (self.havecl or self.havemf):
526 if not len(fl) and (self.havecl or self.havemf):
523 self._err(lr, _(b"empty or missing %s") % f)
527 self._err(lr, _(b"empty or missing %s") % f)
524 else:
528 else:
525 # Guard against implementations not setting this.
529 # Guard against implementations not setting this.
526 state[b'skipread'] = set()
530 state[b'skipread'] = set()
527 state[b'safe_renamed'] = set()
531 state[b'safe_renamed'] = set()
528
532
529 for problem in fl.verifyintegrity(state):
533 for problem in fl.verifyintegrity(state):
530 if problem.node is not None:
534 if problem.node is not None:
531 linkrev = fl.linkrev(fl.rev(problem.node))
535 linkrev = fl.linkrev(fl.rev(problem.node))
532 else:
536 else:
533 linkrev = None
537 linkrev = None
534
538
535 if problem.warning:
539 if problem.warning:
536 self._warn(problem.warning)
540 self._warn(problem.warning)
537 elif problem.error:
541 elif problem.error:
538 linkrev_msg = linkrev if linkrev is not None else lr
542 linkrev_msg = linkrev if linkrev is not None else lr
539 self._err(linkrev_msg, problem.error, f)
543 self._err(linkrev_msg, problem.error, f)
540 else:
544 else:
541 raise error.ProgrammingError(
545 raise error.ProgrammingError(
542 b'problem instance does not set warning or error '
546 b'problem instance does not set warning or error '
543 b'attribute: %s' % problem.msg
547 b'attribute: %s' % problem.msg
544 )
548 )
545
549
546 seen = {}
550 seen = {}
547 for i in fl:
551 for i in fl:
548 revisions += 1
552 revisions += 1
549 n = fl.node(i)
553 n = fl.node(i)
550 lr = self._checkentry(fl, i, n, seen, linkrevs, f)
554 lr = self._checkentry(fl, i, n, seen, linkrevs, f)
551 if f in filenodes:
555 if f in filenodes:
552 if havemf and n not in filenodes[f]:
556 if havemf and n not in filenodes[f]:
553 self._err(lr, _(b"%s not in manifests") % (short(n)), f)
557 self._err(lr, _(b"%s not in manifests") % (short(n)), f)
554 else:
558 else:
555 del filenodes[f][n]
559 del filenodes[f][n]
556
560
557 if n in state[b'skipread'] and n not in state[b'safe_renamed']:
561 if n in state[b'skipread'] and n not in state[b'safe_renamed']:
558 continue
562 continue
559
563
560 # check renames
564 # check renames
561 try:
565 try:
562 # This requires resolving fulltext (at least on revlogs,
566 # This requires resolving fulltext (at least on revlogs,
563 # though not with LFS revisions). We may want
567 # though not with LFS revisions). We may want
564 # ``verifyintegrity()`` to pass a set of nodes with
568 # ``verifyintegrity()`` to pass a set of nodes with
565 # rename metadata as an optimization.
569 # rename metadata as an optimization.
566 rp = fl.renamed(n)
570 rp = fl.renamed(n)
567 if rp:
571 if rp:
568 if lr is not None and ui.verbose:
572 if lr is not None and ui.verbose:
569 ctx = lrugetctx(lr)
573 ctx = lrugetctx(lr)
570 if not any(rp[0] in pctx for pctx in ctx.parents()):
574 if not any(rp[0] in pctx for pctx in ctx.parents()):
571 self._warn(WARN_UNKNOWN_COPY_SOURCE % (f, ctx))
575 self._warn(WARN_UNKNOWN_COPY_SOURCE % (f, ctx))
572 fl2 = repo.file(rp[0])
576 fl2 = repo.file(rp[0])
573 if not len(fl2):
577 if not len(fl2):
574 m = _(b"empty or missing copy source revlog %s:%s")
578 m = _(b"empty or missing copy source revlog %s:%s")
575 self._err(lr, m % (rp[0], short(rp[1])), f)
579 self._err(lr, m % (rp[0], short(rp[1])), f)
576 elif rp[1] == self.repo.nullid:
580 elif rp[1] == self.repo.nullid:
577 msg = WARN_NULLID_COPY_SOURCE
581 msg = WARN_NULLID_COPY_SOURCE
578 msg %= (f, lr, rp[0], short(rp[1]))
582 msg %= (f, lr, rp[0], short(rp[1]))
579 ui.note(msg)
583 ui.note(msg)
580 else:
584 else:
581 fl2.rev(rp[1])
585 fl2.rev(rp[1])
582 except Exception as inst:
586 except Exception as inst:
583 self._exc(
587 self._exc(
584 lr, _(b"checking rename of %s") % short(n), inst, f
588 lr, _(b"checking rename of %s") % short(n), inst, f
585 )
589 )
586
590
587 # cross-check
591 # cross-check
588 if f in filenodes:
592 if f in filenodes:
589 fns = [(v, k) for k, v in filenodes[f].items()]
593 fns = [(v, k) for k, v in filenodes[f].items()]
590 for lr, node in sorted(fns):
594 for lr, node in sorted(fns):
591 msg = _(b"manifest refers to unknown revision %s")
595 msg = _(b"manifest refers to unknown revision %s")
592 self._err(lr, msg % short(node), f)
596 self._err(lr, msg % short(node), f)
593 progress.complete()
597 progress.complete()
594
598
595 if self.warnorphanstorefiles:
599 if self.warnorphanstorefiles:
596 for f in sorted(storefiles):
600 for f in sorted(storefiles):
597 self._warn(_(b"warning: orphan data file '%s'") % f)
601 self._warn(_(b"warning: orphan data file '%s'") % f)
598
602
599 return len(files), revisions
603 return len(files), revisions
600
604
601 def _verify_dirstate(self):
605 def _verify_dirstate(self):
602 """Check that the dirstate is consistent with the parent's manifest"""
606 """Check that the dirstate is consistent with the parent's manifest"""
603 repo = self.repo
607 repo = self.repo
604 ui = self.ui
608 ui = self.ui
605 ui.status(_(b"checking dirstate\n"))
609 ui.status(_(b"checking dirstate\n"))
606
610
607 parent1, parent2 = repo.dirstate.parents()
611 parent1, parent2 = repo.dirstate.parents()
608 m1 = repo[parent1].manifest()
612 m1 = repo[parent1].manifest()
609 m2 = repo[parent2].manifest()
613 m2 = repo[parent2].manifest()
610 dirstate_errors = 0
614 dirstate_errors = 0
611
615
612 is_narrow = requirements.NARROW_REQUIREMENT in repo.requirements
616 is_narrow = requirements.NARROW_REQUIREMENT in repo.requirements
613 narrow_matcher = repo.narrowmatch() if is_narrow else None
617 narrow_matcher = repo.narrowmatch() if is_narrow else None
614
618
615 for err in repo.dirstate.verify(m1, m2, parent1, narrow_matcher):
619 for err in repo.dirstate.verify(m1, m2, parent1, narrow_matcher):
616 ui.error(err)
620 ui.error(err)
617 dirstate_errors += 1
621 dirstate_errors += 1
618
622
619 if dirstate_errors:
623 if dirstate_errors:
620 self.errors += dirstate_errors
624 self.errors += dirstate_errors
621 return dirstate_errors
625 return dirstate_errors
General Comments 0
You need to be logged in to leave comments. Login now