##// END OF EJS Templates
verify: allow suppressing warnings about extra files...
Gregory Szorc -
r37435:76d2115c default
parent child Browse files
Show More
@@ -1,487 +1,492 b''
1 # verify.py - repository integrity checking for Mercurial
1 # verify.py - repository integrity checking for Mercurial
2 #
2 #
3 # Copyright 2006, 2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2006, 2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import os
10 import os
11
11
12 from .i18n import _
12 from .i18n import _
13 from .node import (
13 from .node import (
14 nullid,
14 nullid,
15 short,
15 short,
16 )
16 )
17
17
18 from . import (
18 from . import (
19 error,
19 error,
20 pycompat,
20 pycompat,
21 revlog,
21 revlog,
22 scmutil,
22 scmutil,
23 util,
23 util,
24 )
24 )
25
25
26 def verify(repo):
26 def verify(repo):
27 with repo.lock():
27 with repo.lock():
28 return verifier(repo).verify()
28 return verifier(repo).verify()
29
29
30 def _normpath(f):
30 def _normpath(f):
31 # under hg < 2.4, convert didn't sanitize paths properly, so a
31 # under hg < 2.4, convert didn't sanitize paths properly, so a
32 # converted repo may contain repeated slashes
32 # converted repo may contain repeated slashes
33 while '//' in f:
33 while '//' in f:
34 f = f.replace('//', '/')
34 f = f.replace('//', '/')
35 return f
35 return f
36
36
37 class verifier(object):
37 class verifier(object):
38 # The match argument is always None in hg core, but e.g. the narrowhg
38 # The match argument is always None in hg core, but e.g. the narrowhg
39 # extension will pass in a matcher here.
39 # extension will pass in a matcher here.
40 def __init__(self, repo, match=None):
40 def __init__(self, repo, match=None):
41 self.repo = repo.unfiltered()
41 self.repo = repo.unfiltered()
42 self.ui = repo.ui
42 self.ui = repo.ui
43 self.match = match or scmutil.matchall(repo)
43 self.match = match or scmutil.matchall(repo)
44 self.badrevs = set()
44 self.badrevs = set()
45 self.errors = 0
45 self.errors = 0
46 self.warnings = 0
46 self.warnings = 0
47 self.havecl = len(repo.changelog) > 0
47 self.havecl = len(repo.changelog) > 0
48 self.havemf = len(repo.manifestlog._revlog) > 0
48 self.havemf = len(repo.manifestlog._revlog) > 0
49 self.revlogv1 = repo.changelog.version != revlog.REVLOGV0
49 self.revlogv1 = repo.changelog.version != revlog.REVLOGV0
50 self.lrugetctx = util.lrucachefunc(repo.__getitem__)
50 self.lrugetctx = util.lrucachefunc(repo.__getitem__)
51 self.refersmf = False
51 self.refersmf = False
52 self.fncachewarned = False
52 self.fncachewarned = False
53 # developer config: verify.skipflags
53 # developer config: verify.skipflags
54 self.skipflags = repo.ui.configint('verify', 'skipflags')
54 self.skipflags = repo.ui.configint('verify', 'skipflags')
55 self.warnorphanstorefiles = True
55
56
56 def warn(self, msg):
57 def warn(self, msg):
57 self.ui.warn(msg + "\n")
58 self.ui.warn(msg + "\n")
58 self.warnings += 1
59 self.warnings += 1
59
60
60 def err(self, linkrev, msg, filename=None):
61 def err(self, linkrev, msg, filename=None):
61 if linkrev is not None:
62 if linkrev is not None:
62 self.badrevs.add(linkrev)
63 self.badrevs.add(linkrev)
63 linkrev = "%d" % linkrev
64 linkrev = "%d" % linkrev
64 else:
65 else:
65 linkrev = '?'
66 linkrev = '?'
66 msg = "%s: %s" % (linkrev, msg)
67 msg = "%s: %s" % (linkrev, msg)
67 if filename:
68 if filename:
68 msg = "%s@%s" % (filename, msg)
69 msg = "%s@%s" % (filename, msg)
69 self.ui.warn(" " + msg + "\n")
70 self.ui.warn(" " + msg + "\n")
70 self.errors += 1
71 self.errors += 1
71
72
72 def exc(self, linkrev, msg, inst, filename=None):
73 def exc(self, linkrev, msg, inst, filename=None):
73 fmsg = pycompat.bytestr(inst)
74 fmsg = pycompat.bytestr(inst)
74 if not fmsg:
75 if not fmsg:
75 fmsg = pycompat.byterepr(inst)
76 fmsg = pycompat.byterepr(inst)
76 self.err(linkrev, "%s: %s" % (msg, fmsg), filename)
77 self.err(linkrev, "%s: %s" % (msg, fmsg), filename)
77
78
78 def checklog(self, obj, name, linkrev):
79 def checklog(self, obj, name, linkrev):
79 if not len(obj) and (self.havecl or self.havemf):
80 if not len(obj) and (self.havecl or self.havemf):
80 self.err(linkrev, _("empty or missing %s") % name)
81 self.err(linkrev, _("empty or missing %s") % name)
81 return
82 return
82
83
83 d = obj.checksize()
84 d = obj.checksize()
84 if d[0]:
85 if d[0]:
85 self.err(None, _("data length off by %d bytes") % d[0], name)
86 self.err(None, _("data length off by %d bytes") % d[0], name)
86 if d[1]:
87 if d[1]:
87 self.err(None, _("index contains %d extra bytes") % d[1], name)
88 self.err(None, _("index contains %d extra bytes") % d[1], name)
88
89
89 if obj.version != revlog.REVLOGV0:
90 if obj.version != revlog.REVLOGV0:
90 if not self.revlogv1:
91 if not self.revlogv1:
91 self.warn(_("warning: `%s' uses revlog format 1") % name)
92 self.warn(_("warning: `%s' uses revlog format 1") % name)
92 elif self.revlogv1:
93 elif self.revlogv1:
93 self.warn(_("warning: `%s' uses revlog format 0") % name)
94 self.warn(_("warning: `%s' uses revlog format 0") % name)
94
95
95 def checkentry(self, obj, i, node, seen, linkrevs, f):
96 def checkentry(self, obj, i, node, seen, linkrevs, f):
96 lr = obj.linkrev(obj.rev(node))
97 lr = obj.linkrev(obj.rev(node))
97 if lr < 0 or (self.havecl and lr not in linkrevs):
98 if lr < 0 or (self.havecl and lr not in linkrevs):
98 if lr < 0 or lr >= len(self.repo.changelog):
99 if lr < 0 or lr >= len(self.repo.changelog):
99 msg = _("rev %d points to nonexistent changeset %d")
100 msg = _("rev %d points to nonexistent changeset %d")
100 else:
101 else:
101 msg = _("rev %d points to unexpected changeset %d")
102 msg = _("rev %d points to unexpected changeset %d")
102 self.err(None, msg % (i, lr), f)
103 self.err(None, msg % (i, lr), f)
103 if linkrevs:
104 if linkrevs:
104 if f and len(linkrevs) > 1:
105 if f and len(linkrevs) > 1:
105 try:
106 try:
106 # attempt to filter down to real linkrevs
107 # attempt to filter down to real linkrevs
107 linkrevs = [l for l in linkrevs
108 linkrevs = [l for l in linkrevs
108 if self.lrugetctx(l)[f].filenode() == node]
109 if self.lrugetctx(l)[f].filenode() == node]
109 except Exception:
110 except Exception:
110 pass
111 pass
111 self.warn(_(" (expected %s)") % " ".join
112 self.warn(_(" (expected %s)") % " ".join
112 (map(pycompat.bytestr, linkrevs)))
113 (map(pycompat.bytestr, linkrevs)))
113 lr = None # can't be trusted
114 lr = None # can't be trusted
114
115
115 try:
116 try:
116 p1, p2 = obj.parents(node)
117 p1, p2 = obj.parents(node)
117 if p1 not in seen and p1 != nullid:
118 if p1 not in seen and p1 != nullid:
118 self.err(lr, _("unknown parent 1 %s of %s") %
119 self.err(lr, _("unknown parent 1 %s of %s") %
119 (short(p1), short(node)), f)
120 (short(p1), short(node)), f)
120 if p2 not in seen and p2 != nullid:
121 if p2 not in seen and p2 != nullid:
121 self.err(lr, _("unknown parent 2 %s of %s") %
122 self.err(lr, _("unknown parent 2 %s of %s") %
122 (short(p2), short(node)), f)
123 (short(p2), short(node)), f)
123 except Exception as inst:
124 except Exception as inst:
124 self.exc(lr, _("checking parents of %s") % short(node), inst, f)
125 self.exc(lr, _("checking parents of %s") % short(node), inst, f)
125
126
126 if node in seen:
127 if node in seen:
127 self.err(lr, _("duplicate revision %d (%d)") % (i, seen[node]), f)
128 self.err(lr, _("duplicate revision %d (%d)") % (i, seen[node]), f)
128 seen[node] = i
129 seen[node] = i
129 return lr
130 return lr
130
131
131 def verify(self):
132 def verify(self):
132 repo = self.repo
133 repo = self.repo
133
134
134 ui = repo.ui
135 ui = repo.ui
135
136
136 if not repo.url().startswith('file:'):
137 if not repo.url().startswith('file:'):
137 raise error.Abort(_("cannot verify bundle or remote repos"))
138 raise error.Abort(_("cannot verify bundle or remote repos"))
138
139
139 if os.path.exists(repo.sjoin("journal")):
140 if os.path.exists(repo.sjoin("journal")):
140 ui.warn(_("abandoned transaction found - run hg recover\n"))
141 ui.warn(_("abandoned transaction found - run hg recover\n"))
141
142
142 if ui.verbose or not self.revlogv1:
143 if ui.verbose or not self.revlogv1:
143 ui.status(_("repository uses revlog format %d\n") %
144 ui.status(_("repository uses revlog format %d\n") %
144 (self.revlogv1 and 1 or 0))
145 (self.revlogv1 and 1 or 0))
145
146
146 mflinkrevs, filelinkrevs = self._verifychangelog()
147 mflinkrevs, filelinkrevs = self._verifychangelog()
147
148
148 filenodes = self._verifymanifest(mflinkrevs)
149 filenodes = self._verifymanifest(mflinkrevs)
149 del mflinkrevs
150 del mflinkrevs
150
151
151 self._crosscheckfiles(filelinkrevs, filenodes)
152 self._crosscheckfiles(filelinkrevs, filenodes)
152
153
153 totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)
154 totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)
154
155
155 ui.status(_("%d files, %d changesets, %d total revisions\n") %
156 ui.status(_("%d files, %d changesets, %d total revisions\n") %
156 (totalfiles, len(repo.changelog), filerevisions))
157 (totalfiles, len(repo.changelog), filerevisions))
157 if self.warnings:
158 if self.warnings:
158 ui.warn(_("%d warnings encountered!\n") % self.warnings)
159 ui.warn(_("%d warnings encountered!\n") % self.warnings)
159 if self.fncachewarned:
160 if self.fncachewarned:
160 ui.warn(_('hint: run "hg debugrebuildfncache" to recover from '
161 ui.warn(_('hint: run "hg debugrebuildfncache" to recover from '
161 'corrupt fncache\n'))
162 'corrupt fncache\n'))
162 if self.errors:
163 if self.errors:
163 ui.warn(_("%d integrity errors encountered!\n") % self.errors)
164 ui.warn(_("%d integrity errors encountered!\n") % self.errors)
164 if self.badrevs:
165 if self.badrevs:
165 ui.warn(_("(first damaged changeset appears to be %d)\n")
166 ui.warn(_("(first damaged changeset appears to be %d)\n")
166 % min(self.badrevs))
167 % min(self.badrevs))
167 return 1
168 return 1
168
169
169 def _verifychangelog(self):
170 def _verifychangelog(self):
170 ui = self.ui
171 ui = self.ui
171 repo = self.repo
172 repo = self.repo
172 match = self.match
173 match = self.match
173 cl = repo.changelog
174 cl = repo.changelog
174
175
175 ui.status(_("checking changesets\n"))
176 ui.status(_("checking changesets\n"))
176 mflinkrevs = {}
177 mflinkrevs = {}
177 filelinkrevs = {}
178 filelinkrevs = {}
178 seen = {}
179 seen = {}
179 self.checklog(cl, "changelog", 0)
180 self.checklog(cl, "changelog", 0)
180 total = len(repo)
181 total = len(repo)
181 for i in repo:
182 for i in repo:
182 ui.progress(_('checking'), i, total=total, unit=_('changesets'))
183 ui.progress(_('checking'), i, total=total, unit=_('changesets'))
183 n = cl.node(i)
184 n = cl.node(i)
184 self.checkentry(cl, i, n, seen, [i], "changelog")
185 self.checkentry(cl, i, n, seen, [i], "changelog")
185
186
186 try:
187 try:
187 changes = cl.read(n)
188 changes = cl.read(n)
188 if changes[0] != nullid:
189 if changes[0] != nullid:
189 mflinkrevs.setdefault(changes[0], []).append(i)
190 mflinkrevs.setdefault(changes[0], []).append(i)
190 self.refersmf = True
191 self.refersmf = True
191 for f in changes[3]:
192 for f in changes[3]:
192 if match(f):
193 if match(f):
193 filelinkrevs.setdefault(_normpath(f), []).append(i)
194 filelinkrevs.setdefault(_normpath(f), []).append(i)
194 except Exception as inst:
195 except Exception as inst:
195 self.refersmf = True
196 self.refersmf = True
196 self.exc(i, _("unpacking changeset %s") % short(n), inst)
197 self.exc(i, _("unpacking changeset %s") % short(n), inst)
197 ui.progress(_('checking'), None)
198 ui.progress(_('checking'), None)
198 return mflinkrevs, filelinkrevs
199 return mflinkrevs, filelinkrevs
199
200
200 def _verifymanifest(self, mflinkrevs, dir="", storefiles=None,
201 def _verifymanifest(self, mflinkrevs, dir="", storefiles=None,
201 progress=None):
202 progress=None):
202 repo = self.repo
203 repo = self.repo
203 ui = self.ui
204 ui = self.ui
204 match = self.match
205 match = self.match
205 mfl = self.repo.manifestlog
206 mfl = self.repo.manifestlog
206 mf = mfl._revlog.dirlog(dir)
207 mf = mfl._revlog.dirlog(dir)
207
208
208 if not dir:
209 if not dir:
209 self.ui.status(_("checking manifests\n"))
210 self.ui.status(_("checking manifests\n"))
210
211
211 filenodes = {}
212 filenodes = {}
212 subdirnodes = {}
213 subdirnodes = {}
213 seen = {}
214 seen = {}
214 label = "manifest"
215 label = "manifest"
215 if dir:
216 if dir:
216 label = dir
217 label = dir
217 revlogfiles = mf.files()
218 revlogfiles = mf.files()
218 storefiles.difference_update(revlogfiles)
219 storefiles.difference_update(revlogfiles)
219 if progress: # should be true since we're in a subdirectory
220 if progress: # should be true since we're in a subdirectory
220 progress()
221 progress()
221 if self.refersmf:
222 if self.refersmf:
222 # Do not check manifest if there are only changelog entries with
223 # Do not check manifest if there are only changelog entries with
223 # null manifests.
224 # null manifests.
224 self.checklog(mf, label, 0)
225 self.checklog(mf, label, 0)
225 total = len(mf)
226 total = len(mf)
226 for i in mf:
227 for i in mf:
227 if not dir:
228 if not dir:
228 ui.progress(_('checking'), i, total=total, unit=_('manifests'))
229 ui.progress(_('checking'), i, total=total, unit=_('manifests'))
229 n = mf.node(i)
230 n = mf.node(i)
230 lr = self.checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label)
231 lr = self.checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label)
231 if n in mflinkrevs:
232 if n in mflinkrevs:
232 del mflinkrevs[n]
233 del mflinkrevs[n]
233 elif dir:
234 elif dir:
234 self.err(lr, _("%s not in parent-directory manifest") %
235 self.err(lr, _("%s not in parent-directory manifest") %
235 short(n), label)
236 short(n), label)
236 else:
237 else:
237 self.err(lr, _("%s not in changesets") % short(n), label)
238 self.err(lr, _("%s not in changesets") % short(n), label)
238
239
239 try:
240 try:
240 mfdelta = mfl.get(dir, n).readdelta(shallow=True)
241 mfdelta = mfl.get(dir, n).readdelta(shallow=True)
241 for f, fn, fl in mfdelta.iterentries():
242 for f, fn, fl in mfdelta.iterentries():
242 if not f:
243 if not f:
243 self.err(lr, _("entry without name in manifest"))
244 self.err(lr, _("entry without name in manifest"))
244 elif f == "/dev/null": # ignore this in very old repos
245 elif f == "/dev/null": # ignore this in very old repos
245 continue
246 continue
246 fullpath = dir + _normpath(f)
247 fullpath = dir + _normpath(f)
247 if fl == 't':
248 if fl == 't':
248 if not match.visitdir(fullpath):
249 if not match.visitdir(fullpath):
249 continue
250 continue
250 subdirnodes.setdefault(fullpath + '/', {}).setdefault(
251 subdirnodes.setdefault(fullpath + '/', {}).setdefault(
251 fn, []).append(lr)
252 fn, []).append(lr)
252 else:
253 else:
253 if not match(fullpath):
254 if not match(fullpath):
254 continue
255 continue
255 filenodes.setdefault(fullpath, {}).setdefault(fn, lr)
256 filenodes.setdefault(fullpath, {}).setdefault(fn, lr)
256 except Exception as inst:
257 except Exception as inst:
257 self.exc(lr, _("reading delta %s") % short(n), inst, label)
258 self.exc(lr, _("reading delta %s") % short(n), inst, label)
258 if not dir:
259 if not dir:
259 ui.progress(_('checking'), None)
260 ui.progress(_('checking'), None)
260
261
261 if self.havemf:
262 if self.havemf:
262 for c, m in sorted([(c, m) for m in mflinkrevs
263 for c, m in sorted([(c, m) for m in mflinkrevs
263 for c in mflinkrevs[m]]):
264 for c in mflinkrevs[m]]):
264 if dir:
265 if dir:
265 self.err(c, _("parent-directory manifest refers to unknown "
266 self.err(c, _("parent-directory manifest refers to unknown "
266 "revision %s") % short(m), label)
267 "revision %s") % short(m), label)
267 else:
268 else:
268 self.err(c, _("changeset refers to unknown revision %s") %
269 self.err(c, _("changeset refers to unknown revision %s") %
269 short(m), label)
270 short(m), label)
270
271
271 if not dir and subdirnodes:
272 if not dir and subdirnodes:
272 self.ui.status(_("checking directory manifests\n"))
273 self.ui.status(_("checking directory manifests\n"))
273 storefiles = set()
274 storefiles = set()
274 subdirs = set()
275 subdirs = set()
275 revlogv1 = self.revlogv1
276 revlogv1 = self.revlogv1
276 for f, f2, size in repo.store.datafiles():
277 for f, f2, size in repo.store.datafiles():
277 if not f:
278 if not f:
278 self.err(None, _("cannot decode filename '%s'") % f2)
279 self.err(None, _("cannot decode filename '%s'") % f2)
279 elif (size > 0 or not revlogv1) and f.startswith('meta/'):
280 elif (size > 0 or not revlogv1) and f.startswith('meta/'):
280 storefiles.add(_normpath(f))
281 storefiles.add(_normpath(f))
281 subdirs.add(os.path.dirname(f))
282 subdirs.add(os.path.dirname(f))
282 subdircount = len(subdirs)
283 subdircount = len(subdirs)
283 currentsubdir = [0]
284 currentsubdir = [0]
284 def progress():
285 def progress():
285 currentsubdir[0] += 1
286 currentsubdir[0] += 1
286 ui.progress(_('checking'), currentsubdir[0], total=subdircount,
287 ui.progress(_('checking'), currentsubdir[0], total=subdircount,
287 unit=_('manifests'))
288 unit=_('manifests'))
288
289
289 for subdir, linkrevs in subdirnodes.iteritems():
290 for subdir, linkrevs in subdirnodes.iteritems():
290 subdirfilenodes = self._verifymanifest(linkrevs, subdir, storefiles,
291 subdirfilenodes = self._verifymanifest(linkrevs, subdir, storefiles,
291 progress)
292 progress)
292 for f, onefilenodes in subdirfilenodes.iteritems():
293 for f, onefilenodes in subdirfilenodes.iteritems():
293 filenodes.setdefault(f, {}).update(onefilenodes)
294 filenodes.setdefault(f, {}).update(onefilenodes)
294
295
295 if not dir and subdirnodes:
296 if not dir and subdirnodes:
296 ui.progress(_('checking'), None)
297 ui.progress(_('checking'), None)
297 for f in sorted(storefiles):
298 if self.warnorphanstorefiles:
298 self.warn(_("warning: orphan data file '%s'") % f)
299 for f in sorted(storefiles):
300 self.warn(_("warning: orphan data file '%s'") % f)
299
301
300 return filenodes
302 return filenodes
301
303
302 def _crosscheckfiles(self, filelinkrevs, filenodes):
304 def _crosscheckfiles(self, filelinkrevs, filenodes):
303 repo = self.repo
305 repo = self.repo
304 ui = self.ui
306 ui = self.ui
305 ui.status(_("crosschecking files in changesets and manifests\n"))
307 ui.status(_("crosschecking files in changesets and manifests\n"))
306
308
307 total = len(filelinkrevs) + len(filenodes)
309 total = len(filelinkrevs) + len(filenodes)
308 count = 0
310 count = 0
309 if self.havemf:
311 if self.havemf:
310 for f in sorted(filelinkrevs):
312 for f in sorted(filelinkrevs):
311 count += 1
313 count += 1
312 ui.progress(_('crosschecking'), count, total=total)
314 ui.progress(_('crosschecking'), count, total=total)
313 if f not in filenodes:
315 if f not in filenodes:
314 lr = filelinkrevs[f][0]
316 lr = filelinkrevs[f][0]
315 self.err(lr, _("in changeset but not in manifest"), f)
317 self.err(lr, _("in changeset but not in manifest"), f)
316
318
317 if self.havecl:
319 if self.havecl:
318 for f in sorted(filenodes):
320 for f in sorted(filenodes):
319 count += 1
321 count += 1
320 ui.progress(_('crosschecking'), count, total=total)
322 ui.progress(_('crosschecking'), count, total=total)
321 if f not in filelinkrevs:
323 if f not in filelinkrevs:
322 try:
324 try:
323 fl = repo.file(f)
325 fl = repo.file(f)
324 lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
326 lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
325 except Exception:
327 except Exception:
326 lr = None
328 lr = None
327 self.err(lr, _("in manifest but not in changeset"), f)
329 self.err(lr, _("in manifest but not in changeset"), f)
328
330
329 ui.progress(_('crosschecking'), None)
331 ui.progress(_('crosschecking'), None)
330
332
331 def _verifyfiles(self, filenodes, filelinkrevs):
333 def _verifyfiles(self, filenodes, filelinkrevs):
332 repo = self.repo
334 repo = self.repo
333 ui = self.ui
335 ui = self.ui
334 lrugetctx = self.lrugetctx
336 lrugetctx = self.lrugetctx
335 revlogv1 = self.revlogv1
337 revlogv1 = self.revlogv1
336 havemf = self.havemf
338 havemf = self.havemf
337 ui.status(_("checking files\n"))
339 ui.status(_("checking files\n"))
338
340
339 storefiles = set()
341 storefiles = set()
340 for f, f2, size in repo.store.datafiles():
342 for f, f2, size in repo.store.datafiles():
341 if not f:
343 if not f:
342 self.err(None, _("cannot decode filename '%s'") % f2)
344 self.err(None, _("cannot decode filename '%s'") % f2)
343 elif (size > 0 or not revlogv1) and f.startswith('data/'):
345 elif (size > 0 or not revlogv1) and f.startswith('data/'):
344 storefiles.add(_normpath(f))
346 storefiles.add(_normpath(f))
345
347
346 files = sorted(set(filenodes) | set(filelinkrevs))
348 files = sorted(set(filenodes) | set(filelinkrevs))
347 total = len(files)
349 total = len(files)
348 revisions = 0
350 revisions = 0
349 for i, f in enumerate(files):
351 for i, f in enumerate(files):
350 ui.progress(_('checking'), i, item=f, total=total, unit=_('files'))
352 ui.progress(_('checking'), i, item=f, total=total, unit=_('files'))
351 try:
353 try:
352 linkrevs = filelinkrevs[f]
354 linkrevs = filelinkrevs[f]
353 except KeyError:
355 except KeyError:
354 # in manifest but not in changelog
356 # in manifest but not in changelog
355 linkrevs = []
357 linkrevs = []
356
358
357 if linkrevs:
359 if linkrevs:
358 lr = linkrevs[0]
360 lr = linkrevs[0]
359 else:
361 else:
360 lr = None
362 lr = None
361
363
362 try:
364 try:
363 fl = repo.file(f)
365 fl = repo.file(f)
364 except error.RevlogError as e:
366 except error.RevlogError as e:
365 self.err(lr, _("broken revlog! (%s)") % e, f)
367 self.err(lr, _("broken revlog! (%s)") % e, f)
366 continue
368 continue
367
369
368 for ff in fl.files():
370 for ff in fl.files():
369 try:
371 try:
370 storefiles.remove(ff)
372 storefiles.remove(ff)
371 except KeyError:
373 except KeyError:
372 self.warn(_(" warning: revlog '%s' not in fncache!") % ff)
374 if self.warnorphanstorefiles:
373 self.fncachewarned = True
375 self.warn(_(" warning: revlog '%s' not in fncache!") %
376 ff)
377 self.fncachewarned = True
374
378
375 self.checklog(fl, f, lr)
379 self.checklog(fl, f, lr)
376 seen = {}
380 seen = {}
377 rp = None
381 rp = None
378 for i in fl:
382 for i in fl:
379 revisions += 1
383 revisions += 1
380 n = fl.node(i)
384 n = fl.node(i)
381 lr = self.checkentry(fl, i, n, seen, linkrevs, f)
385 lr = self.checkentry(fl, i, n, seen, linkrevs, f)
382 if f in filenodes:
386 if f in filenodes:
383 if havemf and n not in filenodes[f]:
387 if havemf and n not in filenodes[f]:
384 self.err(lr, _("%s not in manifests") % (short(n)), f)
388 self.err(lr, _("%s not in manifests") % (short(n)), f)
385 else:
389 else:
386 del filenodes[f][n]
390 del filenodes[f][n]
387
391
388 # Verify contents. 4 cases to care about:
392 # Verify contents. 4 cases to care about:
389 #
393 #
390 # common: the most common case
394 # common: the most common case
391 # rename: with a rename
395 # rename: with a rename
392 # meta: file content starts with b'\1\n', the metadata
396 # meta: file content starts with b'\1\n', the metadata
393 # header defined in filelog.py, but without a rename
397 # header defined in filelog.py, but without a rename
394 # ext: content stored externally
398 # ext: content stored externally
395 #
399 #
396 # More formally, their differences are shown below:
400 # More formally, their differences are shown below:
397 #
401 #
398 # | common | rename | meta | ext
402 # | common | rename | meta | ext
399 # -------------------------------------------------------
403 # -------------------------------------------------------
400 # flags() | 0 | 0 | 0 | not 0
404 # flags() | 0 | 0 | 0 | not 0
401 # renamed() | False | True | False | ?
405 # renamed() | False | True | False | ?
402 # rawtext[0:2]=='\1\n'| False | True | True | ?
406 # rawtext[0:2]=='\1\n'| False | True | True | ?
403 #
407 #
404 # "rawtext" means the raw text stored in revlog data, which
408 # "rawtext" means the raw text stored in revlog data, which
405 # could be retrieved by "revision(rev, raw=True)". "text"
409 # could be retrieved by "revision(rev, raw=True)". "text"
406 # mentioned below is "revision(rev, raw=False)".
410 # mentioned below is "revision(rev, raw=False)".
407 #
411 #
408 # There are 3 different lengths stored physically:
412 # There are 3 different lengths stored physically:
409 # 1. L1: rawsize, stored in revlog index
413 # 1. L1: rawsize, stored in revlog index
410 # 2. L2: len(rawtext), stored in revlog data
414 # 2. L2: len(rawtext), stored in revlog data
411 # 3. L3: len(text), stored in revlog data if flags==0, or
415 # 3. L3: len(text), stored in revlog data if flags==0, or
412 # possibly somewhere else if flags!=0
416 # possibly somewhere else if flags!=0
413 #
417 #
414 # L1 should be equal to L2. L3 could be different from them.
418 # L1 should be equal to L2. L3 could be different from them.
415 # "text" may or may not affect commit hash depending on flag
419 # "text" may or may not affect commit hash depending on flag
416 # processors (see revlog.addflagprocessor).
420 # processors (see revlog.addflagprocessor).
417 #
421 #
418 # | common | rename | meta | ext
422 # | common | rename | meta | ext
419 # -------------------------------------------------
423 # -------------------------------------------------
420 # rawsize() | L1 | L1 | L1 | L1
424 # rawsize() | L1 | L1 | L1 | L1
421 # size() | L1 | L2-LM | L1(*) | L1 (?)
425 # size() | L1 | L2-LM | L1(*) | L1 (?)
422 # len(rawtext) | L2 | L2 | L2 | L2
426 # len(rawtext) | L2 | L2 | L2 | L2
423 # len(text) | L2 | L2 | L2 | L3
427 # len(text) | L2 | L2 | L2 | L3
424 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
428 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
425 #
429 #
426 # LM: length of metadata, depending on rawtext
430 # LM: length of metadata, depending on rawtext
427 # (*): not ideal, see comment in filelog.size
431 # (*): not ideal, see comment in filelog.size
428 # (?): could be "- len(meta)" if the resolved content has
432 # (?): could be "- len(meta)" if the resolved content has
429 # rename metadata
433 # rename metadata
430 #
434 #
431 # Checks needed to be done:
435 # Checks needed to be done:
432 # 1. length check: L1 == L2, in all cases.
436 # 1. length check: L1 == L2, in all cases.
433 # 2. hash check: depending on flag processor, we may need to
437 # 2. hash check: depending on flag processor, we may need to
434 # use either "text" (external), or "rawtext" (in revlog).
438 # use either "text" (external), or "rawtext" (in revlog).
435 try:
439 try:
436 skipflags = self.skipflags
440 skipflags = self.skipflags
437 if skipflags:
441 if skipflags:
438 skipflags &= fl.flags(i)
442 skipflags &= fl.flags(i)
439 if not skipflags:
443 if not skipflags:
440 fl.read(n) # side effect: read content and do checkhash
444 fl.read(n) # side effect: read content and do checkhash
441 rp = fl.renamed(n)
445 rp = fl.renamed(n)
442 # the "L1 == L2" check
446 # the "L1 == L2" check
443 l1 = fl.rawsize(i)
447 l1 = fl.rawsize(i)
444 l2 = len(fl.revision(n, raw=True))
448 l2 = len(fl.revision(n, raw=True))
445 if l1 != l2:
449 if l1 != l2:
446 self.err(lr, _("unpacked size is %s, %s expected") %
450 self.err(lr, _("unpacked size is %s, %s expected") %
447 (l2, l1), f)
451 (l2, l1), f)
448 except error.CensoredNodeError:
452 except error.CensoredNodeError:
449 # experimental config: censor.policy
453 # experimental config: censor.policy
450 if ui.config("censor", "policy") == "abort":
454 if ui.config("censor", "policy") == "abort":
451 self.err(lr, _("censored file data"), f)
455 self.err(lr, _("censored file data"), f)
452 except Exception as inst:
456 except Exception as inst:
453 self.exc(lr, _("unpacking %s") % short(n), inst, f)
457 self.exc(lr, _("unpacking %s") % short(n), inst, f)
454
458
455 # check renames
459 # check renames
456 try:
460 try:
457 if rp:
461 if rp:
458 if lr is not None and ui.verbose:
462 if lr is not None and ui.verbose:
459 ctx = lrugetctx(lr)
463 ctx = lrugetctx(lr)
460 if not any(rp[0] in pctx for pctx in ctx.parents()):
464 if not any(rp[0] in pctx for pctx in ctx.parents()):
461 self.warn(_("warning: copy source of '%s' not"
465 self.warn(_("warning: copy source of '%s' not"
462 " in parents of %s") % (f, ctx))
466 " in parents of %s") % (f, ctx))
463 fl2 = repo.file(rp[0])
467 fl2 = repo.file(rp[0])
464 if not len(fl2):
468 if not len(fl2):
465 self.err(lr, _("empty or missing copy source "
469 self.err(lr, _("empty or missing copy source "
466 "revlog %s:%s") % (rp[0], short(rp[1])), f)
470 "revlog %s:%s") % (rp[0], short(rp[1])), f)
467 elif rp[1] == nullid:
471 elif rp[1] == nullid:
468 ui.note(_("warning: %s@%s: copy source"
472 ui.note(_("warning: %s@%s: copy source"
469 " revision is nullid %s:%s\n")
473 " revision is nullid %s:%s\n")
470 % (f, lr, rp[0], short(rp[1])))
474 % (f, lr, rp[0], short(rp[1])))
471 else:
475 else:
472 fl2.rev(rp[1])
476 fl2.rev(rp[1])
473 except Exception as inst:
477 except Exception as inst:
474 self.exc(lr, _("checking rename of %s") % short(n), inst, f)
478 self.exc(lr, _("checking rename of %s") % short(n), inst, f)
475
479
476 # cross-check
480 # cross-check
477 if f in filenodes:
481 if f in filenodes:
478 fns = [(v, k) for k, v in filenodes[f].iteritems()]
482 fns = [(v, k) for k, v in filenodes[f].iteritems()]
479 for lr, node in sorted(fns):
483 for lr, node in sorted(fns):
480 self.err(lr, _("manifest refers to unknown revision %s") %
484 self.err(lr, _("manifest refers to unknown revision %s") %
481 short(node), f)
485 short(node), f)
482 ui.progress(_('checking'), None)
486 ui.progress(_('checking'), None)
483
487
484 for f in sorted(storefiles):
488 if self.warnorphanstorefiles:
485 self.warn(_("warning: orphan data file '%s'") % f)
489 for f in sorted(storefiles):
490 self.warn(_("warning: orphan data file '%s'") % f)
486
491
487 return len(files), revisions
492 return len(files), revisions
@@ -1,664 +1,673 b''
1 # simplestorerepo.py - Extension that swaps in alternate repository storage.
1 # simplestorerepo.py - Extension that swaps in alternate repository storage.
2 #
2 #
3 # Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
3 # Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 # To use this with the test suite:
8 # To use this with the test suite:
9 #
9 #
10 # $ HGREPOFEATURES="simplestore" ./run-tests.py \
10 # $ HGREPOFEATURES="simplestore" ./run-tests.py \
11 # --extra-config-opt extensions.simplestore=`pwd`/simplestorerepo.py
11 # --extra-config-opt extensions.simplestore=`pwd`/simplestorerepo.py
12
12
13 from __future__ import absolute_import
13 from __future__ import absolute_import
14
14
15 import stat
15 import stat
16
16
17 from mercurial.i18n import _
17 from mercurial.i18n import _
18 from mercurial.node import (
18 from mercurial.node import (
19 bin,
19 bin,
20 hex,
20 hex,
21 nullid,
21 nullid,
22 nullrev,
22 nullrev,
23 )
23 )
24 from mercurial.thirdparty import (
24 from mercurial.thirdparty import (
25 cbor,
25 cbor,
26 )
26 )
27 from mercurial import (
27 from mercurial import (
28 ancestor,
28 ancestor,
29 bundlerepo,
29 bundlerepo,
30 error,
30 error,
31 extensions,
31 extensions,
32 filelog,
32 filelog,
33 localrepo,
33 localrepo,
34 mdiff,
34 mdiff,
35 pycompat,
35 pycompat,
36 revlog,
36 revlog,
37 store,
37 store,
38 verify,
38 )
39 )
39
40
40 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
41 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
41 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
42 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
42 # be specifying the version(s) of Mercurial they are tested with, or
43 # be specifying the version(s) of Mercurial they are tested with, or
43 # leave the attribute unspecified.
44 # leave the attribute unspecified.
44 testedwith = 'ships-with-hg-core'
45 testedwith = 'ships-with-hg-core'
45
46
46 REQUIREMENT = 'testonly-simplestore'
47 REQUIREMENT = 'testonly-simplestore'
47
48
48 def validatenode(node):
49 def validatenode(node):
49 if isinstance(node, int):
50 if isinstance(node, int):
50 raise ValueError('expected node; got int')
51 raise ValueError('expected node; got int')
51
52
52 if len(node) != 20:
53 if len(node) != 20:
53 raise ValueError('expected 20 byte node')
54 raise ValueError('expected 20 byte node')
54
55
55 def validaterev(rev):
56 def validaterev(rev):
56 if not isinstance(rev, int):
57 if not isinstance(rev, int):
57 raise ValueError('expected int')
58 raise ValueError('expected int')
58
59
59 class filestorage(object):
60 class filestorage(object):
60 """Implements storage for a tracked path.
61 """Implements storage for a tracked path.
61
62
62 Data is stored in the VFS in a directory corresponding to the tracked
63 Data is stored in the VFS in a directory corresponding to the tracked
63 path.
64 path.
64
65
65 Index data is stored in an ``index`` file using CBOR.
66 Index data is stored in an ``index`` file using CBOR.
66
67
67 Fulltext data is stored in files having names of the node.
68 Fulltext data is stored in files having names of the node.
68 """
69 """
69
70
70 def __init__(self, svfs, path):
71 def __init__(self, svfs, path):
71 self._svfs = svfs
72 self._svfs = svfs
72 self._path = path
73 self._path = path
73
74
74 self._storepath = b'/'.join([b'data', path])
75 self._storepath = b'/'.join([b'data', path])
75 self._indexpath = b'/'.join([self._storepath, b'index'])
76 self._indexpath = b'/'.join([self._storepath, b'index'])
76
77
77 indexdata = self._svfs.tryread(self._indexpath)
78 indexdata = self._svfs.tryread(self._indexpath)
78 if indexdata:
79 if indexdata:
79 indexdata = cbor.loads(indexdata)
80 indexdata = cbor.loads(indexdata)
80
81
81 self._indexdata = indexdata or []
82 self._indexdata = indexdata or []
82 self._indexbynode = {}
83 self._indexbynode = {}
83 self._indexbyrev = {}
84 self._indexbyrev = {}
84 self.index = []
85 self.index = []
85 self._refreshindex()
86 self._refreshindex()
86
87
87 # This is used by changegroup code :/
88 # This is used by changegroup code :/
88 self._generaldelta = True
89 self._generaldelta = True
89 self.storedeltachains = False
90 self.storedeltachains = False
90
91
91 self.version = 1
92 self.version = 1
92
93
93 def _refreshindex(self):
94 def _refreshindex(self):
94 self._indexbynode.clear()
95 self._indexbynode.clear()
95 self._indexbyrev.clear()
96 self._indexbyrev.clear()
96 self.index = []
97 self.index = []
97
98
98 for i, entry in enumerate(self._indexdata):
99 for i, entry in enumerate(self._indexdata):
99 self._indexbynode[entry[b'node']] = entry
100 self._indexbynode[entry[b'node']] = entry
100 self._indexbyrev[i] = entry
101 self._indexbyrev[i] = entry
101
102
102 self._indexbynode[nullid] = {
103 self._indexbynode[nullid] = {
103 b'node': nullid,
104 b'node': nullid,
104 b'p1': nullid,
105 b'p1': nullid,
105 b'p2': nullid,
106 b'p2': nullid,
106 b'linkrev': nullrev,
107 b'linkrev': nullrev,
107 b'flags': 0,
108 b'flags': 0,
108 }
109 }
109
110
110 self._indexbyrev[nullrev] = {
111 self._indexbyrev[nullrev] = {
111 b'node': nullid,
112 b'node': nullid,
112 b'p1': nullid,
113 b'p1': nullid,
113 b'p2': nullid,
114 b'p2': nullid,
114 b'linkrev': nullrev,
115 b'linkrev': nullrev,
115 b'flags': 0,
116 b'flags': 0,
116 }
117 }
117
118
118 for i, entry in enumerate(self._indexdata):
119 for i, entry in enumerate(self._indexdata):
119 p1rev, p2rev = self.parentrevs(self.rev(entry[b'node']))
120 p1rev, p2rev = self.parentrevs(self.rev(entry[b'node']))
120
121
121 # start, length, rawsize, chainbase, linkrev, p1, p2, node
122 # start, length, rawsize, chainbase, linkrev, p1, p2, node
122 self.index.append((0, 0, 0, -1, entry[b'linkrev'], p1rev, p2rev,
123 self.index.append((0, 0, 0, -1, entry[b'linkrev'], p1rev, p2rev,
123 entry[b'node']))
124 entry[b'node']))
124
125
125 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
126 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
126
127
127 def __len__(self):
128 def __len__(self):
128 return len(self._indexdata)
129 return len(self._indexdata)
129
130
130 def __iter__(self):
131 def __iter__(self):
131 return iter(range(len(self)))
132 return iter(range(len(self)))
132
133
133 def revs(self, start=0, stop=None):
134 def revs(self, start=0, stop=None):
134 step = 1
135 step = 1
135 if stop is not None:
136 if stop is not None:
136 if start > stop:
137 if start > stop:
137 step = -1
138 step = -1
138
139
139 stop += step
140 stop += step
140 else:
141 else:
141 stop = len(self)
142 stop = len(self)
142
143
143 return range(start, stop, step)
144 return range(start, stop, step)
144
145
145 def parents(self, node):
146 def parents(self, node):
146 validatenode(node)
147 validatenode(node)
147
148
148 if node not in self._indexbynode:
149 if node not in self._indexbynode:
149 raise KeyError('unknown node')
150 raise KeyError('unknown node')
150
151
151 entry = self._indexbynode[node]
152 entry = self._indexbynode[node]
152
153
153 return entry[b'p1'], entry[b'p2']
154 return entry[b'p1'], entry[b'p2']
154
155
155 def parentrevs(self, rev):
156 def parentrevs(self, rev):
156 p1, p2 = self.parents(self._indexbyrev[rev][b'node'])
157 p1, p2 = self.parents(self._indexbyrev[rev][b'node'])
157 return self.rev(p1), self.rev(p2)
158 return self.rev(p1), self.rev(p2)
158
159
159 def rev(self, node):
160 def rev(self, node):
160 validatenode(node)
161 validatenode(node)
161
162
162 try:
163 try:
163 self._indexbynode[node]
164 self._indexbynode[node]
164 except KeyError:
165 except KeyError:
165 raise error.LookupError(node, self._indexpath, _('no node'))
166 raise error.LookupError(node, self._indexpath, _('no node'))
166
167
167 for rev, entry in self._indexbyrev.items():
168 for rev, entry in self._indexbyrev.items():
168 if entry[b'node'] == node:
169 if entry[b'node'] == node:
169 return rev
170 return rev
170
171
171 raise error.ProgrammingError('this should not occur')
172 raise error.ProgrammingError('this should not occur')
172
173
173 def node(self, rev):
174 def node(self, rev):
174 validaterev(rev)
175 validaterev(rev)
175
176
176 return self._indexbyrev[rev][b'node']
177 return self._indexbyrev[rev][b'node']
177
178
178 def lookup(self, node):
179 def lookup(self, node):
179 if isinstance(node, int):
180 if isinstance(node, int):
180 return self.node(node)
181 return self.node(node)
181
182
182 if len(node) == 20:
183 if len(node) == 20:
183 self.rev(node)
184 self.rev(node)
184 return node
185 return node
185
186
186 try:
187 try:
187 rev = int(node)
188 rev = int(node)
188 if '%d' % rev != node:
189 if '%d' % rev != node:
189 raise ValueError
190 raise ValueError
190
191
191 if rev < 0:
192 if rev < 0:
192 rev = len(self) + rev
193 rev = len(self) + rev
193 if rev < 0 or rev >= len(self):
194 if rev < 0 or rev >= len(self):
194 raise ValueError
195 raise ValueError
195
196
196 return self.node(rev)
197 return self.node(rev)
197 except (ValueError, OverflowError):
198 except (ValueError, OverflowError):
198 pass
199 pass
199
200
200 if len(node) == 40:
201 if len(node) == 40:
201 try:
202 try:
202 rawnode = bin(node)
203 rawnode = bin(node)
203 self.rev(rawnode)
204 self.rev(rawnode)
204 return rawnode
205 return rawnode
205 except TypeError:
206 except TypeError:
206 pass
207 pass
207
208
208 raise error.LookupError(node, self._path, _('invalid lookup input'))
209 raise error.LookupError(node, self._path, _('invalid lookup input'))
209
210
210 def linkrev(self, rev):
211 def linkrev(self, rev):
211 validaterev(rev)
212 validaterev(rev)
212
213
213 return self._indexbyrev[rev][b'linkrev']
214 return self._indexbyrev[rev][b'linkrev']
214
215
215 def flags(self, rev):
216 def flags(self, rev):
216 validaterev(rev)
217 validaterev(rev)
217
218
218 return self._indexbyrev[rev][b'flags']
219 return self._indexbyrev[rev][b'flags']
219
220
220 def deltaparent(self, rev):
221 def deltaparent(self, rev):
221 validaterev(rev)
222 validaterev(rev)
222
223
223 p1node = self.parents(self.node(rev))[0]
224 p1node = self.parents(self.node(rev))[0]
224 return self.rev(p1node)
225 return self.rev(p1node)
225
226
226 def candelta(self, baserev, rev):
227 def candelta(self, baserev, rev):
227 validaterev(baserev)
228 validaterev(baserev)
228 validaterev(rev)
229 validaterev(rev)
229
230
230 if ((self.flags(baserev) & revlog.REVIDX_RAWTEXT_CHANGING_FLAGS)
231 if ((self.flags(baserev) & revlog.REVIDX_RAWTEXT_CHANGING_FLAGS)
231 or (self.flags(rev) & revlog.REVIDX_RAWTEXT_CHANGING_FLAGS)):
232 or (self.flags(rev) & revlog.REVIDX_RAWTEXT_CHANGING_FLAGS)):
232 return False
233 return False
233
234
234 return True
235 return True
235
236
236 def rawsize(self, rev):
237 def rawsize(self, rev):
237 validaterev(rev)
238 validaterev(rev)
238 node = self.node(rev)
239 node = self.node(rev)
239 return len(self.revision(node, raw=True))
240 return len(self.revision(node, raw=True))
240
241
241 def _processflags(self, text, flags, operation, raw=False):
242 def _processflags(self, text, flags, operation, raw=False):
242 if flags == 0:
243 if flags == 0:
243 return text, True
244 return text, True
244
245
245 validatehash = True
246 validatehash = True
246 # Depending on the operation (read or write), the order might be
247 # Depending on the operation (read or write), the order might be
247 # reversed due to non-commutative transforms.
248 # reversed due to non-commutative transforms.
248 orderedflags = revlog.REVIDX_FLAGS_ORDER
249 orderedflags = revlog.REVIDX_FLAGS_ORDER
249 if operation == 'write':
250 if operation == 'write':
250 orderedflags = reversed(orderedflags)
251 orderedflags = reversed(orderedflags)
251
252
252 for flag in orderedflags:
253 for flag in orderedflags:
253 # If a flagprocessor has been registered for a known flag, apply the
254 # If a flagprocessor has been registered for a known flag, apply the
254 # related operation transform and update result tuple.
255 # related operation transform and update result tuple.
255 if flag & flags:
256 if flag & flags:
256 vhash = True
257 vhash = True
257
258
258 if flag not in revlog._flagprocessors:
259 if flag not in revlog._flagprocessors:
259 message = _("missing processor for flag '%#x'") % (flag)
260 message = _("missing processor for flag '%#x'") % (flag)
260 raise revlog.RevlogError(message)
261 raise revlog.RevlogError(message)
261
262
262 processor = revlog._flagprocessors[flag]
263 processor = revlog._flagprocessors[flag]
263 if processor is not None:
264 if processor is not None:
264 readtransform, writetransform, rawtransform = processor
265 readtransform, writetransform, rawtransform = processor
265
266
266 if raw:
267 if raw:
267 vhash = rawtransform(self, text)
268 vhash = rawtransform(self, text)
268 elif operation == 'read':
269 elif operation == 'read':
269 text, vhash = readtransform(self, text)
270 text, vhash = readtransform(self, text)
270 else: # write operation
271 else: # write operation
271 text, vhash = writetransform(self, text)
272 text, vhash = writetransform(self, text)
272 validatehash = validatehash and vhash
273 validatehash = validatehash and vhash
273
274
274 return text, validatehash
275 return text, validatehash
275
276
276 def checkhash(self, text, node, p1=None, p2=None, rev=None):
277 def checkhash(self, text, node, p1=None, p2=None, rev=None):
277 if p1 is None and p2 is None:
278 if p1 is None and p2 is None:
278 p1, p2 = self.parents(node)
279 p1, p2 = self.parents(node)
279 if node != revlog.hash(text, p1, p2):
280 if node != revlog.hash(text, p1, p2):
280 raise error.RevlogError(_("integrity check failed on %s") %
281 raise error.RevlogError(_("integrity check failed on %s") %
281 self._path)
282 self._path)
282
283
283 def revision(self, node, raw=False):
284 def revision(self, node, raw=False):
284 validatenode(node)
285 validatenode(node)
285
286
286 if node == nullid:
287 if node == nullid:
287 return b''
288 return b''
288
289
289 rev = self.rev(node)
290 rev = self.rev(node)
290 flags = self.flags(rev)
291 flags = self.flags(rev)
291
292
292 path = b'/'.join([self._storepath, hex(node)])
293 path = b'/'.join([self._storepath, hex(node)])
293 rawtext = self._svfs.read(path)
294 rawtext = self._svfs.read(path)
294
295
295 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
296 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
296 if validatehash:
297 if validatehash:
297 self.checkhash(text, node, rev=rev)
298 self.checkhash(text, node, rev=rev)
298
299
299 return text
300 return text
300
301
301 def read(self, node):
302 def read(self, node):
302 validatenode(node)
303 validatenode(node)
303
304
304 revision = self.revision(node)
305 revision = self.revision(node)
305
306
306 if not revision.startswith(b'\1\n'):
307 if not revision.startswith(b'\1\n'):
307 return revision
308 return revision
308
309
309 start = revision.index(b'\1\n', 2)
310 start = revision.index(b'\1\n', 2)
310 return revision[start + 2:]
311 return revision[start + 2:]
311
312
312 def renamed(self, node):
313 def renamed(self, node):
313 validatenode(node)
314 validatenode(node)
314
315
315 if self.parents(node)[0] != nullid:
316 if self.parents(node)[0] != nullid:
316 return False
317 return False
317
318
318 fulltext = self.revision(node)
319 fulltext = self.revision(node)
319 m = filelog.parsemeta(fulltext)[0]
320 m = filelog.parsemeta(fulltext)[0]
320
321
321 if m and 'copy' in m:
322 if m and 'copy' in m:
322 return m['copy'], bin(m['copyrev'])
323 return m['copy'], bin(m['copyrev'])
323
324
324 return False
325 return False
325
326
326 def cmp(self, node, text):
327 def cmp(self, node, text):
327 validatenode(node)
328 validatenode(node)
328
329
329 t = text
330 t = text
330
331
331 if text.startswith(b'\1\n'):
332 if text.startswith(b'\1\n'):
332 t = b'\1\n\1\n' + text
333 t = b'\1\n\1\n' + text
333
334
334 p1, p2 = self.parents(node)
335 p1, p2 = self.parents(node)
335
336
336 if revlog.hash(t, p1, p2) == node:
337 if revlog.hash(t, p1, p2) == node:
337 return False
338 return False
338
339
339 if self.iscensored(self.rev(node)):
340 if self.iscensored(self.rev(node)):
340 return text != b''
341 return text != b''
341
342
342 if self.renamed(node):
343 if self.renamed(node):
343 t2 = self.read(node)
344 t2 = self.read(node)
344 return t2 != text
345 return t2 != text
345
346
346 return True
347 return True
347
348
348 def size(self, rev):
349 def size(self, rev):
349 validaterev(rev)
350 validaterev(rev)
350
351
351 node = self._indexbyrev[rev][b'node']
352 node = self._indexbyrev[rev][b'node']
352
353
353 if self.renamed(node):
354 if self.renamed(node):
354 return len(self.read(node))
355 return len(self.read(node))
355
356
356 if self.iscensored(rev):
357 if self.iscensored(rev):
357 return 0
358 return 0
358
359
359 return len(self.revision(node))
360 return len(self.revision(node))
360
361
361 def iscensored(self, rev):
362 def iscensored(self, rev):
362 validaterev(rev)
363 validaterev(rev)
363
364
364 return self.flags(rev) & revlog.REVIDX_ISCENSORED
365 return self.flags(rev) & revlog.REVIDX_ISCENSORED
365
366
366 def commonancestorsheads(self, a, b):
367 def commonancestorsheads(self, a, b):
367 validatenode(a)
368 validatenode(a)
368 validatenode(b)
369 validatenode(b)
369
370
370 a = self.rev(a)
371 a = self.rev(a)
371 b = self.rev(b)
372 b = self.rev(b)
372
373
373 ancestors = ancestor.commonancestorsheads(self.parentrevs, a, b)
374 ancestors = ancestor.commonancestorsheads(self.parentrevs, a, b)
374 return pycompat.maplist(self.node, ancestors)
375 return pycompat.maplist(self.node, ancestors)
375
376
376 def descendants(self, revs):
377 def descendants(self, revs):
377 # This is a copy of revlog.descendants()
378 # This is a copy of revlog.descendants()
378 first = min(revs)
379 first = min(revs)
379 if first == nullrev:
380 if first == nullrev:
380 for i in self:
381 for i in self:
381 yield i
382 yield i
382 return
383 return
383
384
384 seen = set(revs)
385 seen = set(revs)
385 for i in self.revs(start=first + 1):
386 for i in self.revs(start=first + 1):
386 for x in self.parentrevs(i):
387 for x in self.parentrevs(i):
387 if x != nullrev and x in seen:
388 if x != nullrev and x in seen:
388 seen.add(i)
389 seen.add(i)
389 yield i
390 yield i
390 break
391 break
391
392
392 # Required by verify.
393 # Required by verify.
393 def files(self):
394 def files(self):
394 entries = self._svfs.listdir(self._storepath)
395 entries = self._svfs.listdir(self._storepath)
395
396
396 # Strip out undo.backup.* files created as part of transaction
397 # Strip out undo.backup.* files created as part of transaction
397 # recording.
398 # recording.
398 entries = [f for f in entries if not f.startswith('undo.backup.')]
399 entries = [f for f in entries if not f.startswith('undo.backup.')]
399
400
400 return [b'/'.join((self._storepath, f)) for f in entries]
401 return [b'/'.join((self._storepath, f)) for f in entries]
401
402
402 # Required by verify.
403 # Required by verify.
403 def checksize(self):
404 def checksize(self):
404 return 0, 0
405 return 0, 0
405
406
406 def add(self, text, meta, transaction, linkrev, p1, p2):
407 def add(self, text, meta, transaction, linkrev, p1, p2):
407 if meta or text.startswith(b'\1\n'):
408 if meta or text.startswith(b'\1\n'):
408 text = filelog.packmeta(meta, text)
409 text = filelog.packmeta(meta, text)
409
410
410 return self.addrevision(text, transaction, linkrev, p1, p2)
411 return self.addrevision(text, transaction, linkrev, p1, p2)
411
412
412 def addrevision(self, text, transaction, linkrev, p1, p2, node=None,
413 def addrevision(self, text, transaction, linkrev, p1, p2, node=None,
413 flags=0):
414 flags=0):
414 validatenode(p1)
415 validatenode(p1)
415 validatenode(p2)
416 validatenode(p2)
416
417
417 if flags:
418 if flags:
418 node = node or revlog.hash(text, p1, p2)
419 node = node or revlog.hash(text, p1, p2)
419
420
420 rawtext, validatehash = self._processflags(text, flags, 'write')
421 rawtext, validatehash = self._processflags(text, flags, 'write')
421
422
422 node = node or revlog.hash(text, p1, p2)
423 node = node or revlog.hash(text, p1, p2)
423
424
424 if node in self._indexbynode:
425 if node in self._indexbynode:
425 return node
426 return node
426
427
427 if validatehash:
428 if validatehash:
428 self.checkhash(rawtext, node, p1=p1, p2=p2)
429 self.checkhash(rawtext, node, p1=p1, p2=p2)
429
430
430 path = b'/'.join([self._storepath, hex(node)])
431 path = b'/'.join([self._storepath, hex(node)])
431
432
432 self._svfs.write(path, text)
433 self._svfs.write(path, text)
433
434
434 self._indexdata.append({
435 self._indexdata.append({
435 b'node': node,
436 b'node': node,
436 b'p1': p1,
437 b'p1': p1,
437 b'p2': p2,
438 b'p2': p2,
438 b'linkrev': linkrev,
439 b'linkrev': linkrev,
439 b'flags': flags,
440 b'flags': flags,
440 })
441 })
441
442
442 self._reflectindexupdate()
443 self._reflectindexupdate()
443
444
444 return node
445 return node
445
446
446 def _reflectindexupdate(self):
447 def _reflectindexupdate(self):
447 self._refreshindex()
448 self._refreshindex()
448 self._svfs.write(self._indexpath, cbor.dumps(self._indexdata))
449 self._svfs.write(self._indexpath, cbor.dumps(self._indexdata))
449
450
450 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
451 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
451 nodes = []
452 nodes = []
452
453
453 transaction.addbackup(self._indexpath)
454 transaction.addbackup(self._indexpath)
454
455
455 for node, p1, p2, linknode, deltabase, delta, flags in deltas:
456 for node, p1, p2, linknode, deltabase, delta, flags in deltas:
456 linkrev = linkmapper(linknode)
457 linkrev = linkmapper(linknode)
457
458
458 nodes.append(node)
459 nodes.append(node)
459
460
460 if node in self._indexbynode:
461 if node in self._indexbynode:
461 continue
462 continue
462
463
463 # Need to resolve the fulltext from the delta base.
464 # Need to resolve the fulltext from the delta base.
464 if deltabase == nullid:
465 if deltabase == nullid:
465 text = mdiff.patch(b'', delta)
466 text = mdiff.patch(b'', delta)
466 else:
467 else:
467 text = mdiff.patch(self.revision(deltabase), delta)
468 text = mdiff.patch(self.revision(deltabase), delta)
468
469
469 self.addrevision(text, transaction, linkrev, p1, p2, flags)
470 self.addrevision(text, transaction, linkrev, p1, p2, flags)
470
471
471 if addrevisioncb:
472 if addrevisioncb:
472 addrevisioncb(self, node)
473 addrevisioncb(self, node)
473
474
474 return nodes
475 return nodes
475
476
476 def revdiff(self, rev1, rev2):
477 def revdiff(self, rev1, rev2):
477 validaterev(rev1)
478 validaterev(rev1)
478 validaterev(rev2)
479 validaterev(rev2)
479
480
480 node1 = self.node(rev1)
481 node1 = self.node(rev1)
481 node2 = self.node(rev2)
482 node2 = self.node(rev2)
482
483
483 return mdiff.textdiff(self.revision(node1, raw=True),
484 return mdiff.textdiff(self.revision(node1, raw=True),
484 self.revision(node2, raw=True))
485 self.revision(node2, raw=True))
485
486
486 def headrevs(self):
487 def headrevs(self):
487 # Assume all revisions are heads by default.
488 # Assume all revisions are heads by default.
488 revishead = {rev: True for rev in self._indexbyrev}
489 revishead = {rev: True for rev in self._indexbyrev}
489
490
490 for rev, entry in self._indexbyrev.items():
491 for rev, entry in self._indexbyrev.items():
491 # Unset head flag for all seen parents.
492 # Unset head flag for all seen parents.
492 revishead[self.rev(entry[b'p1'])] = False
493 revishead[self.rev(entry[b'p1'])] = False
493 revishead[self.rev(entry[b'p2'])] = False
494 revishead[self.rev(entry[b'p2'])] = False
494
495
495 return [rev for rev, ishead in sorted(revishead.items())
496 return [rev for rev, ishead in sorted(revishead.items())
496 if ishead]
497 if ishead]
497
498
498 def heads(self, start=None, stop=None):
499 def heads(self, start=None, stop=None):
499 # This is copied from revlog.py.
500 # This is copied from revlog.py.
500 if start is None and stop is None:
501 if start is None and stop is None:
501 if not len(self):
502 if not len(self):
502 return [nullid]
503 return [nullid]
503 return [self.node(r) for r in self.headrevs()]
504 return [self.node(r) for r in self.headrevs()]
504
505
505 if start is None:
506 if start is None:
506 start = nullid
507 start = nullid
507 if stop is None:
508 if stop is None:
508 stop = []
509 stop = []
509 stoprevs = set([self.rev(n) for n in stop])
510 stoprevs = set([self.rev(n) for n in stop])
510 startrev = self.rev(start)
511 startrev = self.rev(start)
511 reachable = {startrev}
512 reachable = {startrev}
512 heads = {startrev}
513 heads = {startrev}
513
514
514 parentrevs = self.parentrevs
515 parentrevs = self.parentrevs
515 for r in self.revs(start=startrev + 1):
516 for r in self.revs(start=startrev + 1):
516 for p in parentrevs(r):
517 for p in parentrevs(r):
517 if p in reachable:
518 if p in reachable:
518 if r not in stoprevs:
519 if r not in stoprevs:
519 reachable.add(r)
520 reachable.add(r)
520 heads.add(r)
521 heads.add(r)
521 if p in heads and p not in stoprevs:
522 if p in heads and p not in stoprevs:
522 heads.remove(p)
523 heads.remove(p)
523
524
524 return [self.node(r) for r in heads]
525 return [self.node(r) for r in heads]
525
526
526 def children(self, node):
527 def children(self, node):
527 validatenode(node)
528 validatenode(node)
528
529
529 # This is a copy of revlog.children().
530 # This is a copy of revlog.children().
530 c = []
531 c = []
531 p = self.rev(node)
532 p = self.rev(node)
532 for r in self.revs(start=p + 1):
533 for r in self.revs(start=p + 1):
533 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
534 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
534 if prevs:
535 if prevs:
535 for pr in prevs:
536 for pr in prevs:
536 if pr == p:
537 if pr == p:
537 c.append(self.node(r))
538 c.append(self.node(r))
538 elif p == nullrev:
539 elif p == nullrev:
539 c.append(self.node(r))
540 c.append(self.node(r))
540 return c
541 return c
541
542
542 def getstrippoint(self, minlink):
543 def getstrippoint(self, minlink):
543
544
544 # This is largely a copy of revlog.getstrippoint().
545 # This is largely a copy of revlog.getstrippoint().
545 brokenrevs = set()
546 brokenrevs = set()
546 strippoint = len(self)
547 strippoint = len(self)
547
548
548 heads = {}
549 heads = {}
549 futurelargelinkrevs = set()
550 futurelargelinkrevs = set()
550 for head in self.headrevs():
551 for head in self.headrevs():
551 headlinkrev = self.linkrev(head)
552 headlinkrev = self.linkrev(head)
552 heads[head] = headlinkrev
553 heads[head] = headlinkrev
553 if headlinkrev >= minlink:
554 if headlinkrev >= minlink:
554 futurelargelinkrevs.add(headlinkrev)
555 futurelargelinkrevs.add(headlinkrev)
555
556
556 # This algorithm involves walking down the rev graph, starting at the
557 # This algorithm involves walking down the rev graph, starting at the
557 # heads. Since the revs are topologically sorted according to linkrev,
558 # heads. Since the revs are topologically sorted according to linkrev,
558 # once all head linkrevs are below the minlink, we know there are
559 # once all head linkrevs are below the minlink, we know there are
559 # no more revs that could have a linkrev greater than minlink.
560 # no more revs that could have a linkrev greater than minlink.
560 # So we can stop walking.
561 # So we can stop walking.
561 while futurelargelinkrevs:
562 while futurelargelinkrevs:
562 strippoint -= 1
563 strippoint -= 1
563 linkrev = heads.pop(strippoint)
564 linkrev = heads.pop(strippoint)
564
565
565 if linkrev < minlink:
566 if linkrev < minlink:
566 brokenrevs.add(strippoint)
567 brokenrevs.add(strippoint)
567 else:
568 else:
568 futurelargelinkrevs.remove(linkrev)
569 futurelargelinkrevs.remove(linkrev)
569
570
570 for p in self.parentrevs(strippoint):
571 for p in self.parentrevs(strippoint):
571 if p != nullrev:
572 if p != nullrev:
572 plinkrev = self.linkrev(p)
573 plinkrev = self.linkrev(p)
573 heads[p] = plinkrev
574 heads[p] = plinkrev
574 if plinkrev >= minlink:
575 if plinkrev >= minlink:
575 futurelargelinkrevs.add(plinkrev)
576 futurelargelinkrevs.add(plinkrev)
576
577
577 return strippoint, brokenrevs
578 return strippoint, brokenrevs
578
579
579 def strip(self, minlink, transaction):
580 def strip(self, minlink, transaction):
580 if not len(self):
581 if not len(self):
581 return
582 return
582
583
583 rev, _ignored = self.getstrippoint(minlink)
584 rev, _ignored = self.getstrippoint(minlink)
584 if rev == len(self):
585 if rev == len(self):
585 return
586 return
586
587
587 # Purge index data starting at the requested revision.
588 # Purge index data starting at the requested revision.
588 self._indexdata[rev:] = []
589 self._indexdata[rev:] = []
589 self._reflectindexupdate()
590 self._reflectindexupdate()
590
591
591 def issimplestorefile(f, kind, st):
592 def issimplestorefile(f, kind, st):
592 if kind != stat.S_IFREG:
593 if kind != stat.S_IFREG:
593 return False
594 return False
594
595
595 if store.isrevlog(f, kind, st):
596 if store.isrevlog(f, kind, st):
596 return False
597 return False
597
598
598 # Ignore transaction undo files.
599 # Ignore transaction undo files.
599 if f.startswith('undo.'):
600 if f.startswith('undo.'):
600 return False
601 return False
601
602
602 # Otherwise assume it belongs to the simple store.
603 # Otherwise assume it belongs to the simple store.
603 return True
604 return True
604
605
605 class simplestore(store.encodedstore):
606 class simplestore(store.encodedstore):
606 def datafiles(self):
607 def datafiles(self):
607 for x in super(simplestore, self).datafiles():
608 for x in super(simplestore, self).datafiles():
608 yield x
609 yield x
609
610
610 # Supplement with non-revlog files.
611 # Supplement with non-revlog files.
611 extrafiles = self._walk('data', True, filefilter=issimplestorefile)
612 extrafiles = self._walk('data', True, filefilter=issimplestorefile)
612
613
613 for unencoded, encoded, size in extrafiles:
614 for unencoded, encoded, size in extrafiles:
614 try:
615 try:
615 unencoded = store.decodefilename(unencoded)
616 unencoded = store.decodefilename(unencoded)
616 except KeyError:
617 except KeyError:
617 unencoded = None
618 unencoded = None
618
619
619 yield unencoded, encoded, size
620 yield unencoded, encoded, size
620
621
621 def reposetup(ui, repo):
622 def reposetup(ui, repo):
622 if not repo.local():
623 if not repo.local():
623 return
624 return
624
625
625 if isinstance(repo, bundlerepo.bundlerepository):
626 if isinstance(repo, bundlerepo.bundlerepository):
626 raise error.Abort(_('cannot use simple store with bundlerepo'))
627 raise error.Abort(_('cannot use simple store with bundlerepo'))
627
628
628 class simplestorerepo(repo.__class__):
629 class simplestorerepo(repo.__class__):
629 def file(self, f):
630 def file(self, f):
630 return filestorage(self.svfs, f)
631 return filestorage(self.svfs, f)
631
632
632 repo.__class__ = simplestorerepo
633 repo.__class__ = simplestorerepo
633
634
634 def featuresetup(ui, supported):
635 def featuresetup(ui, supported):
635 supported.add(REQUIREMENT)
636 supported.add(REQUIREMENT)
636
637
637 def newreporequirements(orig, repo):
638 def newreporequirements(orig, repo):
638 """Modifies default requirements for new repos to use the simple store."""
639 """Modifies default requirements for new repos to use the simple store."""
639 requirements = orig(repo)
640 requirements = orig(repo)
640
641
641 # These requirements are only used to affect creation of the store
642 # These requirements are only used to affect creation of the store
642 # object. We have our own store. So we can remove them.
643 # object. We have our own store. So we can remove them.
643 # TODO do this once we feel like taking the test hit.
644 # TODO do this once we feel like taking the test hit.
644 #if 'fncache' in requirements:
645 #if 'fncache' in requirements:
645 # requirements.remove('fncache')
646 # requirements.remove('fncache')
646 #if 'dotencode' in requirements:
647 #if 'dotencode' in requirements:
647 # requirements.remove('dotencode')
648 # requirements.remove('dotencode')
648
649
649 requirements.add(REQUIREMENT)
650 requirements.add(REQUIREMENT)
650
651
651 return requirements
652 return requirements
652
653
653 def makestore(orig, requirements, path, vfstype):
654 def makestore(orig, requirements, path, vfstype):
654 if REQUIREMENT not in requirements:
655 if REQUIREMENT not in requirements:
655 return orig(requirements, path, vfstype)
656 return orig(requirements, path, vfstype)
656
657
657 return simplestore(path, vfstype)
658 return simplestore(path, vfstype)
658
659
660 def verifierinit(orig, self, *args, **kwargs):
661 orig(self, *args, **kwargs)
662
663 # We don't care that files in the store don't align with what is
664 # advertised. So suppress these warnings.
665 self.warnorphanstorefiles = False
666
659 def extsetup(ui):
667 def extsetup(ui):
660 localrepo.featuresetupfuncs.add(featuresetup)
668 localrepo.featuresetupfuncs.add(featuresetup)
661
669
662 extensions.wrapfunction(localrepo, 'newreporequirements',
670 extensions.wrapfunction(localrepo, 'newreporequirements',
663 newreporequirements)
671 newreporequirements)
664 extensions.wrapfunction(store, 'store', makestore)
672 extensions.wrapfunction(store, 'store', makestore)
673 extensions.wrapfunction(verify.verifier, '__init__', verifierinit)
General Comments 0
You need to be logged in to leave comments. Login now