##// END OF EJS Templates
verify: replace "output parameters" by return values...
Martin von Zweigbergk -
r27695:fb0cc863 default
parent child Browse files
Show More
@@ -1,386 +1,387
1 1 # verify.py - repository integrity checking for Mercurial
2 2 #
3 3 # Copyright 2006, 2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11
12 12 from .i18n import _
13 13 from .node import (
14 14 nullid,
15 15 short,
16 16 )
17 17
18 18 from . import (
19 19 error,
20 20 revlog,
21 21 util,
22 22 )
23 23
24 24 def verify(repo):
25 25 lock = repo.lock()
26 26 try:
27 27 return verifier(repo).verify()
28 28 finally:
29 29 lock.release()
30 30
31 31 def _normpath(f):
32 32 # under hg < 2.4, convert didn't sanitize paths properly, so a
33 33 # converted repo may contain repeated slashes
34 34 while '//' in f:
35 35 f = f.replace('//', '/')
36 36 return f
37 37
38 38 def _validpath(repo, path):
39 39 """Returns False if a path should NOT be treated as part of a repo.
40 40
41 41 For all in-core cases, this returns True, as we have no way for a
42 42 path to be mentioned in the history but not actually be
43 43 relevant. For narrow clones, this is important because many
44 44 filelogs will be missing, and changelog entries may mention
45 45 modified files that are outside the narrow scope.
46 46 """
47 47 return True
48 48
49 49 class verifier(object):
50 50 def __init__(self, repo):
51 51 self.repo = repo.unfiltered()
52 52 self.ui = repo.ui
53 53 self.badrevs = set()
54 54 self.errors = 0
55 55 self.warnings = 0
56 56 self.havecl = len(repo.changelog) > 0
57 57 self.havemf = len(repo.manifest) > 0
58 58 self.revlogv1 = repo.changelog.version != revlog.REVLOGV0
59 59 self.lrugetctx = util.lrucachefunc(repo.changectx)
60 60 self.refersmf = False
61 61 self.fncachewarned = False
62 62
63 63 def warn(self, msg):
64 64 self.ui.warn(msg + "\n")
65 65 self.warnings += 1
66 66
67 67 def err(self, linkrev, msg, filename=None):
68 68 if linkrev is not None:
69 69 self.badrevs.add(linkrev)
70 70 else:
71 71 linkrev = '?'
72 72 msg = "%s: %s" % (linkrev, msg)
73 73 if filename:
74 74 msg = "%s@%s" % (filename, msg)
75 75 self.ui.warn(" " + msg + "\n")
76 76 self.errors += 1
77 77
78 78 def exc(self, linkrev, msg, inst, filename=None):
79 79 if not str(inst):
80 80 inst = repr(inst)
81 81 self.err(linkrev, "%s: %s" % (msg, inst), filename)
82 82
83 83 def checklog(self, obj, name, linkrev):
84 84 if not len(obj) and (self.havecl or self.havemf):
85 85 self.err(linkrev, _("empty or missing %s") % name)
86 86 return
87 87
88 88 d = obj.checksize()
89 89 if d[0]:
90 90 self.err(None, _("data length off by %d bytes") % d[0], name)
91 91 if d[1]:
92 92 self.err(None, _("index contains %d extra bytes") % d[1], name)
93 93
94 94 if obj.version != revlog.REVLOGV0:
95 95 if not self.revlogv1:
96 96 self.warn(_("warning: `%s' uses revlog format 1") % name)
97 97 elif self.revlogv1:
98 98 self.warn(_("warning: `%s' uses revlog format 0") % name)
99 99
100 100 def checkentry(self, obj, i, node, seen, linkrevs, f):
101 101 lr = obj.linkrev(obj.rev(node))
102 102 if lr < 0 or (self.havecl and lr not in linkrevs):
103 103 if lr < 0 or lr >= len(self.repo.changelog):
104 104 msg = _("rev %d points to nonexistent changeset %d")
105 105 else:
106 106 msg = _("rev %d points to unexpected changeset %d")
107 107 self.err(None, msg % (i, lr), f)
108 108 if linkrevs:
109 109 if f and len(linkrevs) > 1:
110 110 try:
111 111 # attempt to filter down to real linkrevs
112 112 linkrevs = [l for l in linkrevs
113 113 if self.lrugetctx(l)[f].filenode() == node]
114 114 except Exception:
115 115 pass
116 116 self.warn(_(" (expected %s)") % " ".join(map(str, linkrevs)))
117 117 lr = None # can't be trusted
118 118
119 119 try:
120 120 p1, p2 = obj.parents(node)
121 121 if p1 not in seen and p1 != nullid:
122 122 self.err(lr, _("unknown parent 1 %s of %s") %
123 123 (short(p1), short(node)), f)
124 124 if p2 not in seen and p2 != nullid:
125 125 self.err(lr, _("unknown parent 2 %s of %s") %
126 126 (short(p2), short(node)), f)
127 127 except Exception as inst:
128 128 self.exc(lr, _("checking parents of %s") % short(node), inst, f)
129 129
130 130 if node in seen:
131 131 self.err(lr, _("duplicate revision %d (%d)") % (i, seen[node]), f)
132 132 seen[node] = i
133 133 return lr
134 134
135 135 def verify(self):
136 136 repo = self.repo
137 mflinkrevs = {}
138 filelinkrevs = {}
139 filenodes = {}
140 137
141 138 ui = repo.ui
142 139
143 140 if not repo.url().startswith('file:'):
144 141 raise error.Abort(_("cannot verify bundle or remote repos"))
145 142
146 143 if os.path.exists(repo.sjoin("journal")):
147 144 ui.warn(_("abandoned transaction found - run hg recover\n"))
148 145
149 146 if ui.verbose or not self.revlogv1:
150 147 ui.status(_("repository uses revlog format %d\n") %
151 148 (self.revlogv1 and 1 or 0))
152 149
153 self._verifychangelog(mflinkrevs, filelinkrevs)
150 mflinkrevs, filelinkrevs = self._verifychangelog()
154 151
155 self._verifymanifest(mflinkrevs, filenodes)
152 filenodes = self._verifymanifest(mflinkrevs)
156 153
157 154 self._crosscheckfiles(mflinkrevs, filelinkrevs, filenodes)
158 155
159 156 totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)
160 157
161 158 ui.status(_("%d files, %d changesets, %d total revisions\n") %
162 159 (totalfiles, len(repo.changelog), filerevisions))
163 160 if self.warnings:
164 161 ui.warn(_("%d warnings encountered!\n") % self.warnings)
165 162 if self.fncachewarned:
166 163 ui.warn(_('hint: run "hg debugrebuildfncache" to recover from '
167 164 'corrupt fncache\n'))
168 165 if self.errors:
169 166 ui.warn(_("%d integrity errors encountered!\n") % self.errors)
170 167 if self.badrevs:
171 168 ui.warn(_("(first damaged changeset appears to be %d)\n")
172 169 % min(self.badrevs))
173 170 return 1
174 171
175 def _verifychangelog(self, mflinkrevs, filelinkrevs):
172 def _verifychangelog(self):
176 173 ui = self.ui
177 174 repo = self.repo
178 175 cl = repo.changelog
179 176
180 177 ui.status(_("checking changesets\n"))
178 mflinkrevs = {}
179 filelinkrevs = {}
181 180 seen = {}
182 181 self.checklog(cl, "changelog", 0)
183 182 total = len(repo)
184 183 for i in repo:
185 184 ui.progress(_('checking'), i, total=total, unit=_('changesets'))
186 185 n = cl.node(i)
187 186 self.checkentry(cl, i, n, seen, [i], "changelog")
188 187
189 188 try:
190 189 changes = cl.read(n)
191 190 if changes[0] != nullid:
192 191 mflinkrevs.setdefault(changes[0], []).append(i)
193 192 self.refersmf = True
194 193 for f in changes[3]:
195 194 if _validpath(repo, f):
196 195 filelinkrevs.setdefault(_normpath(f), []).append(i)
197 196 except Exception as inst:
198 197 self.refersmf = True
199 198 self.exc(i, _("unpacking changeset %s") % short(n), inst)
200 199 ui.progress(_('checking'), None)
200 return mflinkrevs, filelinkrevs
201 201
202 def _verifymanifest(self, mflinkrevs, filenodes):
202 def _verifymanifest(self, mflinkrevs):
203 203 repo = self.repo
204 204 ui = self.ui
205 205 mf = self.repo.manifest
206 206
207 207 ui.status(_("checking manifests\n"))
208 filenodes = {}
208 209 seen = {}
209 210 if self.refersmf:
210 211 # Do not check manifest if there are only changelog entries with
211 212 # null manifests.
212 213 self.checklog(mf, "manifest", 0)
213 214 total = len(mf)
214 215 for i in mf:
215 216 ui.progress(_('checking'), i, total=total, unit=_('manifests'))
216 217 n = mf.node(i)
217 218 lr = self.checkentry(mf, i, n, seen, mflinkrevs.get(n, []),
218 219 "manifest")
219 220 if n in mflinkrevs:
220 221 del mflinkrevs[n]
221 222 else:
222 223 self.err(lr, _("%s not in changesets") % short(n), "manifest")
223 224
224 225 try:
225 226 for f, fn in mf.readdelta(n).iteritems():
226 227 if not f:
227 228 self.err(lr, _("file without name in manifest"))
228 229 elif f != "/dev/null": # ignore this in very old repos
229 230 if _validpath(repo, f):
230 231 filenodes.setdefault(
231 232 _normpath(f), {}).setdefault(fn, lr)
232 233 except Exception as inst:
233 234 self.exc(lr, _("reading manifest delta %s") % short(n), inst)
234 235 ui.progress(_('checking'), None)
235 236
236 return mflinkrevs
237 return filenodes
237 238
238 239 def _crosscheckfiles(self, mflinkrevs, filelinkrevs, filenodes):
239 240 repo = self.repo
240 241 ui = self.ui
241 242 ui.status(_("crosschecking files in changesets and manifests\n"))
242 243
243 244 total = len(mflinkrevs) + len(filelinkrevs) + len(filenodes)
244 245 count = 0
245 246 if self.havemf:
246 247 for c, m in sorted([(c, m) for m in mflinkrevs
247 248 for c in mflinkrevs[m]]):
248 249 count += 1
249 250 if m == nullid:
250 251 continue
251 252 ui.progress(_('crosschecking'), count, total=total)
252 253 self.err(c, _("changeset refers to unknown manifest %s") %
253 254 short(m))
254 255 mflinkrevs = None # del is bad here due to scope issues
255 256
256 257 for f in sorted(filelinkrevs):
257 258 count += 1
258 259 ui.progress(_('crosschecking'), count, total=total)
259 260 if f not in filenodes:
260 261 lr = filelinkrevs[f][0]
261 262 self.err(lr, _("in changeset but not in manifest"), f)
262 263
263 264 if self.havecl:
264 265 for f in sorted(filenodes):
265 266 count += 1
266 267 ui.progress(_('crosschecking'), count, total=total)
267 268 if f not in filelinkrevs:
268 269 try:
269 270 fl = repo.file(f)
270 271 lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
271 272 except Exception:
272 273 lr = None
273 274 self.err(lr, _("in manifest but not in changeset"), f)
274 275
275 276 ui.progress(_('crosschecking'), None)
276 277
277 278 def _verifyfiles(self, filenodes, filelinkrevs):
278 279 repo = self.repo
279 280 ui = self.ui
280 281 lrugetctx = self.lrugetctx
281 282 revlogv1 = self.revlogv1
282 283 havemf = self.havemf
283 284 ui.status(_("checking files\n"))
284 285
285 286 storefiles = set()
286 287 for f, f2, size in repo.store.datafiles():
287 288 if not f:
288 289 self.err(None, _("cannot decode filename '%s'") % f2)
289 290 elif size > 0 or not revlogv1:
290 291 storefiles.add(_normpath(f))
291 292
292 293 files = sorted(set(filenodes) | set(filelinkrevs))
293 294 total = len(files)
294 295 revisions = 0
295 296 for i, f in enumerate(files):
296 297 ui.progress(_('checking'), i, item=f, total=total)
297 298 try:
298 299 linkrevs = filelinkrevs[f]
299 300 except KeyError:
300 301 # in manifest but not in changelog
301 302 linkrevs = []
302 303
303 304 if linkrevs:
304 305 lr = linkrevs[0]
305 306 else:
306 307 lr = None
307 308
308 309 try:
309 310 fl = repo.file(f)
310 311 except error.RevlogError as e:
311 312 self.err(lr, _("broken revlog! (%s)") % e, f)
312 313 continue
313 314
314 315 for ff in fl.files():
315 316 try:
316 317 storefiles.remove(ff)
317 318 except KeyError:
318 319 self.warn(_(" warning: revlog '%s' not in fncache!") % ff)
319 320 self.fncachewarned = True
320 321
321 322 self.checklog(fl, f, lr)
322 323 seen = {}
323 324 rp = None
324 325 for i in fl:
325 326 revisions += 1
326 327 n = fl.node(i)
327 328 lr = self.checkentry(fl, i, n, seen, linkrevs, f)
328 329 if f in filenodes:
329 330 if havemf and n not in filenodes[f]:
330 331 self.err(lr, _("%s not in manifests") % (short(n)), f)
331 332 else:
332 333 del filenodes[f][n]
333 334
334 335 # verify contents
335 336 try:
336 337 l = len(fl.read(n))
337 338 rp = fl.renamed(n)
338 339 if l != fl.size(i):
339 340 if len(fl.revision(n)) != fl.size(i):
340 341 self.err(lr, _("unpacked size is %s, %s expected") %
341 342 (l, fl.size(i)), f)
342 343 except error.CensoredNodeError:
343 344 # experimental config: censor.policy
344 345 if ui.config("censor", "policy", "abort") == "abort":
345 346 self.err(lr, _("censored file data"), f)
346 347 except Exception as inst:
347 348 self.exc(lr, _("unpacking %s") % short(n), inst, f)
348 349
349 350 # check renames
350 351 try:
351 352 if rp:
352 353 if lr is not None and ui.verbose:
353 354 ctx = lrugetctx(lr)
354 355 found = False
355 356 for pctx in ctx.parents():
356 357 if rp[0] in pctx:
357 358 found = True
358 359 break
359 360 if not found:
360 361 self.warn(_("warning: copy source of '%s' not"
361 362 " in parents of %s") % (f, ctx))
362 363 fl2 = repo.file(rp[0])
363 364 if not len(fl2):
364 365 self.err(lr, _("empty or missing copy source "
365 366 "revlog %s:%s") % (rp[0], short(rp[1])), f)
366 367 elif rp[1] == nullid:
367 368 ui.note(_("warning: %s@%s: copy source"
368 369 " revision is nullid %s:%s\n")
369 370 % (f, lr, rp[0], short(rp[1])))
370 371 else:
371 372 fl2.rev(rp[1])
372 373 except Exception as inst:
373 374 self.exc(lr, _("checking rename of %s") % short(n), inst, f)
374 375
375 376 # cross-check
376 377 if f in filenodes:
377 378 fns = [(lr, n) for n, lr in filenodes[f].iteritems()]
378 379 for lr, node in sorted(fns):
379 380 self.err(lr, _("%s in manifests not found") % short(node),
380 381 f)
381 382 ui.progress(_('checking'), None)
382 383
383 384 for f in storefiles:
384 385 self.warn(_("warning: orphan revlog '%s'") % f)
385 386
386 387 return len(files), revisions
General Comments 0
You need to be logged in to leave comments. Login now