##// END OF EJS Templates
verify: fix exception formatting bug in Python 3...
Augie Fackler -
r36595:d85ef895 default
parent child Browse files
Show More
@@ -1,486 +1,487
1 1 # verify.py - repository integrity checking for Mercurial
2 2 #
3 3 # Copyright 2006, 2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11
12 12 from .i18n import _
13 13 from .node import (
14 14 nullid,
15 15 short,
16 16 )
17 17
18 18 from . import (
19 19 error,
20 20 pycompat,
21 21 revlog,
22 22 scmutil,
23 23 util,
24 24 )
25 25
26 26 def verify(repo):
27 27 with repo.lock():
28 28 return verifier(repo).verify()
29 29
30 30 def _normpath(f):
31 31 # under hg < 2.4, convert didn't sanitize paths properly, so a
32 32 # converted repo may contain repeated slashes
33 33 while '//' in f:
34 34 f = f.replace('//', '/')
35 35 return f
36 36
37 37 class verifier(object):
38 38 # The match argument is always None in hg core, but e.g. the narrowhg
39 39 # extension will pass in a matcher here.
40 40 def __init__(self, repo, match=None):
41 41 self.repo = repo.unfiltered()
42 42 self.ui = repo.ui
43 43 self.match = match or scmutil.matchall(repo)
44 44 self.badrevs = set()
45 45 self.errors = 0
46 46 self.warnings = 0
47 47 self.havecl = len(repo.changelog) > 0
48 48 self.havemf = len(repo.manifestlog._revlog) > 0
49 49 self.revlogv1 = repo.changelog.version != revlog.REVLOGV0
50 50 self.lrugetctx = util.lrucachefunc(repo.changectx)
51 51 self.refersmf = False
52 52 self.fncachewarned = False
53 53 # developer config: verify.skipflags
54 54 self.skipflags = repo.ui.configint('verify', 'skipflags')
55 55
56 56 def warn(self, msg):
57 57 self.ui.warn(msg + "\n")
58 58 self.warnings += 1
59 59
60 60 def err(self, linkrev, msg, filename=None):
61 61 if linkrev is not None:
62 62 self.badrevs.add(linkrev)
63 63 linkrev = "%d" % linkrev
64 64 else:
65 65 linkrev = '?'
66 66 msg = "%s: %s" % (linkrev, msg)
67 67 if filename:
68 68 msg = "%s@%s" % (filename, msg)
69 69 self.ui.warn(" " + msg + "\n")
70 70 self.errors += 1
71 71
72 72 def exc(self, linkrev, msg, inst, filename=None):
73 if not str(inst):
74 inst = repr(inst)
75 self.err(linkrev, "%s: %s" % (msg, inst), filename)
73 fmsg = pycompat.bytestr(inst)
74 if not fmsg:
75 fmsg = pycompat.byterepr(inst)
76 self.err(linkrev, "%s: %s" % (msg, fmsg), filename)
76 77
77 78 def checklog(self, obj, name, linkrev):
78 79 if not len(obj) and (self.havecl or self.havemf):
79 80 self.err(linkrev, _("empty or missing %s") % name)
80 81 return
81 82
82 83 d = obj.checksize()
83 84 if d[0]:
84 85 self.err(None, _("data length off by %d bytes") % d[0], name)
85 86 if d[1]:
86 87 self.err(None, _("index contains %d extra bytes") % d[1], name)
87 88
88 89 if obj.version != revlog.REVLOGV0:
89 90 if not self.revlogv1:
90 91 self.warn(_("warning: `%s' uses revlog format 1") % name)
91 92 elif self.revlogv1:
92 93 self.warn(_("warning: `%s' uses revlog format 0") % name)
93 94
94 95 def checkentry(self, obj, i, node, seen, linkrevs, f):
95 96 lr = obj.linkrev(obj.rev(node))
96 97 if lr < 0 or (self.havecl and lr not in linkrevs):
97 98 if lr < 0 or lr >= len(self.repo.changelog):
98 99 msg = _("rev %d points to nonexistent changeset %d")
99 100 else:
100 101 msg = _("rev %d points to unexpected changeset %d")
101 102 self.err(None, msg % (i, lr), f)
102 103 if linkrevs:
103 104 if f and len(linkrevs) > 1:
104 105 try:
105 106 # attempt to filter down to real linkrevs
106 107 linkrevs = [l for l in linkrevs
107 108 if self.lrugetctx(l)[f].filenode() == node]
108 109 except Exception:
109 110 pass
110 111 self.warn(_(" (expected %s)") % " ".join
111 112 (map(pycompat.bytestr, linkrevs)))
112 113 lr = None # can't be trusted
113 114
114 115 try:
115 116 p1, p2 = obj.parents(node)
116 117 if p1 not in seen and p1 != nullid:
117 118 self.err(lr, _("unknown parent 1 %s of %s") %
118 119 (short(p1), short(node)), f)
119 120 if p2 not in seen and p2 != nullid:
120 121 self.err(lr, _("unknown parent 2 %s of %s") %
121 122 (short(p2), short(node)), f)
122 123 except Exception as inst:
123 124 self.exc(lr, _("checking parents of %s") % short(node), inst, f)
124 125
125 126 if node in seen:
126 127 self.err(lr, _("duplicate revision %d (%d)") % (i, seen[node]), f)
127 128 seen[node] = i
128 129 return lr
129 130
130 131 def verify(self):
131 132 repo = self.repo
132 133
133 134 ui = repo.ui
134 135
135 136 if not repo.url().startswith('file:'):
136 137 raise error.Abort(_("cannot verify bundle or remote repos"))
137 138
138 139 if os.path.exists(repo.sjoin("journal")):
139 140 ui.warn(_("abandoned transaction found - run hg recover\n"))
140 141
141 142 if ui.verbose or not self.revlogv1:
142 143 ui.status(_("repository uses revlog format %d\n") %
143 144 (self.revlogv1 and 1 or 0))
144 145
145 146 mflinkrevs, filelinkrevs = self._verifychangelog()
146 147
147 148 filenodes = self._verifymanifest(mflinkrevs)
148 149 del mflinkrevs
149 150
150 151 self._crosscheckfiles(filelinkrevs, filenodes)
151 152
152 153 totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)
153 154
154 155 ui.status(_("%d files, %d changesets, %d total revisions\n") %
155 156 (totalfiles, len(repo.changelog), filerevisions))
156 157 if self.warnings:
157 158 ui.warn(_("%d warnings encountered!\n") % self.warnings)
158 159 if self.fncachewarned:
159 160 ui.warn(_('hint: run "hg debugrebuildfncache" to recover from '
160 161 'corrupt fncache\n'))
161 162 if self.errors:
162 163 ui.warn(_("%d integrity errors encountered!\n") % self.errors)
163 164 if self.badrevs:
164 165 ui.warn(_("(first damaged changeset appears to be %d)\n")
165 166 % min(self.badrevs))
166 167 return 1
167 168
168 169 def _verifychangelog(self):
169 170 ui = self.ui
170 171 repo = self.repo
171 172 match = self.match
172 173 cl = repo.changelog
173 174
174 175 ui.status(_("checking changesets\n"))
175 176 mflinkrevs = {}
176 177 filelinkrevs = {}
177 178 seen = {}
178 179 self.checklog(cl, "changelog", 0)
179 180 total = len(repo)
180 181 for i in repo:
181 182 ui.progress(_('checking'), i, total=total, unit=_('changesets'))
182 183 n = cl.node(i)
183 184 self.checkentry(cl, i, n, seen, [i], "changelog")
184 185
185 186 try:
186 187 changes = cl.read(n)
187 188 if changes[0] != nullid:
188 189 mflinkrevs.setdefault(changes[0], []).append(i)
189 190 self.refersmf = True
190 191 for f in changes[3]:
191 192 if match(f):
192 193 filelinkrevs.setdefault(_normpath(f), []).append(i)
193 194 except Exception as inst:
194 195 self.refersmf = True
195 196 self.exc(i, _("unpacking changeset %s") % short(n), inst)
196 197 ui.progress(_('checking'), None)
197 198 return mflinkrevs, filelinkrevs
198 199
199 200 def _verifymanifest(self, mflinkrevs, dir="", storefiles=None,
200 201 progress=None):
201 202 repo = self.repo
202 203 ui = self.ui
203 204 match = self.match
204 205 mfl = self.repo.manifestlog
205 206 mf = mfl._revlog.dirlog(dir)
206 207
207 208 if not dir:
208 209 self.ui.status(_("checking manifests\n"))
209 210
210 211 filenodes = {}
211 212 subdirnodes = {}
212 213 seen = {}
213 214 label = "manifest"
214 215 if dir:
215 216 label = dir
216 217 revlogfiles = mf.files()
217 218 storefiles.difference_update(revlogfiles)
218 219 if progress: # should be true since we're in a subdirectory
219 220 progress()
220 221 if self.refersmf:
221 222 # Do not check manifest if there are only changelog entries with
222 223 # null manifests.
223 224 self.checklog(mf, label, 0)
224 225 total = len(mf)
225 226 for i in mf:
226 227 if not dir:
227 228 ui.progress(_('checking'), i, total=total, unit=_('manifests'))
228 229 n = mf.node(i)
229 230 lr = self.checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label)
230 231 if n in mflinkrevs:
231 232 del mflinkrevs[n]
232 233 elif dir:
233 234 self.err(lr, _("%s not in parent-directory manifest") %
234 235 short(n), label)
235 236 else:
236 237 self.err(lr, _("%s not in changesets") % short(n), label)
237 238
238 239 try:
239 240 mfdelta = mfl.get(dir, n).readdelta(shallow=True)
240 241 for f, fn, fl in mfdelta.iterentries():
241 242 if not f:
242 243 self.err(lr, _("entry without name in manifest"))
243 244 elif f == "/dev/null": # ignore this in very old repos
244 245 continue
245 246 fullpath = dir + _normpath(f)
246 247 if fl == 't':
247 248 if not match.visitdir(fullpath):
248 249 continue
249 250 subdirnodes.setdefault(fullpath + '/', {}).setdefault(
250 251 fn, []).append(lr)
251 252 else:
252 253 if not match(fullpath):
253 254 continue
254 255 filenodes.setdefault(fullpath, {}).setdefault(fn, lr)
255 256 except Exception as inst:
256 257 self.exc(lr, _("reading delta %s") % short(n), inst, label)
257 258 if not dir:
258 259 ui.progress(_('checking'), None)
259 260
260 261 if self.havemf:
261 262 for c, m in sorted([(c, m) for m in mflinkrevs
262 263 for c in mflinkrevs[m]]):
263 264 if dir:
264 265 self.err(c, _("parent-directory manifest refers to unknown "
265 266 "revision %s") % short(m), label)
266 267 else:
267 268 self.err(c, _("changeset refers to unknown revision %s") %
268 269 short(m), label)
269 270
270 271 if not dir and subdirnodes:
271 272 self.ui.status(_("checking directory manifests\n"))
272 273 storefiles = set()
273 274 subdirs = set()
274 275 revlogv1 = self.revlogv1
275 276 for f, f2, size in repo.store.datafiles():
276 277 if not f:
277 278 self.err(None, _("cannot decode filename '%s'") % f2)
278 279 elif (size > 0 or not revlogv1) and f.startswith('meta/'):
279 280 storefiles.add(_normpath(f))
280 281 subdirs.add(os.path.dirname(f))
281 282 subdircount = len(subdirs)
282 283 currentsubdir = [0]
283 284 def progress():
284 285 currentsubdir[0] += 1
285 286 ui.progress(_('checking'), currentsubdir[0], total=subdircount,
286 287 unit=_('manifests'))
287 288
288 289 for subdir, linkrevs in subdirnodes.iteritems():
289 290 subdirfilenodes = self._verifymanifest(linkrevs, subdir, storefiles,
290 291 progress)
291 292 for f, onefilenodes in subdirfilenodes.iteritems():
292 293 filenodes.setdefault(f, {}).update(onefilenodes)
293 294
294 295 if not dir and subdirnodes:
295 296 ui.progress(_('checking'), None)
296 297 for f in sorted(storefiles):
297 298 self.warn(_("warning: orphan revlog '%s'") % f)
298 299
299 300 return filenodes
300 301
301 302 def _crosscheckfiles(self, filelinkrevs, filenodes):
302 303 repo = self.repo
303 304 ui = self.ui
304 305 ui.status(_("crosschecking files in changesets and manifests\n"))
305 306
306 307 total = len(filelinkrevs) + len(filenodes)
307 308 count = 0
308 309 if self.havemf:
309 310 for f in sorted(filelinkrevs):
310 311 count += 1
311 312 ui.progress(_('crosschecking'), count, total=total)
312 313 if f not in filenodes:
313 314 lr = filelinkrevs[f][0]
314 315 self.err(lr, _("in changeset but not in manifest"), f)
315 316
316 317 if self.havecl:
317 318 for f in sorted(filenodes):
318 319 count += 1
319 320 ui.progress(_('crosschecking'), count, total=total)
320 321 if f not in filelinkrevs:
321 322 try:
322 323 fl = repo.file(f)
323 324 lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
324 325 except Exception:
325 326 lr = None
326 327 self.err(lr, _("in manifest but not in changeset"), f)
327 328
328 329 ui.progress(_('crosschecking'), None)
329 330
330 331 def _verifyfiles(self, filenodes, filelinkrevs):
331 332 repo = self.repo
332 333 ui = self.ui
333 334 lrugetctx = self.lrugetctx
334 335 revlogv1 = self.revlogv1
335 336 havemf = self.havemf
336 337 ui.status(_("checking files\n"))
337 338
338 339 storefiles = set()
339 340 for f, f2, size in repo.store.datafiles():
340 341 if not f:
341 342 self.err(None, _("cannot decode filename '%s'") % f2)
342 343 elif (size > 0 or not revlogv1) and f.startswith('data/'):
343 344 storefiles.add(_normpath(f))
344 345
345 346 files = sorted(set(filenodes) | set(filelinkrevs))
346 347 total = len(files)
347 348 revisions = 0
348 349 for i, f in enumerate(files):
349 350 ui.progress(_('checking'), i, item=f, total=total, unit=_('files'))
350 351 try:
351 352 linkrevs = filelinkrevs[f]
352 353 except KeyError:
353 354 # in manifest but not in changelog
354 355 linkrevs = []
355 356
356 357 if linkrevs:
357 358 lr = linkrevs[0]
358 359 else:
359 360 lr = None
360 361
361 362 try:
362 363 fl = repo.file(f)
363 364 except error.RevlogError as e:
364 365 self.err(lr, _("broken revlog! (%s)") % e, f)
365 366 continue
366 367
367 368 for ff in fl.files():
368 369 try:
369 370 storefiles.remove(ff)
370 371 except KeyError:
371 372 self.warn(_(" warning: revlog '%s' not in fncache!") % ff)
372 373 self.fncachewarned = True
373 374
374 375 self.checklog(fl, f, lr)
375 376 seen = {}
376 377 rp = None
377 378 for i in fl:
378 379 revisions += 1
379 380 n = fl.node(i)
380 381 lr = self.checkentry(fl, i, n, seen, linkrevs, f)
381 382 if f in filenodes:
382 383 if havemf and n not in filenodes[f]:
383 384 self.err(lr, _("%s not in manifests") % (short(n)), f)
384 385 else:
385 386 del filenodes[f][n]
386 387
387 388 # Verify contents. 4 cases to care about:
388 389 #
389 390 # common: the most common case
390 391 # rename: with a rename
391 392 # meta: file content starts with b'\1\n', the metadata
392 393 # header defined in filelog.py, but without a rename
393 394 # ext: content stored externally
394 395 #
395 396 # More formally, their differences are shown below:
396 397 #
397 398 # | common | rename | meta | ext
398 399 # -------------------------------------------------------
399 400 # flags() | 0 | 0 | 0 | not 0
400 401 # renamed() | False | True | False | ?
401 402 # rawtext[0:2]=='\1\n'| False | True | True | ?
402 403 #
403 404 # "rawtext" means the raw text stored in revlog data, which
404 405 # could be retrieved by "revision(rev, raw=True)". "text"
405 406 # mentioned below is "revision(rev, raw=False)".
406 407 #
407 408 # There are 3 different lengths stored physically:
408 409 # 1. L1: rawsize, stored in revlog index
409 410 # 2. L2: len(rawtext), stored in revlog data
410 411 # 3. L3: len(text), stored in revlog data if flags==0, or
411 412 # possibly somewhere else if flags!=0
412 413 #
413 414 # L1 should be equal to L2. L3 could be different from them.
414 415 # "text" may or may not affect commit hash depending on flag
415 416 # processors (see revlog.addflagprocessor).
416 417 #
417 418 # | common | rename | meta | ext
418 419 # -------------------------------------------------
419 420 # rawsize() | L1 | L1 | L1 | L1
420 421 # size() | L1 | L2-LM | L1(*) | L1 (?)
421 422 # len(rawtext) | L2 | L2 | L2 | L2
422 423 # len(text) | L2 | L2 | L2 | L3
423 424 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
424 425 #
425 426 # LM: length of metadata, depending on rawtext
426 427 # (*): not ideal, see comment in filelog.size
427 428 # (?): could be "- len(meta)" if the resolved content has
428 429 # rename metadata
429 430 #
430 431 # Checks needed to be done:
431 432 # 1. length check: L1 == L2, in all cases.
432 433 # 2. hash check: depending on flag processor, we may need to
433 434 # use either "text" (external), or "rawtext" (in revlog).
434 435 try:
435 436 skipflags = self.skipflags
436 437 if skipflags:
437 438 skipflags &= fl.flags(i)
438 439 if not skipflags:
439 440 fl.read(n) # side effect: read content and do checkhash
440 441 rp = fl.renamed(n)
441 442 # the "L1 == L2" check
442 443 l1 = fl.rawsize(i)
443 444 l2 = len(fl.revision(n, raw=True))
444 445 if l1 != l2:
445 446 self.err(lr, _("unpacked size is %s, %s expected") %
446 447 (l2, l1), f)
447 448 except error.CensoredNodeError:
448 449 # experimental config: censor.policy
449 450 if ui.config("censor", "policy") == "abort":
450 451 self.err(lr, _("censored file data"), f)
451 452 except Exception as inst:
452 453 self.exc(lr, _("unpacking %s") % short(n), inst, f)
453 454
454 455 # check renames
455 456 try:
456 457 if rp:
457 458 if lr is not None and ui.verbose:
458 459 ctx = lrugetctx(lr)
459 460 if not any(rp[0] in pctx for pctx in ctx.parents()):
460 461 self.warn(_("warning: copy source of '%s' not"
461 462 " in parents of %s") % (f, ctx))
462 463 fl2 = repo.file(rp[0])
463 464 if not len(fl2):
464 465 self.err(lr, _("empty or missing copy source "
465 466 "revlog %s:%s") % (rp[0], short(rp[1])), f)
466 467 elif rp[1] == nullid:
467 468 ui.note(_("warning: %s@%s: copy source"
468 469 " revision is nullid %s:%s\n")
469 470 % (f, lr, rp[0], short(rp[1])))
470 471 else:
471 472 fl2.rev(rp[1])
472 473 except Exception as inst:
473 474 self.exc(lr, _("checking rename of %s") % short(n), inst, f)
474 475
475 476 # cross-check
476 477 if f in filenodes:
477 478 fns = [(v, k) for k, v in filenodes[f].iteritems()]
478 479 for lr, node in sorted(fns):
479 480 self.err(lr, _("manifest refers to unknown revision %s") %
480 481 short(node), f)
481 482 ui.progress(_('checking'), None)
482 483
483 484 for f in sorted(storefiles):
484 485 self.warn(_("warning: orphan revlog '%s'") % f)
485 486
486 487 return len(files), revisions
General Comments 0
You need to be logged in to leave comments. Login now