##// END OF EJS Templates
verify: avoid shadowing two variables with a list comprehension...
Augie Fackler -
r30393:b667b780 default
parent child Browse files
Show More
@@ -1,437 +1,437
1 1 # verify.py - repository integrity checking for Mercurial
2 2 #
3 3 # Copyright 2006, 2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11
12 12 from .i18n import _
13 13 from .node import (
14 14 nullid,
15 15 short,
16 16 )
17 17
18 18 from . import (
19 19 error,
20 20 revlog,
21 21 util,
22 22 )
23 23
24 24 def verify(repo):
25 25 with repo.lock():
26 26 return verifier(repo).verify()
27 27
28 28 def _normpath(f):
29 29 # under hg < 2.4, convert didn't sanitize paths properly, so a
30 30 # converted repo may contain repeated slashes
31 31 while '//' in f:
32 32 f = f.replace('//', '/')
33 33 return f
34 34
35 35 def _validpath(repo, path):
36 36 """Returns False if a path should NOT be treated as part of a repo.
37 37
38 38 For all in-core cases, this returns True, as we have no way for a
39 39 path to be mentioned in the history but not actually be
40 40 relevant. For narrow clones, this is important because many
41 41 filelogs will be missing, and changelog entries may mention
42 42 modified files that are outside the narrow scope.
43 43 """
44 44 return True
45 45
46 46 class verifier(object):
47 47 def __init__(self, repo):
48 48 self.repo = repo.unfiltered()
49 49 self.ui = repo.ui
50 50 self.badrevs = set()
51 51 self.errors = 0
52 52 self.warnings = 0
53 53 self.havecl = len(repo.changelog) > 0
54 54 self.havemf = len(repo.manifestlog._revlog) > 0
55 55 self.revlogv1 = repo.changelog.version != revlog.REVLOGV0
56 56 self.lrugetctx = util.lrucachefunc(repo.changectx)
57 57 self.refersmf = False
58 58 self.fncachewarned = False
59 59
60 60 def warn(self, msg):
61 61 self.ui.warn(msg + "\n")
62 62 self.warnings += 1
63 63
64 64 def err(self, linkrev, msg, filename=None):
65 65 if linkrev is not None:
66 66 self.badrevs.add(linkrev)
67 67 else:
68 68 linkrev = '?'
69 69 msg = "%s: %s" % (linkrev, msg)
70 70 if filename:
71 71 msg = "%s@%s" % (filename, msg)
72 72 self.ui.warn(" " + msg + "\n")
73 73 self.errors += 1
74 74
75 75 def exc(self, linkrev, msg, inst, filename=None):
76 76 if not str(inst):
77 77 inst = repr(inst)
78 78 self.err(linkrev, "%s: %s" % (msg, inst), filename)
79 79
80 80 def checklog(self, obj, name, linkrev):
81 81 if not len(obj) and (self.havecl or self.havemf):
82 82 self.err(linkrev, _("empty or missing %s") % name)
83 83 return
84 84
85 85 d = obj.checksize()
86 86 if d[0]:
87 87 self.err(None, _("data length off by %d bytes") % d[0], name)
88 88 if d[1]:
89 89 self.err(None, _("index contains %d extra bytes") % d[1], name)
90 90
91 91 if obj.version != revlog.REVLOGV0:
92 92 if not self.revlogv1:
93 93 self.warn(_("warning: `%s' uses revlog format 1") % name)
94 94 elif self.revlogv1:
95 95 self.warn(_("warning: `%s' uses revlog format 0") % name)
96 96
97 97 def checkentry(self, obj, i, node, seen, linkrevs, f):
98 98 lr = obj.linkrev(obj.rev(node))
99 99 if lr < 0 or (self.havecl and lr not in linkrevs):
100 100 if lr < 0 or lr >= len(self.repo.changelog):
101 101 msg = _("rev %d points to nonexistent changeset %d")
102 102 else:
103 103 msg = _("rev %d points to unexpected changeset %d")
104 104 self.err(None, msg % (i, lr), f)
105 105 if linkrevs:
106 106 if f and len(linkrevs) > 1:
107 107 try:
108 108 # attempt to filter down to real linkrevs
109 109 linkrevs = [l for l in linkrevs
110 110 if self.lrugetctx(l)[f].filenode() == node]
111 111 except Exception:
112 112 pass
113 113 self.warn(_(" (expected %s)") % " ".join(map(str, linkrevs)))
114 114 lr = None # can't be trusted
115 115
116 116 try:
117 117 p1, p2 = obj.parents(node)
118 118 if p1 not in seen and p1 != nullid:
119 119 self.err(lr, _("unknown parent 1 %s of %s") %
120 120 (short(p1), short(node)), f)
121 121 if p2 not in seen and p2 != nullid:
122 122 self.err(lr, _("unknown parent 2 %s of %s") %
123 123 (short(p2), short(node)), f)
124 124 except Exception as inst:
125 125 self.exc(lr, _("checking parents of %s") % short(node), inst, f)
126 126
127 127 if node in seen:
128 128 self.err(lr, _("duplicate revision %d (%d)") % (i, seen[node]), f)
129 129 seen[node] = i
130 130 return lr
131 131
132 132 def verify(self):
133 133 repo = self.repo
134 134
135 135 ui = repo.ui
136 136
137 137 if not repo.url().startswith('file:'):
138 138 raise error.Abort(_("cannot verify bundle or remote repos"))
139 139
140 140 if os.path.exists(repo.sjoin("journal")):
141 141 ui.warn(_("abandoned transaction found - run hg recover\n"))
142 142
143 143 if ui.verbose or not self.revlogv1:
144 144 ui.status(_("repository uses revlog format %d\n") %
145 145 (self.revlogv1 and 1 or 0))
146 146
147 147 mflinkrevs, filelinkrevs = self._verifychangelog()
148 148
149 149 filenodes = self._verifymanifest(mflinkrevs)
150 150 del mflinkrevs
151 151
152 152 self._crosscheckfiles(filelinkrevs, filenodes)
153 153
154 154 totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)
155 155
156 156 ui.status(_("%d files, %d changesets, %d total revisions\n") %
157 157 (totalfiles, len(repo.changelog), filerevisions))
158 158 if self.warnings:
159 159 ui.warn(_("%d warnings encountered!\n") % self.warnings)
160 160 if self.fncachewarned:
161 161 ui.warn(_('hint: run "hg debugrebuildfncache" to recover from '
162 162 'corrupt fncache\n'))
163 163 if self.errors:
164 164 ui.warn(_("%d integrity errors encountered!\n") % self.errors)
165 165 if self.badrevs:
166 166 ui.warn(_("(first damaged changeset appears to be %d)\n")
167 167 % min(self.badrevs))
168 168 return 1
169 169
170 170 def _verifychangelog(self):
171 171 ui = self.ui
172 172 repo = self.repo
173 173 cl = repo.changelog
174 174
175 175 ui.status(_("checking changesets\n"))
176 176 mflinkrevs = {}
177 177 filelinkrevs = {}
178 178 seen = {}
179 179 self.checklog(cl, "changelog", 0)
180 180 total = len(repo)
181 181 for i in repo:
182 182 ui.progress(_('checking'), i, total=total, unit=_('changesets'))
183 183 n = cl.node(i)
184 184 self.checkentry(cl, i, n, seen, [i], "changelog")
185 185
186 186 try:
187 187 changes = cl.read(n)
188 188 if changes[0] != nullid:
189 189 mflinkrevs.setdefault(changes[0], []).append(i)
190 190 self.refersmf = True
191 191 for f in changes[3]:
192 192 if _validpath(repo, f):
193 193 filelinkrevs.setdefault(_normpath(f), []).append(i)
194 194 except Exception as inst:
195 195 self.refersmf = True
196 196 self.exc(i, _("unpacking changeset %s") % short(n), inst)
197 197 ui.progress(_('checking'), None)
198 198 return mflinkrevs, filelinkrevs
199 199
200 200 def _verifymanifest(self, mflinkrevs, dir="", storefiles=None,
201 201 progress=None):
202 202 repo = self.repo
203 203 ui = self.ui
204 204 mfl = self.repo.manifestlog
205 205 mf = mfl._revlog.dirlog(dir)
206 206
207 207 if not dir:
208 208 self.ui.status(_("checking manifests\n"))
209 209
210 210 filenodes = {}
211 211 subdirnodes = {}
212 212 seen = {}
213 213 label = "manifest"
214 214 if dir:
215 215 label = dir
216 216 revlogfiles = mf.files()
217 217 storefiles.difference_update(revlogfiles)
218 218 if progress: # should be true since we're in a subdirectory
219 219 progress()
220 220 if self.refersmf:
221 221 # Do not check manifest if there are only changelog entries with
222 222 # null manifests.
223 223 self.checklog(mf, label, 0)
224 224 total = len(mf)
225 225 for i in mf:
226 226 if not dir:
227 227 ui.progress(_('checking'), i, total=total, unit=_('manifests'))
228 228 n = mf.node(i)
229 229 lr = self.checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label)
230 230 if n in mflinkrevs:
231 231 del mflinkrevs[n]
232 232 elif dir:
233 233 self.err(lr, _("%s not in parent-directory manifest") %
234 234 short(n), label)
235 235 else:
236 236 self.err(lr, _("%s not in changesets") % short(n), label)
237 237
238 238 try:
239 239 mfdelta = mfl.get(dir, n).readdelta(shallow=True)
240 240 for f, fn, fl in mfdelta.iterentries():
241 241 if not f:
242 242 self.err(lr, _("entry without name in manifest"))
243 243 elif f == "/dev/null": # ignore this in very old repos
244 244 continue
245 245 fullpath = dir + _normpath(f)
246 246 if not _validpath(repo, fullpath):
247 247 continue
248 248 if fl == 't':
249 249 subdirnodes.setdefault(fullpath + '/', {}).setdefault(
250 250 fn, []).append(lr)
251 251 else:
252 252 filenodes.setdefault(fullpath, {}).setdefault(fn, lr)
253 253 except Exception as inst:
254 254 self.exc(lr, _("reading delta %s") % short(n), inst, label)
255 255 if not dir:
256 256 ui.progress(_('checking'), None)
257 257
258 258 if self.havemf:
259 259 for c, m in sorted([(c, m) for m in mflinkrevs
260 260 for c in mflinkrevs[m]]):
261 261 if dir:
262 262 self.err(c, _("parent-directory manifest refers to unknown "
263 263 "revision %s") % short(m), label)
264 264 else:
265 265 self.err(c, _("changeset refers to unknown revision %s") %
266 266 short(m), label)
267 267
268 268 if not dir and subdirnodes:
269 269 self.ui.status(_("checking directory manifests\n"))
270 270 storefiles = set()
271 271 subdirs = set()
272 272 revlogv1 = self.revlogv1
273 273 for f, f2, size in repo.store.datafiles():
274 274 if not f:
275 275 self.err(None, _("cannot decode filename '%s'") % f2)
276 276 elif (size > 0 or not revlogv1) and f.startswith('meta/'):
277 277 storefiles.add(_normpath(f))
278 278 subdirs.add(os.path.dirname(f))
279 279 subdircount = len(subdirs)
280 280 currentsubdir = [0]
281 281 def progress():
282 282 currentsubdir[0] += 1
283 283 ui.progress(_('checking'), currentsubdir[0], total=subdircount,
284 284 unit=_('manifests'))
285 285
286 286 for subdir, linkrevs in subdirnodes.iteritems():
287 287 subdirfilenodes = self._verifymanifest(linkrevs, subdir, storefiles,
288 288 progress)
289 289 for f, onefilenodes in subdirfilenodes.iteritems():
290 290 filenodes.setdefault(f, {}).update(onefilenodes)
291 291
292 292 if not dir and subdirnodes:
293 293 ui.progress(_('checking'), None)
294 294 for f in sorted(storefiles):
295 295 self.warn(_("warning: orphan revlog '%s'") % f)
296 296
297 297 return filenodes
298 298
299 299 def _crosscheckfiles(self, filelinkrevs, filenodes):
300 300 repo = self.repo
301 301 ui = self.ui
302 302 ui.status(_("crosschecking files in changesets and manifests\n"))
303 303
304 304 total = len(filelinkrevs) + len(filenodes)
305 305 count = 0
306 306 if self.havemf:
307 307 for f in sorted(filelinkrevs):
308 308 count += 1
309 309 ui.progress(_('crosschecking'), count, total=total)
310 310 if f not in filenodes:
311 311 lr = filelinkrevs[f][0]
312 312 self.err(lr, _("in changeset but not in manifest"), f)
313 313
314 314 if self.havecl:
315 315 for f in sorted(filenodes):
316 316 count += 1
317 317 ui.progress(_('crosschecking'), count, total=total)
318 318 if f not in filelinkrevs:
319 319 try:
320 320 fl = repo.file(f)
321 321 lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
322 322 except Exception:
323 323 lr = None
324 324 self.err(lr, _("in manifest but not in changeset"), f)
325 325
326 326 ui.progress(_('crosschecking'), None)
327 327
328 328 def _verifyfiles(self, filenodes, filelinkrevs):
329 329 repo = self.repo
330 330 ui = self.ui
331 331 lrugetctx = self.lrugetctx
332 332 revlogv1 = self.revlogv1
333 333 havemf = self.havemf
334 334 ui.status(_("checking files\n"))
335 335
336 336 storefiles = set()
337 337 for f, f2, size in repo.store.datafiles():
338 338 if not f:
339 339 self.err(None, _("cannot decode filename '%s'") % f2)
340 340 elif (size > 0 or not revlogv1) and f.startswith('data/'):
341 341 storefiles.add(_normpath(f))
342 342
343 343 files = sorted(set(filenodes) | set(filelinkrevs))
344 344 total = len(files)
345 345 revisions = 0
346 346 for i, f in enumerate(files):
347 347 ui.progress(_('checking'), i, item=f, total=total, unit=_('files'))
348 348 try:
349 349 linkrevs = filelinkrevs[f]
350 350 except KeyError:
351 351 # in manifest but not in changelog
352 352 linkrevs = []
353 353
354 354 if linkrevs:
355 355 lr = linkrevs[0]
356 356 else:
357 357 lr = None
358 358
359 359 try:
360 360 fl = repo.file(f)
361 361 except error.RevlogError as e:
362 362 self.err(lr, _("broken revlog! (%s)") % e, f)
363 363 continue
364 364
365 365 for ff in fl.files():
366 366 try:
367 367 storefiles.remove(ff)
368 368 except KeyError:
369 369 self.warn(_(" warning: revlog '%s' not in fncache!") % ff)
370 370 self.fncachewarned = True
371 371
372 372 self.checklog(fl, f, lr)
373 373 seen = {}
374 374 rp = None
375 375 for i in fl:
376 376 revisions += 1
377 377 n = fl.node(i)
378 378 lr = self.checkentry(fl, i, n, seen, linkrevs, f)
379 379 if f in filenodes:
380 380 if havemf and n not in filenodes[f]:
381 381 self.err(lr, _("%s not in manifests") % (short(n)), f)
382 382 else:
383 383 del filenodes[f][n]
384 384
385 385 # verify contents
386 386 try:
387 387 l = len(fl.read(n))
388 388 rp = fl.renamed(n)
389 389 if l != fl.size(i):
390 390 if len(fl.revision(n)) != fl.size(i):
391 391 self.err(lr, _("unpacked size is %s, %s expected") %
392 392 (l, fl.size(i)), f)
393 393 except error.CensoredNodeError:
394 394 # experimental config: censor.policy
395 395 if ui.config("censor", "policy", "abort") == "abort":
396 396 self.err(lr, _("censored file data"), f)
397 397 except Exception as inst:
398 398 self.exc(lr, _("unpacking %s") % short(n), inst, f)
399 399
400 400 # check renames
401 401 try:
402 402 if rp:
403 403 if lr is not None and ui.verbose:
404 404 ctx = lrugetctx(lr)
405 405 found = False
406 406 for pctx in ctx.parents():
407 407 if rp[0] in pctx:
408 408 found = True
409 409 break
410 410 if not found:
411 411 self.warn(_("warning: copy source of '%s' not"
412 412 " in parents of %s") % (f, ctx))
413 413 fl2 = repo.file(rp[0])
414 414 if not len(fl2):
415 415 self.err(lr, _("empty or missing copy source "
416 416 "revlog %s:%s") % (rp[0], short(rp[1])), f)
417 417 elif rp[1] == nullid:
418 418 ui.note(_("warning: %s@%s: copy source"
419 419 " revision is nullid %s:%s\n")
420 420 % (f, lr, rp[0], short(rp[1])))
421 421 else:
422 422 fl2.rev(rp[1])
423 423 except Exception as inst:
424 424 self.exc(lr, _("checking rename of %s") % short(n), inst, f)
425 425
426 426 # cross-check
427 427 if f in filenodes:
428 fns = [(lr, n) for n, lr in filenodes[f].iteritems()]
428 fns = [(v, k) for k, v in filenodes[f].iteritems()]
429 429 for lr, node in sorted(fns):
430 430 self.err(lr, _("manifest refers to unknown revision %s") %
431 431 short(node), f)
432 432 ui.progress(_('checking'), None)
433 433
434 434 for f in sorted(storefiles):
435 435 self.warn(_("warning: orphan revlog '%s'") % f)
436 436
437 437 return len(files), revisions
General Comments 0
You need to be logged in to leave comments. Login now