##// END OF EJS Templates
verify: specify unit for ui.progress when checking files
av6 -
r28467:bd37f0d5 default
parent child Browse files
Show More
@@ -1,435 +1,435
1 1 # verify.py - repository integrity checking for Mercurial
2 2 #
3 3 # Copyright 2006, 2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11
12 12 from .i18n import _
13 13 from .node import (
14 14 nullid,
15 15 short,
16 16 )
17 17
18 18 from . import (
19 19 error,
20 20 revlog,
21 21 util,
22 22 )
23 23
24 24 def verify(repo):
25 25 with repo.lock():
26 26 return verifier(repo).verify()
27 27
28 28 def _normpath(f):
29 29 # under hg < 2.4, convert didn't sanitize paths properly, so a
30 30 # converted repo may contain repeated slashes
31 31 while '//' in f:
32 32 f = f.replace('//', '/')
33 33 return f
34 34
35 35 def _validpath(repo, path):
36 36 """Returns False if a path should NOT be treated as part of a repo.
37 37
38 38 For all in-core cases, this returns True, as we have no way for a
39 39 path to be mentioned in the history but not actually be
40 40 relevant. For narrow clones, this is important because many
41 41 filelogs will be missing, and changelog entries may mention
42 42 modified files that are outside the narrow scope.
43 43 """
44 44 return True
45 45
46 46 class verifier(object):
47 47 def __init__(self, repo):
48 48 self.repo = repo.unfiltered()
49 49 self.ui = repo.ui
50 50 self.badrevs = set()
51 51 self.errors = 0
52 52 self.warnings = 0
53 53 self.havecl = len(repo.changelog) > 0
54 54 self.havemf = len(repo.manifest) > 0
55 55 self.revlogv1 = repo.changelog.version != revlog.REVLOGV0
56 56 self.lrugetctx = util.lrucachefunc(repo.changectx)
57 57 self.refersmf = False
58 58 self.fncachewarned = False
59 59
60 60 def warn(self, msg):
61 61 self.ui.warn(msg + "\n")
62 62 self.warnings += 1
63 63
64 64 def err(self, linkrev, msg, filename=None):
65 65 if linkrev is not None:
66 66 self.badrevs.add(linkrev)
67 67 else:
68 68 linkrev = '?'
69 69 msg = "%s: %s" % (linkrev, msg)
70 70 if filename:
71 71 msg = "%s@%s" % (filename, msg)
72 72 self.ui.warn(" " + msg + "\n")
73 73 self.errors += 1
74 74
75 75 def exc(self, linkrev, msg, inst, filename=None):
76 76 if not str(inst):
77 77 inst = repr(inst)
78 78 self.err(linkrev, "%s: %s" % (msg, inst), filename)
79 79
80 80 def checklog(self, obj, name, linkrev):
81 81 if not len(obj) and (self.havecl or self.havemf):
82 82 self.err(linkrev, _("empty or missing %s") % name)
83 83 return
84 84
85 85 d = obj.checksize()
86 86 if d[0]:
87 87 self.err(None, _("data length off by %d bytes") % d[0], name)
88 88 if d[1]:
89 89 self.err(None, _("index contains %d extra bytes") % d[1], name)
90 90
91 91 if obj.version != revlog.REVLOGV0:
92 92 if not self.revlogv1:
93 93 self.warn(_("warning: `%s' uses revlog format 1") % name)
94 94 elif self.revlogv1:
95 95 self.warn(_("warning: `%s' uses revlog format 0") % name)
96 96
97 97 def checkentry(self, obj, i, node, seen, linkrevs, f):
98 98 lr = obj.linkrev(obj.rev(node))
99 99 if lr < 0 or (self.havecl and lr not in linkrevs):
100 100 if lr < 0 or lr >= len(self.repo.changelog):
101 101 msg = _("rev %d points to nonexistent changeset %d")
102 102 else:
103 103 msg = _("rev %d points to unexpected changeset %d")
104 104 self.err(None, msg % (i, lr), f)
105 105 if linkrevs:
106 106 if f and len(linkrevs) > 1:
107 107 try:
108 108 # attempt to filter down to real linkrevs
109 109 linkrevs = [l for l in linkrevs
110 110 if self.lrugetctx(l)[f].filenode() == node]
111 111 except Exception:
112 112 pass
113 113 self.warn(_(" (expected %s)") % " ".join(map(str, linkrevs)))
114 114 lr = None # can't be trusted
115 115
116 116 try:
117 117 p1, p2 = obj.parents(node)
118 118 if p1 not in seen and p1 != nullid:
119 119 self.err(lr, _("unknown parent 1 %s of %s") %
120 120 (short(p1), short(node)), f)
121 121 if p2 not in seen and p2 != nullid:
122 122 self.err(lr, _("unknown parent 2 %s of %s") %
123 123 (short(p2), short(node)), f)
124 124 except Exception as inst:
125 125 self.exc(lr, _("checking parents of %s") % short(node), inst, f)
126 126
127 127 if node in seen:
128 128 self.err(lr, _("duplicate revision %d (%d)") % (i, seen[node]), f)
129 129 seen[node] = i
130 130 return lr
131 131
132 132 def verify(self):
133 133 repo = self.repo
134 134
135 135 ui = repo.ui
136 136
137 137 if not repo.url().startswith('file:'):
138 138 raise error.Abort(_("cannot verify bundle or remote repos"))
139 139
140 140 if os.path.exists(repo.sjoin("journal")):
141 141 ui.warn(_("abandoned transaction found - run hg recover\n"))
142 142
143 143 if ui.verbose or not self.revlogv1:
144 144 ui.status(_("repository uses revlog format %d\n") %
145 145 (self.revlogv1 and 1 or 0))
146 146
147 147 mflinkrevs, filelinkrevs = self._verifychangelog()
148 148
149 149 filenodes = self._verifymanifest(mflinkrevs)
150 150 del mflinkrevs
151 151
152 152 self._crosscheckfiles(filelinkrevs, filenodes)
153 153
154 154 totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)
155 155
156 156 ui.status(_("%d files, %d changesets, %d total revisions\n") %
157 157 (totalfiles, len(repo.changelog), filerevisions))
158 158 if self.warnings:
159 159 ui.warn(_("%d warnings encountered!\n") % self.warnings)
160 160 if self.fncachewarned:
161 161 ui.warn(_('hint: run "hg debugrebuildfncache" to recover from '
162 162 'corrupt fncache\n'))
163 163 if self.errors:
164 164 ui.warn(_("%d integrity errors encountered!\n") % self.errors)
165 165 if self.badrevs:
166 166 ui.warn(_("(first damaged changeset appears to be %d)\n")
167 167 % min(self.badrevs))
168 168 return 1
169 169
170 170 def _verifychangelog(self):
171 171 ui = self.ui
172 172 repo = self.repo
173 173 cl = repo.changelog
174 174
175 175 ui.status(_("checking changesets\n"))
176 176 mflinkrevs = {}
177 177 filelinkrevs = {}
178 178 seen = {}
179 179 self.checklog(cl, "changelog", 0)
180 180 total = len(repo)
181 181 for i in repo:
182 182 ui.progress(_('checking'), i, total=total, unit=_('changesets'))
183 183 n = cl.node(i)
184 184 self.checkentry(cl, i, n, seen, [i], "changelog")
185 185
186 186 try:
187 187 changes = cl.read(n)
188 188 if changes[0] != nullid:
189 189 mflinkrevs.setdefault(changes[0], []).append(i)
190 190 self.refersmf = True
191 191 for f in changes[3]:
192 192 if _validpath(repo, f):
193 193 filelinkrevs.setdefault(_normpath(f), []).append(i)
194 194 except Exception as inst:
195 195 self.refersmf = True
196 196 self.exc(i, _("unpacking changeset %s") % short(n), inst)
197 197 ui.progress(_('checking'), None)
198 198 return mflinkrevs, filelinkrevs
199 199
200 200 def _verifymanifest(self, mflinkrevs, dir="", storefiles=None,
201 201 progress=None):
202 202 repo = self.repo
203 203 ui = self.ui
204 204 mf = self.repo.manifest.dirlog(dir)
205 205
206 206 if not dir:
207 207 self.ui.status(_("checking manifests\n"))
208 208
209 209 filenodes = {}
210 210 subdirnodes = {}
211 211 seen = {}
212 212 label = "manifest"
213 213 if dir:
214 214 label = dir
215 215 revlogfiles = mf.files()
216 216 storefiles.difference_update(revlogfiles)
217 217 if progress: # should be true since we're in a subdirectory
218 218 progress()
219 219 if self.refersmf:
220 220 # Do not check manifest if there are only changelog entries with
221 221 # null manifests.
222 222 self.checklog(mf, label, 0)
223 223 total = len(mf)
224 224 for i in mf:
225 225 if not dir:
226 226 ui.progress(_('checking'), i, total=total, unit=_('manifests'))
227 227 n = mf.node(i)
228 228 lr = self.checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label)
229 229 if n in mflinkrevs:
230 230 del mflinkrevs[n]
231 231 elif dir:
232 232 self.err(lr, _("%s not in parent-directory manifest") %
233 233 short(n), label)
234 234 else:
235 235 self.err(lr, _("%s not in changesets") % short(n), label)
236 236
237 237 try:
238 238 for f, fn, fl in mf.readshallowdelta(n).iterentries():
239 239 if not f:
240 240 self.err(lr, _("entry without name in manifest"))
241 241 elif f == "/dev/null": # ignore this in very old repos
242 242 continue
243 243 fullpath = dir + _normpath(f)
244 244 if not _validpath(repo, fullpath):
245 245 continue
246 246 if fl == 't':
247 247 subdirnodes.setdefault(fullpath + '/', {}).setdefault(
248 248 fn, []).append(lr)
249 249 else:
250 250 filenodes.setdefault(fullpath, {}).setdefault(fn, lr)
251 251 except Exception as inst:
252 252 self.exc(lr, _("reading delta %s") % short(n), inst, label)
253 253 if not dir:
254 254 ui.progress(_('checking'), None)
255 255
256 256 if self.havemf:
257 257 for c, m in sorted([(c, m) for m in mflinkrevs
258 258 for c in mflinkrevs[m]]):
259 259 if dir:
260 260 self.err(c, _("parent-directory manifest refers to unknown "
261 261 "revision %s") % short(m), label)
262 262 else:
263 263 self.err(c, _("changeset refers to unknown revision %s") %
264 264 short(m), label)
265 265
266 266 if not dir and subdirnodes:
267 267 self.ui.status(_("checking directory manifests\n"))
268 268 storefiles = set()
269 269 subdirs = set()
270 270 revlogv1 = self.revlogv1
271 271 for f, f2, size in repo.store.datafiles():
272 272 if not f:
273 273 self.err(None, _("cannot decode filename '%s'") % f2)
274 274 elif (size > 0 or not revlogv1) and f.startswith('meta/'):
275 275 storefiles.add(_normpath(f))
276 276 subdirs.add(os.path.dirname(f))
277 277 subdircount = len(subdirs)
278 278 currentsubdir = [0]
279 279 def progress():
280 280 currentsubdir[0] += 1
281 281 ui.progress(_('checking'), currentsubdir[0], total=subdircount,
282 282 unit=_('manifests'))
283 283
284 284 for subdir, linkrevs in subdirnodes.iteritems():
285 285 subdirfilenodes = self._verifymanifest(linkrevs, subdir, storefiles,
286 286 progress)
287 287 for f, onefilenodes in subdirfilenodes.iteritems():
288 288 filenodes.setdefault(f, {}).update(onefilenodes)
289 289
290 290 if not dir and subdirnodes:
291 291 ui.progress(_('checking'), None)
292 292 for f in sorted(storefiles):
293 293 self.warn(_("warning: orphan revlog '%s'") % f)
294 294
295 295 return filenodes
296 296
297 297 def _crosscheckfiles(self, filelinkrevs, filenodes):
298 298 repo = self.repo
299 299 ui = self.ui
300 300 ui.status(_("crosschecking files in changesets and manifests\n"))
301 301
302 302 total = len(filelinkrevs) + len(filenodes)
303 303 count = 0
304 304 if self.havemf:
305 305 for f in sorted(filelinkrevs):
306 306 count += 1
307 307 ui.progress(_('crosschecking'), count, total=total)
308 308 if f not in filenodes:
309 309 lr = filelinkrevs[f][0]
310 310 self.err(lr, _("in changeset but not in manifest"), f)
311 311
312 312 if self.havecl:
313 313 for f in sorted(filenodes):
314 314 count += 1
315 315 ui.progress(_('crosschecking'), count, total=total)
316 316 if f not in filelinkrevs:
317 317 try:
318 318 fl = repo.file(f)
319 319 lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
320 320 except Exception:
321 321 lr = None
322 322 self.err(lr, _("in manifest but not in changeset"), f)
323 323
324 324 ui.progress(_('crosschecking'), None)
325 325
326 326 def _verifyfiles(self, filenodes, filelinkrevs):
327 327 repo = self.repo
328 328 ui = self.ui
329 329 lrugetctx = self.lrugetctx
330 330 revlogv1 = self.revlogv1
331 331 havemf = self.havemf
332 332 ui.status(_("checking files\n"))
333 333
334 334 storefiles = set()
335 335 for f, f2, size in repo.store.datafiles():
336 336 if not f:
337 337 self.err(None, _("cannot decode filename '%s'") % f2)
338 338 elif (size > 0 or not revlogv1) and f.startswith('data/'):
339 339 storefiles.add(_normpath(f))
340 340
341 341 files = sorted(set(filenodes) | set(filelinkrevs))
342 342 total = len(files)
343 343 revisions = 0
344 344 for i, f in enumerate(files):
345 ui.progress(_('checking'), i, item=f, total=total)
345 ui.progress(_('checking'), i, item=f, total=total, unit=_('files'))
346 346 try:
347 347 linkrevs = filelinkrevs[f]
348 348 except KeyError:
349 349 # in manifest but not in changelog
350 350 linkrevs = []
351 351
352 352 if linkrevs:
353 353 lr = linkrevs[0]
354 354 else:
355 355 lr = None
356 356
357 357 try:
358 358 fl = repo.file(f)
359 359 except error.RevlogError as e:
360 360 self.err(lr, _("broken revlog! (%s)") % e, f)
361 361 continue
362 362
363 363 for ff in fl.files():
364 364 try:
365 365 storefiles.remove(ff)
366 366 except KeyError:
367 367 self.warn(_(" warning: revlog '%s' not in fncache!") % ff)
368 368 self.fncachewarned = True
369 369
370 370 self.checklog(fl, f, lr)
371 371 seen = {}
372 372 rp = None
373 373 for i in fl:
374 374 revisions += 1
375 375 n = fl.node(i)
376 376 lr = self.checkentry(fl, i, n, seen, linkrevs, f)
377 377 if f in filenodes:
378 378 if havemf and n not in filenodes[f]:
379 379 self.err(lr, _("%s not in manifests") % (short(n)), f)
380 380 else:
381 381 del filenodes[f][n]
382 382
383 383 # verify contents
384 384 try:
385 385 l = len(fl.read(n))
386 386 rp = fl.renamed(n)
387 387 if l != fl.size(i):
388 388 if len(fl.revision(n)) != fl.size(i):
389 389 self.err(lr, _("unpacked size is %s, %s expected") %
390 390 (l, fl.size(i)), f)
391 391 except error.CensoredNodeError:
392 392 # experimental config: censor.policy
393 393 if ui.config("censor", "policy", "abort") == "abort":
394 394 self.err(lr, _("censored file data"), f)
395 395 except Exception as inst:
396 396 self.exc(lr, _("unpacking %s") % short(n), inst, f)
397 397
398 398 # check renames
399 399 try:
400 400 if rp:
401 401 if lr is not None and ui.verbose:
402 402 ctx = lrugetctx(lr)
403 403 found = False
404 404 for pctx in ctx.parents():
405 405 if rp[0] in pctx:
406 406 found = True
407 407 break
408 408 if not found:
409 409 self.warn(_("warning: copy source of '%s' not"
410 410 " in parents of %s") % (f, ctx))
411 411 fl2 = repo.file(rp[0])
412 412 if not len(fl2):
413 413 self.err(lr, _("empty or missing copy source "
414 414 "revlog %s:%s") % (rp[0], short(rp[1])), f)
415 415 elif rp[1] == nullid:
416 416 ui.note(_("warning: %s@%s: copy source"
417 417 " revision is nullid %s:%s\n")
418 418 % (f, lr, rp[0], short(rp[1])))
419 419 else:
420 420 fl2.rev(rp[1])
421 421 except Exception as inst:
422 422 self.exc(lr, _("checking rename of %s") % short(n), inst, f)
423 423
424 424 # cross-check
425 425 if f in filenodes:
426 426 fns = [(lr, n) for n, lr in filenodes[f].iteritems()]
427 427 for lr, node in sorted(fns):
428 428 self.err(lr, _("manifest refers to unknown revision %s") %
429 429 short(node), f)
430 430 ui.progress(_('checking'), None)
431 431
432 432 for f in sorted(storefiles):
433 433 self.warn(_("warning: orphan revlog '%s'") % f)
434 434
435 435 return len(files), revisions
General Comments 0
You need to be logged in to leave comments. Login now