##// END OF EJS Templates
verify: provide unit to ui.makeprogress()
av6 -
r40670:d2ff0af6 stable
parent child Browse files
Show More
@@ -1,453 +1,454 b''
1 1 # verify.py - repository integrity checking for Mercurial
2 2 #
3 3 # Copyright 2006, 2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11
12 12 from .i18n import _
13 13 from .node import (
14 14 nullid,
15 15 short,
16 16 )
17 17
18 18 from . import (
19 19 error,
20 20 pycompat,
21 21 revlog,
22 22 util,
23 23 )
24 24
25 25 def verify(repo):
26 26 with repo.lock():
27 27 return verifier(repo).verify()
28 28
29 29 def _normpath(f):
30 30 # under hg < 2.4, convert didn't sanitize paths properly, so a
31 31 # converted repo may contain repeated slashes
32 32 while '//' in f:
33 33 f = f.replace('//', '/')
34 34 return f
35 35
36 36 class verifier(object):
37 37 def __init__(self, repo):
38 38 self.repo = repo.unfiltered()
39 39 self.ui = repo.ui
40 40 self.match = repo.narrowmatch()
41 41 self.badrevs = set()
42 42 self.errors = 0
43 43 self.warnings = 0
44 44 self.havecl = len(repo.changelog) > 0
45 45 self.havemf = len(repo.manifestlog.getstorage(b'')) > 0
46 46 self.revlogv1 = repo.changelog.version != revlog.REVLOGV0
47 47 self.lrugetctx = util.lrucachefunc(repo.__getitem__)
48 48 self.refersmf = False
49 49 self.fncachewarned = False
50 50 # developer config: verify.skipflags
51 51 self.skipflags = repo.ui.configint('verify', 'skipflags')
52 52 self.warnorphanstorefiles = True
53 53
54 54 def warn(self, msg):
55 55 self.ui.warn(msg + "\n")
56 56 self.warnings += 1
57 57
58 58 def err(self, linkrev, msg, filename=None):
59 59 if linkrev is not None:
60 60 self.badrevs.add(linkrev)
61 61 linkrev = "%d" % linkrev
62 62 else:
63 63 linkrev = '?'
64 64 msg = "%s: %s" % (linkrev, msg)
65 65 if filename:
66 66 msg = "%s@%s" % (filename, msg)
67 67 self.ui.warn(" " + msg + "\n")
68 68 self.errors += 1
69 69
70 70 def exc(self, linkrev, msg, inst, filename=None):
71 71 fmsg = pycompat.bytestr(inst)
72 72 if not fmsg:
73 73 fmsg = pycompat.byterepr(inst)
74 74 self.err(linkrev, "%s: %s" % (msg, fmsg), filename)
75 75
76 76 def checklog(self, obj, name, linkrev):
77 77 if not len(obj) and (self.havecl or self.havemf):
78 78 self.err(linkrev, _("empty or missing %s") % name)
79 79 return
80 80
81 81 d = obj.checksize()
82 82 if d[0]:
83 83 self.err(None, _("data length off by %d bytes") % d[0], name)
84 84 if d[1]:
85 85 self.err(None, _("index contains %d extra bytes") % d[1], name)
86 86
87 87 if obj.version != revlog.REVLOGV0:
88 88 if not self.revlogv1:
89 89 self.warn(_("warning: `%s' uses revlog format 1") % name)
90 90 elif self.revlogv1:
91 91 self.warn(_("warning: `%s' uses revlog format 0") % name)
92 92
93 93 def checkentry(self, obj, i, node, seen, linkrevs, f):
94 94 lr = obj.linkrev(obj.rev(node))
95 95 if lr < 0 or (self.havecl and lr not in linkrevs):
96 96 if lr < 0 or lr >= len(self.repo.changelog):
97 97 msg = _("rev %d points to nonexistent changeset %d")
98 98 else:
99 99 msg = _("rev %d points to unexpected changeset %d")
100 100 self.err(None, msg % (i, lr), f)
101 101 if linkrevs:
102 102 if f and len(linkrevs) > 1:
103 103 try:
104 104 # attempt to filter down to real linkrevs
105 105 linkrevs = [l for l in linkrevs
106 106 if self.lrugetctx(l)[f].filenode() == node]
107 107 except Exception:
108 108 pass
109 109 self.warn(_(" (expected %s)") % " ".join
110 110 (map(pycompat.bytestr, linkrevs)))
111 111 lr = None # can't be trusted
112 112
113 113 try:
114 114 p1, p2 = obj.parents(node)
115 115 if p1 not in seen and p1 != nullid:
116 116 self.err(lr, _("unknown parent 1 %s of %s") %
117 117 (short(p1), short(node)), f)
118 118 if p2 not in seen and p2 != nullid:
119 119 self.err(lr, _("unknown parent 2 %s of %s") %
120 120 (short(p2), short(node)), f)
121 121 except Exception as inst:
122 122 self.exc(lr, _("checking parents of %s") % short(node), inst, f)
123 123
124 124 if node in seen:
125 125 self.err(lr, _("duplicate revision %d (%d)") % (i, seen[node]), f)
126 126 seen[node] = i
127 127 return lr
128 128
129 129 def verify(self):
130 130 repo = self.repo
131 131
132 132 ui = repo.ui
133 133
134 134 if not repo.url().startswith('file:'):
135 135 raise error.Abort(_("cannot verify bundle or remote repos"))
136 136
137 137 if os.path.exists(repo.sjoin("journal")):
138 138 ui.warn(_("abandoned transaction found - run hg recover\n"))
139 139
140 140 if ui.verbose or not self.revlogv1:
141 141 ui.status(_("repository uses revlog format %d\n") %
142 142 (self.revlogv1 and 1 or 0))
143 143
144 144 mflinkrevs, filelinkrevs = self._verifychangelog()
145 145
146 146 filenodes = self._verifymanifest(mflinkrevs)
147 147 del mflinkrevs
148 148
149 149 self._crosscheckfiles(filelinkrevs, filenodes)
150 150
151 151 totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)
152 152
153 153 ui.status(_("checked %d changesets with %d changes to %d files\n") %
154 154 (len(repo.changelog), filerevisions, totalfiles))
155 155 if self.warnings:
156 156 ui.warn(_("%d warnings encountered!\n") % self.warnings)
157 157 if self.fncachewarned:
158 158 ui.warn(_('hint: run "hg debugrebuildfncache" to recover from '
159 159 'corrupt fncache\n'))
160 160 if self.errors:
161 161 ui.warn(_("%d integrity errors encountered!\n") % self.errors)
162 162 if self.badrevs:
163 163 ui.warn(_("(first damaged changeset appears to be %d)\n")
164 164 % min(self.badrevs))
165 165 return 1
166 166
167 167 def _verifychangelog(self):
168 168 ui = self.ui
169 169 repo = self.repo
170 170 match = self.match
171 171 cl = repo.changelog
172 172
173 173 ui.status(_("checking changesets\n"))
174 174 mflinkrevs = {}
175 175 filelinkrevs = {}
176 176 seen = {}
177 177 self.checklog(cl, "changelog", 0)
178 178 progress = ui.makeprogress(_('checking'), unit=_('changesets'),
179 179 total=len(repo))
180 180 for i in repo:
181 181 progress.update(i)
182 182 n = cl.node(i)
183 183 self.checkentry(cl, i, n, seen, [i], "changelog")
184 184
185 185 try:
186 186 changes = cl.read(n)
187 187 if changes[0] != nullid:
188 188 mflinkrevs.setdefault(changes[0], []).append(i)
189 189 self.refersmf = True
190 190 for f in changes[3]:
191 191 if match(f):
192 192 filelinkrevs.setdefault(_normpath(f), []).append(i)
193 193 except Exception as inst:
194 194 self.refersmf = True
195 195 self.exc(i, _("unpacking changeset %s") % short(n), inst)
196 196 progress.complete()
197 197 return mflinkrevs, filelinkrevs
198 198
199 199 def _verifymanifest(self, mflinkrevs, dir="", storefiles=None,
200 200 subdirprogress=None):
201 201 repo = self.repo
202 202 ui = self.ui
203 203 match = self.match
204 204 mfl = self.repo.manifestlog
205 205 mf = mfl.getstorage(dir)
206 206
207 207 if not dir:
208 208 self.ui.status(_("checking manifests\n"))
209 209
210 210 filenodes = {}
211 211 subdirnodes = {}
212 212 seen = {}
213 213 label = "manifest"
214 214 if dir:
215 215 label = dir
216 216 revlogfiles = mf.files()
217 217 storefiles.difference_update(revlogfiles)
218 218 if subdirprogress: # should be true since we're in a subdirectory
219 219 subdirprogress.increment()
220 220 if self.refersmf:
221 221 # Do not check manifest if there are only changelog entries with
222 222 # null manifests.
223 223 self.checklog(mf, label, 0)
224 224 progress = ui.makeprogress(_('checking'), unit=_('manifests'),
225 225 total=len(mf))
226 226 for i in mf:
227 227 if not dir:
228 228 progress.update(i)
229 229 n = mf.node(i)
230 230 lr = self.checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label)
231 231 if n in mflinkrevs:
232 232 del mflinkrevs[n]
233 233 elif dir:
234 234 self.err(lr, _("%s not in parent-directory manifest") %
235 235 short(n), label)
236 236 else:
237 237 self.err(lr, _("%s not in changesets") % short(n), label)
238 238
239 239 try:
240 240 mfdelta = mfl.get(dir, n).readdelta(shallow=True)
241 241 for f, fn, fl in mfdelta.iterentries():
242 242 if not f:
243 243 self.err(lr, _("entry without name in manifest"))
244 244 elif f == "/dev/null": # ignore this in very old repos
245 245 continue
246 246 fullpath = dir + _normpath(f)
247 247 if fl == 't':
248 248 if not match.visitdir(fullpath):
249 249 continue
250 250 subdirnodes.setdefault(fullpath + '/', {}).setdefault(
251 251 fn, []).append(lr)
252 252 else:
253 253 if not match(fullpath):
254 254 continue
255 255 filenodes.setdefault(fullpath, {}).setdefault(fn, lr)
256 256 except Exception as inst:
257 257 self.exc(lr, _("reading delta %s") % short(n), inst, label)
258 258 if not dir:
259 259 progress.complete()
260 260
261 261 if self.havemf:
262 262 for c, m in sorted([(c, m) for m in mflinkrevs
263 263 for c in mflinkrevs[m]]):
264 264 if dir:
265 265 self.err(c, _("parent-directory manifest refers to unknown "
266 266 "revision %s") % short(m), label)
267 267 else:
268 268 self.err(c, _("changeset refers to unknown revision %s") %
269 269 short(m), label)
270 270
271 271 if not dir and subdirnodes:
272 272 self.ui.status(_("checking directory manifests\n"))
273 273 storefiles = set()
274 274 subdirs = set()
275 275 revlogv1 = self.revlogv1
276 276 for f, f2, size in repo.store.datafiles():
277 277 if not f:
278 278 self.err(None, _("cannot decode filename '%s'") % f2)
279 279 elif (size > 0 or not revlogv1) and f.startswith('meta/'):
280 280 storefiles.add(_normpath(f))
281 281 subdirs.add(os.path.dirname(f))
282 282 subdirprogress = ui.makeprogress(_('checking'), unit=_('manifests'),
283 283 total=len(subdirs))
284 284
285 285 for subdir, linkrevs in subdirnodes.iteritems():
286 286 subdirfilenodes = self._verifymanifest(linkrevs, subdir, storefiles,
287 287 subdirprogress)
288 288 for f, onefilenodes in subdirfilenodes.iteritems():
289 289 filenodes.setdefault(f, {}).update(onefilenodes)
290 290
291 291 if not dir and subdirnodes:
292 292 subdirprogress.complete()
293 293 if self.warnorphanstorefiles:
294 294 for f in sorted(storefiles):
295 295 self.warn(_("warning: orphan data file '%s'") % f)
296 296
297 297 return filenodes
298 298
299 299 def _crosscheckfiles(self, filelinkrevs, filenodes):
300 300 repo = self.repo
301 301 ui = self.ui
302 302 ui.status(_("crosschecking files in changesets and manifests\n"))
303 303
304 304 total = len(filelinkrevs) + len(filenodes)
305 progress = ui.makeprogress(_('crosschecking'), total=total)
305 progress = ui.makeprogress(_('crosschecking'), unit=_('files'),
306 total=total)
306 307 if self.havemf:
307 308 for f in sorted(filelinkrevs):
308 309 progress.increment()
309 310 if f not in filenodes:
310 311 lr = filelinkrevs[f][0]
311 312 self.err(lr, _("in changeset but not in manifest"), f)
312 313
313 314 if self.havecl:
314 315 for f in sorted(filenodes):
315 316 progress.increment()
316 317 if f not in filelinkrevs:
317 318 try:
318 319 fl = repo.file(f)
319 320 lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
320 321 except Exception:
321 322 lr = None
322 323 self.err(lr, _("in manifest but not in changeset"), f)
323 324
324 325 progress.complete()
325 326
326 327 def _verifyfiles(self, filenodes, filelinkrevs):
327 328 repo = self.repo
328 329 ui = self.ui
329 330 lrugetctx = self.lrugetctx
330 331 revlogv1 = self.revlogv1
331 332 havemf = self.havemf
332 333 ui.status(_("checking files\n"))
333 334
334 335 storefiles = set()
335 336 for f, f2, size in repo.store.datafiles():
336 337 if not f:
337 338 self.err(None, _("cannot decode filename '%s'") % f2)
338 339 elif (size > 0 or not revlogv1) and f.startswith('data/'):
339 340 storefiles.add(_normpath(f))
340 341
341 342 state = {
342 343 # TODO this assumes revlog storage for changelog.
343 344 'expectedversion': self.repo.changelog.version & 0xFFFF,
344 345 'skipflags': self.skipflags,
345 346 # experimental config: censor.policy
346 347 'erroroncensored': ui.config('censor', 'policy') == 'abort',
347 348 }
348 349
349 350 files = sorted(set(filenodes) | set(filelinkrevs))
350 351 revisions = 0
351 352 progress = ui.makeprogress(_('checking'), unit=_('files'),
352 353 total=len(files))
353 354 for i, f in enumerate(files):
354 355 progress.update(i, item=f)
355 356 try:
356 357 linkrevs = filelinkrevs[f]
357 358 except KeyError:
358 359 # in manifest but not in changelog
359 360 linkrevs = []
360 361
361 362 if linkrevs:
362 363 lr = linkrevs[0]
363 364 else:
364 365 lr = None
365 366
366 367 try:
367 368 fl = repo.file(f)
368 369 except error.StorageError as e:
369 370 self.err(lr, _("broken revlog! (%s)") % e, f)
370 371 continue
371 372
372 373 for ff in fl.files():
373 374 try:
374 375 storefiles.remove(ff)
375 376 except KeyError:
376 377 if self.warnorphanstorefiles:
377 378 self.warn(_(" warning: revlog '%s' not in fncache!") %
378 379 ff)
379 380 self.fncachewarned = True
380 381
381 382 if not len(fl) and (self.havecl or self.havemf):
382 383 self.err(lr, _("empty or missing %s") % f)
383 384 else:
384 385 # Guard against implementations not setting this.
385 386 state['skipread'] = set()
386 387 for problem in fl.verifyintegrity(state):
387 388 if problem.node is not None:
388 389 linkrev = fl.linkrev(fl.rev(problem.node))
389 390 else:
390 391 linkrev = None
391 392
392 393 if problem.warning:
393 394 self.warn(problem.warning)
394 395 elif problem.error:
395 396 self.err(linkrev if linkrev is not None else lr,
396 397 problem.error, f)
397 398 else:
398 399 raise error.ProgrammingError(
399 400 'problem instance does not set warning or error '
400 401 'attribute: %s' % problem.msg)
401 402
402 403 seen = {}
403 404 for i in fl:
404 405 revisions += 1
405 406 n = fl.node(i)
406 407 lr = self.checkentry(fl, i, n, seen, linkrevs, f)
407 408 if f in filenodes:
408 409 if havemf and n not in filenodes[f]:
409 410 self.err(lr, _("%s not in manifests") % (short(n)), f)
410 411 else:
411 412 del filenodes[f][n]
412 413
413 414 if n in state['skipread']:
414 415 continue
415 416
416 417 # check renames
417 418 try:
418 419 # This requires resolving fulltext (at least on revlogs). We
419 420 # may want ``verifyintegrity()`` to pass a set of nodes with
420 421 # rename metadata as an optimization.
421 422 rp = fl.renamed(n)
422 423 if rp:
423 424 if lr is not None and ui.verbose:
424 425 ctx = lrugetctx(lr)
425 426 if not any(rp[0] in pctx for pctx in ctx.parents()):
426 427 self.warn(_("warning: copy source of '%s' not"
427 428 " in parents of %s") % (f, ctx))
428 429 fl2 = repo.file(rp[0])
429 430 if not len(fl2):
430 431 self.err(lr, _("empty or missing copy source "
431 432 "revlog %s:%s") % (rp[0], short(rp[1])), f)
432 433 elif rp[1] == nullid:
433 434 ui.note(_("warning: %s@%s: copy source"
434 435 " revision is nullid %s:%s\n")
435 436 % (f, lr, rp[0], short(rp[1])))
436 437 else:
437 438 fl2.rev(rp[1])
438 439 except Exception as inst:
439 440 self.exc(lr, _("checking rename of %s") % short(n), inst, f)
440 441
441 442 # cross-check
442 443 if f in filenodes:
443 444 fns = [(v, k) for k, v in filenodes[f].iteritems()]
444 445 for lr, node in sorted(fns):
445 446 self.err(lr, _("manifest refers to unknown revision %s") %
446 447 short(node), f)
447 448 progress.complete()
448 449
449 450 if self.warnorphanstorefiles:
450 451 for f in sorted(storefiles):
451 452 self.warn(_("warning: orphan data file '%s'") % f)
452 453
453 454 return len(files), revisions
General Comments 0
You need to be logged in to leave comments. Login now