##// END OF EJS Templates
verify: move file cross checking to its own function...
Durham Goode -
r27645:df8973e1 default
parent child Browse files
Show More
@@ -1,373 +1,375 b''
1 1 # verify.py - repository integrity checking for Mercurial
2 2 #
3 3 # Copyright 2006, 2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11
12 12 from .i18n import _
13 13 from .node import (
14 14 nullid,
15 15 short,
16 16 )
17 17
18 18 from . import (
19 19 error,
20 20 revlog,
21 21 util,
22 22 )
23 23
24 24 def verify(repo):
25 25 lock = repo.lock()
26 26 try:
27 27 return verifier(repo).verify()
28 28 finally:
29 29 lock.release()
30 30
31 31 def _normpath(f):
32 32 # under hg < 2.4, convert didn't sanitize paths properly, so a
33 33 # converted repo may contain repeated slashes
34 34 while '//' in f:
35 35 f = f.replace('//', '/')
36 36 return f
37 37
38 38 def _validpath(repo, path):
39 39 """Returns False if a path should NOT be treated as part of a repo.
40 40
41 41 For all in-core cases, this returns True, as we have no way for a
42 42 path to be mentioned in the history but not actually be
43 43 relevant. For narrow clones, this is important because many
44 44 filelogs will be missing, and changelog entries may mention
45 45 modified files that are outside the narrow scope.
46 46 """
47 47 return True
48 48
49 49 class verifier(object):
50 50 def __init__(self, repo):
51 51 self.repo = repo.unfiltered()
52 52 self.ui = repo.ui
53 53 self.badrevs = set()
54 54 self.errors = 0
55 55 self.warnings = 0
56 56 self.havecl = len(repo.changelog) > 0
57 57 self.havemf = len(repo.manifest) > 0
58 58 self.revlogv1 = repo.changelog.version != revlog.REVLOGV0
59 59 self.lrugetctx = util.lrucachefunc(repo.changectx)
60 60 self.refersmf = False
61 61 self.fncachewarned = False
62 62
63 63 def warn(self, msg):
64 64 self.ui.warn(msg + "\n")
65 65 self.warnings += 1
66 66
67 67 def err(self, linkrev, msg, filename=None):
68 68 if linkrev is not None:
69 69 self.badrevs.add(linkrev)
70 70 else:
71 71 linkrev = '?'
72 72 msg = "%s: %s" % (linkrev, msg)
73 73 if filename:
74 74 msg = "%s@%s" % (filename, msg)
75 75 self.ui.warn(" " + msg + "\n")
76 76 self.errors += 1
77 77
78 78 def exc(self, linkrev, msg, inst, filename=None):
79 79 if not str(inst):
80 80 inst = repr(inst)
81 81 self.err(linkrev, "%s: %s" % (msg, inst), filename)
82 82
83 83 def checklog(self, obj, name, linkrev):
84 84 if not len(obj) and (self.havecl or self.havemf):
85 85 self.err(linkrev, _("empty or missing %s") % name)
86 86 return
87 87
88 88 d = obj.checksize()
89 89 if d[0]:
90 90 self.err(None, _("data length off by %d bytes") % d[0], name)
91 91 if d[1]:
92 92 self.err(None, _("index contains %d extra bytes") % d[1], name)
93 93
94 94 if obj.version != revlog.REVLOGV0:
95 95 if not self.revlogv1:
96 96 self.warn(_("warning: `%s' uses revlog format 1") % name)
97 97 elif self.revlogv1:
98 98 self.warn(_("warning: `%s' uses revlog format 0") % name)
99 99
100 100 def checkentry(self, obj, i, node, seen, linkrevs, f):
101 101 lr = obj.linkrev(obj.rev(node))
102 102 if lr < 0 or (self.havecl and lr not in linkrevs):
103 103 if lr < 0 or lr >= len(self.repo.changelog):
104 104 msg = _("rev %d points to nonexistent changeset %d")
105 105 else:
106 106 msg = _("rev %d points to unexpected changeset %d")
107 107 self.err(None, msg % (i, lr), f)
108 108 if linkrevs:
109 109 if f and len(linkrevs) > 1:
110 110 try:
111 111 # attempt to filter down to real linkrevs
112 112 linkrevs = [l for l in linkrevs
113 113 if self.lrugetctx(l)[f].filenode() == node]
114 114 except Exception:
115 115 pass
116 116 self.warn(_(" (expected %s)") % " ".join(map(str, linkrevs)))
117 117 lr = None # can't be trusted
118 118
119 119 try:
120 120 p1, p2 = obj.parents(node)
121 121 if p1 not in seen and p1 != nullid:
122 122 self.err(lr, _("unknown parent 1 %s of %s") %
123 123 (short(p1), short(node)), f)
124 124 if p2 not in seen and p2 != nullid:
125 125 self.err(lr, _("unknown parent 2 %s of %s") %
126 126 (short(p2), short(node)), f)
127 127 except Exception as inst:
128 128 self.exc(lr, _("checking parents of %s") % short(node), inst, f)
129 129
130 130 if node in seen:
131 131 self.err(lr, _("duplicate revision %d (%d)") % (i, seen[node]), f)
132 132 seen[node] = i
133 133 return lr
134 134
135 135 def verify(self):
136 136 repo = self.repo
137 137 mflinkrevs = {}
138 138 filelinkrevs = {}
139 139 filenodes = {}
140 140 revisions = 0
141 141 badrevs = self.badrevs
142 142 ui = repo.ui
143 143 cl = repo.changelog
144 144 mf = repo.manifest
145 145
146 146 if not repo.url().startswith('file:'):
147 147 raise error.Abort(_("cannot verify bundle or remote repos"))
148 148
149 149 if os.path.exists(repo.sjoin("journal")):
150 150 ui.warn(_("abandoned transaction found - run hg recover\n"))
151 151
152 152 revlogv1 = self.revlogv1
153 153 if ui.verbose or not revlogv1:
154 154 ui.status(_("repository uses revlog format %d\n") %
155 155 (revlogv1 and 1 or 0))
156 156
157 havecl = self.havecl
158 havemf = self.havemf
159
160 157 ui.status(_("checking changesets\n"))
161 158 seen = {}
162 159 self.checklog(cl, "changelog", 0)
163 160 total = len(repo)
164 161 for i in repo:
165 162 ui.progress(_('checking'), i, total=total, unit=_('changesets'))
166 163 n = cl.node(i)
167 164 self.checkentry(cl, i, n, seen, [i], "changelog")
168 165
169 166 try:
170 167 changes = cl.read(n)
171 168 if changes[0] != nullid:
172 169 mflinkrevs.setdefault(changes[0], []).append(i)
173 170 self.refersmf = True
174 171 for f in changes[3]:
175 172 if _validpath(repo, f):
176 173 filelinkrevs.setdefault(_normpath(f), []).append(i)
177 174 except Exception as inst:
178 175 self.refersmf = True
179 176 self.exc(i, _("unpacking changeset %s") % short(n), inst)
180 177 ui.progress(_('checking'), None)
181 178
182 179 ui.status(_("checking manifests\n"))
183 180 seen = {}
184 181 if self.refersmf:
185 182 # Do not check manifest if there are only changelog entries with
186 183 # null manifests.
187 184 self.checklog(mf, "manifest", 0)
188 185 total = len(mf)
189 186 for i in mf:
190 187 ui.progress(_('checking'), i, total=total, unit=_('manifests'))
191 188 n = mf.node(i)
192 189 lr = self.checkentry(mf, i, n, seen, mflinkrevs.get(n, []),
193 190 "manifest")
194 191 if n in mflinkrevs:
195 192 del mflinkrevs[n]
196 193 else:
197 194 self.err(lr, _("%s not in changesets") % short(n), "manifest")
198 195
199 196 try:
200 197 for f, fn in mf.readdelta(n).iteritems():
201 198 if not f:
202 199 self.err(lr, _("file without name in manifest"))
203 200 elif f != "/dev/null": # ignore this in very old repos
204 201 if _validpath(repo, f):
205 202 filenodes.setdefault(
206 203 _normpath(f), {}).setdefault(fn, lr)
207 204 except Exception as inst:
208 205 self.exc(lr, _("reading manifest delta %s") % short(n), inst)
209 206 ui.progress(_('checking'), None)
210 207
208 self._crosscheckfiles(mflinkrevs, filelinkrevs, filenodes)
209
210 totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)
211 revisions += filerevisions
212
213 ui.status(_("%d files, %d changesets, %d total revisions\n") %
214 (totalfiles, len(cl), revisions))
215 if self.warnings:
216 ui.warn(_("%d warnings encountered!\n") % self.warnings)
217 if self.fncachewarned:
218 ui.warn(_('hint: run "hg debugrebuildfncache" to recover from '
219 'corrupt fncache\n'))
220 if self.errors:
221 ui.warn(_("%d integrity errors encountered!\n") % self.errors)
222 if badrevs:
223 ui.warn(_("(first damaged changeset appears to be %d)\n")
224 % min(badrevs))
225 return 1
226
227 def _crosscheckfiles(self, mflinkrevs, filelinkrevs, filenodes):
228 repo = self.repo
229 ui = self.ui
211 230 ui.status(_("crosschecking files in changesets and manifests\n"))
212 231
213 232 total = len(mflinkrevs) + len(filelinkrevs) + len(filenodes)
214 233 count = 0
215 if havemf:
234 if self.havemf:
216 235 for c, m in sorted([(c, m) for m in mflinkrevs
217 236 for c in mflinkrevs[m]]):
218 237 count += 1
219 238 if m == nullid:
220 239 continue
221 240 ui.progress(_('crosschecking'), count, total=total)
222 241 self.err(c, _("changeset refers to unknown manifest %s") %
223 242 short(m))
224 243 mflinkrevs = None # del is bad here due to scope issues
225 244
226 245 for f in sorted(filelinkrevs):
227 246 count += 1
228 247 ui.progress(_('crosschecking'), count, total=total)
229 248 if f not in filenodes:
230 249 lr = filelinkrevs[f][0]
231 250 self.err(lr, _("in changeset but not in manifest"), f)
232 251
233 if havecl:
252 if self.havecl:
234 253 for f in sorted(filenodes):
235 254 count += 1
236 255 ui.progress(_('crosschecking'), count, total=total)
237 256 if f not in filelinkrevs:
238 257 try:
239 258 fl = repo.file(f)
240 259 lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
241 260 except Exception:
242 261 lr = None
243 262 self.err(lr, _("in manifest but not in changeset"), f)
244 263
245 264 ui.progress(_('crosschecking'), None)
246 265
247 totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)
248 revisions += filerevisions
249
250 ui.status(_("%d files, %d changesets, %d total revisions\n") %
251 (totalfiles, len(cl), revisions))
252 if self.warnings:
253 ui.warn(_("%d warnings encountered!\n") % self.warnings)
254 if self.fncachewarned:
255 ui.warn(_('hint: run "hg debugrebuildfncache" to recover from '
256 'corrupt fncache\n'))
257 if self.errors:
258 ui.warn(_("%d integrity errors encountered!\n") % self.errors)
259 if badrevs:
260 ui.warn(_("(first damaged changeset appears to be %d)\n")
261 % min(badrevs))
262 return 1
263
264 266 def _verifyfiles(self, filenodes, filelinkrevs):
265 267 repo = self.repo
266 268 ui = self.ui
267 269 lrugetctx = self.lrugetctx
268 270 revlogv1 = self.revlogv1
269 271 havemf = self.havemf
270 272 ui.status(_("checking files\n"))
271 273
272 274 storefiles = set()
273 275 for f, f2, size in repo.store.datafiles():
274 276 if not f:
275 277 self.err(None, _("cannot decode filename '%s'") % f2)
276 278 elif size > 0 or not revlogv1:
277 279 storefiles.add(_normpath(f))
278 280
279 281 files = sorted(set(filenodes) | set(filelinkrevs))
280 282 total = len(files)
281 283 revisions = 0
282 284 for i, f in enumerate(files):
283 285 ui.progress(_('checking'), i, item=f, total=total)
284 286 try:
285 287 linkrevs = filelinkrevs[f]
286 288 except KeyError:
287 289 # in manifest but not in changelog
288 290 linkrevs = []
289 291
290 292 if linkrevs:
291 293 lr = linkrevs[0]
292 294 else:
293 295 lr = None
294 296
295 297 try:
296 298 fl = repo.file(f)
297 299 except error.RevlogError as e:
298 300 self.err(lr, _("broken revlog! (%s)") % e, f)
299 301 continue
300 302
301 303 for ff in fl.files():
302 304 try:
303 305 storefiles.remove(ff)
304 306 except KeyError:
305 307 self.warn(_(" warning: revlog '%s' not in fncache!") % ff)
306 308 self.fncachewarned = True
307 309
308 310 self.checklog(fl, f, lr)
309 311 seen = {}
310 312 rp = None
311 313 for i in fl:
312 314 revisions += 1
313 315 n = fl.node(i)
314 316 lr = self.checkentry(fl, i, n, seen, linkrevs, f)
315 317 if f in filenodes:
316 318 if havemf and n not in filenodes[f]:
317 319 self.err(lr, _("%s not in manifests") % (short(n)), f)
318 320 else:
319 321 del filenodes[f][n]
320 322
321 323 # verify contents
322 324 try:
323 325 l = len(fl.read(n))
324 326 rp = fl.renamed(n)
325 327 if l != fl.size(i):
326 328 if len(fl.revision(n)) != fl.size(i):
327 329 self.err(lr, _("unpacked size is %s, %s expected") %
328 330 (l, fl.size(i)), f)
329 331 except error.CensoredNodeError:
330 332 # experimental config: censor.policy
331 333 if ui.config("censor", "policy", "abort") == "abort":
332 334 self.err(lr, _("censored file data"), f)
333 335 except Exception as inst:
334 336 self.exc(lr, _("unpacking %s") % short(n), inst, f)
335 337
336 338 # check renames
337 339 try:
338 340 if rp:
339 341 if lr is not None and ui.verbose:
340 342 ctx = lrugetctx(lr)
341 343 found = False
342 344 for pctx in ctx.parents():
343 345 if rp[0] in pctx:
344 346 found = True
345 347 break
346 348 if not found:
347 349 self.warn(_("warning: copy source of '%s' not"
348 350 " in parents of %s") % (f, ctx))
349 351 fl2 = repo.file(rp[0])
350 352 if not len(fl2):
351 353 self.err(lr, _("empty or missing copy source "
352 354 "revlog %s:%s") % (rp[0], short(rp[1])), f)
353 355 elif rp[1] == nullid:
354 356 ui.note(_("warning: %s@%s: copy source"
355 357 " revision is nullid %s:%s\n")
356 358 % (f, lr, rp[0], short(rp[1])))
357 359 else:
358 360 fl2.rev(rp[1])
359 361 except Exception as inst:
360 362 self.exc(lr, _("checking rename of %s") % short(n), inst, f)
361 363
362 364 # cross-check
363 365 if f in filenodes:
364 366 fns = [(lr, n) for n, lr in filenodes[f].iteritems()]
365 367 for lr, node in sorted(fns):
366 368 self.err(lr, _("%s in manifests not found") % short(node),
367 369 f)
368 370 ui.progress(_('checking'), None)
369 371
370 372 for f in storefiles:
371 373 self.warn(_("warning: orphan revlog '%s'") % f)
372 374
373 375 return len(files), revisions
General Comments 0
You need to be logged in to leave comments. Login now