##// END OF EJS Templates
verify: move cross-checking of changeset/manifest out of _crosscheckfiles()...
Martin von Zweigbergk -
r28111:06205989 default
parent child Browse files
Show More
@@ -1,384 +1,383
1 1 # verify.py - repository integrity checking for Mercurial
2 2 #
3 3 # Copyright 2006, 2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11
12 12 from .i18n import _
13 13 from .node import (
14 14 nullid,
15 15 short,
16 16 )
17 17
18 18 from . import (
19 19 error,
20 20 revlog,
21 21 util,
22 22 )
23 23
24 24 def verify(repo):
25 25 with repo.lock():
26 26 return verifier(repo).verify()
27 27
28 28 def _normpath(f):
29 29 # under hg < 2.4, convert didn't sanitize paths properly, so a
30 30 # converted repo may contain repeated slashes
31 31 while '//' in f:
32 32 f = f.replace('//', '/')
33 33 return f
34 34
35 35 def _validpath(repo, path):
36 36 """Returns False if a path should NOT be treated as part of a repo.
37 37
38 38 For all in-core cases, this returns True, as we have no way for a
39 39 path to be mentioned in the history but not actually be
40 40 relevant. For narrow clones, this is important because many
41 41 filelogs will be missing, and changelog entries may mention
42 42 modified files that are outside the narrow scope.
43 43 """
44 44 return True
45 45
46 46 class verifier(object):
47 47 def __init__(self, repo):
48 48 self.repo = repo.unfiltered()
49 49 self.ui = repo.ui
50 50 self.badrevs = set()
51 51 self.errors = 0
52 52 self.warnings = 0
53 53 self.havecl = len(repo.changelog) > 0
54 54 self.havemf = len(repo.manifest) > 0
55 55 self.revlogv1 = repo.changelog.version != revlog.REVLOGV0
56 56 self.lrugetctx = util.lrucachefunc(repo.changectx)
57 57 self.refersmf = False
58 58 self.fncachewarned = False
59 59
60 60 def warn(self, msg):
61 61 self.ui.warn(msg + "\n")
62 62 self.warnings += 1
63 63
64 64 def err(self, linkrev, msg, filename=None):
65 65 if linkrev is not None:
66 66 self.badrevs.add(linkrev)
67 67 else:
68 68 linkrev = '?'
69 69 msg = "%s: %s" % (linkrev, msg)
70 70 if filename:
71 71 msg = "%s@%s" % (filename, msg)
72 72 self.ui.warn(" " + msg + "\n")
73 73 self.errors += 1
74 74
75 75 def exc(self, linkrev, msg, inst, filename=None):
76 76 if not str(inst):
77 77 inst = repr(inst)
78 78 self.err(linkrev, "%s: %s" % (msg, inst), filename)
79 79
80 80 def checklog(self, obj, name, linkrev):
81 81 if not len(obj) and (self.havecl or self.havemf):
82 82 self.err(linkrev, _("empty or missing %s") % name)
83 83 return
84 84
85 85 d = obj.checksize()
86 86 if d[0]:
87 87 self.err(None, _("data length off by %d bytes") % d[0], name)
88 88 if d[1]:
89 89 self.err(None, _("index contains %d extra bytes") % d[1], name)
90 90
91 91 if obj.version != revlog.REVLOGV0:
92 92 if not self.revlogv1:
93 93 self.warn(_("warning: `%s' uses revlog format 1") % name)
94 94 elif self.revlogv1:
95 95 self.warn(_("warning: `%s' uses revlog format 0") % name)
96 96
97 97 def checkentry(self, obj, i, node, seen, linkrevs, f):
98 98 lr = obj.linkrev(obj.rev(node))
99 99 if lr < 0 or (self.havecl and lr not in linkrevs):
100 100 if lr < 0 or lr >= len(self.repo.changelog):
101 101 msg = _("rev %d points to nonexistent changeset %d")
102 102 else:
103 103 msg = _("rev %d points to unexpected changeset %d")
104 104 self.err(None, msg % (i, lr), f)
105 105 if linkrevs:
106 106 if f and len(linkrevs) > 1:
107 107 try:
108 108 # attempt to filter down to real linkrevs
109 109 linkrevs = [l for l in linkrevs
110 110 if self.lrugetctx(l)[f].filenode() == node]
111 111 except Exception:
112 112 pass
113 113 self.warn(_(" (expected %s)") % " ".join(map(str, linkrevs)))
114 114 lr = None # can't be trusted
115 115
116 116 try:
117 117 p1, p2 = obj.parents(node)
118 118 if p1 not in seen and p1 != nullid:
119 119 self.err(lr, _("unknown parent 1 %s of %s") %
120 120 (short(p1), short(node)), f)
121 121 if p2 not in seen and p2 != nullid:
122 122 self.err(lr, _("unknown parent 2 %s of %s") %
123 123 (short(p2), short(node)), f)
124 124 except Exception as inst:
125 125 self.exc(lr, _("checking parents of %s") % short(node), inst, f)
126 126
127 127 if node in seen:
128 128 self.err(lr, _("duplicate revision %d (%d)") % (i, seen[node]), f)
129 129 seen[node] = i
130 130 return lr
131 131
132 132 def verify(self):
133 133 repo = self.repo
134 134
135 135 ui = repo.ui
136 136
137 137 if not repo.url().startswith('file:'):
138 138 raise error.Abort(_("cannot verify bundle or remote repos"))
139 139
140 140 if os.path.exists(repo.sjoin("journal")):
141 141 ui.warn(_("abandoned transaction found - run hg recover\n"))
142 142
143 143 if ui.verbose or not self.revlogv1:
144 144 ui.status(_("repository uses revlog format %d\n") %
145 145 (self.revlogv1 and 1 or 0))
146 146
147 147 mflinkrevs, filelinkrevs = self._verifychangelog()
148 148
149 149 filenodes = self._verifymanifest(mflinkrevs)
150 del mflinkrevs
150 151
151 self._crosscheckfiles(mflinkrevs, filelinkrevs, filenodes)
152 del mflinkrevs
152 self._crosscheckfiles(filelinkrevs, filenodes)
153 153
154 154 totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)
155 155
156 156 ui.status(_("%d files, %d changesets, %d total revisions\n") %
157 157 (totalfiles, len(repo.changelog), filerevisions))
158 158 if self.warnings:
159 159 ui.warn(_("%d warnings encountered!\n") % self.warnings)
160 160 if self.fncachewarned:
161 161 ui.warn(_('hint: run "hg debugrebuildfncache" to recover from '
162 162 'corrupt fncache\n'))
163 163 if self.errors:
164 164 ui.warn(_("%d integrity errors encountered!\n") % self.errors)
165 165 if self.badrevs:
166 166 ui.warn(_("(first damaged changeset appears to be %d)\n")
167 167 % min(self.badrevs))
168 168 return 1
169 169
170 170 def _verifychangelog(self):
171 171 ui = self.ui
172 172 repo = self.repo
173 173 cl = repo.changelog
174 174
175 175 ui.status(_("checking changesets\n"))
176 176 mflinkrevs = {}
177 177 filelinkrevs = {}
178 178 seen = {}
179 179 self.checklog(cl, "changelog", 0)
180 180 total = len(repo)
181 181 for i in repo:
182 182 ui.progress(_('checking'), i, total=total, unit=_('changesets'))
183 183 n = cl.node(i)
184 184 self.checkentry(cl, i, n, seen, [i], "changelog")
185 185
186 186 try:
187 187 changes = cl.read(n)
188 188 if changes[0] != nullid:
189 189 mflinkrevs.setdefault(changes[0], []).append(i)
190 190 self.refersmf = True
191 191 for f in changes[3]:
192 192 if _validpath(repo, f):
193 193 filelinkrevs.setdefault(_normpath(f), []).append(i)
194 194 except Exception as inst:
195 195 self.refersmf = True
196 196 self.exc(i, _("unpacking changeset %s") % short(n), inst)
197 197 ui.progress(_('checking'), None)
198 198 return mflinkrevs, filelinkrevs
199 199
200 200 def _verifymanifest(self, mflinkrevs):
201 201 repo = self.repo
202 202 ui = self.ui
203 203 mf = self.repo.manifest
204 204
205 205 ui.status(_("checking manifests\n"))
206 206 filenodes = {}
207 207 seen = {}
208 208 if self.refersmf:
209 209 # Do not check manifest if there are only changelog entries with
210 210 # null manifests.
211 211 self.checklog(mf, "manifest", 0)
212 212 total = len(mf)
213 213 for i in mf:
214 214 ui.progress(_('checking'), i, total=total, unit=_('manifests'))
215 215 n = mf.node(i)
216 216 lr = self.checkentry(mf, i, n, seen, mflinkrevs.get(n, []),
217 217 "manifest")
218 218 if n in mflinkrevs:
219 219 del mflinkrevs[n]
220 220 else:
221 221 self.err(lr, _("%s not in changesets") % short(n), "manifest")
222 222
223 223 try:
224 224 for f, fn in mf.readdelta(n).iteritems():
225 225 if not f:
226 226 self.err(lr, _("file without name in manifest"))
227 227 elif f != "/dev/null": # ignore this in very old repos
228 228 if _validpath(repo, f):
229 229 filenodes.setdefault(
230 230 _normpath(f), {}).setdefault(fn, lr)
231 231 except Exception as inst:
232 232 self.exc(lr, _("reading manifest delta %s") % short(n), inst)
233 233 ui.progress(_('checking'), None)
234 234
235 if self.havemf:
236 for c, m in sorted([(c, m) for m in mflinkrevs
237 for c in mflinkrevs[m]]):
238 if m == nullid:
239 continue
240 self.err(c, _("changeset refers to unknown manifest %s") %
241 short(m))
242
235 243 return filenodes
236 244
237 def _crosscheckfiles(self, mflinkrevs, filelinkrevs, filenodes):
245 def _crosscheckfiles(self, filelinkrevs, filenodes):
238 246 repo = self.repo
239 247 ui = self.ui
240 248 ui.status(_("crosschecking files in changesets and manifests\n"))
241 249
242 total = len(mflinkrevs) + len(filelinkrevs) + len(filenodes)
250 total = len(filelinkrevs) + len(filenodes)
243 251 count = 0
244 252 if self.havemf:
245 for c, m in sorted([(c, m) for m in mflinkrevs
246 for c in mflinkrevs[m]]):
247 count += 1
248 if m == nullid:
249 continue
250 ui.progress(_('crosschecking'), count, total=total)
251 self.err(c, _("changeset refers to unknown manifest %s") %
252 short(m))
253
254 253 for f in sorted(filelinkrevs):
255 254 count += 1
256 255 ui.progress(_('crosschecking'), count, total=total)
257 256 if f not in filenodes:
258 257 lr = filelinkrevs[f][0]
259 258 self.err(lr, _("in changeset but not in manifest"), f)
260 259
261 260 if self.havecl:
262 261 for f in sorted(filenodes):
263 262 count += 1
264 263 ui.progress(_('crosschecking'), count, total=total)
265 264 if f not in filelinkrevs:
266 265 try:
267 266 fl = repo.file(f)
268 267 lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
269 268 except Exception:
270 269 lr = None
271 270 self.err(lr, _("in manifest but not in changeset"), f)
272 271
273 272 ui.progress(_('crosschecking'), None)
274 273
275 274 def _verifyfiles(self, filenodes, filelinkrevs):
276 275 repo = self.repo
277 276 ui = self.ui
278 277 lrugetctx = self.lrugetctx
279 278 revlogv1 = self.revlogv1
280 279 havemf = self.havemf
281 280 ui.status(_("checking files\n"))
282 281
283 282 storefiles = set()
284 283 for f, f2, size in repo.store.datafiles():
285 284 if not f:
286 285 self.err(None, _("cannot decode filename '%s'") % f2)
287 286 elif (size > 0 or not revlogv1) and f.startswith('data/'):
288 287 storefiles.add(_normpath(f))
289 288
290 289 files = sorted(set(filenodes) | set(filelinkrevs))
291 290 total = len(files)
292 291 revisions = 0
293 292 for i, f in enumerate(files):
294 293 ui.progress(_('checking'), i, item=f, total=total)
295 294 try:
296 295 linkrevs = filelinkrevs[f]
297 296 except KeyError:
298 297 # in manifest but not in changelog
299 298 linkrevs = []
300 299
301 300 if linkrevs:
302 301 lr = linkrevs[0]
303 302 else:
304 303 lr = None
305 304
306 305 try:
307 306 fl = repo.file(f)
308 307 except error.RevlogError as e:
309 308 self.err(lr, _("broken revlog! (%s)") % e, f)
310 309 continue
311 310
312 311 for ff in fl.files():
313 312 try:
314 313 storefiles.remove(ff)
315 314 except KeyError:
316 315 self.warn(_(" warning: revlog '%s' not in fncache!") % ff)
317 316 self.fncachewarned = True
318 317
319 318 self.checklog(fl, f, lr)
320 319 seen = {}
321 320 rp = None
322 321 for i in fl:
323 322 revisions += 1
324 323 n = fl.node(i)
325 324 lr = self.checkentry(fl, i, n, seen, linkrevs, f)
326 325 if f in filenodes:
327 326 if havemf and n not in filenodes[f]:
328 327 self.err(lr, _("%s not in manifests") % (short(n)), f)
329 328 else:
330 329 del filenodes[f][n]
331 330
332 331 # verify contents
333 332 try:
334 333 l = len(fl.read(n))
335 334 rp = fl.renamed(n)
336 335 if l != fl.size(i):
337 336 if len(fl.revision(n)) != fl.size(i):
338 337 self.err(lr, _("unpacked size is %s, %s expected") %
339 338 (l, fl.size(i)), f)
340 339 except error.CensoredNodeError:
341 340 # experimental config: censor.policy
342 341 if ui.config("censor", "policy", "abort") == "abort":
343 342 self.err(lr, _("censored file data"), f)
344 343 except Exception as inst:
345 344 self.exc(lr, _("unpacking %s") % short(n), inst, f)
346 345
347 346 # check renames
348 347 try:
349 348 if rp:
350 349 if lr is not None and ui.verbose:
351 350 ctx = lrugetctx(lr)
352 351 found = False
353 352 for pctx in ctx.parents():
354 353 if rp[0] in pctx:
355 354 found = True
356 355 break
357 356 if not found:
358 357 self.warn(_("warning: copy source of '%s' not"
359 358 " in parents of %s") % (f, ctx))
360 359 fl2 = repo.file(rp[0])
361 360 if not len(fl2):
362 361 self.err(lr, _("empty or missing copy source "
363 362 "revlog %s:%s") % (rp[0], short(rp[1])), f)
364 363 elif rp[1] == nullid:
365 364 ui.note(_("warning: %s@%s: copy source"
366 365 " revision is nullid %s:%s\n")
367 366 % (f, lr, rp[0], short(rp[1])))
368 367 else:
369 368 fl2.rev(rp[1])
370 369 except Exception as inst:
371 370 self.exc(lr, _("checking rename of %s") % short(n), inst, f)
372 371
373 372 # cross-check
374 373 if f in filenodes:
375 374 fns = [(lr, n) for n, lr in filenodes[f].iteritems()]
376 375 for lr, node in sorted(fns):
377 376 self.err(lr, _("%s in manifests not found") % short(node),
378 377 f)
379 378 ui.progress(_('checking'), None)
380 379
381 380 for f in storefiles:
382 381 self.warn(_("warning: orphan revlog '%s'") % f)
383 382
384 383 return len(files), revisions
General Comments 0
You need to be logged in to leave comments. Login now