##// END OF EJS Templates
verify: move changelog verificaiton to its own function...
Durham Goode -
r27647:2c2858f3 default
parent child Browse files
Show More
@@ -1,383 +1,390
1 1 # verify.py - repository integrity checking for Mercurial
2 2 #
3 3 # Copyright 2006, 2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11
12 12 from .i18n import _
13 13 from .node import (
14 14 nullid,
15 15 short,
16 16 )
17 17
18 18 from . import (
19 19 error,
20 20 revlog,
21 21 util,
22 22 )
23 23
24 24 def verify(repo):
25 25 lock = repo.lock()
26 26 try:
27 27 return verifier(repo).verify()
28 28 finally:
29 29 lock.release()
30 30
31 31 def _normpath(f):
32 32 # under hg < 2.4, convert didn't sanitize paths properly, so a
33 33 # converted repo may contain repeated slashes
34 34 while '//' in f:
35 35 f = f.replace('//', '/')
36 36 return f
37 37
38 38 def _validpath(repo, path):
39 39 """Returns False if a path should NOT be treated as part of a repo.
40 40
41 41 For all in-core cases, this returns True, as we have no way for a
42 42 path to be mentioned in the history but not actually be
43 43 relevant. For narrow clones, this is important because many
44 44 filelogs will be missing, and changelog entries may mention
45 45 modified files that are outside the narrow scope.
46 46 """
47 47 return True
48 48
49 49 class verifier(object):
50 50 def __init__(self, repo):
51 51 self.repo = repo.unfiltered()
52 52 self.ui = repo.ui
53 53 self.badrevs = set()
54 54 self.errors = 0
55 55 self.warnings = 0
56 56 self.havecl = len(repo.changelog) > 0
57 57 self.havemf = len(repo.manifest) > 0
58 58 self.revlogv1 = repo.changelog.version != revlog.REVLOGV0
59 59 self.lrugetctx = util.lrucachefunc(repo.changectx)
60 60 self.refersmf = False
61 61 self.fncachewarned = False
62 62
63 63 def warn(self, msg):
64 64 self.ui.warn(msg + "\n")
65 65 self.warnings += 1
66 66
67 67 def err(self, linkrev, msg, filename=None):
68 68 if linkrev is not None:
69 69 self.badrevs.add(linkrev)
70 70 else:
71 71 linkrev = '?'
72 72 msg = "%s: %s" % (linkrev, msg)
73 73 if filename:
74 74 msg = "%s@%s" % (filename, msg)
75 75 self.ui.warn(" " + msg + "\n")
76 76 self.errors += 1
77 77
78 78 def exc(self, linkrev, msg, inst, filename=None):
79 79 if not str(inst):
80 80 inst = repr(inst)
81 81 self.err(linkrev, "%s: %s" % (msg, inst), filename)
82 82
83 83 def checklog(self, obj, name, linkrev):
84 84 if not len(obj) and (self.havecl or self.havemf):
85 85 self.err(linkrev, _("empty or missing %s") % name)
86 86 return
87 87
88 88 d = obj.checksize()
89 89 if d[0]:
90 90 self.err(None, _("data length off by %d bytes") % d[0], name)
91 91 if d[1]:
92 92 self.err(None, _("index contains %d extra bytes") % d[1], name)
93 93
94 94 if obj.version != revlog.REVLOGV0:
95 95 if not self.revlogv1:
96 96 self.warn(_("warning: `%s' uses revlog format 1") % name)
97 97 elif self.revlogv1:
98 98 self.warn(_("warning: `%s' uses revlog format 0") % name)
99 99
100 100 def checkentry(self, obj, i, node, seen, linkrevs, f):
101 101 lr = obj.linkrev(obj.rev(node))
102 102 if lr < 0 or (self.havecl and lr not in linkrevs):
103 103 if lr < 0 or lr >= len(self.repo.changelog):
104 104 msg = _("rev %d points to nonexistent changeset %d")
105 105 else:
106 106 msg = _("rev %d points to unexpected changeset %d")
107 107 self.err(None, msg % (i, lr), f)
108 108 if linkrevs:
109 109 if f and len(linkrevs) > 1:
110 110 try:
111 111 # attempt to filter down to real linkrevs
112 112 linkrevs = [l for l in linkrevs
113 113 if self.lrugetctx(l)[f].filenode() == node]
114 114 except Exception:
115 115 pass
116 116 self.warn(_(" (expected %s)") % " ".join(map(str, linkrevs)))
117 117 lr = None # can't be trusted
118 118
119 119 try:
120 120 p1, p2 = obj.parents(node)
121 121 if p1 not in seen and p1 != nullid:
122 122 self.err(lr, _("unknown parent 1 %s of %s") %
123 123 (short(p1), short(node)), f)
124 124 if p2 not in seen and p2 != nullid:
125 125 self.err(lr, _("unknown parent 2 %s of %s") %
126 126 (short(p2), short(node)), f)
127 127 except Exception as inst:
128 128 self.exc(lr, _("checking parents of %s") % short(node), inst, f)
129 129
130 130 if node in seen:
131 131 self.err(lr, _("duplicate revision %d (%d)") % (i, seen[node]), f)
132 132 seen[node] = i
133 133 return lr
134 134
135 135 def verify(self):
136 136 repo = self.repo
137 137 mflinkrevs = {}
138 138 filelinkrevs = {}
139 139 filenodes = {}
140 140 revisions = 0
141 141 badrevs = self.badrevs
142 142 ui = repo.ui
143 143 cl = repo.changelog
144 144
145 145 if not repo.url().startswith('file:'):
146 146 raise error.Abort(_("cannot verify bundle or remote repos"))
147 147
148 148 if os.path.exists(repo.sjoin("journal")):
149 149 ui.warn(_("abandoned transaction found - run hg recover\n"))
150 150
151 151 revlogv1 = self.revlogv1
152 152 if ui.verbose or not revlogv1:
153 153 ui.status(_("repository uses revlog format %d\n") %
154 154 (revlogv1 and 1 or 0))
155 155
156 self._verifychangelog(mflinkrevs, filelinkrevs)
157
158 self._verifymanifest(mflinkrevs, filenodes)
159
160 self._crosscheckfiles(mflinkrevs, filelinkrevs, filenodes)
161
162 totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)
163 revisions += filerevisions
164
165 ui.status(_("%d files, %d changesets, %d total revisions\n") %
166 (totalfiles, len(cl), revisions))
167 if self.warnings:
168 ui.warn(_("%d warnings encountered!\n") % self.warnings)
169 if self.fncachewarned:
170 ui.warn(_('hint: run "hg debugrebuildfncache" to recover from '
171 'corrupt fncache\n'))
172 if self.errors:
173 ui.warn(_("%d integrity errors encountered!\n") % self.errors)
174 if badrevs:
175 ui.warn(_("(first damaged changeset appears to be %d)\n")
176 % min(badrevs))
177 return 1
178
179 def _verifychangelog(self, mflinkrevs, filelinkrevs):
180 ui = self.ui
181 repo = self.repo
182 cl = repo.changelog
183
156 184 ui.status(_("checking changesets\n"))
157 185 seen = {}
158 186 self.checklog(cl, "changelog", 0)
159 187 total = len(repo)
160 188 for i in repo:
161 189 ui.progress(_('checking'), i, total=total, unit=_('changesets'))
162 190 n = cl.node(i)
163 191 self.checkentry(cl, i, n, seen, [i], "changelog")
164 192
165 193 try:
166 194 changes = cl.read(n)
167 195 if changes[0] != nullid:
168 196 mflinkrevs.setdefault(changes[0], []).append(i)
169 197 self.refersmf = True
170 198 for f in changes[3]:
171 199 if _validpath(repo, f):
172 200 filelinkrevs.setdefault(_normpath(f), []).append(i)
173 201 except Exception as inst:
174 202 self.refersmf = True
175 203 self.exc(i, _("unpacking changeset %s") % short(n), inst)
176 204 ui.progress(_('checking'), None)
177 205
178 self._verifymanifest(mflinkrevs, filenodes)
179
180 self._crosscheckfiles(mflinkrevs, filelinkrevs, filenodes)
181
182 totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)
183 revisions += filerevisions
184
185 ui.status(_("%d files, %d changesets, %d total revisions\n") %
186 (totalfiles, len(cl), revisions))
187 if self.warnings:
188 ui.warn(_("%d warnings encountered!\n") % self.warnings)
189 if self.fncachewarned:
190 ui.warn(_('hint: run "hg debugrebuildfncache" to recover from '
191 'corrupt fncache\n'))
192 if self.errors:
193 ui.warn(_("%d integrity errors encountered!\n") % self.errors)
194 if badrevs:
195 ui.warn(_("(first damaged changeset appears to be %d)\n")
196 % min(badrevs))
197 return 1
198
199 206 def _verifymanifest(self, mflinkrevs, filenodes):
200 207 repo = self.repo
201 208 ui = self.ui
202 209 mf = self.repo.manifest
203 210
204 211 ui.status(_("checking manifests\n"))
205 212 seen = {}
206 213 if self.refersmf:
207 214 # Do not check manifest if there are only changelog entries with
208 215 # null manifests.
209 216 self.checklog(mf, "manifest", 0)
210 217 total = len(mf)
211 218 for i in mf:
212 219 ui.progress(_('checking'), i, total=total, unit=_('manifests'))
213 220 n = mf.node(i)
214 221 lr = self.checkentry(mf, i, n, seen, mflinkrevs.get(n, []),
215 222 "manifest")
216 223 if n in mflinkrevs:
217 224 del mflinkrevs[n]
218 225 else:
219 226 self.err(lr, _("%s not in changesets") % short(n), "manifest")
220 227
221 228 try:
222 229 for f, fn in mf.readdelta(n).iteritems():
223 230 if not f:
224 231 self.err(lr, _("file without name in manifest"))
225 232 elif f != "/dev/null": # ignore this in very old repos
226 233 if _validpath(repo, f):
227 234 filenodes.setdefault(
228 235 _normpath(f), {}).setdefault(fn, lr)
229 236 except Exception as inst:
230 237 self.exc(lr, _("reading manifest delta %s") % short(n), inst)
231 238 ui.progress(_('checking'), None)
232 239
233 240 return mflinkrevs
234 241
235 242 def _crosscheckfiles(self, mflinkrevs, filelinkrevs, filenodes):
236 243 repo = self.repo
237 244 ui = self.ui
238 245 ui.status(_("crosschecking files in changesets and manifests\n"))
239 246
240 247 total = len(mflinkrevs) + len(filelinkrevs) + len(filenodes)
241 248 count = 0
242 249 if self.havemf:
243 250 for c, m in sorted([(c, m) for m in mflinkrevs
244 251 for c in mflinkrevs[m]]):
245 252 count += 1
246 253 if m == nullid:
247 254 continue
248 255 ui.progress(_('crosschecking'), count, total=total)
249 256 self.err(c, _("changeset refers to unknown manifest %s") %
250 257 short(m))
251 258 mflinkrevs = None # del is bad here due to scope issues
252 259
253 260 for f in sorted(filelinkrevs):
254 261 count += 1
255 262 ui.progress(_('crosschecking'), count, total=total)
256 263 if f not in filenodes:
257 264 lr = filelinkrevs[f][0]
258 265 self.err(lr, _("in changeset but not in manifest"), f)
259 266
260 267 if self.havecl:
261 268 for f in sorted(filenodes):
262 269 count += 1
263 270 ui.progress(_('crosschecking'), count, total=total)
264 271 if f not in filelinkrevs:
265 272 try:
266 273 fl = repo.file(f)
267 274 lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
268 275 except Exception:
269 276 lr = None
270 277 self.err(lr, _("in manifest but not in changeset"), f)
271 278
272 279 ui.progress(_('crosschecking'), None)
273 280
274 281 def _verifyfiles(self, filenodes, filelinkrevs):
275 282 repo = self.repo
276 283 ui = self.ui
277 284 lrugetctx = self.lrugetctx
278 285 revlogv1 = self.revlogv1
279 286 havemf = self.havemf
280 287 ui.status(_("checking files\n"))
281 288
282 289 storefiles = set()
283 290 for f, f2, size in repo.store.datafiles():
284 291 if not f:
285 292 self.err(None, _("cannot decode filename '%s'") % f2)
286 293 elif size > 0 or not revlogv1:
287 294 storefiles.add(_normpath(f))
288 295
289 296 files = sorted(set(filenodes) | set(filelinkrevs))
290 297 total = len(files)
291 298 revisions = 0
292 299 for i, f in enumerate(files):
293 300 ui.progress(_('checking'), i, item=f, total=total)
294 301 try:
295 302 linkrevs = filelinkrevs[f]
296 303 except KeyError:
297 304 # in manifest but not in changelog
298 305 linkrevs = []
299 306
300 307 if linkrevs:
301 308 lr = linkrevs[0]
302 309 else:
303 310 lr = None
304 311
305 312 try:
306 313 fl = repo.file(f)
307 314 except error.RevlogError as e:
308 315 self.err(lr, _("broken revlog! (%s)") % e, f)
309 316 continue
310 317
311 318 for ff in fl.files():
312 319 try:
313 320 storefiles.remove(ff)
314 321 except KeyError:
315 322 self.warn(_(" warning: revlog '%s' not in fncache!") % ff)
316 323 self.fncachewarned = True
317 324
318 325 self.checklog(fl, f, lr)
319 326 seen = {}
320 327 rp = None
321 328 for i in fl:
322 329 revisions += 1
323 330 n = fl.node(i)
324 331 lr = self.checkentry(fl, i, n, seen, linkrevs, f)
325 332 if f in filenodes:
326 333 if havemf and n not in filenodes[f]:
327 334 self.err(lr, _("%s not in manifests") % (short(n)), f)
328 335 else:
329 336 del filenodes[f][n]
330 337
331 338 # verify contents
332 339 try:
333 340 l = len(fl.read(n))
334 341 rp = fl.renamed(n)
335 342 if l != fl.size(i):
336 343 if len(fl.revision(n)) != fl.size(i):
337 344 self.err(lr, _("unpacked size is %s, %s expected") %
338 345 (l, fl.size(i)), f)
339 346 except error.CensoredNodeError:
340 347 # experimental config: censor.policy
341 348 if ui.config("censor", "policy", "abort") == "abort":
342 349 self.err(lr, _("censored file data"), f)
343 350 except Exception as inst:
344 351 self.exc(lr, _("unpacking %s") % short(n), inst, f)
345 352
346 353 # check renames
347 354 try:
348 355 if rp:
349 356 if lr is not None and ui.verbose:
350 357 ctx = lrugetctx(lr)
351 358 found = False
352 359 for pctx in ctx.parents():
353 360 if rp[0] in pctx:
354 361 found = True
355 362 break
356 363 if not found:
357 364 self.warn(_("warning: copy source of '%s' not"
358 365 " in parents of %s") % (f, ctx))
359 366 fl2 = repo.file(rp[0])
360 367 if not len(fl2):
361 368 self.err(lr, _("empty or missing copy source "
362 369 "revlog %s:%s") % (rp[0], short(rp[1])), f)
363 370 elif rp[1] == nullid:
364 371 ui.note(_("warning: %s@%s: copy source"
365 372 " revision is nullid %s:%s\n")
366 373 % (f, lr, rp[0], short(rp[1])))
367 374 else:
368 375 fl2.rev(rp[1])
369 376 except Exception as inst:
370 377 self.exc(lr, _("checking rename of %s") % short(n), inst, f)
371 378
372 379 # cross-check
373 380 if f in filenodes:
374 381 fns = [(lr, n) for n, lr in filenodes[f].iteritems()]
375 382 for lr, node in sorted(fns):
376 383 self.err(lr, _("%s in manifests not found") % short(node),
377 384 f)
378 385 ui.progress(_('checking'), None)
379 386
380 387 for f in storefiles:
381 388 self.warn(_("warning: orphan revlog '%s'") % f)
382 389
383 390 return len(files), revisions
General Comments 0
You need to be logged in to leave comments. Login now