##// END OF EJS Templates
verify: drop unnecessary check for nullid...
Martin von Zweigbergk -
r28112:334a3aa6 default
parent child Browse files
Show More
@@ -1,383 +1,381 b''
1 1 # verify.py - repository integrity checking for Mercurial
2 2 #
3 3 # Copyright 2006, 2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11
12 12 from .i18n import _
13 13 from .node import (
14 14 nullid,
15 15 short,
16 16 )
17 17
18 18 from . import (
19 19 error,
20 20 revlog,
21 21 util,
22 22 )
23 23
24 24 def verify(repo):
25 25 with repo.lock():
26 26 return verifier(repo).verify()
27 27
28 28 def _normpath(f):
29 29 # under hg < 2.4, convert didn't sanitize paths properly, so a
30 30 # converted repo may contain repeated slashes
31 31 while '//' in f:
32 32 f = f.replace('//', '/')
33 33 return f
34 34
35 35 def _validpath(repo, path):
36 36 """Returns False if a path should NOT be treated as part of a repo.
37 37
38 38 For all in-core cases, this returns True, as we have no way for a
39 39 path to be mentioned in the history but not actually be
40 40 relevant. For narrow clones, this is important because many
41 41 filelogs will be missing, and changelog entries may mention
42 42 modified files that are outside the narrow scope.
43 43 """
44 44 return True
45 45
46 46 class verifier(object):
47 47 def __init__(self, repo):
48 48 self.repo = repo.unfiltered()
49 49 self.ui = repo.ui
50 50 self.badrevs = set()
51 51 self.errors = 0
52 52 self.warnings = 0
53 53 self.havecl = len(repo.changelog) > 0
54 54 self.havemf = len(repo.manifest) > 0
55 55 self.revlogv1 = repo.changelog.version != revlog.REVLOGV0
56 56 self.lrugetctx = util.lrucachefunc(repo.changectx)
57 57 self.refersmf = False
58 58 self.fncachewarned = False
59 59
60 60 def warn(self, msg):
61 61 self.ui.warn(msg + "\n")
62 62 self.warnings += 1
63 63
64 64 def err(self, linkrev, msg, filename=None):
65 65 if linkrev is not None:
66 66 self.badrevs.add(linkrev)
67 67 else:
68 68 linkrev = '?'
69 69 msg = "%s: %s" % (linkrev, msg)
70 70 if filename:
71 71 msg = "%s@%s" % (filename, msg)
72 72 self.ui.warn(" " + msg + "\n")
73 73 self.errors += 1
74 74
75 75 def exc(self, linkrev, msg, inst, filename=None):
76 76 if not str(inst):
77 77 inst = repr(inst)
78 78 self.err(linkrev, "%s: %s" % (msg, inst), filename)
79 79
80 80 def checklog(self, obj, name, linkrev):
81 81 if not len(obj) and (self.havecl or self.havemf):
82 82 self.err(linkrev, _("empty or missing %s") % name)
83 83 return
84 84
85 85 d = obj.checksize()
86 86 if d[0]:
87 87 self.err(None, _("data length off by %d bytes") % d[0], name)
88 88 if d[1]:
89 89 self.err(None, _("index contains %d extra bytes") % d[1], name)
90 90
91 91 if obj.version != revlog.REVLOGV0:
92 92 if not self.revlogv1:
93 93 self.warn(_("warning: `%s' uses revlog format 1") % name)
94 94 elif self.revlogv1:
95 95 self.warn(_("warning: `%s' uses revlog format 0") % name)
96 96
97 97 def checkentry(self, obj, i, node, seen, linkrevs, f):
98 98 lr = obj.linkrev(obj.rev(node))
99 99 if lr < 0 or (self.havecl and lr not in linkrevs):
100 100 if lr < 0 or lr >= len(self.repo.changelog):
101 101 msg = _("rev %d points to nonexistent changeset %d")
102 102 else:
103 103 msg = _("rev %d points to unexpected changeset %d")
104 104 self.err(None, msg % (i, lr), f)
105 105 if linkrevs:
106 106 if f and len(linkrevs) > 1:
107 107 try:
108 108 # attempt to filter down to real linkrevs
109 109 linkrevs = [l for l in linkrevs
110 110 if self.lrugetctx(l)[f].filenode() == node]
111 111 except Exception:
112 112 pass
113 113 self.warn(_(" (expected %s)") % " ".join(map(str, linkrevs)))
114 114 lr = None # can't be trusted
115 115
116 116 try:
117 117 p1, p2 = obj.parents(node)
118 118 if p1 not in seen and p1 != nullid:
119 119 self.err(lr, _("unknown parent 1 %s of %s") %
120 120 (short(p1), short(node)), f)
121 121 if p2 not in seen and p2 != nullid:
122 122 self.err(lr, _("unknown parent 2 %s of %s") %
123 123 (short(p2), short(node)), f)
124 124 except Exception as inst:
125 125 self.exc(lr, _("checking parents of %s") % short(node), inst, f)
126 126
127 127 if node in seen:
128 128 self.err(lr, _("duplicate revision %d (%d)") % (i, seen[node]), f)
129 129 seen[node] = i
130 130 return lr
131 131
132 132 def verify(self):
133 133 repo = self.repo
134 134
135 135 ui = repo.ui
136 136
137 137 if not repo.url().startswith('file:'):
138 138 raise error.Abort(_("cannot verify bundle or remote repos"))
139 139
140 140 if os.path.exists(repo.sjoin("journal")):
141 141 ui.warn(_("abandoned transaction found - run hg recover\n"))
142 142
143 143 if ui.verbose or not self.revlogv1:
144 144 ui.status(_("repository uses revlog format %d\n") %
145 145 (self.revlogv1 and 1 or 0))
146 146
147 147 mflinkrevs, filelinkrevs = self._verifychangelog()
148 148
149 149 filenodes = self._verifymanifest(mflinkrevs)
150 150 del mflinkrevs
151 151
152 152 self._crosscheckfiles(filelinkrevs, filenodes)
153 153
154 154 totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)
155 155
156 156 ui.status(_("%d files, %d changesets, %d total revisions\n") %
157 157 (totalfiles, len(repo.changelog), filerevisions))
158 158 if self.warnings:
159 159 ui.warn(_("%d warnings encountered!\n") % self.warnings)
160 160 if self.fncachewarned:
161 161 ui.warn(_('hint: run "hg debugrebuildfncache" to recover from '
162 162 'corrupt fncache\n'))
163 163 if self.errors:
164 164 ui.warn(_("%d integrity errors encountered!\n") % self.errors)
165 165 if self.badrevs:
166 166 ui.warn(_("(first damaged changeset appears to be %d)\n")
167 167 % min(self.badrevs))
168 168 return 1
169 169
170 170 def _verifychangelog(self):
171 171 ui = self.ui
172 172 repo = self.repo
173 173 cl = repo.changelog
174 174
175 175 ui.status(_("checking changesets\n"))
176 176 mflinkrevs = {}
177 177 filelinkrevs = {}
178 178 seen = {}
179 179 self.checklog(cl, "changelog", 0)
180 180 total = len(repo)
181 181 for i in repo:
182 182 ui.progress(_('checking'), i, total=total, unit=_('changesets'))
183 183 n = cl.node(i)
184 184 self.checkentry(cl, i, n, seen, [i], "changelog")
185 185
186 186 try:
187 187 changes = cl.read(n)
188 188 if changes[0] != nullid:
189 189 mflinkrevs.setdefault(changes[0], []).append(i)
190 190 self.refersmf = True
191 191 for f in changes[3]:
192 192 if _validpath(repo, f):
193 193 filelinkrevs.setdefault(_normpath(f), []).append(i)
194 194 except Exception as inst:
195 195 self.refersmf = True
196 196 self.exc(i, _("unpacking changeset %s") % short(n), inst)
197 197 ui.progress(_('checking'), None)
198 198 return mflinkrevs, filelinkrevs
199 199
200 200 def _verifymanifest(self, mflinkrevs):
201 201 repo = self.repo
202 202 ui = self.ui
203 203 mf = self.repo.manifest
204 204
205 205 ui.status(_("checking manifests\n"))
206 206 filenodes = {}
207 207 seen = {}
208 208 if self.refersmf:
209 209 # Do not check manifest if there are only changelog entries with
210 210 # null manifests.
211 211 self.checklog(mf, "manifest", 0)
212 212 total = len(mf)
213 213 for i in mf:
214 214 ui.progress(_('checking'), i, total=total, unit=_('manifests'))
215 215 n = mf.node(i)
216 216 lr = self.checkentry(mf, i, n, seen, mflinkrevs.get(n, []),
217 217 "manifest")
218 218 if n in mflinkrevs:
219 219 del mflinkrevs[n]
220 220 else:
221 221 self.err(lr, _("%s not in changesets") % short(n), "manifest")
222 222
223 223 try:
224 224 for f, fn in mf.readdelta(n).iteritems():
225 225 if not f:
226 226 self.err(lr, _("file without name in manifest"))
227 227 elif f != "/dev/null": # ignore this in very old repos
228 228 if _validpath(repo, f):
229 229 filenodes.setdefault(
230 230 _normpath(f), {}).setdefault(fn, lr)
231 231 except Exception as inst:
232 232 self.exc(lr, _("reading manifest delta %s") % short(n), inst)
233 233 ui.progress(_('checking'), None)
234 234
235 235 if self.havemf:
236 236 for c, m in sorted([(c, m) for m in mflinkrevs
237 237 for c in mflinkrevs[m]]):
238 if m == nullid:
239 continue
240 238 self.err(c, _("changeset refers to unknown manifest %s") %
241 239 short(m))
242 240
243 241 return filenodes
244 242
245 243 def _crosscheckfiles(self, filelinkrevs, filenodes):
246 244 repo = self.repo
247 245 ui = self.ui
248 246 ui.status(_("crosschecking files in changesets and manifests\n"))
249 247
250 248 total = len(filelinkrevs) + len(filenodes)
251 249 count = 0
252 250 if self.havemf:
253 251 for f in sorted(filelinkrevs):
254 252 count += 1
255 253 ui.progress(_('crosschecking'), count, total=total)
256 254 if f not in filenodes:
257 255 lr = filelinkrevs[f][0]
258 256 self.err(lr, _("in changeset but not in manifest"), f)
259 257
260 258 if self.havecl:
261 259 for f in sorted(filenodes):
262 260 count += 1
263 261 ui.progress(_('crosschecking'), count, total=total)
264 262 if f not in filelinkrevs:
265 263 try:
266 264 fl = repo.file(f)
267 265 lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
268 266 except Exception:
269 267 lr = None
270 268 self.err(lr, _("in manifest but not in changeset"), f)
271 269
272 270 ui.progress(_('crosschecking'), None)
273 271
274 272 def _verifyfiles(self, filenodes, filelinkrevs):
275 273 repo = self.repo
276 274 ui = self.ui
277 275 lrugetctx = self.lrugetctx
278 276 revlogv1 = self.revlogv1
279 277 havemf = self.havemf
280 278 ui.status(_("checking files\n"))
281 279
282 280 storefiles = set()
283 281 for f, f2, size in repo.store.datafiles():
284 282 if not f:
285 283 self.err(None, _("cannot decode filename '%s'") % f2)
286 284 elif (size > 0 or not revlogv1) and f.startswith('data/'):
287 285 storefiles.add(_normpath(f))
288 286
289 287 files = sorted(set(filenodes) | set(filelinkrevs))
290 288 total = len(files)
291 289 revisions = 0
292 290 for i, f in enumerate(files):
293 291 ui.progress(_('checking'), i, item=f, total=total)
294 292 try:
295 293 linkrevs = filelinkrevs[f]
296 294 except KeyError:
297 295 # in manifest but not in changelog
298 296 linkrevs = []
299 297
300 298 if linkrevs:
301 299 lr = linkrevs[0]
302 300 else:
303 301 lr = None
304 302
305 303 try:
306 304 fl = repo.file(f)
307 305 except error.RevlogError as e:
308 306 self.err(lr, _("broken revlog! (%s)") % e, f)
309 307 continue
310 308
311 309 for ff in fl.files():
312 310 try:
313 311 storefiles.remove(ff)
314 312 except KeyError:
315 313 self.warn(_(" warning: revlog '%s' not in fncache!") % ff)
316 314 self.fncachewarned = True
317 315
318 316 self.checklog(fl, f, lr)
319 317 seen = {}
320 318 rp = None
321 319 for i in fl:
322 320 revisions += 1
323 321 n = fl.node(i)
324 322 lr = self.checkentry(fl, i, n, seen, linkrevs, f)
325 323 if f in filenodes:
326 324 if havemf and n not in filenodes[f]:
327 325 self.err(lr, _("%s not in manifests") % (short(n)), f)
328 326 else:
329 327 del filenodes[f][n]
330 328
331 329 # verify contents
332 330 try:
333 331 l = len(fl.read(n))
334 332 rp = fl.renamed(n)
335 333 if l != fl.size(i):
336 334 if len(fl.revision(n)) != fl.size(i):
337 335 self.err(lr, _("unpacked size is %s, %s expected") %
338 336 (l, fl.size(i)), f)
339 337 except error.CensoredNodeError:
340 338 # experimental config: censor.policy
341 339 if ui.config("censor", "policy", "abort") == "abort":
342 340 self.err(lr, _("censored file data"), f)
343 341 except Exception as inst:
344 342 self.exc(lr, _("unpacking %s") % short(n), inst, f)
345 343
346 344 # check renames
347 345 try:
348 346 if rp:
349 347 if lr is not None and ui.verbose:
350 348 ctx = lrugetctx(lr)
351 349 found = False
352 350 for pctx in ctx.parents():
353 351 if rp[0] in pctx:
354 352 found = True
355 353 break
356 354 if not found:
357 355 self.warn(_("warning: copy source of '%s' not"
358 356 " in parents of %s") % (f, ctx))
359 357 fl2 = repo.file(rp[0])
360 358 if not len(fl2):
361 359 self.err(lr, _("empty or missing copy source "
362 360 "revlog %s:%s") % (rp[0], short(rp[1])), f)
363 361 elif rp[1] == nullid:
364 362 ui.note(_("warning: %s@%s: copy source"
365 363 " revision is nullid %s:%s\n")
366 364 % (f, lr, rp[0], short(rp[1])))
367 365 else:
368 366 fl2.rev(rp[1])
369 367 except Exception as inst:
370 368 self.exc(lr, _("checking rename of %s") % short(n), inst, f)
371 369
372 370 # cross-check
373 371 if f in filenodes:
374 372 fns = [(lr, n) for n, lr in filenodes[f].iteritems()]
375 373 for lr, node in sorted(fns):
376 374 self.err(lr, _("%s in manifests not found") % short(node),
377 375 f)
378 376 ui.progress(_('checking'), None)
379 377
380 378 for f in storefiles:
381 379 self.warn(_("warning: orphan revlog '%s'") % f)
382 380
383 381 return len(files), revisions
General Comments 0
You need to be logged in to leave comments. Login now