##// END OF EJS Templates
verify: recover lost freeing of memory...
Martin von Zweigbergk -
r27964:ac5057d5 stable
parent child Browse files
Show More
@@ -1,384 +1,384 b''
1 1 # verify.py - repository integrity checking for Mercurial
2 2 #
3 3 # Copyright 2006, 2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11
12 12 from .i18n import _
13 13 from .node import (
14 14 nullid,
15 15 short,
16 16 )
17 17
18 18 from . import (
19 19 error,
20 20 revlog,
21 21 util,
22 22 )
23 23
24 24 def verify(repo):
25 25 with repo.lock():
26 26 return verifier(repo).verify()
27 27
28 28 def _normpath(f):
29 29 # under hg < 2.4, convert didn't sanitize paths properly, so a
30 30 # converted repo may contain repeated slashes
31 31 while '//' in f:
32 32 f = f.replace('//', '/')
33 33 return f
34 34
35 35 def _validpath(repo, path):
36 36 """Returns False if a path should NOT be treated as part of a repo.
37 37
38 38 For all in-core cases, this returns True, as we have no way for a
39 39 path to be mentioned in the history but not actually be
40 40 relevant. For narrow clones, this is important because many
41 41 filelogs will be missing, and changelog entries may mention
42 42 modified files that are outside the narrow scope.
43 43 """
44 44 return True
45 45
46 46 class verifier(object):
47 47 def __init__(self, repo):
48 48 self.repo = repo.unfiltered()
49 49 self.ui = repo.ui
50 50 self.badrevs = set()
51 51 self.errors = 0
52 52 self.warnings = 0
53 53 self.havecl = len(repo.changelog) > 0
54 54 self.havemf = len(repo.manifest) > 0
55 55 self.revlogv1 = repo.changelog.version != revlog.REVLOGV0
56 56 self.lrugetctx = util.lrucachefunc(repo.changectx)
57 57 self.refersmf = False
58 58 self.fncachewarned = False
59 59
60 60 def warn(self, msg):
61 61 self.ui.warn(msg + "\n")
62 62 self.warnings += 1
63 63
64 64 def err(self, linkrev, msg, filename=None):
65 65 if linkrev is not None:
66 66 self.badrevs.add(linkrev)
67 67 else:
68 68 linkrev = '?'
69 69 msg = "%s: %s" % (linkrev, msg)
70 70 if filename:
71 71 msg = "%s@%s" % (filename, msg)
72 72 self.ui.warn(" " + msg + "\n")
73 73 self.errors += 1
74 74
75 75 def exc(self, linkrev, msg, inst, filename=None):
76 76 if not str(inst):
77 77 inst = repr(inst)
78 78 self.err(linkrev, "%s: %s" % (msg, inst), filename)
79 79
80 80 def checklog(self, obj, name, linkrev):
81 81 if not len(obj) and (self.havecl or self.havemf):
82 82 self.err(linkrev, _("empty or missing %s") % name)
83 83 return
84 84
85 85 d = obj.checksize()
86 86 if d[0]:
87 87 self.err(None, _("data length off by %d bytes") % d[0], name)
88 88 if d[1]:
89 89 self.err(None, _("index contains %d extra bytes") % d[1], name)
90 90
91 91 if obj.version != revlog.REVLOGV0:
92 92 if not self.revlogv1:
93 93 self.warn(_("warning: `%s' uses revlog format 1") % name)
94 94 elif self.revlogv1:
95 95 self.warn(_("warning: `%s' uses revlog format 0") % name)
96 96
97 97 def checkentry(self, obj, i, node, seen, linkrevs, f):
98 98 lr = obj.linkrev(obj.rev(node))
99 99 if lr < 0 or (self.havecl and lr not in linkrevs):
100 100 if lr < 0 or lr >= len(self.repo.changelog):
101 101 msg = _("rev %d points to nonexistent changeset %d")
102 102 else:
103 103 msg = _("rev %d points to unexpected changeset %d")
104 104 self.err(None, msg % (i, lr), f)
105 105 if linkrevs:
106 106 if f and len(linkrevs) > 1:
107 107 try:
108 108 # attempt to filter down to real linkrevs
109 109 linkrevs = [l for l in linkrevs
110 110 if self.lrugetctx(l)[f].filenode() == node]
111 111 except Exception:
112 112 pass
113 113 self.warn(_(" (expected %s)") % " ".join(map(str, linkrevs)))
114 114 lr = None # can't be trusted
115 115
116 116 try:
117 117 p1, p2 = obj.parents(node)
118 118 if p1 not in seen and p1 != nullid:
119 119 self.err(lr, _("unknown parent 1 %s of %s") %
120 120 (short(p1), short(node)), f)
121 121 if p2 not in seen and p2 != nullid:
122 122 self.err(lr, _("unknown parent 2 %s of %s") %
123 123 (short(p2), short(node)), f)
124 124 except Exception as inst:
125 125 self.exc(lr, _("checking parents of %s") % short(node), inst, f)
126 126
127 127 if node in seen:
128 128 self.err(lr, _("duplicate revision %d (%d)") % (i, seen[node]), f)
129 129 seen[node] = i
130 130 return lr
131 131
132 132 def verify(self):
133 133 repo = self.repo
134 134
135 135 ui = repo.ui
136 136
137 137 if not repo.url().startswith('file:'):
138 138 raise error.Abort(_("cannot verify bundle or remote repos"))
139 139
140 140 if os.path.exists(repo.sjoin("journal")):
141 141 ui.warn(_("abandoned transaction found - run hg recover\n"))
142 142
143 143 if ui.verbose or not self.revlogv1:
144 144 ui.status(_("repository uses revlog format %d\n") %
145 145 (self.revlogv1 and 1 or 0))
146 146
147 147 mflinkrevs, filelinkrevs = self._verifychangelog()
148 148
149 149 filenodes = self._verifymanifest(mflinkrevs)
150 150
151 151 self._crosscheckfiles(mflinkrevs, filelinkrevs, filenodes)
152 del mflinkrevs
152 153
153 154 totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)
154 155
155 156 ui.status(_("%d files, %d changesets, %d total revisions\n") %
156 157 (totalfiles, len(repo.changelog), filerevisions))
157 158 if self.warnings:
158 159 ui.warn(_("%d warnings encountered!\n") % self.warnings)
159 160 if self.fncachewarned:
160 161 ui.warn(_('hint: run "hg debugrebuildfncache" to recover from '
161 162 'corrupt fncache\n'))
162 163 if self.errors:
163 164 ui.warn(_("%d integrity errors encountered!\n") % self.errors)
164 165 if self.badrevs:
165 166 ui.warn(_("(first damaged changeset appears to be %d)\n")
166 167 % min(self.badrevs))
167 168 return 1
168 169
169 170 def _verifychangelog(self):
170 171 ui = self.ui
171 172 repo = self.repo
172 173 cl = repo.changelog
173 174
174 175 ui.status(_("checking changesets\n"))
175 176 mflinkrevs = {}
176 177 filelinkrevs = {}
177 178 seen = {}
178 179 self.checklog(cl, "changelog", 0)
179 180 total = len(repo)
180 181 for i in repo:
181 182 ui.progress(_('checking'), i, total=total, unit=_('changesets'))
182 183 n = cl.node(i)
183 184 self.checkentry(cl, i, n, seen, [i], "changelog")
184 185
185 186 try:
186 187 changes = cl.read(n)
187 188 if changes[0] != nullid:
188 189 mflinkrevs.setdefault(changes[0], []).append(i)
189 190 self.refersmf = True
190 191 for f in changes[3]:
191 192 if _validpath(repo, f):
192 193 filelinkrevs.setdefault(_normpath(f), []).append(i)
193 194 except Exception as inst:
194 195 self.refersmf = True
195 196 self.exc(i, _("unpacking changeset %s") % short(n), inst)
196 197 ui.progress(_('checking'), None)
197 198 return mflinkrevs, filelinkrevs
198 199
199 200 def _verifymanifest(self, mflinkrevs):
200 201 repo = self.repo
201 202 ui = self.ui
202 203 mf = self.repo.manifest
203 204
204 205 ui.status(_("checking manifests\n"))
205 206 filenodes = {}
206 207 seen = {}
207 208 if self.refersmf:
208 209 # Do not check manifest if there are only changelog entries with
209 210 # null manifests.
210 211 self.checklog(mf, "manifest", 0)
211 212 total = len(mf)
212 213 for i in mf:
213 214 ui.progress(_('checking'), i, total=total, unit=_('manifests'))
214 215 n = mf.node(i)
215 216 lr = self.checkentry(mf, i, n, seen, mflinkrevs.get(n, []),
216 217 "manifest")
217 218 if n in mflinkrevs:
218 219 del mflinkrevs[n]
219 220 else:
220 221 self.err(lr, _("%s not in changesets") % short(n), "manifest")
221 222
222 223 try:
223 224 for f, fn in mf.readdelta(n).iteritems():
224 225 if not f:
225 226 self.err(lr, _("file without name in manifest"))
226 227 elif f != "/dev/null": # ignore this in very old repos
227 228 if _validpath(repo, f):
228 229 filenodes.setdefault(
229 230 _normpath(f), {}).setdefault(fn, lr)
230 231 except Exception as inst:
231 232 self.exc(lr, _("reading manifest delta %s") % short(n), inst)
232 233 ui.progress(_('checking'), None)
233 234
234 235 return filenodes
235 236
236 237 def _crosscheckfiles(self, mflinkrevs, filelinkrevs, filenodes):
237 238 repo = self.repo
238 239 ui = self.ui
239 240 ui.status(_("crosschecking files in changesets and manifests\n"))
240 241
241 242 total = len(mflinkrevs) + len(filelinkrevs) + len(filenodes)
242 243 count = 0
243 244 if self.havemf:
244 245 for c, m in sorted([(c, m) for m in mflinkrevs
245 246 for c in mflinkrevs[m]]):
246 247 count += 1
247 248 if m == nullid:
248 249 continue
249 250 ui.progress(_('crosschecking'), count, total=total)
250 251 self.err(c, _("changeset refers to unknown manifest %s") %
251 252 short(m))
252 mflinkrevs = None # del is bad here due to scope issues
253 253
254 254 for f in sorted(filelinkrevs):
255 255 count += 1
256 256 ui.progress(_('crosschecking'), count, total=total)
257 257 if f not in filenodes:
258 258 lr = filelinkrevs[f][0]
259 259 self.err(lr, _("in changeset but not in manifest"), f)
260 260
261 261 if self.havecl:
262 262 for f in sorted(filenodes):
263 263 count += 1
264 264 ui.progress(_('crosschecking'), count, total=total)
265 265 if f not in filelinkrevs:
266 266 try:
267 267 fl = repo.file(f)
268 268 lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
269 269 except Exception:
270 270 lr = None
271 271 self.err(lr, _("in manifest but not in changeset"), f)
272 272
273 273 ui.progress(_('crosschecking'), None)
274 274
275 275 def _verifyfiles(self, filenodes, filelinkrevs):
276 276 repo = self.repo
277 277 ui = self.ui
278 278 lrugetctx = self.lrugetctx
279 279 revlogv1 = self.revlogv1
280 280 havemf = self.havemf
281 281 ui.status(_("checking files\n"))
282 282
283 283 storefiles = set()
284 284 for f, f2, size in repo.store.datafiles():
285 285 if not f:
286 286 self.err(None, _("cannot decode filename '%s'") % f2)
287 287 elif size > 0 or not revlogv1:
288 288 storefiles.add(_normpath(f))
289 289
290 290 files = sorted(set(filenodes) | set(filelinkrevs))
291 291 total = len(files)
292 292 revisions = 0
293 293 for i, f in enumerate(files):
294 294 ui.progress(_('checking'), i, item=f, total=total)
295 295 try:
296 296 linkrevs = filelinkrevs[f]
297 297 except KeyError:
298 298 # in manifest but not in changelog
299 299 linkrevs = []
300 300
301 301 if linkrevs:
302 302 lr = linkrevs[0]
303 303 else:
304 304 lr = None
305 305
306 306 try:
307 307 fl = repo.file(f)
308 308 except error.RevlogError as e:
309 309 self.err(lr, _("broken revlog! (%s)") % e, f)
310 310 continue
311 311
312 312 for ff in fl.files():
313 313 try:
314 314 storefiles.remove(ff)
315 315 except KeyError:
316 316 self.warn(_(" warning: revlog '%s' not in fncache!") % ff)
317 317 self.fncachewarned = True
318 318
319 319 self.checklog(fl, f, lr)
320 320 seen = {}
321 321 rp = None
322 322 for i in fl:
323 323 revisions += 1
324 324 n = fl.node(i)
325 325 lr = self.checkentry(fl, i, n, seen, linkrevs, f)
326 326 if f in filenodes:
327 327 if havemf and n not in filenodes[f]:
328 328 self.err(lr, _("%s not in manifests") % (short(n)), f)
329 329 else:
330 330 del filenodes[f][n]
331 331
332 332 # verify contents
333 333 try:
334 334 l = len(fl.read(n))
335 335 rp = fl.renamed(n)
336 336 if l != fl.size(i):
337 337 if len(fl.revision(n)) != fl.size(i):
338 338 self.err(lr, _("unpacked size is %s, %s expected") %
339 339 (l, fl.size(i)), f)
340 340 except error.CensoredNodeError:
341 341 # experimental config: censor.policy
342 342 if ui.config("censor", "policy", "abort") == "abort":
343 343 self.err(lr, _("censored file data"), f)
344 344 except Exception as inst:
345 345 self.exc(lr, _("unpacking %s") % short(n), inst, f)
346 346
347 347 # check renames
348 348 try:
349 349 if rp:
350 350 if lr is not None and ui.verbose:
351 351 ctx = lrugetctx(lr)
352 352 found = False
353 353 for pctx in ctx.parents():
354 354 if rp[0] in pctx:
355 355 found = True
356 356 break
357 357 if not found:
358 358 self.warn(_("warning: copy source of '%s' not"
359 359 " in parents of %s") % (f, ctx))
360 360 fl2 = repo.file(rp[0])
361 361 if not len(fl2):
362 362 self.err(lr, _("empty or missing copy source "
363 363 "revlog %s:%s") % (rp[0], short(rp[1])), f)
364 364 elif rp[1] == nullid:
365 365 ui.note(_("warning: %s@%s: copy source"
366 366 " revision is nullid %s:%s\n")
367 367 % (f, lr, rp[0], short(rp[1])))
368 368 else:
369 369 fl2.rev(rp[1])
370 370 except Exception as inst:
371 371 self.exc(lr, _("checking rename of %s") % short(n), inst, f)
372 372
373 373 # cross-check
374 374 if f in filenodes:
375 375 fns = [(lr, n) for n, lr in filenodes[f].iteritems()]
376 376 for lr, node in sorted(fns):
377 377 self.err(lr, _("%s in manifests not found") % short(node),
378 378 f)
379 379 ui.progress(_('checking'), None)
380 380
381 381 for f in storefiles:
382 382 self.warn(_("warning: orphan revlog '%s'") % f)
383 383
384 384 return len(files), revisions
General Comments 0
You need to be logged in to leave comments. Login now