##// END OF EJS Templates
verify: do not choke on valid changelog without manifest...
Patrick Mezard -
r17385:b32a30da stable
parent child Browse files
Show More
@@ -1,303 +1,310 b''
1 1 # verify.py - repository integrity checking for Mercurial
2 2 #
3 3 # Copyright 2006, 2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from node import nullid, short
9 9 from i18n import _
10 10 import os
11 11 import revlog, util, error
12 12
13 13 def verify(repo):
14 14 lock = repo.lock()
15 15 try:
16 16 return _verify(repo)
17 17 finally:
18 18 lock.release()
19 19
20 20 def _verify(repo):
21 21 mflinkrevs = {}
22 22 filelinkrevs = {}
23 23 filenodes = {}
24 24 revisions = 0
25 25 badrevs = set()
26 26 errors = [0]
27 27 warnings = [0]
28 28 ui = repo.ui
29 29 cl = repo.changelog
30 30 mf = repo.manifest
31 31 lrugetctx = util.lrucachefunc(repo.changectx)
32 32
33 33 if not repo.cancopy():
34 34 raise util.Abort(_("cannot verify bundle or remote repos"))
35 35
36 36 def err(linkrev, msg, filename=None):
37 37 if linkrev is not None:
38 38 badrevs.add(linkrev)
39 39 else:
40 40 linkrev = '?'
41 41 msg = "%s: %s" % (linkrev, msg)
42 42 if filename:
43 43 msg = "%s@%s" % (filename, msg)
44 44 ui.warn(" " + msg + "\n")
45 45 errors[0] += 1
46 46
47 47 def exc(linkrev, msg, inst, filename=None):
48 48 if isinstance(inst, KeyboardInterrupt):
49 49 ui.warn(_("interrupted"))
50 50 raise
51 51 if not str(inst):
52 52 inst = repr(inst)
53 53 err(linkrev, "%s: %s" % (msg, inst), filename)
54 54
55 55 def warn(msg):
56 56 ui.warn(msg + "\n")
57 57 warnings[0] += 1
58 58
59 59 def checklog(obj, name, linkrev):
60 60 if not len(obj) and (havecl or havemf):
61 61 err(linkrev, _("empty or missing %s") % name)
62 62 return
63 63
64 64 d = obj.checksize()
65 65 if d[0]:
66 66 err(None, _("data length off by %d bytes") % d[0], name)
67 67 if d[1]:
68 68 err(None, _("index contains %d extra bytes") % d[1], name)
69 69
70 70 if obj.version != revlog.REVLOGV0:
71 71 if not revlogv1:
72 72 warn(_("warning: `%s' uses revlog format 1") % name)
73 73 elif revlogv1:
74 74 warn(_("warning: `%s' uses revlog format 0") % name)
75 75
76 76 def checkentry(obj, i, node, seen, linkrevs, f):
77 77 lr = obj.linkrev(obj.rev(node))
78 78 if lr < 0 or (havecl and lr not in linkrevs):
79 79 if lr < 0 or lr >= len(cl):
80 80 msg = _("rev %d points to nonexistent changeset %d")
81 81 else:
82 82 msg = _("rev %d points to unexpected changeset %d")
83 83 err(None, msg % (i, lr), f)
84 84 if linkrevs:
85 85 if f and len(linkrevs) > 1:
86 86 try:
87 87 # attempt to filter down to real linkrevs
88 88 linkrevs = [l for l in linkrevs
89 89 if lrugetctx(l)[f].filenode() == node]
90 90 except Exception:
91 91 pass
92 92 warn(_(" (expected %s)") % " ".join(map(str, linkrevs)))
93 93 lr = None # can't be trusted
94 94
95 95 try:
96 96 p1, p2 = obj.parents(node)
97 97 if p1 not in seen and p1 != nullid:
98 98 err(lr, _("unknown parent 1 %s of %s") %
99 99 (short(p1), short(n)), f)
100 100 if p2 not in seen and p2 != nullid:
101 101 err(lr, _("unknown parent 2 %s of %s") %
102 102 (short(p2), short(p1)), f)
103 103 except Exception, inst:
104 104 exc(lr, _("checking parents of %s") % short(node), inst, f)
105 105
106 106 if node in seen:
107 107 err(lr, _("duplicate revision %d (%d)") % (i, seen[n]), f)
108 108 seen[n] = i
109 109 return lr
110 110
111 111 if os.path.exists(repo.sjoin("journal")):
112 112 ui.warn(_("abandoned transaction found - run hg recover\n"))
113 113
114 114 revlogv1 = cl.version != revlog.REVLOGV0
115 115 if ui.verbose or not revlogv1:
116 116 ui.status(_("repository uses revlog format %d\n") %
117 117 (revlogv1 and 1 or 0))
118 118
119 119 havecl = len(cl) > 0
120 120 havemf = len(mf) > 0
121 121
122 122 ui.status(_("checking changesets\n"))
123 hasmanifest = False
123 124 seen = {}
124 125 checklog(cl, "changelog", 0)
125 126 total = len(repo)
126 127 for i in repo:
127 128 ui.progress(_('checking'), i, total=total, unit=_('changesets'))
128 129 n = cl.node(i)
129 130 checkentry(cl, i, n, seen, [i], "changelog")
130 131
131 132 try:
132 133 changes = cl.read(n)
133 mflinkrevs.setdefault(changes[0], []).append(i)
134 if changes[0] != nullid:
135 mflinkrevs.setdefault(changes[0], []).append(i)
136 hasmanifest = True
134 137 for f in changes[3]:
135 138 filelinkrevs.setdefault(f, []).append(i)
136 139 except Exception, inst:
140 hasmanifest = True
137 141 exc(i, _("unpacking changeset %s") % short(n), inst)
138 142 ui.progress(_('checking'), None)
139 143
140 144 ui.status(_("checking manifests\n"))
141 145 seen = {}
142 checklog(mf, "manifest", 0)
146 if hasmanifest:
147 # Do not check manifest if there are only changelog entries with
148 # null manifests.
149 checklog(mf, "manifest", 0)
143 150 total = len(mf)
144 151 for i in mf:
145 152 ui.progress(_('checking'), i, total=total, unit=_('manifests'))
146 153 n = mf.node(i)
147 154 lr = checkentry(mf, i, n, seen, mflinkrevs.get(n, []), "manifest")
148 155 if n in mflinkrevs:
149 156 del mflinkrevs[n]
150 157 else:
151 158 err(lr, _("%s not in changesets") % short(n), "manifest")
152 159
153 160 try:
154 161 for f, fn in mf.readdelta(n).iteritems():
155 162 if not f:
156 163 err(lr, _("file without name in manifest"))
157 164 elif f != "/dev/null":
158 165 filenodes.setdefault(f, {}).setdefault(fn, lr)
159 166 except Exception, inst:
160 167 exc(lr, _("reading manifest delta %s") % short(n), inst)
161 168 ui.progress(_('checking'), None)
162 169
163 170 ui.status(_("crosschecking files in changesets and manifests\n"))
164 171
165 172 total = len(mflinkrevs) + len(filelinkrevs) + len(filenodes)
166 173 count = 0
167 174 if havemf:
168 175 for c, m in sorted([(c, m) for m in mflinkrevs
169 176 for c in mflinkrevs[m]]):
170 177 count += 1
171 178 if m == nullid:
172 179 continue
173 180 ui.progress(_('crosschecking'), count, total=total)
174 181 err(c, _("changeset refers to unknown manifest %s") % short(m))
175 182 mflinkrevs = None # del is bad here due to scope issues
176 183
177 184 for f in sorted(filelinkrevs):
178 185 count += 1
179 186 ui.progress(_('crosschecking'), count, total=total)
180 187 if f not in filenodes:
181 188 lr = filelinkrevs[f][0]
182 189 err(lr, _("in changeset but not in manifest"), f)
183 190
184 191 if havecl:
185 192 for f in sorted(filenodes):
186 193 count += 1
187 194 ui.progress(_('crosschecking'), count, total=total)
188 195 if f not in filelinkrevs:
189 196 try:
190 197 fl = repo.file(f)
191 198 lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
192 199 except Exception:
193 200 lr = None
194 201 err(lr, _("in manifest but not in changeset"), f)
195 202
196 203 ui.progress(_('crosschecking'), None)
197 204
198 205 ui.status(_("checking files\n"))
199 206
200 207 storefiles = set()
201 208 for f, f2, size in repo.store.datafiles():
202 209 if not f:
203 210 err(None, _("cannot decode filename '%s'") % f2)
204 211 elif size > 0 or not revlogv1:
205 212 storefiles.add(f)
206 213
207 214 files = sorted(set(filenodes) | set(filelinkrevs))
208 215 total = len(files)
209 216 for i, f in enumerate(files):
210 217 ui.progress(_('checking'), i, item=f, total=total)
211 218 try:
212 219 linkrevs = filelinkrevs[f]
213 220 except KeyError:
214 221 # in manifest but not in changelog
215 222 linkrevs = []
216 223
217 224 if linkrevs:
218 225 lr = linkrevs[0]
219 226 else:
220 227 lr = None
221 228
222 229 try:
223 230 fl = repo.file(f)
224 231 except error.RevlogError, e:
225 232 err(lr, _("broken revlog! (%s)") % e, f)
226 233 continue
227 234
228 235 for ff in fl.files():
229 236 try:
230 237 storefiles.remove(ff)
231 238 except KeyError:
232 239 err(lr, _("missing revlog!"), ff)
233 240
234 241 checklog(fl, f, lr)
235 242 seen = {}
236 243 rp = None
237 244 for i in fl:
238 245 revisions += 1
239 246 n = fl.node(i)
240 247 lr = checkentry(fl, i, n, seen, linkrevs, f)
241 248 if f in filenodes:
242 249 if havemf and n not in filenodes[f]:
243 250 err(lr, _("%s not in manifests") % (short(n)), f)
244 251 else:
245 252 del filenodes[f][n]
246 253
247 254 # verify contents
248 255 try:
249 256 l = len(fl.read(n))
250 257 rp = fl.renamed(n)
251 258 if l != fl.size(i):
252 259 if len(fl.revision(n)) != fl.size(i):
253 260 err(lr, _("unpacked size is %s, %s expected") %
254 261 (l, fl.size(i)), f)
255 262 except Exception, inst:
256 263 exc(lr, _("unpacking %s") % short(n), inst, f)
257 264
258 265 # check renames
259 266 try:
260 267 if rp:
261 268 if lr is not None and ui.verbose:
262 269 ctx = lrugetctx(lr)
263 270 found = False
264 271 for pctx in ctx.parents():
265 272 if rp[0] in pctx:
266 273 found = True
267 274 break
268 275 if not found:
269 276 warn(_("warning: copy source of '%s' not"
270 277 " in parents of %s") % (f, ctx))
271 278 fl2 = repo.file(rp[0])
272 279 if not len(fl2):
273 280 err(lr, _("empty or missing copy source revlog %s:%s")
274 281 % (rp[0], short(rp[1])), f)
275 282 elif rp[1] == nullid:
276 283 ui.note(_("warning: %s@%s: copy source"
277 284 " revision is nullid %s:%s\n")
278 285 % (f, lr, rp[0], short(rp[1])))
279 286 else:
280 287 fl2.rev(rp[1])
281 288 except Exception, inst:
282 289 exc(lr, _("checking rename of %s") % short(n), inst, f)
283 290
284 291 # cross-check
285 292 if f in filenodes:
286 293 fns = [(lr, n) for n, lr in filenodes[f].iteritems()]
287 294 for lr, node in sorted(fns):
288 295 err(lr, _("%s in manifests not found") % short(node), f)
289 296 ui.progress(_('checking'), None)
290 297
291 298 for f in storefiles:
292 299 warn(_("warning: orphan revlog '%s'") % f)
293 300
294 301 ui.status(_("%d files, %d changesets, %d total revisions\n") %
295 302 (len(files), len(cl), revisions))
296 303 if warnings[0]:
297 304 ui.warn(_("%d warnings encountered!\n") % warnings[0])
298 305 if errors[0]:
299 306 ui.warn(_("%d integrity errors encountered!\n") % errors[0])
300 307 if badrevs:
301 308 ui.warn(_("(first damaged changeset appears to be %d)\n")
302 309 % min(badrevs))
303 310 return 1
@@ -1,103 +1,115 b''
1 1 prepare repo
2 2
3 3 $ hg init a
4 4 $ cd a
5 5 $ echo "some text" > FOO.txt
6 6 $ echo "another text" > bar.txt
7 7 $ echo "more text" > QUICK.txt
8 8 $ hg add
9 9 adding FOO.txt
10 10 adding QUICK.txt
11 11 adding bar.txt
12 12 $ hg ci -mtest1
13 13
14 14 verify
15 15
16 16 $ hg verify
17 17 checking changesets
18 18 checking manifests
19 19 crosschecking files in changesets and manifests
20 20 checking files
21 21 3 files, 1 changesets, 3 total revisions
22 22
23 23 verify with journal
24 24
25 25 $ touch .hg/store/journal
26 26 $ hg verify
27 27 abandoned transaction found - run hg recover
28 28 checking changesets
29 29 checking manifests
30 30 crosschecking files in changesets and manifests
31 31 checking files
32 32 3 files, 1 changesets, 3 total revisions
33 33 $ rm .hg/store/journal
34 34
35 35 introduce some bugs in repo
36 36
37 37 $ cd .hg/store/data
38 38 $ mv _f_o_o.txt.i X_f_o_o.txt.i
39 39 $ mv bar.txt.i xbar.txt.i
40 40 $ rm _q_u_i_c_k.txt.i
41 41
42 42 $ hg verify
43 43 checking changesets
44 44 checking manifests
45 45 crosschecking files in changesets and manifests
46 46 checking files
47 47 data/FOO.txt.i@0: missing revlog!
48 48 0: empty or missing FOO.txt
49 49 FOO.txt@0: f62022d3d590 in manifests not found
50 50 data/QUICK.txt.i@0: missing revlog!
51 51 0: empty or missing QUICK.txt
52 52 QUICK.txt@0: 88b857db8eba in manifests not found
53 53 data/bar.txt.i@0: missing revlog!
54 54 0: empty or missing bar.txt
55 55 bar.txt@0: 256559129457 in manifests not found
56 56 3 files, 1 changesets, 0 total revisions
57 57 9 integrity errors encountered!
58 58 (first damaged changeset appears to be 0)
59 59 [1]
60 60
61 61 $ cd ../../..
62 62 $ cd ..
63 63
64 test revlog corruption
64 test changelog without a manifest
65 65
66 66 $ hg init b
67 67 $ cd b
68 $ hg branch foo
69 marked working directory as branch foo
70 (branches are permanent and global, did you want a bookmark?)
71 $ hg ci -m branchfoo
72 $ hg verify
73 checking changesets
74 checking manifests
75 crosschecking files in changesets and manifests
76 checking files
77 0 files, 1 changesets, 0 total revisions
78
79 test revlog corruption
68 80
69 81 $ touch a
70 82 $ hg add a
71 83 $ hg ci -m a
72 84
73 85 $ echo 'corrupted' > b
74 86 $ dd if=.hg/store/data/a.i of=start bs=1 count=20 2>/dev/null
75 87 $ cat start b > .hg/store/data/a.i
76 88
77 89 $ hg verify
78 90 checking changesets
79 91 checking manifests
80 92 crosschecking files in changesets and manifests
81 93 checking files
82 a@0: broken revlog! (index data/a.i is corrupted)
94 a@1: broken revlog! (index data/a.i is corrupted)
83 95 warning: orphan revlog 'data/a.i'
84 1 files, 1 changesets, 0 total revisions
96 1 files, 2 changesets, 0 total revisions
85 97 1 warnings encountered!
86 98 1 integrity errors encountered!
87 (first damaged changeset appears to be 0)
99 (first damaged changeset appears to be 1)
88 100 [1]
89 101
90 102 $ cd ..
91 103
92 104 test revlog format 0
93 105
94 106 $ "$TESTDIR/revlog-formatv0.py"
95 107 $ cd formatv0
96 108 $ hg verify
97 109 repository uses revlog format 0
98 110 checking changesets
99 111 checking manifests
100 112 crosschecking files in changesets and manifests
101 113 checking files
102 114 1 files, 1 changesets, 1 total revisions
103 115 $ cd ..
General Comments 0
You need to be logged in to leave comments. Login now