##// END OF EJS Templates
censor: mark experimental option
Matt Mackall -
r25846:c55eac3f default
parent child Browse files
Show More
@@ -1,326 +1,327 b''
1 1 # verify.py - repository integrity checking for Mercurial
2 2 #
3 3 # Copyright 2006, 2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from node import nullid, short
9 9 from i18n import _
10 10 import os
11 11 import revlog, util, error
12 12
13 13 def verify(repo):
14 14 lock = repo.lock()
15 15 try:
16 16 return _verify(repo)
17 17 finally:
18 18 lock.release()
19 19
20 20 def _normpath(f):
21 21 # under hg < 2.4, convert didn't sanitize paths properly, so a
22 22 # converted repo may contain repeated slashes
23 23 while '//' in f:
24 24 f = f.replace('//', '/')
25 25 return f
26 26
27 27 def _verify(repo):
28 28 repo = repo.unfiltered()
29 29 mflinkrevs = {}
30 30 filelinkrevs = {}
31 31 filenodes = {}
32 32 revisions = 0
33 33 badrevs = set()
34 34 errors = [0]
35 35 warnings = [0]
36 36 ui = repo.ui
37 37 cl = repo.changelog
38 38 mf = repo.manifest
39 39 lrugetctx = util.lrucachefunc(repo.changectx)
40 40
41 41 if not repo.url().startswith('file:'):
42 42 raise util.Abort(_("cannot verify bundle or remote repos"))
43 43
44 44 def err(linkrev, msg, filename=None):
45 45 if linkrev is not None:
46 46 badrevs.add(linkrev)
47 47 else:
48 48 linkrev = '?'
49 49 msg = "%s: %s" % (linkrev, msg)
50 50 if filename:
51 51 msg = "%s@%s" % (filename, msg)
52 52 ui.warn(" " + msg + "\n")
53 53 errors[0] += 1
54 54
55 55 def exc(linkrev, msg, inst, filename=None):
56 56 if isinstance(inst, KeyboardInterrupt):
57 57 ui.warn(_("interrupted"))
58 58 raise
59 59 if not str(inst):
60 60 inst = repr(inst)
61 61 err(linkrev, "%s: %s" % (msg, inst), filename)
62 62
63 63 def warn(msg):
64 64 ui.warn(msg + "\n")
65 65 warnings[0] += 1
66 66
67 67 def checklog(obj, name, linkrev):
68 68 if not len(obj) and (havecl or havemf):
69 69 err(linkrev, _("empty or missing %s") % name)
70 70 return
71 71
72 72 d = obj.checksize()
73 73 if d[0]:
74 74 err(None, _("data length off by %d bytes") % d[0], name)
75 75 if d[1]:
76 76 err(None, _("index contains %d extra bytes") % d[1], name)
77 77
78 78 if obj.version != revlog.REVLOGV0:
79 79 if not revlogv1:
80 80 warn(_("warning: `%s' uses revlog format 1") % name)
81 81 elif revlogv1:
82 82 warn(_("warning: `%s' uses revlog format 0") % name)
83 83
84 84 def checkentry(obj, i, node, seen, linkrevs, f):
85 85 lr = obj.linkrev(obj.rev(node))
86 86 if lr < 0 or (havecl and lr not in linkrevs):
87 87 if lr < 0 or lr >= len(cl):
88 88 msg = _("rev %d points to nonexistent changeset %d")
89 89 else:
90 90 msg = _("rev %d points to unexpected changeset %d")
91 91 err(None, msg % (i, lr), f)
92 92 if linkrevs:
93 93 if f and len(linkrevs) > 1:
94 94 try:
95 95 # attempt to filter down to real linkrevs
96 96 linkrevs = [l for l in linkrevs
97 97 if lrugetctx(l)[f].filenode() == node]
98 98 except Exception:
99 99 pass
100 100 warn(_(" (expected %s)") % " ".join(map(str, linkrevs)))
101 101 lr = None # can't be trusted
102 102
103 103 try:
104 104 p1, p2 = obj.parents(node)
105 105 if p1 not in seen and p1 != nullid:
106 106 err(lr, _("unknown parent 1 %s of %s") %
107 107 (short(p1), short(node)), f)
108 108 if p2 not in seen and p2 != nullid:
109 109 err(lr, _("unknown parent 2 %s of %s") %
110 110 (short(p2), short(node)), f)
111 111 except Exception as inst:
112 112 exc(lr, _("checking parents of %s") % short(node), inst, f)
113 113
114 114 if node in seen:
115 115 err(lr, _("duplicate revision %d (%d)") % (i, seen[node]), f)
116 116 seen[node] = i
117 117 return lr
118 118
119 119 if os.path.exists(repo.sjoin("journal")):
120 120 ui.warn(_("abandoned transaction found - run hg recover\n"))
121 121
122 122 revlogv1 = cl.version != revlog.REVLOGV0
123 123 if ui.verbose or not revlogv1:
124 124 ui.status(_("repository uses revlog format %d\n") %
125 125 (revlogv1 and 1 or 0))
126 126
127 127 havecl = len(cl) > 0
128 128 havemf = len(mf) > 0
129 129
130 130 ui.status(_("checking changesets\n"))
131 131 refersmf = False
132 132 seen = {}
133 133 checklog(cl, "changelog", 0)
134 134 total = len(repo)
135 135 for i in repo:
136 136 ui.progress(_('checking'), i, total=total, unit=_('changesets'))
137 137 n = cl.node(i)
138 138 checkentry(cl, i, n, seen, [i], "changelog")
139 139
140 140 try:
141 141 changes = cl.read(n)
142 142 if changes[0] != nullid:
143 143 mflinkrevs.setdefault(changes[0], []).append(i)
144 144 refersmf = True
145 145 for f in changes[3]:
146 146 filelinkrevs.setdefault(_normpath(f), []).append(i)
147 147 except Exception as inst:
148 148 refersmf = True
149 149 exc(i, _("unpacking changeset %s") % short(n), inst)
150 150 ui.progress(_('checking'), None)
151 151
152 152 ui.status(_("checking manifests\n"))
153 153 seen = {}
154 154 if refersmf:
155 155 # Do not check manifest if there are only changelog entries with
156 156 # null manifests.
157 157 checklog(mf, "manifest", 0)
158 158 total = len(mf)
159 159 for i in mf:
160 160 ui.progress(_('checking'), i, total=total, unit=_('manifests'))
161 161 n = mf.node(i)
162 162 lr = checkentry(mf, i, n, seen, mflinkrevs.get(n, []), "manifest")
163 163 if n in mflinkrevs:
164 164 del mflinkrevs[n]
165 165 else:
166 166 err(lr, _("%s not in changesets") % short(n), "manifest")
167 167
168 168 try:
169 169 for f, fn in mf.readdelta(n).iteritems():
170 170 if not f:
171 171 err(lr, _("file without name in manifest"))
172 172 elif f != "/dev/null": # ignore this in very old repos
173 173 filenodes.setdefault(_normpath(f), {}).setdefault(fn, lr)
174 174 except Exception as inst:
175 175 exc(lr, _("reading manifest delta %s") % short(n), inst)
176 176 ui.progress(_('checking'), None)
177 177
178 178 ui.status(_("crosschecking files in changesets and manifests\n"))
179 179
180 180 total = len(mflinkrevs) + len(filelinkrevs) + len(filenodes)
181 181 count = 0
182 182 if havemf:
183 183 for c, m in sorted([(c, m) for m in mflinkrevs
184 184 for c in mflinkrevs[m]]):
185 185 count += 1
186 186 if m == nullid:
187 187 continue
188 188 ui.progress(_('crosschecking'), count, total=total)
189 189 err(c, _("changeset refers to unknown manifest %s") % short(m))
190 190 mflinkrevs = None # del is bad here due to scope issues
191 191
192 192 for f in sorted(filelinkrevs):
193 193 count += 1
194 194 ui.progress(_('crosschecking'), count, total=total)
195 195 if f not in filenodes:
196 196 lr = filelinkrevs[f][0]
197 197 err(lr, _("in changeset but not in manifest"), f)
198 198
199 199 if havecl:
200 200 for f in sorted(filenodes):
201 201 count += 1
202 202 ui.progress(_('crosschecking'), count, total=total)
203 203 if f not in filelinkrevs:
204 204 try:
205 205 fl = repo.file(f)
206 206 lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
207 207 except Exception:
208 208 lr = None
209 209 err(lr, _("in manifest but not in changeset"), f)
210 210
211 211 ui.progress(_('crosschecking'), None)
212 212
213 213 ui.status(_("checking files\n"))
214 214
215 215 storefiles = set()
216 216 for f, f2, size in repo.store.datafiles():
217 217 if not f:
218 218 err(None, _("cannot decode filename '%s'") % f2)
219 219 elif size > 0 or not revlogv1:
220 220 storefiles.add(_normpath(f))
221 221
222 222 fncachewarned = False
223 223 files = sorted(set(filenodes) | set(filelinkrevs))
224 224 total = len(files)
225 225 for i, f in enumerate(files):
226 226 ui.progress(_('checking'), i, item=f, total=total)
227 227 try:
228 228 linkrevs = filelinkrevs[f]
229 229 except KeyError:
230 230 # in manifest but not in changelog
231 231 linkrevs = []
232 232
233 233 if linkrevs:
234 234 lr = linkrevs[0]
235 235 else:
236 236 lr = None
237 237
238 238 try:
239 239 fl = repo.file(f)
240 240 except error.RevlogError as e:
241 241 err(lr, _("broken revlog! (%s)") % e, f)
242 242 continue
243 243
244 244 for ff in fl.files():
245 245 try:
246 246 storefiles.remove(ff)
247 247 except KeyError:
248 248 warn(_(" warning: revlog '%s' not in fncache!") % ff)
249 249 fncachewarned = True
250 250
251 251 checklog(fl, f, lr)
252 252 seen = {}
253 253 rp = None
254 254 for i in fl:
255 255 revisions += 1
256 256 n = fl.node(i)
257 257 lr = checkentry(fl, i, n, seen, linkrevs, f)
258 258 if f in filenodes:
259 259 if havemf and n not in filenodes[f]:
260 260 err(lr, _("%s not in manifests") % (short(n)), f)
261 261 else:
262 262 del filenodes[f][n]
263 263
264 264 # verify contents
265 265 try:
266 266 l = len(fl.read(n))
267 267 rp = fl.renamed(n)
268 268 if l != fl.size(i):
269 269 if len(fl.revision(n)) != fl.size(i):
270 270 err(lr, _("unpacked size is %s, %s expected") %
271 271 (l, fl.size(i)), f)
272 272 except error.CensoredNodeError:
273 # experimental config: censor.policy
273 274 if ui.config("censor", "policy", "abort") == "abort":
274 275 err(lr, _("censored file data"), f)
275 276 except Exception as inst:
276 277 exc(lr, _("unpacking %s") % short(n), inst, f)
277 278
278 279 # check renames
279 280 try:
280 281 if rp:
281 282 if lr is not None and ui.verbose:
282 283 ctx = lrugetctx(lr)
283 284 found = False
284 285 for pctx in ctx.parents():
285 286 if rp[0] in pctx:
286 287 found = True
287 288 break
288 289 if not found:
289 290 warn(_("warning: copy source of '%s' not"
290 291 " in parents of %s") % (f, ctx))
291 292 fl2 = repo.file(rp[0])
292 293 if not len(fl2):
293 294 err(lr, _("empty or missing copy source revlog %s:%s")
294 295 % (rp[0], short(rp[1])), f)
295 296 elif rp[1] == nullid:
296 297 ui.note(_("warning: %s@%s: copy source"
297 298 " revision is nullid %s:%s\n")
298 299 % (f, lr, rp[0], short(rp[1])))
299 300 else:
300 301 fl2.rev(rp[1])
301 302 except Exception as inst:
302 303 exc(lr, _("checking rename of %s") % short(n), inst, f)
303 304
304 305 # cross-check
305 306 if f in filenodes:
306 307 fns = [(lr, n) for n, lr in filenodes[f].iteritems()]
307 308 for lr, node in sorted(fns):
308 309 err(lr, _("%s in manifests not found") % short(node), f)
309 310 ui.progress(_('checking'), None)
310 311
311 312 for f in storefiles:
312 313 warn(_("warning: orphan revlog '%s'") % f)
313 314
314 315 ui.status(_("%d files, %d changesets, %d total revisions\n") %
315 316 (len(files), len(cl), revisions))
316 317 if warnings[0]:
317 318 ui.warn(_("%d warnings encountered!\n") % warnings[0])
318 319 if fncachewarned:
319 320 ui.warn(_('hint: run "hg debugrebuildfncache" to recover from '
320 321 'corrupt fncache\n'))
321 322 if errors[0]:
322 323 ui.warn(_("%d integrity errors encountered!\n") % errors[0])
323 324 if badrevs:
324 325 ui.warn(_("(first damaged changeset appears to be %d)\n")
325 326 % min(badrevs))
326 327 return 1
General Comments 0
You need to be logged in to leave comments. Login now