##// END OF EJS Templates
verify: check repo.store
Adrian Buehlmann -
r6892:dab95717 default
parent child Browse files
Show More
@@ -1,127 +1,132 b''
1 # store.py - repository store handling for Mercurial
1 # store.py - repository store handling for Mercurial
2 #
2 #
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms
5 # This software may be used and distributed according to the terms
6 # of the GNU General Public License, incorporated herein by reference.
6 # of the GNU General Public License, incorporated herein by reference.
7
7
8 from i18n import _
8 import os, stat, osutil, util
9 import os, stat, osutil, util
9
10
10 def _buildencodefun():
11 def _buildencodefun():
11 e = '_'
12 e = '_'
12 win_reserved = [ord(x) for x in '\\:*?"<>|']
13 win_reserved = [ord(x) for x in '\\:*?"<>|']
13 cmap = dict([ (chr(x), chr(x)) for x in xrange(127) ])
14 cmap = dict([ (chr(x), chr(x)) for x in xrange(127) ])
14 for x in (range(32) + range(126, 256) + win_reserved):
15 for x in (range(32) + range(126, 256) + win_reserved):
15 cmap[chr(x)] = "~%02x" % x
16 cmap[chr(x)] = "~%02x" % x
16 for x in range(ord("A"), ord("Z")+1) + [ord(e)]:
17 for x in range(ord("A"), ord("Z")+1) + [ord(e)]:
17 cmap[chr(x)] = e + chr(x).lower()
18 cmap[chr(x)] = e + chr(x).lower()
18 dmap = {}
19 dmap = {}
19 for k, v in cmap.iteritems():
20 for k, v in cmap.iteritems():
20 dmap[v] = k
21 dmap[v] = k
21 def decode(s):
22 def decode(s):
22 i = 0
23 i = 0
23 while i < len(s):
24 while i < len(s):
24 for l in xrange(1, 4):
25 for l in xrange(1, 4):
25 try:
26 try:
26 yield dmap[s[i:i+l]]
27 yield dmap[s[i:i+l]]
27 i += l
28 i += l
28 break
29 break
29 except KeyError:
30 except KeyError:
30 pass
31 pass
31 else:
32 else:
32 raise KeyError
33 raise KeyError
33 return (lambda s: "".join([cmap[c] for c in s]),
34 return (lambda s: "".join([cmap[c] for c in s]),
34 lambda s: "".join(list(decode(s))))
35 lambda s: "".join(list(decode(s))))
35
36
36 encodefilename, decodefilename = _buildencodefun()
37 encodefilename, decodefilename = _buildencodefun()
37
38
38 def _dirwalk(path, recurse):
39 def _dirwalk(path, recurse):
39 '''yields (filename, size)'''
40 '''yields (filename, size)'''
40 for e, kind, st in osutil.listdir(path, stat=True):
41 for e, kind, st in osutil.listdir(path, stat=True):
41 pe = os.path.join(path, e)
42 pe = os.path.join(path, e)
42 if kind == stat.S_IFDIR:
43 if kind == stat.S_IFDIR:
43 if recurse:
44 if recurse:
44 for x in _dirwalk(pe, True):
45 for x in _dirwalk(pe, True):
45 yield x
46 yield x
46 elif kind == stat.S_IFREG:
47 elif kind == stat.S_IFREG:
47 yield pe, st.st_size
48 yield pe, st.st_size
48
49
49 class _store:
50 class _store:
50 '''base class for local repository stores'''
51 '''base class for local repository stores'''
51 def __init__(self, path):
52 def __init__(self, path):
52 self.path = path
53 self.path = path
53 try:
54 try:
54 # files in .hg/ will be created using this mode
55 # files in .hg/ will be created using this mode
55 mode = os.stat(self.path).st_mode
56 mode = os.stat(self.path).st_mode
56 # avoid some useless chmods
57 # avoid some useless chmods
57 if (0777 & ~util._umask) == (0777 & mode):
58 if (0777 & ~util._umask) == (0777 & mode):
58 mode = None
59 mode = None
59 except OSError:
60 except OSError:
60 mode = None
61 mode = None
61 self.createmode = mode
62 self.createmode = mode
62
63
63 def join(self, f):
64 def join(self, f):
64 return os.path.join(self.path, f)
65 return os.path.join(self.path, f)
65
66
66 def _revlogfiles(self, relpath='', recurse=False):
67 def _revlogfiles(self, relpath='', recurse=False):
67 '''yields (filename, size)'''
68 '''yields (filename, size)'''
68 if relpath:
69 if relpath:
69 path = os.path.join(self.path, relpath)
70 path = os.path.join(self.path, relpath)
70 else:
71 else:
71 path = self.path
72 path = self.path
72 if not os.path.isdir(path):
73 if not os.path.isdir(path):
73 return
74 return
74 striplen = len(self.path) + len(os.sep)
75 striplen = len(self.path) + len(os.sep)
75 filetypes = ('.d', '.i')
76 filetypes = ('.d', '.i')
76 for f, size in _dirwalk(path, recurse):
77 for f, size in _dirwalk(path, recurse):
77 if (len(f) > 2) and f[-2:] in filetypes:
78 if (len(f) > 2) and f[-2:] in filetypes:
78 yield util.pconvert(f[striplen:]), size
79 yield util.pconvert(f[striplen:]), size
79
80
80 def _datafiles(self):
81 def datafiles(self, reporterror=None):
81 for x in self._revlogfiles('data', True):
82 for x in self._revlogfiles('data', True):
82 yield x
83 yield x
83
84
84 def walk(self):
85 def walk(self):
85 '''yields (direncoded filename, size)'''
86 '''yields (direncoded filename, size)'''
86 # yield data files first
87 # yield data files first
87 for x in self._datafiles():
88 for x in self.datafiles():
88 yield x
89 yield x
89 # yield manifest before changelog
90 # yield manifest before changelog
90 meta = util.sort(self._revlogfiles())
91 meta = util.sort(self._revlogfiles())
91 meta.reverse()
92 meta.reverse()
92 for x in meta:
93 for x in meta:
93 yield x
94 yield x
94
95
95 class directstore(_store):
96 class directstore(_store):
96 def __init__(self, path):
97 def __init__(self, path):
97 _store.__init__(self, path)
98 _store.__init__(self, path)
98 self.encodefn = lambda x: x
99 self.opener = util.opener(self.path)
99 self.opener = util.opener(self.path)
100 self.opener.createmode = self.createmode
100 self.opener.createmode = self.createmode
101
101
102 class encodedstore(_store):
102 class encodedstore(_store):
103 def __init__(self, path):
103 def __init__(self, path):
104 _store.__init__(self, os.path.join(path, 'store'))
104 _store.__init__(self, os.path.join(path, 'store'))
105 self.encodefn = encodefilename
105 self.encodefn = encodefilename
106 op = util.opener(self.path)
106 op = util.opener(self.path)
107 op.createmode = self.createmode
107 op.createmode = self.createmode
108 self.opener = lambda f, *args, **kw: op(self.encodefn(f), *args, **kw)
108 self.opener = lambda f, *args, **kw: op(self.encodefn(f), *args, **kw)
109
109
110 def _datafiles(self):
110 def datafiles(self, reporterror=None):
111 for f, size in self._revlogfiles('data', True):
111 for f, size in self._revlogfiles('data', True):
112 yield decodefilename(f), size
112 try:
113 yield decodefilename(f), size
114 except KeyError:
115 if not reporterror:
116 raise
117 reporterror(_("cannot decode filename '%s'") % f)
113
118
114 def join(self, f):
119 def join(self, f):
115 return os.path.join(self.path, self.encodefn(f))
120 return os.path.join(self.path, self.encodefn(f))
116
121
117 def encodefn(requirements):
122 def encodefn(requirements):
118 if 'store' not in requirements:
123 if 'store' not in requirements:
119 return lambda x: x
124 return lambda x: x
120 else:
125 else:
121 return encodefilename
126 return encodefilename
122
127
123 def store(requirements, path):
128 def store(requirements, path):
124 if 'store' not in requirements:
129 if 'store' not in requirements:
125 return directstore(path)
130 return directstore(path)
126 else:
131 else:
127 return encodedstore(path)
132 return encodedstore(path)
@@ -1,217 +1,233 b''
1 # verify.py - repository integrity checking for Mercurial
1 # verify.py - repository integrity checking for Mercurial
2 #
2 #
3 # Copyright 2006, 2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2006, 2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms
5 # This software may be used and distributed according to the terms
6 # of the GNU General Public License, incorporated herein by reference.
6 # of the GNU General Public License, incorporated herein by reference.
7
7
8 from node import nullid, short
8 from node import nullid, short
9 from i18n import _
9 from i18n import _
10 import revlog, util
10 import revlog, util
11
11
12 def verify(repo):
12 def verify(repo):
13 lock = repo.lock()
13 lock = repo.lock()
14 try:
14 try:
15 return _verify(repo)
15 return _verify(repo)
16 finally:
16 finally:
17 del lock
17 del lock
18
18
19 def _verify(repo):
19 def _verify(repo):
20 mflinkrevs = {}
20 mflinkrevs = {}
21 filelinkrevs = {}
21 filelinkrevs = {}
22 filenodes = {}
22 filenodes = {}
23 revisions = 0
23 revisions = 0
24 badrevs = {}
24 badrevs = {}
25 errors = [0]
25 errors = [0]
26 warnings = [0]
26 warnings = [0]
27 ui = repo.ui
27 ui = repo.ui
28 cl = repo.changelog
28 cl = repo.changelog
29 mf = repo.manifest
29 mf = repo.manifest
30
30
31 def err(linkrev, msg, filename=None):
31 def err(linkrev, msg, filename=None):
32 if linkrev != None:
32 if linkrev != None:
33 badrevs[linkrev] = True
33 badrevs[linkrev] = True
34 else:
34 else:
35 linkrev = '?'
35 linkrev = '?'
36 msg = "%s: %s" % (linkrev, msg)
36 msg = "%s: %s" % (linkrev, msg)
37 if filename:
37 if filename:
38 msg = "%s@%s" % (filename, msg)
38 msg = "%s@%s" % (filename, msg)
39 ui.warn(" " + msg + "\n")
39 ui.warn(" " + msg + "\n")
40 errors[0] += 1
40 errors[0] += 1
41
41
42 def exc(linkrev, msg, inst, filename=None):
42 def exc(linkrev, msg, inst, filename=None):
43 if isinstance(inst, KeyboardInterrupt):
43 if isinstance(inst, KeyboardInterrupt):
44 ui.warn(_("interrupted"))
44 ui.warn(_("interrupted"))
45 raise
45 raise
46 err(linkrev, "%s: %s" % (msg, inst), filename)
46 err(linkrev, "%s: %s" % (msg, inst), filename)
47
47
48 def warn(msg):
48 def warn(msg):
49 ui.warn(msg + "\n")
49 ui.warn(msg + "\n")
50 warnings[0] += 1
50 warnings[0] += 1
51
51
52 def checklog(obj, name):
52 def checklog(obj, name):
53 if not len(obj) and (havecl or havemf):
53 if not len(obj) and (havecl or havemf):
54 err(0, _("empty or missing %s") % name)
54 err(0, _("empty or missing %s") % name)
55 return
55 return
56
56
57 d = obj.checksize()
57 d = obj.checksize()
58 if d[0]:
58 if d[0]:
59 err(None, _("data length off by %d bytes") % d[0], name)
59 err(None, _("data length off by %d bytes") % d[0], name)
60 if d[1]:
60 if d[1]:
61 err(None, _("index contains %d extra bytes") % d[1], name)
61 err(None, _("index contains %d extra bytes") % d[1], name)
62
62
63 if obj.version != revlog.REVLOGV0:
63 if obj.version != revlog.REVLOGV0:
64 if not revlogv1:
64 if not revlogv1:
65 warn(_("warning: `%s' uses revlog format 1") % name)
65 warn(_("warning: `%s' uses revlog format 1") % name)
66 elif revlogv1:
66 elif revlogv1:
67 warn(_("warning: `%s' uses revlog format 0") % name)
67 warn(_("warning: `%s' uses revlog format 0") % name)
68
68
69 def checkentry(obj, i, node, seen, linkrevs, f):
69 def checkentry(obj, i, node, seen, linkrevs, f):
70 lr = obj.linkrev(node)
70 lr = obj.linkrev(node)
71 if lr < 0 or (havecl and lr not in linkrevs):
71 if lr < 0 or (havecl and lr not in linkrevs):
72 t = "unexpected"
72 t = "unexpected"
73 if lr < 0 or lr >= len(cl):
73 if lr < 0 or lr >= len(cl):
74 t = "nonexistent"
74 t = "nonexistent"
75 err(None, _("rev %d point to %s changeset %d") % (i, t, lr), f)
75 err(None, _("rev %d point to %s changeset %d") % (i, t, lr), f)
76 if linkrevs:
76 if linkrevs:
77 warn(_(" (expected %s)") % " ".join(map(str,linkrevs)))
77 warn(_(" (expected %s)") % " ".join(map(str,linkrevs)))
78 lr = None # can't be trusted
78 lr = None # can't be trusted
79
79
80 try:
80 try:
81 p1, p2 = obj.parents(node)
81 p1, p2 = obj.parents(node)
82 if p1 not in seen and p1 != nullid:
82 if p1 not in seen and p1 != nullid:
83 err(lr, _("unknown parent 1 %s of %s") %
83 err(lr, _("unknown parent 1 %s of %s") %
84 (short(p1), short(n)), f)
84 (short(p1), short(n)), f)
85 if p2 not in seen and p2 != nullid:
85 if p2 not in seen and p2 != nullid:
86 err(lr, _("unknown parent 2 %s of %s") %
86 err(lr, _("unknown parent 2 %s of %s") %
87 (short(p2), short(p1)), f)
87 (short(p2), short(p1)), f)
88 except Exception, inst:
88 except Exception, inst:
89 exc(lr, _("checking parents of %s") % short(node), inst, f)
89 exc(lr, _("checking parents of %s") % short(node), inst, f)
90
90
91 if node in seen:
91 if node in seen:
92 err(lr, _("duplicate revision %d (%d)") % (i, seen[n]), f)
92 err(lr, _("duplicate revision %d (%d)") % (i, seen[n]), f)
93 seen[n] = i
93 seen[n] = i
94 return lr
94 return lr
95
95
96 revlogv1 = cl.version != revlog.REVLOGV0
96 revlogv1 = cl.version != revlog.REVLOGV0
97 if ui.verbose or not revlogv1:
97 if ui.verbose or not revlogv1:
98 ui.status(_("repository uses revlog format %d\n") %
98 ui.status(_("repository uses revlog format %d\n") %
99 (revlogv1 and 1 or 0))
99 (revlogv1 and 1 or 0))
100
100
101 havecl = len(cl) > 0
101 havecl = len(cl) > 0
102 havemf = len(mf) > 0
102 havemf = len(mf) > 0
103
103
104 ui.status(_("checking changesets\n"))
104 ui.status(_("checking changesets\n"))
105 seen = {}
105 seen = {}
106 checklog(cl, "changelog")
106 checklog(cl, "changelog")
107 for i in repo:
107 for i in repo:
108 n = cl.node(i)
108 n = cl.node(i)
109 checkentry(cl, i, n, seen, [i], "changelog")
109 checkentry(cl, i, n, seen, [i], "changelog")
110
110
111 try:
111 try:
112 changes = cl.read(n)
112 changes = cl.read(n)
113 mflinkrevs.setdefault(changes[0], []).append(i)
113 mflinkrevs.setdefault(changes[0], []).append(i)
114 for f in changes[3]:
114 for f in changes[3]:
115 filelinkrevs.setdefault(f, []).append(i)
115 filelinkrevs.setdefault(f, []).append(i)
116 except Exception, inst:
116 except Exception, inst:
117 exc(i, _("unpacking changeset %s") % short(n), inst)
117 exc(i, _("unpacking changeset %s") % short(n), inst)
118
118
119 ui.status(_("checking manifests\n"))
119 ui.status(_("checking manifests\n"))
120 seen = {}
120 seen = {}
121 checklog(mf, "manifest")
121 checklog(mf, "manifest")
122 for i in mf:
122 for i in mf:
123 n = mf.node(i)
123 n = mf.node(i)
124 lr = checkentry(mf, i, n, seen, mflinkrevs.get(n, []), "manifest")
124 lr = checkentry(mf, i, n, seen, mflinkrevs.get(n, []), "manifest")
125 if n in mflinkrevs:
125 if n in mflinkrevs:
126 del mflinkrevs[n]
126 del mflinkrevs[n]
127
127
128 try:
128 try:
129 for f, fn in mf.readdelta(n).iteritems():
129 for f, fn in mf.readdelta(n).iteritems():
130 if not f:
130 if not f:
131 err(lr, _("file without name in manifest"))
131 err(lr, _("file without name in manifest"))
132 elif f != "/dev/null":
132 elif f != "/dev/null":
133 fns = filenodes.setdefault(f, {})
133 fns = filenodes.setdefault(f, {})
134 if fn not in fns:
134 if fn not in fns:
135 fns[fn] = n
135 fns[fn] = n
136 except Exception, inst:
136 except Exception, inst:
137 exc(lr, _("reading manifest delta %s") % short(n), inst)
137 exc(lr, _("reading manifest delta %s") % short(n), inst)
138
138
139 ui.status(_("crosschecking files in changesets and manifests\n"))
139 ui.status(_("crosschecking files in changesets and manifests\n"))
140
140
141 if havemf:
141 if havemf:
142 for c, m in util.sort([(c, m) for m in mflinkrevs for c in mflinkrevs[m]]):
142 for c, m in util.sort([(c, m) for m in mflinkrevs for c in mflinkrevs[m]]):
143 err(c, _("changeset refers to unknown manifest %s") % short(m))
143 err(c, _("changeset refers to unknown manifest %s") % short(m))
144 del mflinkrevs
144 del mflinkrevs
145
145
146 for f in util.sort(filelinkrevs):
146 for f in util.sort(filelinkrevs):
147 if f not in filenodes:
147 if f not in filenodes:
148 lr = filelinkrevs[f][0]
148 lr = filelinkrevs[f][0]
149 err(lr, _("in changeset but not in manifest"), f)
149 err(lr, _("in changeset but not in manifest"), f)
150
150
151 if havecl:
151 if havecl:
152 for f in util.sort(filenodes):
152 for f in util.sort(filenodes):
153 if f not in filelinkrevs:
153 if f not in filelinkrevs:
154 try:
154 try:
155 lr = min([repo.file(f).linkrev(n) for n in filenodes[f]])
155 lr = min([repo.file(f).linkrev(n) for n in filenodes[f]])
156 except:
156 except:
157 lr = None
157 lr = None
158 err(lr, _("in manifest but not in changeset"), f)
158 err(lr, _("in manifest but not in changeset"), f)
159
159
160 ui.status(_("checking files\n"))
160 ui.status(_("checking files\n"))
161
162 storefiles = {}
163 for f, size in repo.store.datafiles(lambda m: err(None, m)):
164 if size > 0:
165 storefiles[f] = True
166
161 files = util.sort(util.unique(filenodes.keys() + filelinkrevs.keys()))
167 files = util.sort(util.unique(filenodes.keys() + filelinkrevs.keys()))
162 for f in files:
168 for f in files:
163 fl = repo.file(f)
169 fl = repo.file(f)
170
171 for ff in fl.files():
172 try:
173 del storefiles[ff]
174 except KeyError:
175 err(0, _("missing revlog!"), ff)
176
164 checklog(fl, f)
177 checklog(fl, f)
165 seen = {}
178 seen = {}
166 for i in fl:
179 for i in fl:
167 revisions += 1
180 revisions += 1
168 n = fl.node(i)
181 n = fl.node(i)
169 lr = checkentry(fl, i, n, seen, filelinkrevs.get(f, []), f)
182 lr = checkentry(fl, i, n, seen, filelinkrevs.get(f, []), f)
170 if f in filenodes:
183 if f in filenodes:
171 if havemf and n not in filenodes[f]:
184 if havemf and n not in filenodes[f]:
172 err(lr, _("%s not in manifests") % (short(n)), f)
185 err(lr, _("%s not in manifests") % (short(n)), f)
173 else:
186 else:
174 del filenodes[f][n]
187 del filenodes[f][n]
175
188
176 # verify contents
189 # verify contents
177 try:
190 try:
178 t = fl.read(n)
191 t = fl.read(n)
179 rp = fl.renamed(n)
192 rp = fl.renamed(n)
180 if len(t) != fl.size(i):
193 if len(t) != fl.size(i):
181 if not fl._readmeta(n): # ancient copy?
194 if not fl._readmeta(n): # ancient copy?
182 err(lr, _("unpacked size is %s, %s expected") %
195 err(lr, _("unpacked size is %s, %s expected") %
183 (len(t), fl.size(i)), f)
196 (len(t), fl.size(i)), f)
184 except Exception, inst:
197 except Exception, inst:
185 exc(lr, _("unpacking %s") % short(n), inst, f)
198 exc(lr, _("unpacking %s") % short(n), inst, f)
186
199
187 # check renames
200 # check renames
188 try:
201 try:
189 if rp:
202 if rp:
190 fl2 = repo.file(rp[0])
203 fl2 = repo.file(rp[0])
191 if not len(fl2):
204 if not len(fl2):
192 err(lr, _("empty or missing copy source revlog %s:%s")
205 err(lr, _("empty or missing copy source revlog %s:%s")
193 % (rp[0], short(rp[1])), f)
206 % (rp[0], short(rp[1])), f)
194 elif rp[1] == nullid:
207 elif rp[1] == nullid:
195 warn(lr, _("copy source revision is nullid %s:%s")
208 warn(lr, _("copy source revision is nullid %s:%s")
196 % (rp[0], short(rp[1])), f)
209 % (rp[0], short(rp[1])), f)
197 else:
210 else:
198 rev = fl2.rev(rp[1])
211 rev = fl2.rev(rp[1])
199 except Exception, inst:
212 except Exception, inst:
200 exc(lr, _("checking rename of %s") % short(n), inst, f)
213 exc(lr, _("checking rename of %s") % short(n), inst, f)
201
214
202 # cross-check
215 # cross-check
203 if f in filenodes:
216 if f in filenodes:
204 fns = [(mf.linkrev(l), n) for n,l in filenodes[f].items()]
217 fns = [(mf.linkrev(l), n) for n,l in filenodes[f].items()]
205 for lr, node in util.sort(fns):
218 for lr, node in util.sort(fns):
206 err(lr, _("%s in manifests not found") % short(node), f)
219 err(lr, _("%s in manifests not found") % short(node), f)
207
220
221 for f in storefiles:
222 warn(_("warning: orphan revlog '%s'") % f)
223
208 ui.status(_("%d files, %d changesets, %d total revisions\n") %
224 ui.status(_("%d files, %d changesets, %d total revisions\n") %
209 (len(files), len(cl), revisions))
225 (len(files), len(cl), revisions))
210 if warnings[0]:
226 if warnings[0]:
211 ui.warn(_("%d warnings encountered!\n") % warnings[0])
227 ui.warn(_("%d warnings encountered!\n") % warnings[0])
212 if errors[0]:
228 if errors[0]:
213 ui.warn(_("%d integrity errors encountered!\n") % errors[0])
229 ui.warn(_("%d integrity errors encountered!\n") % errors[0])
214 if badrevs:
230 if badrevs:
215 ui.warn(_("(first damaged changeset appears to be %d)\n")
231 ui.warn(_("(first damaged changeset appears to be %d)\n")
216 % min(badrevs))
232 % min(badrevs))
217 return 1
233 return 1
General Comments 0
You need to be logged in to leave comments. Login now