##// END OF EJS Templates
Abstract manifest block parsing.
Brendan Cully -
r3196:f3b93944 default
parent child Browse files
Show More
@@ -1,199 +1,209
1 1 # manifest.py - manifest revision class for mercurial
2 2 #
3 3 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms
6 6 # of the GNU General Public License, incorporated herein by reference.
7 7
8 8 from revlog import *
9 9 from i18n import gettext as _
10 10 from demandload import *
11 11 demandload(globals(), "array bisect struct")
12 demandload(globals(), "mdiff")
12 13
13 14 class manifestdict(dict):
14 15 def __init__(self, mapping=None, flags=None):
15 16 if mapping is None: mapping = {}
16 17 if flags is None: flags = {}
17 18 dict.__init__(self, mapping)
18 19 self._flags = flags
19 20 def flags(self, f):
20 21 return self._flags.get(f, "")
21 22 def execf(self, f):
22 23 "test for executable in manifest flags"
23 24 return "x" in self.flags(f)
24 25 def linkf(self, f):
25 26 "test for symlink in manifest flags"
26 27 return "l" in self.flags(f)
27 28 def rawset(self, f, entry):
28 29 self[f] = bin(entry[:40])
29 30 fl = entry[40:-1]
30 31 if fl: self._flags[f] = fl
31 32 def set(self, f, execf=False, linkf=False):
32 33 if linkf: self._flags[f] = "l"
33 34 elif execf: self._flags[f] = "x"
34 35 else: self._flags[f] = ""
35 36 def copy(self):
36 37 return manifestdict(dict.copy(self), dict.copy(self._flags))
37 38
38 39 class manifest(revlog):
39 40 def __init__(self, opener, defversion=REVLOGV0):
40 41 self.mapcache = None
41 42 self.listcache = None
42 43 revlog.__init__(self, opener, "00manifest.i", "00manifest.d",
43 44 defversion)
44 45
46 def parselines(self, lines):
47 for l in lines.splitlines(1):
48 yield l.split('\0')
49
50 def readdelta(self, node):
51 delta = mdiff.patchtext(self.delta(node))
52 deltamap = manifestdict()
53 for f, n in self.parselines(delta):
54 deltamap.rawset(f, n)
55 return deltamap
56
45 57 def read(self, node):
46 58 if node == nullid: return manifestdict() # don't upset local cache
47 59 if self.mapcache and self.mapcache[0] == node:
48 60 return self.mapcache[1]
49 61 text = self.revision(node)
50 62 self.listcache = array.array('c', text)
51 lines = text.splitlines(1)
52 63 mapping = manifestdict()
53 for l in lines:
54 (f, n) = l.split('\0')
64 for f, n in self.parselines(text):
55 65 mapping.rawset(f, n)
56 66 self.mapcache = (node, mapping)
57 67 return mapping
58 68
59 69 def _search(self, m, s, lo=0, hi=None):
60 70 '''return a tuple (start, end) that says where to find s within m.
61 71
62 72 If the string is found m[start:end] are the line containing
63 73 that string. If start == end the string was not found and
64 74 they indicate the proper sorted insertion point. This was
65 75 taken from bisect_left, and modified to find line start/end as
66 76 it goes along.
67 77
68 78 m should be a buffer or a string
69 79 s is a string'''
70 80 def advance(i, c):
71 81 while i < lenm and m[i] != c:
72 82 i += 1
73 83 return i
74 84 lenm = len(m)
75 85 if not hi:
76 86 hi = lenm
77 87 while lo < hi:
78 88 mid = (lo + hi) // 2
79 89 start = mid
80 90 while start > 0 and m[start-1] != '\n':
81 91 start -= 1
82 92 end = advance(start, '\0')
83 93 if m[start:end] < s:
84 94 # we know that after the null there are 40 bytes of sha1
85 95 # this translates to the bisect lo = mid + 1
86 96 lo = advance(end + 40, '\n') + 1
87 97 else:
88 98 # this translates to the bisect hi = mid
89 99 hi = start
90 100 end = advance(lo, '\0')
91 101 found = m[lo:end]
92 102 if cmp(s, found) == 0:
93 103 # we know that after the null there are 40 bytes of sha1
94 104 end = advance(end + 40, '\n')
95 105 return (lo, end+1)
96 106 else:
97 107 return (lo, lo)
98 108
99 109 def find(self, node, f):
100 110 '''look up entry for a single file efficiently.
101 111 return (node, flag) pair if found, (None, None) if not.'''
102 112 if self.mapcache and node == self.mapcache[0]:
103 113 return self.mapcache[1].get(f), self.mapcache[1].flags(f)
104 114 text = self.revision(node)
105 115 start, end = self._search(text, f)
106 116 if start == end:
107 117 return None, None
108 118 l = text[start:end]
109 119 f, n = l.split('\0')
110 120 return bin(n[:40]), n[40:-1] == 'x'
111 121
112 122 def add(self, map, transaction, link, p1=None, p2=None,
113 123 changed=None):
114 124 # apply the changes collected during the bisect loop to our addlist
115 125 # return a delta suitable for addrevision
116 126 def addlistdelta(addlist, x):
117 127 # start from the bottom up
118 128 # so changes to the offsets don't mess things up.
119 129 i = len(x)
120 130 while i > 0:
121 131 i -= 1
122 132 start = x[i][0]
123 133 end = x[i][1]
124 134 if x[i][2]:
125 135 addlist[start:end] = array.array('c', x[i][2])
126 136 else:
127 137 del addlist[start:end]
128 138 return "".join([struct.pack(">lll", d[0], d[1], len(d[2])) + d[2] \
129 139 for d in x ])
130 140
131 141 # if we're using the listcache, make sure it is valid and
132 142 # parented by the same node we're diffing against
133 143 if not changed or not self.listcache or not p1 or \
134 144 self.mapcache[0] != p1:
135 145 files = map.keys()
136 146 files.sort()
137 147
138 148 # if this is changed to support newlines in filenames,
139 149 # be sure to check the templates/ dir again (especially *-raw.tmpl)
140 150 text = ["%s\000%s%s\n" % (f, hex(map[f]), map.flags(f)) for f in files]
141 151 self.listcache = array.array('c', "".join(text))
142 152 cachedelta = None
143 153 else:
144 154 addlist = self.listcache
145 155
146 156 # combine the changed lists into one list for sorting
147 157 work = [[x, 0] for x in changed[0]]
148 158 work[len(work):] = [[x, 1] for x in changed[1]]
149 159 work.sort()
150 160
151 161 delta = []
152 162 dstart = None
153 163 dend = None
154 164 dline = [""]
155 165 start = 0
156 166 # zero copy representation of addlist as a buffer
157 167 addbuf = buffer(addlist)
158 168
159 169 # start with a readonly loop that finds the offset of
160 170 # each line and creates the deltas
161 171 for w in work:
162 172 f = w[0]
163 173 # bs will either be the index of the item or the insert point
164 174 start, end = self._search(addbuf, f, start)
165 175 if w[1] == 0:
166 176 l = "%s\000%s%s\n" % (f, hex(map[f]), map.flags(f))
167 177 else:
168 178 l = ""
169 179 if start == end and w[1] == 1:
170 180 # item we want to delete was not found, error out
171 181 raise AssertionError(
172 182 _("failed to remove %s from manifest") % f)
173 183 if dstart != None and dstart <= start and dend >= start:
174 184 if dend < end:
175 185 dend = end
176 186 if l:
177 187 dline.append(l)
178 188 else:
179 189 if dstart != None:
180 190 delta.append([dstart, dend, "".join(dline)])
181 191 dstart = start
182 192 dend = end
183 193 dline = [l]
184 194
185 195 if dstart != None:
186 196 delta.append([dstart, dend, "".join(dline)])
187 197 # apply the delta to the addlist, and get a delta for addrevision
188 198 cachedelta = addlistdelta(addlist, delta)
189 199
190 200 # the delta is only valid if we've been processing the tip revision
191 201 if self.mapcache[0] != self.tip():
192 202 cachedelta = None
193 203 self.listcache = addlist
194 204
195 205 n = self.addrevision(buffer(self.listcache), transaction, link, p1, \
196 206 p2, cachedelta)
197 207 self.mapcache = (n, map)
198 208
199 209 return n
@@ -1,200 +1,194
1 1 # verify.py - repository integrity checking for Mercurial
2 2 #
3 3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms
6 6 # of the GNU General Public License, incorporated herein by reference.
7 7
8 8 from node import *
9 9 from i18n import gettext as _
10 10 import revlog, mdiff
11 11
12 12 def verify(repo):
13 13 filelinkrevs = {}
14 14 filenodes = {}
15 15 changesets = revisions = files = 0
16 16 errors = [0]
17 17 warnings = [0]
18 18 neededmanifests = {}
19 19
20 20 def err(msg):
21 21 repo.ui.warn(msg + "\n")
22 22 errors[0] += 1
23 23
24 24 def warn(msg):
25 25 repo.ui.warn(msg + "\n")
26 26 warnings[0] += 1
27 27
28 28 def checksize(obj, name):
29 29 d = obj.checksize()
30 30 if d[0]:
31 31 err(_("%s data length off by %d bytes") % (name, d[0]))
32 32 if d[1]:
33 33 err(_("%s index contains %d extra bytes") % (name, d[1]))
34 34
35 35 def checkversion(obj, name):
36 36 if obj.version != revlog.REVLOGV0:
37 37 if not revlogv1:
38 38 warn(_("warning: `%s' uses revlog format 1") % name)
39 39 elif revlogv1:
40 40 warn(_("warning: `%s' uses revlog format 0") % name)
41 41
42 42 revlogv1 = repo.revlogversion != revlog.REVLOGV0
43 43 if repo.ui.verbose or revlogv1 != repo.revlogv1:
44 44 repo.ui.status(_("repository uses revlog format %d\n") %
45 45 (revlogv1 and 1 or 0))
46 46
47 47 seen = {}
48 48 repo.ui.status(_("checking changesets\n"))
49 49 checksize(repo.changelog, "changelog")
50 50
51 51 for i in range(repo.changelog.count()):
52 52 changesets += 1
53 53 n = repo.changelog.node(i)
54 54 l = repo.changelog.linkrev(n)
55 55 if l != i:
56 56 err(_("incorrect link (%d) for changeset revision %d") %(l, i))
57 57 if n in seen:
58 58 err(_("duplicate changeset at revision %d") % i)
59 59 seen[n] = 1
60 60
61 61 for p in repo.changelog.parents(n):
62 62 if p not in repo.changelog.nodemap:
63 63 err(_("changeset %s has unknown parent %s") %
64 64 (short(n), short(p)))
65 65 try:
66 66 changes = repo.changelog.read(n)
67 67 except KeyboardInterrupt:
68 68 repo.ui.warn(_("interrupted"))
69 69 raise
70 70 except Exception, inst:
71 71 err(_("unpacking changeset %s: %s") % (short(n), inst))
72 72 continue
73 73
74 74 neededmanifests[changes[0]] = n
75 75
76 76 for f in changes[3]:
77 77 filelinkrevs.setdefault(f, []).append(i)
78 78
79 79 seen = {}
80 80 repo.ui.status(_("checking manifests\n"))
81 81 checkversion(repo.manifest, "manifest")
82 82 checksize(repo.manifest, "manifest")
83 83
84 84 for i in range(repo.manifest.count()):
85 85 n = repo.manifest.node(i)
86 86 l = repo.manifest.linkrev(n)
87 87
88 88 if l < 0 or l >= repo.changelog.count():
89 89 err(_("bad manifest link (%d) at revision %d") % (l, i))
90 90
91 91 if n in neededmanifests:
92 92 del neededmanifests[n]
93 93
94 94 if n in seen:
95 95 err(_("duplicate manifest at revision %d") % i)
96 96
97 97 seen[n] = 1
98 98
99 99 for p in repo.manifest.parents(n):
100 100 if p not in repo.manifest.nodemap:
101 101 err(_("manifest %s has unknown parent %s") %
102 102 (short(n), short(p)))
103 103
104 104 try:
105 delta = mdiff.patchtext(repo.manifest.delta(n))
105 for f, fn in repo.manifest.readdelta(n).iteritems():
106 filenodes.setdefault(f, {})[fn] = 1
106 107 except KeyboardInterrupt:
107 108 repo.ui.warn(_("interrupted"))
108 109 raise
109 110 except Exception, inst:
110 err(_("unpacking manifest %s: %s") % (short(n), inst))
111 err(_("reading delta for manifest %s: %s") % (short(n), inst))
111 112 continue
112 113
113 try:
114 ff = [ l.split('\0') for l in delta.splitlines() ]
115 for f, fn in ff:
116 filenodes.setdefault(f, {})[bin(fn[:40])] = 1
117 except (ValueError, TypeError), inst:
118 err(_("broken delta in manifest %s: %s") % (short(n), inst))
119
120 114 repo.ui.status(_("crosschecking files in changesets and manifests\n"))
121 115
122 116 for m, c in neededmanifests.items():
123 117 err(_("Changeset %s refers to unknown manifest %s") %
124 118 (short(m), short(c)))
125 119 del neededmanifests
126 120
127 121 for f in filenodes:
128 122 if f not in filelinkrevs:
129 123 err(_("file %s in manifest but not in changesets") % f)
130 124
131 125 for f in filelinkrevs:
132 126 if f not in filenodes:
133 127 err(_("file %s in changeset but not in manifest") % f)
134 128
135 129 repo.ui.status(_("checking files\n"))
136 130 ff = filenodes.keys()
137 131 ff.sort()
138 132 for f in ff:
139 133 if f == "/dev/null":
140 134 continue
141 135 files += 1
142 136 if not f:
143 137 err(_("file without name in manifest %s") % short(n))
144 138 continue
145 139 fl = repo.file(f)
146 140 checkversion(fl, f)
147 141 checksize(fl, f)
148 142
149 143 nodes = {nullid: 1}
150 144 seen = {}
151 145 for i in range(fl.count()):
152 146 revisions += 1
153 147 n = fl.node(i)
154 148
155 149 if n in seen:
156 150 err(_("%s: duplicate revision %d") % (f, i))
157 151 if n not in filenodes[f]:
158 152 err(_("%s: %d:%s not in manifests") % (f, i, short(n)))
159 153 else:
160 154 del filenodes[f][n]
161 155
162 156 flr = fl.linkrev(n)
163 157 if flr not in filelinkrevs.get(f, []):
164 158 err(_("%s:%s points to unexpected changeset %d")
165 159 % (f, short(n), flr))
166 160 else:
167 161 filelinkrevs[f].remove(flr)
168 162
169 163 # verify contents
170 164 try:
171 165 t = fl.read(n)
172 166 except KeyboardInterrupt:
173 167 repo.ui.warn(_("interrupted"))
174 168 raise
175 169 except Exception, inst:
176 170 err(_("unpacking file %s %s: %s") % (f, short(n), inst))
177 171
178 172 # verify parents
179 173 (p1, p2) = fl.parents(n)
180 174 if p1 not in nodes:
181 175 err(_("file %s:%s unknown parent 1 %s") %
182 176 (f, short(n), short(p1)))
183 177 if p2 not in nodes:
184 178 err(_("file %s:%s unknown parent 2 %s") %
185 179 (f, short(n), short(p1)))
186 180 nodes[n] = 1
187 181
188 182 # cross-check
189 183 for node in filenodes[f]:
190 184 err(_("node %s in manifests not in %s") % (hex(node), f))
191 185
192 186 repo.ui.status(_("%d files, %d changesets, %d total revisions\n") %
193 187 (files, changesets, revisions))
194 188
195 189 if warnings[0]:
196 190 repo.ui.warn(_("%d warnings encountered!\n") % warnings[0])
197 191 if errors[0]:
198 192 repo.ui.warn(_("%d integrity errors encountered!\n") % errors[0])
199 193 return 1
200 194
General Comments 0
You need to be logged in to leave comments. Login now