##// END OF EJS Templates
manifest: speed up creation of the manifestdict...
Matt Mackall -
r4995:e45fc5d0 default
parent child Browse files
Show More
@@ -1,215 +1,212 b''
1 1 # manifest.py - manifest revision class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms
6 6 # of the GNU General Public License, incorporated herein by reference.
7 7
8 8 from revlog import *
9 9 from i18n import _
10 10 import array, bisect, struct, mdiff
11 11
12 12 class manifestdict(dict):
13 13 def __init__(self, mapping=None, flags=None):
14 14 if mapping is None: mapping = {}
15 15 if flags is None: flags = {}
16 16 dict.__init__(self, mapping)
17 17 self._flags = flags
18 18 def flags(self, f):
19 19 return self._flags.get(f, "")
20 20 def execf(self, f):
21 21 "test for executable in manifest flags"
22 22 return "x" in self.flags(f)
23 23 def linkf(self, f):
24 24 "test for symlink in manifest flags"
25 25 return "l" in self.flags(f)
26 def rawset(self, f, entry):
27 self[f] = bin(entry[:40])
28 fl = entry[40:-1]
29 if fl: self._flags[f] = fl
30 26 def set(self, f, execf=False, linkf=False):
31 27 if linkf: self._flags[f] = "l"
32 28 elif execf: self._flags[f] = "x"
33 29 else: self._flags[f] = ""
34 30 def copy(self):
35 31 return manifestdict(dict.copy(self), dict.copy(self._flags))
36 32
37 33 class manifest(revlog):
38 34 def __init__(self, opener):
39 35 self.mapcache = None
40 36 self.listcache = None
41 37 revlog.__init__(self, opener, "00manifest.i")
42 38
43 def parselines(self, lines):
44 for l in lines.splitlines(1):
45 yield l.split('\0')
39 def parse(self, lines):
40 mfdict = manifestdict()
41 for l in lines.splitlines():
42 f, n = l.split('\0')
43 if len(n) > 40:
44 mfdict._flags[f] = n[40:]
45 mfdict[f] = bin(n[:40])
46 else:
47 mfdict[f] = bin(n)
48 return mfdict
46 49
47 50 def readdelta(self, node):
48 delta = mdiff.patchtext(self.delta(node))
49 deltamap = manifestdict()
50 for f, n in self.parselines(delta):
51 deltamap.rawset(f, n)
52 return deltamap
51 return self.parse(mdiff.patchtext(self.delta(node)))
53 52
54 53 def read(self, node):
55 54 if node == nullid: return manifestdict() # don't upset local cache
56 55 if self.mapcache and self.mapcache[0] == node:
57 56 return self.mapcache[1]
58 57 text = self.revision(node)
59 58 self.listcache = array.array('c', text)
60 mapping = manifestdict()
61 for f, n in self.parselines(text):
62 mapping.rawset(f, n)
59 mapping = self.parse(text)
63 60 self.mapcache = (node, mapping)
64 61 return mapping
65 62
66 63 def _search(self, m, s, lo=0, hi=None):
67 64 '''return a tuple (start, end) that says where to find s within m.
68 65
69 66 If the string is found m[start:end] are the line containing
70 67 that string. If start == end the string was not found and
71 68 they indicate the proper sorted insertion point. This was
72 69 taken from bisect_left, and modified to find line start/end as
73 70 it goes along.
74 71
75 72 m should be a buffer or a string
76 73 s is a string'''
77 74 def advance(i, c):
78 75 while i < lenm and m[i] != c:
79 76 i += 1
80 77 return i
81 78 lenm = len(m)
82 79 if not hi:
83 80 hi = lenm
84 81 while lo < hi:
85 82 mid = (lo + hi) // 2
86 83 start = mid
87 84 while start > 0 and m[start-1] != '\n':
88 85 start -= 1
89 86 end = advance(start, '\0')
90 87 if m[start:end] < s:
91 88 # we know that after the null there are 40 bytes of sha1
92 89 # this translates to the bisect lo = mid + 1
93 90 lo = advance(end + 40, '\n') + 1
94 91 else:
95 92 # this translates to the bisect hi = mid
96 93 hi = start
97 94 end = advance(lo, '\0')
98 95 found = m[lo:end]
99 96 if cmp(s, found) == 0:
100 97 # we know that after the null there are 40 bytes of sha1
101 98 end = advance(end + 40, '\n')
102 99 return (lo, end+1)
103 100 else:
104 101 return (lo, lo)
105 102
106 103 def find(self, node, f):
107 104 '''look up entry for a single file efficiently.
108 105 return (node, flags) pair if found, (None, None) if not.'''
109 106 if self.mapcache and node == self.mapcache[0]:
110 107 return self.mapcache[1].get(f), self.mapcache[1].flags(f)
111 108 text = self.revision(node)
112 109 start, end = self._search(text, f)
113 110 if start == end:
114 111 return None, None
115 112 l = text[start:end]
116 113 f, n = l.split('\0')
117 114 return bin(n[:40]), n[40:-1]
118 115
119 116 def add(self, map, transaction, link, p1=None, p2=None,
120 117 changed=None):
121 118 # apply the changes collected during the bisect loop to our addlist
122 119 # return a delta suitable for addrevision
123 120 def addlistdelta(addlist, x):
124 121 # start from the bottom up
125 122 # so changes to the offsets don't mess things up.
126 123 i = len(x)
127 124 while i > 0:
128 125 i -= 1
129 126 start = x[i][0]
130 127 end = x[i][1]
131 128 if x[i][2]:
132 129 addlist[start:end] = array.array('c', x[i][2])
133 130 else:
134 131 del addlist[start:end]
135 132 return "".join([struct.pack(">lll", d[0], d[1], len(d[2])) + d[2]
136 133 for d in x ])
137 134
138 135 def checkforbidden(f):
139 136 if '\n' in f or '\r' in f:
140 137 raise RevlogError(_("'\\n' and '\\r' disallowed in filenames"))
141 138
142 139 # if we're using the listcache, make sure it is valid and
143 140 # parented by the same node we're diffing against
144 141 if not (changed and self.listcache and p1 and self.mapcache[0] == p1):
145 142 files = map.keys()
146 143 files.sort()
147 144
148 145 for f in files:
149 146 checkforbidden(f)
150 147
151 148 # if this is changed to support newlines in filenames,
152 149 # be sure to check the templates/ dir again (especially *-raw.tmpl)
153 150 text = ["%s\000%s%s\n" % (f, hex(map[f]), map.flags(f))
154 151 for f in files]
155 152 self.listcache = array.array('c', "".join(text))
156 153 cachedelta = None
157 154 else:
158 155 addlist = self.listcache
159 156
160 157 for f in changed[0]:
161 158 checkforbidden(f)
162 159 # combine the changed lists into one list for sorting
163 160 work = [[x, 0] for x in changed[0]]
164 161 work[len(work):] = [[x, 1] for x in changed[1]]
165 162 work.sort()
166 163
167 164 delta = []
168 165 dstart = None
169 166 dend = None
170 167 dline = [""]
171 168 start = 0
172 169 # zero copy representation of addlist as a buffer
173 170 addbuf = buffer(addlist)
174 171
175 172 # start with a readonly loop that finds the offset of
176 173 # each line and creates the deltas
177 174 for w in work:
178 175 f = w[0]
179 176 # bs will either be the index of the item or the insert point
180 177 start, end = self._search(addbuf, f, start)
181 178 if w[1] == 0:
182 179 l = "%s\000%s%s\n" % (f, hex(map[f]), map.flags(f))
183 180 else:
184 181 l = ""
185 182 if start == end and w[1] == 1:
186 183 # item we want to delete was not found, error out
187 184 raise AssertionError(
188 185 _("failed to remove %s from manifest") % f)
189 186 if dstart != None and dstart <= start and dend >= start:
190 187 if dend < end:
191 188 dend = end
192 189 if l:
193 190 dline.append(l)
194 191 else:
195 192 if dstart != None:
196 193 delta.append([dstart, dend, "".join(dline)])
197 194 dstart = start
198 195 dend = end
199 196 dline = [l]
200 197
201 198 if dstart != None:
202 199 delta.append([dstart, dend, "".join(dline)])
203 200 # apply the delta to the addlist, and get a delta for addrevision
204 201 cachedelta = addlistdelta(addlist, delta)
205 202
206 203 # the delta is only valid if we've been processing the tip revision
207 204 if self.mapcache[0] != self.tip():
208 205 cachedelta = None
209 206 self.listcache = addlist
210 207
211 208 n = self.addrevision(buffer(self.listcache), transaction, link,
212 209 p1, p2, cachedelta)
213 210 self.mapcache = (n, map)
214 211
215 212 return n
@@ -1,22 +1,20 b''
1 1 """
2 2 node.py - basic nodeid manipulation for mercurial
3 3
4 4 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
5 5
6 6 This software may be used and distributed according to the terms
7 7 of the GNU General Public License, incorporated herein by reference.
8 8 """
9 9
10 10 import binascii
11 11
12 12 nullrev = -1
13 13 nullid = "\0" * 20
14 14
15 def hex(node):
16 return binascii.hexlify(node)
17
18 def bin(node):
19 return binascii.unhexlify(node)
15 # This ugly style has a noticeable effect in manifest parsing
16 hex = binascii.hexlify
17 bin = binascii.unhexlify
20 18
21 19 def short(node):
22 20 return hex(node[:6])
General Comments 0
You need to be logged in to leave comments. Login now