##// END OF EJS Templates
manifest: speed up creation of the manifestdict...
Matt Mackall -
r4995:e45fc5d0 default
parent child Browse files
Show More
@@ -1,215 +1,212 b''
1 # manifest.py - manifest revision class for mercurial
1 # manifest.py - manifest revision class for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms
5 # This software may be used and distributed according to the terms
6 # of the GNU General Public License, incorporated herein by reference.
6 # of the GNU General Public License, incorporated herein by reference.
7
7
8 from revlog import *
8 from revlog import *
9 from i18n import _
9 from i18n import _
10 import array, bisect, struct, mdiff
10 import array, bisect, struct, mdiff
11
11
12 class manifestdict(dict):
12 class manifestdict(dict):
13 def __init__(self, mapping=None, flags=None):
13 def __init__(self, mapping=None, flags=None):
14 if mapping is None: mapping = {}
14 if mapping is None: mapping = {}
15 if flags is None: flags = {}
15 if flags is None: flags = {}
16 dict.__init__(self, mapping)
16 dict.__init__(self, mapping)
17 self._flags = flags
17 self._flags = flags
18 def flags(self, f):
18 def flags(self, f):
19 return self._flags.get(f, "")
19 return self._flags.get(f, "")
20 def execf(self, f):
20 def execf(self, f):
21 "test for executable in manifest flags"
21 "test for executable in manifest flags"
22 return "x" in self.flags(f)
22 return "x" in self.flags(f)
23 def linkf(self, f):
23 def linkf(self, f):
24 "test for symlink in manifest flags"
24 "test for symlink in manifest flags"
25 return "l" in self.flags(f)
25 return "l" in self.flags(f)
26 def rawset(self, f, entry):
27 self[f] = bin(entry[:40])
28 fl = entry[40:-1]
29 if fl: self._flags[f] = fl
30 def set(self, f, execf=False, linkf=False):
26 def set(self, f, execf=False, linkf=False):
31 if linkf: self._flags[f] = "l"
27 if linkf: self._flags[f] = "l"
32 elif execf: self._flags[f] = "x"
28 elif execf: self._flags[f] = "x"
33 else: self._flags[f] = ""
29 else: self._flags[f] = ""
34 def copy(self):
30 def copy(self):
35 return manifestdict(dict.copy(self), dict.copy(self._flags))
31 return manifestdict(dict.copy(self), dict.copy(self._flags))
36
32
37 class manifest(revlog):
33 class manifest(revlog):
38 def __init__(self, opener):
34 def __init__(self, opener):
39 self.mapcache = None
35 self.mapcache = None
40 self.listcache = None
36 self.listcache = None
41 revlog.__init__(self, opener, "00manifest.i")
37 revlog.__init__(self, opener, "00manifest.i")
42
38
43 def parselines(self, lines):
39 def parse(self, lines):
44 for l in lines.splitlines(1):
40 mfdict = manifestdict()
45 yield l.split('\0')
41 for l in lines.splitlines():
42 f, n = l.split('\0')
43 if len(n) > 40:
44 mfdict._flags[f] = n[40:]
45 mfdict[f] = bin(n[:40])
46 else:
47 mfdict[f] = bin(n)
48 return mfdict
46
49
47 def readdelta(self, node):
50 def readdelta(self, node):
48 delta = mdiff.patchtext(self.delta(node))
51 return self.parse(mdiff.patchtext(self.delta(node)))
49 deltamap = manifestdict()
50 for f, n in self.parselines(delta):
51 deltamap.rawset(f, n)
52 return deltamap
53
52
54 def read(self, node):
53 def read(self, node):
55 if node == nullid: return manifestdict() # don't upset local cache
54 if node == nullid: return manifestdict() # don't upset local cache
56 if self.mapcache and self.mapcache[0] == node:
55 if self.mapcache and self.mapcache[0] == node:
57 return self.mapcache[1]
56 return self.mapcache[1]
58 text = self.revision(node)
57 text = self.revision(node)
59 self.listcache = array.array('c', text)
58 self.listcache = array.array('c', text)
60 mapping = manifestdict()
59 mapping = self.parse(text)
61 for f, n in self.parselines(text):
62 mapping.rawset(f, n)
63 self.mapcache = (node, mapping)
60 self.mapcache = (node, mapping)
64 return mapping
61 return mapping
65
62
66 def _search(self, m, s, lo=0, hi=None):
63 def _search(self, m, s, lo=0, hi=None):
67 '''return a tuple (start, end) that says where to find s within m.
64 '''return a tuple (start, end) that says where to find s within m.
68
65
69 If the string is found m[start:end] are the line containing
66 If the string is found m[start:end] are the line containing
70 that string. If start == end the string was not found and
67 that string. If start == end the string was not found and
71 they indicate the proper sorted insertion point. This was
68 they indicate the proper sorted insertion point. This was
72 taken from bisect_left, and modified to find line start/end as
69 taken from bisect_left, and modified to find line start/end as
73 it goes along.
70 it goes along.
74
71
75 m should be a buffer or a string
72 m should be a buffer or a string
76 s is a string'''
73 s is a string'''
77 def advance(i, c):
74 def advance(i, c):
78 while i < lenm and m[i] != c:
75 while i < lenm and m[i] != c:
79 i += 1
76 i += 1
80 return i
77 return i
81 lenm = len(m)
78 lenm = len(m)
82 if not hi:
79 if not hi:
83 hi = lenm
80 hi = lenm
84 while lo < hi:
81 while lo < hi:
85 mid = (lo + hi) // 2
82 mid = (lo + hi) // 2
86 start = mid
83 start = mid
87 while start > 0 and m[start-1] != '\n':
84 while start > 0 and m[start-1] != '\n':
88 start -= 1
85 start -= 1
89 end = advance(start, '\0')
86 end = advance(start, '\0')
90 if m[start:end] < s:
87 if m[start:end] < s:
91 # we know that after the null there are 40 bytes of sha1
88 # we know that after the null there are 40 bytes of sha1
92 # this translates to the bisect lo = mid + 1
89 # this translates to the bisect lo = mid + 1
93 lo = advance(end + 40, '\n') + 1
90 lo = advance(end + 40, '\n') + 1
94 else:
91 else:
95 # this translates to the bisect hi = mid
92 # this translates to the bisect hi = mid
96 hi = start
93 hi = start
97 end = advance(lo, '\0')
94 end = advance(lo, '\0')
98 found = m[lo:end]
95 found = m[lo:end]
99 if cmp(s, found) == 0:
96 if cmp(s, found) == 0:
100 # we know that after the null there are 40 bytes of sha1
97 # we know that after the null there are 40 bytes of sha1
101 end = advance(end + 40, '\n')
98 end = advance(end + 40, '\n')
102 return (lo, end+1)
99 return (lo, end+1)
103 else:
100 else:
104 return (lo, lo)
101 return (lo, lo)
105
102
106 def find(self, node, f):
103 def find(self, node, f):
107 '''look up entry for a single file efficiently.
104 '''look up entry for a single file efficiently.
108 return (node, flags) pair if found, (None, None) if not.'''
105 return (node, flags) pair if found, (None, None) if not.'''
109 if self.mapcache and node == self.mapcache[0]:
106 if self.mapcache and node == self.mapcache[0]:
110 return self.mapcache[1].get(f), self.mapcache[1].flags(f)
107 return self.mapcache[1].get(f), self.mapcache[1].flags(f)
111 text = self.revision(node)
108 text = self.revision(node)
112 start, end = self._search(text, f)
109 start, end = self._search(text, f)
113 if start == end:
110 if start == end:
114 return None, None
111 return None, None
115 l = text[start:end]
112 l = text[start:end]
116 f, n = l.split('\0')
113 f, n = l.split('\0')
117 return bin(n[:40]), n[40:-1]
114 return bin(n[:40]), n[40:-1]
118
115
119 def add(self, map, transaction, link, p1=None, p2=None,
116 def add(self, map, transaction, link, p1=None, p2=None,
120 changed=None):
117 changed=None):
121 # apply the changes collected during the bisect loop to our addlist
118 # apply the changes collected during the bisect loop to our addlist
122 # return a delta suitable for addrevision
119 # return a delta suitable for addrevision
123 def addlistdelta(addlist, x):
120 def addlistdelta(addlist, x):
124 # start from the bottom up
121 # start from the bottom up
125 # so changes to the offsets don't mess things up.
122 # so changes to the offsets don't mess things up.
126 i = len(x)
123 i = len(x)
127 while i > 0:
124 while i > 0:
128 i -= 1
125 i -= 1
129 start = x[i][0]
126 start = x[i][0]
130 end = x[i][1]
127 end = x[i][1]
131 if x[i][2]:
128 if x[i][2]:
132 addlist[start:end] = array.array('c', x[i][2])
129 addlist[start:end] = array.array('c', x[i][2])
133 else:
130 else:
134 del addlist[start:end]
131 del addlist[start:end]
135 return "".join([struct.pack(">lll", d[0], d[1], len(d[2])) + d[2]
132 return "".join([struct.pack(">lll", d[0], d[1], len(d[2])) + d[2]
136 for d in x ])
133 for d in x ])
137
134
138 def checkforbidden(f):
135 def checkforbidden(f):
139 if '\n' in f or '\r' in f:
136 if '\n' in f or '\r' in f:
140 raise RevlogError(_("'\\n' and '\\r' disallowed in filenames"))
137 raise RevlogError(_("'\\n' and '\\r' disallowed in filenames"))
141
138
142 # if we're using the listcache, make sure it is valid and
139 # if we're using the listcache, make sure it is valid and
143 # parented by the same node we're diffing against
140 # parented by the same node we're diffing against
144 if not (changed and self.listcache and p1 and self.mapcache[0] == p1):
141 if not (changed and self.listcache and p1 and self.mapcache[0] == p1):
145 files = map.keys()
142 files = map.keys()
146 files.sort()
143 files.sort()
147
144
148 for f in files:
145 for f in files:
149 checkforbidden(f)
146 checkforbidden(f)
150
147
151 # if this is changed to support newlines in filenames,
148 # if this is changed to support newlines in filenames,
152 # be sure to check the templates/ dir again (especially *-raw.tmpl)
149 # be sure to check the templates/ dir again (especially *-raw.tmpl)
153 text = ["%s\000%s%s\n" % (f, hex(map[f]), map.flags(f))
150 text = ["%s\000%s%s\n" % (f, hex(map[f]), map.flags(f))
154 for f in files]
151 for f in files]
155 self.listcache = array.array('c', "".join(text))
152 self.listcache = array.array('c', "".join(text))
156 cachedelta = None
153 cachedelta = None
157 else:
154 else:
158 addlist = self.listcache
155 addlist = self.listcache
159
156
160 for f in changed[0]:
157 for f in changed[0]:
161 checkforbidden(f)
158 checkforbidden(f)
162 # combine the changed lists into one list for sorting
159 # combine the changed lists into one list for sorting
163 work = [[x, 0] for x in changed[0]]
160 work = [[x, 0] for x in changed[0]]
164 work[len(work):] = [[x, 1] for x in changed[1]]
161 work[len(work):] = [[x, 1] for x in changed[1]]
165 work.sort()
162 work.sort()
166
163
167 delta = []
164 delta = []
168 dstart = None
165 dstart = None
169 dend = None
166 dend = None
170 dline = [""]
167 dline = [""]
171 start = 0
168 start = 0
172 # zero copy representation of addlist as a buffer
169 # zero copy representation of addlist as a buffer
173 addbuf = buffer(addlist)
170 addbuf = buffer(addlist)
174
171
175 # start with a readonly loop that finds the offset of
172 # start with a readonly loop that finds the offset of
176 # each line and creates the deltas
173 # each line and creates the deltas
177 for w in work:
174 for w in work:
178 f = w[0]
175 f = w[0]
179 # bs will either be the index of the item or the insert point
176 # bs will either be the index of the item or the insert point
180 start, end = self._search(addbuf, f, start)
177 start, end = self._search(addbuf, f, start)
181 if w[1] == 0:
178 if w[1] == 0:
182 l = "%s\000%s%s\n" % (f, hex(map[f]), map.flags(f))
179 l = "%s\000%s%s\n" % (f, hex(map[f]), map.flags(f))
183 else:
180 else:
184 l = ""
181 l = ""
185 if start == end and w[1] == 1:
182 if start == end and w[1] == 1:
186 # item we want to delete was not found, error out
183 # item we want to delete was not found, error out
187 raise AssertionError(
184 raise AssertionError(
188 _("failed to remove %s from manifest") % f)
185 _("failed to remove %s from manifest") % f)
189 if dstart != None and dstart <= start and dend >= start:
186 if dstart != None and dstart <= start and dend >= start:
190 if dend < end:
187 if dend < end:
191 dend = end
188 dend = end
192 if l:
189 if l:
193 dline.append(l)
190 dline.append(l)
194 else:
191 else:
195 if dstart != None:
192 if dstart != None:
196 delta.append([dstart, dend, "".join(dline)])
193 delta.append([dstart, dend, "".join(dline)])
197 dstart = start
194 dstart = start
198 dend = end
195 dend = end
199 dline = [l]
196 dline = [l]
200
197
201 if dstart != None:
198 if dstart != None:
202 delta.append([dstart, dend, "".join(dline)])
199 delta.append([dstart, dend, "".join(dline)])
203 # apply the delta to the addlist, and get a delta for addrevision
200 # apply the delta to the addlist, and get a delta for addrevision
204 cachedelta = addlistdelta(addlist, delta)
201 cachedelta = addlistdelta(addlist, delta)
205
202
206 # the delta is only valid if we've been processing the tip revision
203 # the delta is only valid if we've been processing the tip revision
207 if self.mapcache[0] != self.tip():
204 if self.mapcache[0] != self.tip():
208 cachedelta = None
205 cachedelta = None
209 self.listcache = addlist
206 self.listcache = addlist
210
207
211 n = self.addrevision(buffer(self.listcache), transaction, link,
208 n = self.addrevision(buffer(self.listcache), transaction, link,
212 p1, p2, cachedelta)
209 p1, p2, cachedelta)
213 self.mapcache = (n, map)
210 self.mapcache = (n, map)
214
211
215 return n
212 return n
@@ -1,22 +1,20 b''
1 """
1 """
2 node.py - basic nodeid manipulation for mercurial
2 node.py - basic nodeid manipulation for mercurial
3
3
4 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
4 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
5
5
6 This software may be used and distributed according to the terms
6 This software may be used and distributed according to the terms
7 of the GNU General Public License, incorporated herein by reference.
7 of the GNU General Public License, incorporated herein by reference.
8 """
8 """
9
9
10 import binascii
10 import binascii
11
11
12 nullrev = -1
12 nullrev = -1
13 nullid = "\0" * 20
13 nullid = "\0" * 20
14
14
15 def hex(node):
15 # This ugly style has a noticeable effect in manifest parsing
16 return binascii.hexlify(node)
16 hex = binascii.hexlify
17
17 bin = binascii.unhexlify
18 def bin(node):
19 return binascii.unhexlify(node)
20
18
21 def short(node):
19 def short(node):
22 return hex(node[:6])
20 return hex(node[:6])
General Comments 0
You need to be logged in to leave comments. Login now