##// END OF EJS Templates
changegroup: decompress GZ algorithm in larger chunks for better performance
Michael Tjørnemark -
r16557:9dba5536 stable
parent child Browse files
Show More
@@ -1,256 +1,256
1 # changegroup.py - Mercurial changegroup manipulation functions
1 # changegroup.py - Mercurial changegroup manipulation functions
2 #
2 #
3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from i18n import _
8 from i18n import _
9 from node import nullrev
9 from node import nullrev
10 import mdiff, util
10 import mdiff, util
11 import struct, os, bz2, zlib, tempfile
11 import struct, os, bz2, zlib, tempfile
12
12
13 _BUNDLE10_DELTA_HEADER = "20s20s20s20s"
13 _BUNDLE10_DELTA_HEADER = "20s20s20s20s"
14
14
15 def readexactly(stream, n):
15 def readexactly(stream, n):
16 '''read n bytes from stream.read and abort if less was available'''
16 '''read n bytes from stream.read and abort if less was available'''
17 s = stream.read(n)
17 s = stream.read(n)
18 if len(s) < n:
18 if len(s) < n:
19 raise util.Abort(_("stream ended unexpectedly"
19 raise util.Abort(_("stream ended unexpectedly"
20 " (got %d bytes, expected %d)")
20 " (got %d bytes, expected %d)")
21 % (len(s), n))
21 % (len(s), n))
22 return s
22 return s
23
23
24 def getchunk(stream):
24 def getchunk(stream):
25 """return the next chunk from stream as a string"""
25 """return the next chunk from stream as a string"""
26 d = readexactly(stream, 4)
26 d = readexactly(stream, 4)
27 l = struct.unpack(">l", d)[0]
27 l = struct.unpack(">l", d)[0]
28 if l <= 4:
28 if l <= 4:
29 if l:
29 if l:
30 raise util.Abort(_("invalid chunk length %d") % l)
30 raise util.Abort(_("invalid chunk length %d") % l)
31 return ""
31 return ""
32 return readexactly(stream, l - 4)
32 return readexactly(stream, l - 4)
33
33
34 def chunkheader(length):
34 def chunkheader(length):
35 """return a changegroup chunk header (string)"""
35 """return a changegroup chunk header (string)"""
36 return struct.pack(">l", length + 4)
36 return struct.pack(">l", length + 4)
37
37
38 def closechunk():
38 def closechunk():
39 """return a changegroup chunk header (string) for a zero-length chunk"""
39 """return a changegroup chunk header (string) for a zero-length chunk"""
40 return struct.pack(">l", 0)
40 return struct.pack(">l", 0)
41
41
42 class nocompress(object):
42 class nocompress(object):
43 def compress(self, x):
43 def compress(self, x):
44 return x
44 return x
45 def flush(self):
45 def flush(self):
46 return ""
46 return ""
47
47
48 bundletypes = {
48 bundletypes = {
49 "": ("", nocompress), # only when using unbundle on ssh and old http servers
49 "": ("", nocompress), # only when using unbundle on ssh and old http servers
50 # since the unification ssh accepts a header but there
50 # since the unification ssh accepts a header but there
51 # is no capability signaling it.
51 # is no capability signaling it.
52 "HG10UN": ("HG10UN", nocompress),
52 "HG10UN": ("HG10UN", nocompress),
53 "HG10BZ": ("HG10", lambda: bz2.BZ2Compressor()),
53 "HG10BZ": ("HG10", lambda: bz2.BZ2Compressor()),
54 "HG10GZ": ("HG10GZ", lambda: zlib.compressobj()),
54 "HG10GZ": ("HG10GZ", lambda: zlib.compressobj()),
55 }
55 }
56
56
57 # hgweb uses this list to communicate its preferred type
57 # hgweb uses this list to communicate its preferred type
58 bundlepriority = ['HG10GZ', 'HG10BZ', 'HG10UN']
58 bundlepriority = ['HG10GZ', 'HG10BZ', 'HG10UN']
59
59
60 def writebundle(cg, filename, bundletype):
60 def writebundle(cg, filename, bundletype):
61 """Write a bundle file and return its filename.
61 """Write a bundle file and return its filename.
62
62
63 Existing files will not be overwritten.
63 Existing files will not be overwritten.
64 If no filename is specified, a temporary file is created.
64 If no filename is specified, a temporary file is created.
65 bz2 compression can be turned off.
65 bz2 compression can be turned off.
66 The bundle file will be deleted in case of errors.
66 The bundle file will be deleted in case of errors.
67 """
67 """
68
68
69 fh = None
69 fh = None
70 cleanup = None
70 cleanup = None
71 try:
71 try:
72 if filename:
72 if filename:
73 fh = open(filename, "wb")
73 fh = open(filename, "wb")
74 else:
74 else:
75 fd, filename = tempfile.mkstemp(prefix="hg-bundle-", suffix=".hg")
75 fd, filename = tempfile.mkstemp(prefix="hg-bundle-", suffix=".hg")
76 fh = os.fdopen(fd, "wb")
76 fh = os.fdopen(fd, "wb")
77 cleanup = filename
77 cleanup = filename
78
78
79 header, compressor = bundletypes[bundletype]
79 header, compressor = bundletypes[bundletype]
80 fh.write(header)
80 fh.write(header)
81 z = compressor()
81 z = compressor()
82
82
83 # parse the changegroup data, otherwise we will block
83 # parse the changegroup data, otherwise we will block
84 # in case of sshrepo because we don't know the end of the stream
84 # in case of sshrepo because we don't know the end of the stream
85
85
86 # an empty chunkgroup is the end of the changegroup
86 # an empty chunkgroup is the end of the changegroup
87 # a changegroup has at least 2 chunkgroups (changelog and manifest).
87 # a changegroup has at least 2 chunkgroups (changelog and manifest).
88 # after that, an empty chunkgroup is the end of the changegroup
88 # after that, an empty chunkgroup is the end of the changegroup
89 empty = False
89 empty = False
90 count = 0
90 count = 0
91 while not empty or count <= 2:
91 while not empty or count <= 2:
92 empty = True
92 empty = True
93 count += 1
93 count += 1
94 while True:
94 while True:
95 chunk = getchunk(cg)
95 chunk = getchunk(cg)
96 if not chunk:
96 if not chunk:
97 break
97 break
98 empty = False
98 empty = False
99 fh.write(z.compress(chunkheader(len(chunk))))
99 fh.write(z.compress(chunkheader(len(chunk))))
100 pos = 0
100 pos = 0
101 while pos < len(chunk):
101 while pos < len(chunk):
102 next = pos + 2**20
102 next = pos + 2**20
103 fh.write(z.compress(chunk[pos:next]))
103 fh.write(z.compress(chunk[pos:next]))
104 pos = next
104 pos = next
105 fh.write(z.compress(closechunk()))
105 fh.write(z.compress(closechunk()))
106 fh.write(z.flush())
106 fh.write(z.flush())
107 cleanup = None
107 cleanup = None
108 return filename
108 return filename
109 finally:
109 finally:
110 if fh is not None:
110 if fh is not None:
111 fh.close()
111 fh.close()
112 if cleanup is not None:
112 if cleanup is not None:
113 os.unlink(cleanup)
113 os.unlink(cleanup)
114
114
115 def decompressor(fh, alg):
115 def decompressor(fh, alg):
116 if alg == 'UN':
116 if alg == 'UN':
117 return fh
117 return fh
118 elif alg == 'GZ':
118 elif alg == 'GZ':
119 def generator(f):
119 def generator(f):
120 zd = zlib.decompressobj()
120 zd = zlib.decompressobj()
121 for chunk in f:
121 for chunk in util.filechunkiter(f):
122 yield zd.decompress(chunk)
122 yield zd.decompress(chunk)
123 elif alg == 'BZ':
123 elif alg == 'BZ':
124 def generator(f):
124 def generator(f):
125 zd = bz2.BZ2Decompressor()
125 zd = bz2.BZ2Decompressor()
126 zd.decompress("BZ")
126 zd.decompress("BZ")
127 for chunk in util.filechunkiter(f, 4096):
127 for chunk in util.filechunkiter(f, 4096):
128 yield zd.decompress(chunk)
128 yield zd.decompress(chunk)
129 else:
129 else:
130 raise util.Abort("unknown bundle compression '%s'" % alg)
130 raise util.Abort("unknown bundle compression '%s'" % alg)
131 return util.chunkbuffer(generator(fh))
131 return util.chunkbuffer(generator(fh))
132
132
133 class unbundle10(object):
133 class unbundle10(object):
134 deltaheader = _BUNDLE10_DELTA_HEADER
134 deltaheader = _BUNDLE10_DELTA_HEADER
135 deltaheadersize = struct.calcsize(deltaheader)
135 deltaheadersize = struct.calcsize(deltaheader)
136 def __init__(self, fh, alg):
136 def __init__(self, fh, alg):
137 self._stream = decompressor(fh, alg)
137 self._stream = decompressor(fh, alg)
138 self._type = alg
138 self._type = alg
139 self.callback = None
139 self.callback = None
140 def compressed(self):
140 def compressed(self):
141 return self._type != 'UN'
141 return self._type != 'UN'
142 def read(self, l):
142 def read(self, l):
143 return self._stream.read(l)
143 return self._stream.read(l)
144 def seek(self, pos):
144 def seek(self, pos):
145 return self._stream.seek(pos)
145 return self._stream.seek(pos)
146 def tell(self):
146 def tell(self):
147 return self._stream.tell()
147 return self._stream.tell()
148 def close(self):
148 def close(self):
149 return self._stream.close()
149 return self._stream.close()
150
150
151 def chunklength(self):
151 def chunklength(self):
152 d = readexactly(self._stream, 4)
152 d = readexactly(self._stream, 4)
153 l = struct.unpack(">l", d)[0]
153 l = struct.unpack(">l", d)[0]
154 if l <= 4:
154 if l <= 4:
155 if l:
155 if l:
156 raise util.Abort(_("invalid chunk length %d") % l)
156 raise util.Abort(_("invalid chunk length %d") % l)
157 return 0
157 return 0
158 if self.callback:
158 if self.callback:
159 self.callback()
159 self.callback()
160 return l - 4
160 return l - 4
161
161
162 def changelogheader(self):
162 def changelogheader(self):
163 """v10 does not have a changelog header chunk"""
163 """v10 does not have a changelog header chunk"""
164 return {}
164 return {}
165
165
166 def manifestheader(self):
166 def manifestheader(self):
167 """v10 does not have a manifest header chunk"""
167 """v10 does not have a manifest header chunk"""
168 return {}
168 return {}
169
169
170 def filelogheader(self):
170 def filelogheader(self):
171 """return the header of the filelogs chunk, v10 only has the filename"""
171 """return the header of the filelogs chunk, v10 only has the filename"""
172 l = self.chunklength()
172 l = self.chunklength()
173 if not l:
173 if not l:
174 return {}
174 return {}
175 fname = readexactly(self._stream, l)
175 fname = readexactly(self._stream, l)
176 return dict(filename=fname)
176 return dict(filename=fname)
177
177
178 def _deltaheader(self, headertuple, prevnode):
178 def _deltaheader(self, headertuple, prevnode):
179 node, p1, p2, cs = headertuple
179 node, p1, p2, cs = headertuple
180 if prevnode is None:
180 if prevnode is None:
181 deltabase = p1
181 deltabase = p1
182 else:
182 else:
183 deltabase = prevnode
183 deltabase = prevnode
184 return node, p1, p2, deltabase, cs
184 return node, p1, p2, deltabase, cs
185
185
186 def deltachunk(self, prevnode):
186 def deltachunk(self, prevnode):
187 l = self.chunklength()
187 l = self.chunklength()
188 if not l:
188 if not l:
189 return {}
189 return {}
190 headerdata = readexactly(self._stream, self.deltaheadersize)
190 headerdata = readexactly(self._stream, self.deltaheadersize)
191 header = struct.unpack(self.deltaheader, headerdata)
191 header = struct.unpack(self.deltaheader, headerdata)
192 delta = readexactly(self._stream, l - self.deltaheadersize)
192 delta = readexactly(self._stream, l - self.deltaheadersize)
193 node, p1, p2, deltabase, cs = self._deltaheader(header, prevnode)
193 node, p1, p2, deltabase, cs = self._deltaheader(header, prevnode)
194 return dict(node=node, p1=p1, p2=p2, cs=cs,
194 return dict(node=node, p1=p1, p2=p2, cs=cs,
195 deltabase=deltabase, delta=delta)
195 deltabase=deltabase, delta=delta)
196
196
197 class headerlessfixup(object):
197 class headerlessfixup(object):
198 def __init__(self, fh, h):
198 def __init__(self, fh, h):
199 self._h = h
199 self._h = h
200 self._fh = fh
200 self._fh = fh
201 def read(self, n):
201 def read(self, n):
202 if self._h:
202 if self._h:
203 d, self._h = self._h[:n], self._h[n:]
203 d, self._h = self._h[:n], self._h[n:]
204 if len(d) < n:
204 if len(d) < n:
205 d += readexactly(self._fh, n - len(d))
205 d += readexactly(self._fh, n - len(d))
206 return d
206 return d
207 return readexactly(self._fh, n)
207 return readexactly(self._fh, n)
208
208
209 def readbundle(fh, fname):
209 def readbundle(fh, fname):
210 header = readexactly(fh, 6)
210 header = readexactly(fh, 6)
211
211
212 if not fname:
212 if not fname:
213 fname = "stream"
213 fname = "stream"
214 if not header.startswith('HG') and header.startswith('\0'):
214 if not header.startswith('HG') and header.startswith('\0'):
215 fh = headerlessfixup(fh, header)
215 fh = headerlessfixup(fh, header)
216 header = "HG10UN"
216 header = "HG10UN"
217
217
218 magic, version, alg = header[0:2], header[2:4], header[4:6]
218 magic, version, alg = header[0:2], header[2:4], header[4:6]
219
219
220 if magic != 'HG':
220 if magic != 'HG':
221 raise util.Abort(_('%s: not a Mercurial bundle') % fname)
221 raise util.Abort(_('%s: not a Mercurial bundle') % fname)
222 if version != '10':
222 if version != '10':
223 raise util.Abort(_('%s: unknown bundle version %s') % (fname, version))
223 raise util.Abort(_('%s: unknown bundle version %s') % (fname, version))
224 return unbundle10(fh, alg)
224 return unbundle10(fh, alg)
225
225
226 class bundle10(object):
226 class bundle10(object):
227 deltaheader = _BUNDLE10_DELTA_HEADER
227 deltaheader = _BUNDLE10_DELTA_HEADER
228 def __init__(self, lookup):
228 def __init__(self, lookup):
229 self._lookup = lookup
229 self._lookup = lookup
230 def close(self):
230 def close(self):
231 return closechunk()
231 return closechunk()
232 def fileheader(self, fname):
232 def fileheader(self, fname):
233 return chunkheader(len(fname)) + fname
233 return chunkheader(len(fname)) + fname
234 def revchunk(self, revlog, rev, prev):
234 def revchunk(self, revlog, rev, prev):
235 node = revlog.node(rev)
235 node = revlog.node(rev)
236 p1, p2 = revlog.parentrevs(rev)
236 p1, p2 = revlog.parentrevs(rev)
237 base = prev
237 base = prev
238
238
239 prefix = ''
239 prefix = ''
240 if base == nullrev:
240 if base == nullrev:
241 delta = revlog.revision(node)
241 delta = revlog.revision(node)
242 prefix = mdiff.trivialdiffheader(len(delta))
242 prefix = mdiff.trivialdiffheader(len(delta))
243 else:
243 else:
244 delta = revlog.revdiff(base, rev)
244 delta = revlog.revdiff(base, rev)
245 linknode = self._lookup(revlog, node)
245 linknode = self._lookup(revlog, node)
246 p1n, p2n = revlog.parents(node)
246 p1n, p2n = revlog.parents(node)
247 basenode = revlog.node(base)
247 basenode = revlog.node(base)
248 meta = self.builddeltaheader(node, p1n, p2n, basenode, linknode)
248 meta = self.builddeltaheader(node, p1n, p2n, basenode, linknode)
249 meta += prefix
249 meta += prefix
250 l = len(meta) + len(delta)
250 l = len(meta) + len(delta)
251 yield chunkheader(l)
251 yield chunkheader(l)
252 yield meta
252 yield meta
253 yield delta
253 yield delta
254 def builddeltaheader(self, node, p1n, p2n, basenode, linknode):
254 def builddeltaheader(self, node, p1n, p2n, basenode, linknode):
255 # do nothing with basenode, it is implicitly the previous one in HG10
255 # do nothing with basenode, it is implicitly the previous one in HG10
256 return struct.pack(self.deltaheader, node, p1n, p2n, linknode)
256 return struct.pack(self.deltaheader, node, p1n, p2n, linknode)
General Comments 0
You need to be logged in to leave comments. Login now