##// END OF EJS Templates
bundle2: support unbundling empty part...
Pierre-Yves David -
r20864:9a75d255 default
parent child Browse files
Show More
@@ -1,269 +1,292
1 1 # bundle2.py - generic container format to transmit arbitrary data.
2 2 #
3 3 # Copyright 2013 Facebook, Inc.
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7 """Handling of the new bundle2 format
8 8
9 9 The goal of bundle2 is to act as an atomically packet to transmit a set of
10 10 payloads in an application agnostic way. It consist in a sequence of "parts"
11 11 that will be handed to and processed by the application layer.
12 12
13 13
14 14 General format architecture
15 15 ===========================
16 16
17 17 The format is architectured as follow
18 18
19 19 - magic string
20 20 - stream level parameters
21 21 - payload parts (any number)
22 22 - end of stream marker.
23 23
24 24 the Binary format
25 25 ============================
26 26
27 27 All numbers are unsigned and big endian.
28 28
29 29 stream level parameters
30 30 ------------------------
31 31
32 32 Binary format is as follow
33 33
34 34 :params size: (16 bits integer)
35 35
36 36 The total number of Bytes used by the parameters
37 37
38 38 :params value: arbitrary number of Bytes
39 39
40 40 A blob of `params size` containing the serialized version of all stream level
41 41 parameters.
42 42
43 43 The blob contains a space separated list of parameters. parameter with value
44 44 are stored in the form `<name>=<value>`. Both name and value are urlquoted.
45 45
46 46 Empty name are obviously forbidden.
47 47
48 48 Name MUST start with a letter. If this first letter is lower case, the
49 49 parameter is advisory and can be safefly ignored. However when the first
50 50 letter is capital, the parameter is mandatory and the bundling process MUST
51 51 stop if he is not able to proceed it.
52 52
53 53 Stream parameters use a simple textual format for two main reasons:
54 54
55 55 - Stream level parameters should remains simple and we want to discourage any
56 56 crazy usage.
57 57 - Textual data allow easy human inspection of a the bundle2 header in case of
58 58 troubles.
59 59
60 60 Any Applicative level options MUST go into a bundle2 part instead.
61 61
62 62 Payload part
63 63 ------------------------
64 64
65 65 Binary format is as follow
66 66
67 67 :header size: (16 bits inter)
68 68
69 69 The total number of Bytes used by the part headers. When the header is empty
70 70 (size = 0) this is interpreted as the end of stream marker.
71 71
72 72 :header:
73 73
74 74 The header defines how to interpret the part. It contains two piece of
75 75 data: the part type, and the part parameters.
76 76
77 77 The part type is used to route an application level handler, that can
78 78 interpret payload.
79 79
80 80 Part parameters are passed to the application level handler. They are
81 81 meant to convey information that will help the application level object to
82 82 interpret the part payload.
83 83
84 84 The binary format of the header is has follow
85 85
86 86 :typesize: (one byte)
87 87 :typename: alphanumerical part name
88 88 :option: we do not support option yet this denoted by two 16 bites zero.
89 89
90 90 :payload:
91 91
92 92 The current payload is a 32bit integer with a value of 0. This is
93 93 considered an "empty" payload.
94 94 """
95 95
96 96 import util
97 97 import struct
98 98 import urllib
99 99 import string
100 100
101 101 import changegroup
102 102 from i18n import _
103 103
104 104 _pack = struct.pack
105 105 _unpack = struct.unpack
106 106
107 107 _magicstring = 'HG20'
108 108
109 109 _fstreamparamsize = '>H'
110 110 _fpartheadersize = '>H'
111 111 _fparttypesize = '>B'
112 112
113 113 class bundle20(object):
114 114 """represent an outgoing bundle2 container
115 115
116 116 Use the `addparam` method to add stream level parameter. and `addpart` to
117 117 populate it. Then call `getchunks` to retrieve all the binary chunks of
118 118 datathat compose the bundle2 container."""
119 119
120 120 def __init__(self, ui):
121 121 self.ui = ui
122 122 self._params = []
123 123 self._parts = []
124 124
125 125 def addparam(self, name, value=None):
126 126 """add a stream level parameter"""
127 127 if not name:
128 128 raise ValueError('empty parameter name')
129 129 if name[0] not in string.letters:
130 130 raise ValueError('non letter first character: %r' % name)
131 131 self._params.append((name, value))
132 132
133 133 def addpart(self, part):
134 134 """add a new part to the bundle2 container
135 135
136 136 Parts contains the actuall applicative payload."""
137 137 self._parts.append(part)
138 138
139 139 def getchunks(self):
140 140 self.ui.debug('start emission of %s stream\n' % _magicstring)
141 141 yield _magicstring
142 142 param = self._paramchunk()
143 143 self.ui.debug('bundle parameter: %s\n' % param)
144 144 yield _pack(_fstreamparamsize, len(param))
145 145 if param:
146 146 yield param
147 147
148 148 self.ui.debug('start of parts\n')
149 149 for part in self._parts:
150 150 self.ui.debug('bundle part: "%s"\n' % part.type)
151 151 for chunk in part.getchunks():
152 152 yield chunk
153 153 self.ui.debug('end of bundle\n')
154 154 yield '\0\0'
155 155
156 156 def _paramchunk(self):
157 157 """return a encoded version of all stream parameters"""
158 158 blocks = []
159 159 for par, value in self._params:
160 160 par = urllib.quote(par)
161 161 if value is not None:
162 162 value = urllib.quote(value)
163 163 par = '%s=%s' % (par, value)
164 164 blocks.append(par)
165 165 return ' '.join(blocks)
166 166
167 167 class unbundle20(object):
168 168 """interpret a bundle2 stream
169 169
170 170 (this will eventually yield parts)"""
171 171
172 172 def __init__(self, ui, fp):
173 173 self.ui = ui
174 174 self._fp = fp
175 175 header = self._readexact(4)
176 176 magic, version = header[0:2], header[2:4]
177 177 if magic != 'HG':
178 178 raise util.Abort(_('not a Mercurial bundle'))
179 179 if version != '20':
180 180 raise util.Abort(_('unknown bundle version %s') % version)
181 181 self.ui.debug('start processing of %s stream\n' % header)
182 182
183 183 def _unpack(self, format):
184 184 """unpack this struct format from the stream"""
185 185 data = self._readexact(struct.calcsize(format))
186 186 return _unpack(format, data)
187 187
188 188 def _readexact(self, size):
189 189 """read exactly <size> bytes from the stream"""
190 190 return changegroup.readexactly(self._fp, size)
191 191
192 192 @util.propertycache
193 193 def params(self):
194 194 """dictionnary of stream level parameters"""
195 195 self.ui.debug('reading bundle2 stream parameters\n')
196 196 params = {}
197 197 paramssize = self._unpack(_fstreamparamsize)[0]
198 198 if paramssize:
199 199 for p in self._readexact(paramssize).split(' '):
200 200 p = p.split('=', 1)
201 201 p = [urllib.unquote(i) for i in p]
202 202 if len(p) < 2:
203 203 p.append(None)
204 204 self._processparam(*p)
205 205 params[p[0]] = p[1]
206 206 return params
207 207
208 208 def _processparam(self, name, value):
209 209 """process a parameter, applying its effect if needed
210 210
211 211 Parameter starting with a lower case letter are advisory and will be
212 212 ignored when unknown. Those starting with an upper case letter are
213 213 mandatory and will this function will raise a KeyError when unknown.
214 214
215 215 Note: no option are currently supported. Any input will be either
216 216 ignored or failing.
217 217 """
218 218 if not name:
219 219 raise ValueError('empty parameter name')
220 220 if name[0] not in string.letters:
221 221 raise ValueError('non letter first character: %r' % name)
222 222 # Some logic will be later added here to try to process the option for
223 223 # a dict of known parameter.
224 224 if name[0].islower():
225 225 self.ui.debug("ignoring unknown parameter %r\n" % name)
226 226 else:
227 227 raise KeyError(name)
228 228
229 229
230 230 def __iter__(self):
231 231 """yield all parts contained in the stream"""
232 232 # make sure param have been loaded
233 233 self.params
234 234 self.ui.debug('start extraction of bundle2 parts\n')
235 235 part = self._readpart()
236 236 while part is not None:
237 237 yield part
238 238 part = self._readpart()
239 239 self.ui.debug('end of bundle2 stream\n')
240 240
241 241 def _readpart(self):
242 242 """return None when an end of stream markers is reach"""
243 headersize = self._readexact(2)
244 assert headersize == '\0\0'
243
244 headersize = self._unpack(_fpartheadersize)[0]
245 self.ui.debug('part header size: %i\n' % headersize)
246 if not headersize:
245 247 return None
248 headerblock = self._readexact(headersize)
249 # some utility to help reading from the header block
250 self._offset = 0 # layer violation to have something easy to understand
251 def fromheader(size):
252 """return the next <size> byte from the header"""
253 offset = self._offset
254 data = headerblock[offset:(offset + size)]
255 self._offset = offset + size
256 return data
257 typesize = _unpack(_fparttypesize, fromheader(1))[0]
258 parttype = fromheader(typesize)
259 self.ui.debug('part type: "%s"\n' % parttype)
260 current = part(parttype)
261 assert fromheader(2) == '\0\0' # no option for now
262 del self._offset # clean up layer, nobody saw anything.
263 self.ui.debug('part parameters: 0\n')
264 assert self._readexact(4) == '\0\0\0\0' #empty payload
265 self.ui.debug('payload chunk size: 0\n')
266 return current
267
246 268
247 269 class part(object):
248 270 """A bundle2 part contains application level payload
249 271
250 272 The part `type` is used to route the part to the application level
251 273 handler.
252 274 """
253 275
254 def __init__(self, parttype):
276 def __init__(self, parttype, data=''):
255 277 self.type = parttype
278 self.data = data
256 279
257 280 def getchunks(self):
258 281 ### header
259 282 header = [_pack(_fparttypesize, len(self.type)),
260 283 self.type,
261 284 '\0\0', # No option support for now.
262 285 ]
263 286 headerchunk = ''.join(header)
264 287 yield _pack(_fpartheadersize, len(headerchunk))
265 288 yield headerchunk
266 289 # force empty part for now
267 290 yield '\0\0\0\0'
268 291
269 292
@@ -1,245 +1,272
1 1
2 2 Create an extension to test bundle2 API
3 3
4 4 $ cat > bundle2.py << EOF
5 5 > """A small extension to test bundle2 implementation
6 6 >
7 7 > Current bundle2 implementation is far too limited to be used in any core
8 8 > code. We still need to be able to test it while it grow up.
9 9 > """
10 10 >
11 11 > import sys
12 12 > from mercurial import cmdutil
13 13 > from mercurial import util
14 14 > from mercurial import bundle2
15 15 > cmdtable = {}
16 16 > command = cmdutil.command(cmdtable)
17 17 >
18 18 > @command('bundle2',
19 19 > [('', 'param', [], 'stream level parameter'),
20 20 > ('', 'parts', False, 'include some arbitrary parts to the bundle'),],
21 21 > '[OUTPUTFILE]')
22 22 > def cmdbundle2(ui, repo, path=None, **opts):
23 23 > """write a bundle2 container on standard ouput"""
24 24 > bundler = bundle2.bundle20(ui)
25 25 > for p in opts['param']:
26 26 > p = p.split('=', 1)
27 27 > try:
28 28 > bundler.addparam(*p)
29 29 > except ValueError, exc:
30 30 > raise util.Abort('%s' % exc)
31 31 >
32 32 > if opts['parts']:
33 33 > part = bundle2.part('test:empty')
34 34 > bundler.addpart(part)
35 35 > # add a second one to make sure we handle multiple parts
36 36 > part = bundle2.part('test:empty')
37 37 > bundler.addpart(part)
38 38 >
39 39 > if path is None:
40 40 > file = sys.stdout
41 41 > else:
42 42 > file = open(path, 'w')
43 43 >
44 44 > for chunk in bundler.getchunks():
45 45 > file.write(chunk)
46 46 >
47 47 > @command('unbundle2', [], '')
48 48 > def cmdunbundle2(ui, repo):
49 49 > """read a bundle2 container from standard input"""
50 50 > unbundler = bundle2.unbundle20(ui, sys.stdin)
51 51 > try:
52 52 > params = unbundler.params
53 53 > except KeyError, exc:
54 54 > raise util.Abort('unknown parameters: %s' % exc)
55 55 > ui.write('options count: %i\n' % len(params))
56 56 > for key in sorted(params):
57 57 > ui.write('- %s\n' % key)
58 58 > value = params[key]
59 59 > if value is not None:
60 60 > ui.write(' %s\n' % value)
61 61 > parts = list(unbundler)
62 62 > ui.write('parts count: %i\n' % len(parts))
63 > for p in parts:
64 > ui.write(' :%s:\n' % p.type)
63 65 > EOF
64 66 $ cat >> $HGRCPATH << EOF
65 67 > [extensions]
66 68 > bundle2=$TESTTMP/bundle2.py
67 69 > EOF
68 70
69 71 The extension requires a repo (currently unused)
70 72
71 73 $ hg init main
72 74 $ cd main
73 75 $ touch a
74 76 $ hg add a
75 77 $ hg commit -m 'a'
76 78
77 79
78 80 Empty bundle
79 81 =================
80 82
81 83 - no option
82 84 - no parts
83 85
84 86 Test bundling
85 87
86 88 $ hg bundle2
87 89 HG20\x00\x00\x00\x00 (no-eol) (esc)
88 90
89 91 Test unbundling
90 92
91 93 $ hg bundle2 | hg unbundle2
92 94 options count: 0
93 95 parts count: 0
94 96
95 97 Test old style bundle are detected and refused
96 98
97 99 $ hg bundle --all ../bundle.hg
98 100 1 changesets found
99 101 $ hg unbundle2 < ../bundle.hg
100 102 abort: unknown bundle version 10
101 103 [255]
102 104
103 105 Test parameters
104 106 =================
105 107
106 108 - some options
107 109 - no parts
108 110
109 111 advisory parameters, no value
110 112 -------------------------------
111 113
112 114 Simplest possible parameters form
113 115
114 116 Test generation simple option
115 117
116 118 $ hg bundle2 --param 'caution'
117 119 HG20\x00\x07caution\x00\x00 (no-eol) (esc)
118 120
119 121 Test unbundling
120 122
121 123 $ hg bundle2 --param 'caution' | hg unbundle2
122 124 options count: 1
123 125 - caution
124 126 parts count: 0
125 127
126 128 Test generation multiple option
127 129
128 130 $ hg bundle2 --param 'caution' --param 'meal'
129 131 HG20\x00\x0ccaution meal\x00\x00 (no-eol) (esc)
130 132
131 133 Test unbundling
132 134
133 135 $ hg bundle2 --param 'caution' --param 'meal' | hg unbundle2
134 136 options count: 2
135 137 - caution
136 138 - meal
137 139 parts count: 0
138 140
139 141 advisory parameters, with value
140 142 -------------------------------
141 143
142 144 Test generation
143 145
144 146 $ hg bundle2 --param 'caution' --param 'meal=vegan' --param 'elephants'
145 147 HG20\x00\x1ccaution meal=vegan elephants\x00\x00 (no-eol) (esc)
146 148
147 149 Test unbundling
148 150
149 151 $ hg bundle2 --param 'caution' --param 'meal=vegan' --param 'elephants' | hg unbundle2
150 152 options count: 3
151 153 - caution
152 154 - elephants
153 155 - meal
154 156 vegan
155 157 parts count: 0
156 158
157 159 parameter with special char in value
158 160 ---------------------------------------------------
159 161
160 162 Test generation
161 163
162 164 $ hg bundle2 --param 'e|! 7/=babar%#==tutu' --param simple
163 165 HG20\x00)e%7C%21%207/=babar%25%23%3D%3Dtutu simple\x00\x00 (no-eol) (esc)
164 166
165 167 Test unbundling
166 168
167 169 $ hg bundle2 --param 'e|! 7/=babar%#==tutu' --param simple | hg unbundle2
168 170 options count: 2
169 171 - e|! 7/
170 172 babar%#==tutu
171 173 - simple
172 174 parts count: 0
173 175
174 176 Test unknown mandatory option
175 177 ---------------------------------------------------
176 178
177 179 $ hg bundle2 --param 'Gravity' | hg unbundle2
178 180 abort: unknown parameters: 'Gravity'
179 181 [255]
180 182
181 183 Test debug output
182 184 ---------------------------------------------------
183 185
184 186 bundling debug
185 187
186 188 $ hg bundle2 --debug --param 'e|! 7/=babar%#==tutu' --param simple ../out.hg2
187 189 start emission of HG20 stream
188 190 bundle parameter: e%7C%21%207/=babar%25%23%3D%3Dtutu simple
189 191 start of parts
190 192 end of bundle
191 193
192 194 file content is ok
193 195
194 196 $ cat ../out.hg2
195 197 HG20\x00)e%7C%21%207/=babar%25%23%3D%3Dtutu simple\x00\x00 (no-eol) (esc)
196 198
197 199 unbundling debug
198 200
199 201 $ hg unbundle2 --debug < ../out.hg2
200 202 start processing of HG20 stream
201 203 reading bundle2 stream parameters
202 204 ignoring unknown parameter 'e|! 7/'
203 205 ignoring unknown parameter 'simple'
204 206 options count: 2
205 207 - e|! 7/
206 208 babar%#==tutu
207 209 - simple
208 210 start extraction of bundle2 parts
211 part header size: 0
209 212 end of bundle2 stream
210 213 parts count: 0
211 214
212 215
213 216 Test buggy input
214 217 ---------------------------------------------------
215 218
216 219 empty parameter name
217 220
218 221 $ hg bundle2 --param '' --quiet
219 222 abort: empty parameter name
220 223 [255]
221 224
222 225 bad parameter name
223 226
224 227 $ hg bundle2 --param 42babar
225 228 abort: non letter first character: '42babar'
226 229 [255]
227 230
228 231
229 232 Test part
230 233 =================
231 234
232 235 $ hg bundle2 --parts ../parts.hg2 --debug
233 236 start emission of HG20 stream
234 237 bundle parameter:
235 238 start of parts
236 239 bundle part: "test:empty"
237 240 bundle part: "test:empty"
238 241 end of bundle
239 242
240 243 $ cat ../parts.hg2
241 244 HG20\x00\x00\x00\r (esc)
242 245 test:empty\x00\x00\x00\x00\x00\x00\x00\r (esc)
243 246 test:empty\x00\x00\x00\x00\x00\x00\x00\x00 (no-eol) (esc)
244 247
245 248
249 $ hg unbundle2 < ../parts.hg2
250 options count: 0
251 parts count: 2
252 :test:empty:
253 :test:empty:
254
255 $ hg unbundle2 --debug < ../parts.hg2
256 start processing of HG20 stream
257 reading bundle2 stream parameters
258 options count: 0
259 start extraction of bundle2 parts
260 part header size: 13
261 part type: "test:empty"
262 part parameters: 0
263 payload chunk size: 0
264 part header size: 13
265 part type: "test:empty"
266 part parameters: 0
267 payload chunk size: 0
268 part header size: 0
269 end of bundle2 stream
270 parts count: 2
271 :test:empty:
272 :test:empty:
General Comments 0
You need to be logged in to leave comments. Login now