##// END OF EJS Templates
byterange: backport fix from upstream
Benoit Boissinot -
r9695:e4211db4 default
parent child Browse files
Show More
@@ -1,468 +1,470
1 1 # This library is free software; you can redistribute it and/or
2 2 # modify it under the terms of the GNU Lesser General Public
3 3 # License as published by the Free Software Foundation; either
4 4 # version 2.1 of the License, or (at your option) any later version.
5 5 #
6 6 # This library is distributed in the hope that it will be useful,
7 7 # but WITHOUT ANY WARRANTY; without even the implied warranty of
8 8 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
9 9 # Lesser General Public License for more details.
10 10 #
11 11 # You should have received a copy of the GNU Lesser General Public
12 12 # License along with this library; if not, write to the
13 13 # Free Software Foundation, Inc.,
14 14 # 59 Temple Place, Suite 330,
15 15 # Boston, MA 02111-1307 USA
16 16
17 17 # This file is part of urlgrabber, a high-level cross-protocol url-grabber
18 18 # Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
19 19
20 20 # $Id: byterange.py,v 1.9 2005/02/14 21:55:07 mstenner Exp $
21 21
22 22 import os
23 23 import stat
24 24 import urllib
25 25 import urllib2
26 26 import email.Utils
27 27
28 28 try:
29 29 from cStringIO import StringIO
30 30 except ImportError, msg:
31 31 from StringIO import StringIO
32 32
33 33 class RangeError(IOError):
34 34 """Error raised when an unsatisfiable range is requested."""
35 35 pass
36 36
37 37 class HTTPRangeHandler(urllib2.BaseHandler):
38 38 """Handler that enables HTTP Range headers.
39 39
40 40 This was extremely simple. The Range header is a HTTP feature to
41 41 begin with so all this class does is tell urllib2 that the
42 42 "206 Partial Content" reponse from the HTTP server is what we
43 43 expected.
44 44
45 45 Example:
46 46 import urllib2
47 47 import byterange
48 48
49 49 range_handler = range.HTTPRangeHandler()
50 50 opener = urllib2.build_opener(range_handler)
51 51
52 52 # install it
53 53 urllib2.install_opener(opener)
54 54
55 55 # create Request and set Range header
56 56 req = urllib2.Request('http://www.python.org/')
57 57 req.header['Range'] = 'bytes=30-50'
58 58 f = urllib2.urlopen(req)
59 59 """
60 60
61 61 def http_error_206(self, req, fp, code, msg, hdrs):
62 62 # 206 Partial Content Response
63 63 r = urllib.addinfourl(fp, hdrs, req.get_full_url())
64 64 r.code = code
65 65 r.msg = msg
66 66 return r
67 67
68 68 def http_error_416(self, req, fp, code, msg, hdrs):
69 69 # HTTP's Range Not Satisfiable error
70 70 raise RangeError('Requested Range Not Satisfiable')
71 71
72 72 class RangeableFileObject:
73 73 """File object wrapper to enable raw range handling.
74 74 This was implemented primarilary for handling range
75 75 specifications for file:// urls. This object effectively makes
76 76 a file object look like it consists only of a range of bytes in
77 77 the stream.
78 78
79 79 Examples:
80 80 # expose 10 bytes, starting at byte position 20, from
81 81 # /etc/aliases.
82 82 >>> fo = RangeableFileObject(file('/etc/passwd', 'r'), (20,30))
83 83 # seek seeks within the range (to position 23 in this case)
84 84 >>> fo.seek(3)
85 85 # tell tells where your at _within the range_ (position 3 in
86 86 # this case)
87 87 >>> fo.tell()
88 88 # read EOFs if an attempt is made to read past the last
89 89 # byte in the range. the following will return only 7 bytes.
90 90 >>> fo.read(30)
91 91 """
92 92
93 93 def __init__(self, fo, rangetup):
94 94 """Create a RangeableFileObject.
95 95 fo -- a file like object. only the read() method need be
96 96 supported but supporting an optimized seek() is
97 97 preferable.
98 98 rangetup -- a (firstbyte,lastbyte) tuple specifying the range
99 99 to work over.
100 100 The file object provided is assumed to be at byte offset 0.
101 101 """
102 102 self.fo = fo
103 103 (self.firstbyte, self.lastbyte) = range_tuple_normalize(rangetup)
104 104 self.realpos = 0
105 105 self._do_seek(self.firstbyte)
106 106
107 107 def __getattr__(self, name):
108 108 """This effectively allows us to wrap at the instance level.
109 109 Any attribute not found in _this_ object will be searched for
110 110 in self.fo. This includes methods."""
111 111 if hasattr(self.fo, name):
112 112 return getattr(self.fo, name)
113 113 raise AttributeError(name)
114 114
115 115 def tell(self):
116 116 """Return the position within the range.
117 117 This is different from fo.seek in that position 0 is the
118 118 first byte position of the range tuple. For example, if
119 119 this object was created with a range tuple of (500,899),
120 120 tell() will return 0 when at byte position 500 of the file.
121 121 """
122 122 return (self.realpos - self.firstbyte)
123 123
124 124 def seek(self, offset, whence=0):
125 125 """Seek within the byte range.
126 126 Positioning is identical to that described under tell().
127 127 """
128 128 assert whence in (0, 1, 2)
129 129 if whence == 0: # absolute seek
130 130 realoffset = self.firstbyte + offset
131 131 elif whence == 1: # relative seek
132 132 realoffset = self.realpos + offset
133 133 elif whence == 2: # absolute from end of file
134 134 # XXX: are we raising the right Error here?
135 135 raise IOError('seek from end of file not supported.')
136 136
137 137 # do not allow seek past lastbyte in range
138 138 if self.lastbyte and (realoffset >= self.lastbyte):
139 139 realoffset = self.lastbyte
140 140
141 141 self._do_seek(realoffset - self.realpos)
142 142
143 143 def read(self, size=-1):
144 144 """Read within the range.
145 145 This method will limit the size read based on the range.
146 146 """
147 147 size = self._calc_read_size(size)
148 148 rslt = self.fo.read(size)
149 149 self.realpos += len(rslt)
150 150 return rslt
151 151
152 152 def readline(self, size=-1):
153 153 """Read lines within the range.
154 154 This method will limit the size read based on the range.
155 155 """
156 156 size = self._calc_read_size(size)
157 157 rslt = self.fo.readline(size)
158 158 self.realpos += len(rslt)
159 159 return rslt
160 160
161 161 def _calc_read_size(self, size):
162 162 """Handles calculating the amount of data to read based on
163 163 the range.
164 164 """
165 165 if self.lastbyte:
166 166 if size > -1:
167 167 if ((self.realpos + size) >= self.lastbyte):
168 168 size = (self.lastbyte - self.realpos)
169 169 else:
170 170 size = (self.lastbyte - self.realpos)
171 171 return size
172 172
173 173 def _do_seek(self, offset):
174 174 """Seek based on whether wrapped object supports seek().
175 175 offset is relative to the current position (self.realpos).
176 176 """
177 177 assert offset >= 0
178 178 if not hasattr(self.fo, 'seek'):
179 179 self._poor_mans_seek(offset)
180 180 else:
181 181 self.fo.seek(self.realpos + offset)
182 182 self.realpos += offset
183 183
184 184 def _poor_mans_seek(self, offset):
185 185 """Seek by calling the wrapped file objects read() method.
186 186 This is used for file like objects that do not have native
187 187 seek support. The wrapped objects read() method is called
188 188 to manually seek to the desired position.
189 189 offset -- read this number of bytes from the wrapped
190 190 file object.
191 191 raise RangeError if we encounter EOF before reaching the
192 192 specified offset.
193 193 """
194 194 pos = 0
195 195 bufsize = 1024
196 196 while pos < offset:
197 197 if (pos + bufsize) > offset:
198 198 bufsize = offset - pos
199 199 buf = self.fo.read(bufsize)
200 200 if len(buf) != bufsize:
201 201 raise RangeError('Requested Range Not Satisfiable')
202 202 pos += bufsize
203 203
204 204 class FileRangeHandler(urllib2.FileHandler):
205 205 """FileHandler subclass that adds Range support.
206 206 This class handles Range headers exactly like an HTTP
207 207 server would.
208 208 """
209 209 def open_local_file(self, req):
210 210 import mimetypes
211 211 import email
212 212 host = req.get_host()
213 213 file = req.get_selector()
214 214 localfile = urllib.url2pathname(file)
215 215 stats = os.stat(localfile)
216 216 size = stats[stat.ST_SIZE]
217 217 modified = email.Utils.formatdate(stats[stat.ST_MTIME])
218 218 mtype = mimetypes.guess_type(file)[0]
219 219 if host:
220 220 host, port = urllib.splitport(host)
221 221 if port or socket.gethostbyname(host) not in self.get_names():
222 222 raise urllib2.URLError('file not on local host')
223 223 fo = open(localfile,'rb')
224 224 brange = req.headers.get('Range', None)
225 225 brange = range_header_to_tuple(brange)
226 226 assert brange != ()
227 227 if brange:
228 228 (fb, lb) = brange
229 229 if lb == '':
230 230 lb = size
231 231 if fb < 0 or fb > size or lb > size:
232 232 raise RangeError('Requested Range Not Satisfiable')
233 233 size = (lb - fb)
234 234 fo = RangeableFileObject(fo, (fb, lb))
235 235 headers = email.message_from_string(
236 236 'Content-Type: %s\nContent-Length: %d\nLast-Modified: %s\n' %
237 237 (mtype or 'text/plain', size, modified))
238 238 return urllib.addinfourl(fo, headers, 'file:'+file)
239 239
240 240
241 241 # FTP Range Support
242 242 # Unfortunately, a large amount of base FTP code had to be copied
243 243 # from urllib and urllib2 in order to insert the FTP REST command.
244 244 # Code modifications for range support have been commented as
245 245 # follows:
246 246 # -- range support modifications start/end here
247 247
248 248 from urllib import splitport, splituser, splitpasswd, splitattr, \
249 249 unquote, addclosehook, addinfourl
250 250 import ftplib
251 251 import socket
252 252 import sys
253 253 import mimetypes
254 254 import email
255 255
256 256 class FTPRangeHandler(urllib2.FTPHandler):
257 257 def ftp_open(self, req):
258 258 host = req.get_host()
259 259 if not host:
260 260 raise IOError('ftp error', 'no host given')
261 261 host, port = splitport(host)
262 262 if port is None:
263 263 port = ftplib.FTP_PORT
264 else:
265 port = int(port)
264 266
265 267 # username/password handling
266 268 user, host = splituser(host)
267 269 if user:
268 270 user, passwd = splitpasswd(user)
269 271 else:
270 272 passwd = None
271 273 host = unquote(host)
272 274 user = unquote(user or '')
273 275 passwd = unquote(passwd or '')
274 276
275 277 try:
276 278 host = socket.gethostbyname(host)
277 279 except socket.error, msg:
278 280 raise urllib2.URLError(msg)
279 281 path, attrs = splitattr(req.get_selector())
280 282 dirs = path.split('/')
281 283 dirs = map(unquote, dirs)
282 284 dirs, file = dirs[:-1], dirs[-1]
283 285 if dirs and not dirs[0]:
284 286 dirs = dirs[1:]
285 287 try:
286 288 fw = self.connect_ftp(user, passwd, host, port, dirs)
287 289 type = file and 'I' or 'D'
288 290 for attr in attrs:
289 291 attr, value = splitattr(attr)
290 292 if attr.lower() == 'type' and \
291 293 value in ('a', 'A', 'i', 'I', 'd', 'D'):
292 294 type = value.upper()
293 295
294 296 # -- range support modifications start here
295 297 rest = None
296 298 range_tup = range_header_to_tuple(req.headers.get('Range', None))
297 299 assert range_tup != ()
298 300 if range_tup:
299 301 (fb, lb) = range_tup
300 302 if fb > 0:
301 303 rest = fb
302 304 # -- range support modifications end here
303 305
304 306 fp, retrlen = fw.retrfile(file, type, rest)
305 307
306 308 # -- range support modifications start here
307 309 if range_tup:
308 310 (fb, lb) = range_tup
309 311 if lb == '':
310 312 if retrlen is None or retrlen == 0:
311 313 raise RangeError('Requested Range Not Satisfiable due to unobtainable file length.')
312 314 lb = retrlen
313 315 retrlen = lb - fb
314 316 if retrlen < 0:
315 317 # beginning of range is larger than file
316 318 raise RangeError('Requested Range Not Satisfiable')
317 319 else:
318 320 retrlen = lb - fb
319 321 fp = RangeableFileObject(fp, (0, retrlen))
320 322 # -- range support modifications end here
321 323
322 324 headers = ""
323 325 mtype = mimetypes.guess_type(req.get_full_url())[0]
324 326 if mtype:
325 327 headers += "Content-Type: %s\n" % mtype
326 328 if retrlen is not None and retrlen >= 0:
327 329 headers += "Content-Length: %d\n" % retrlen
328 330 headers = email.message_from_string(headers)
329 331 return addinfourl(fp, headers, req.get_full_url())
330 332 except ftplib.all_errors, msg:
331 333 raise IOError('ftp error', msg), sys.exc_info()[2]
332 334
333 335 def connect_ftp(self, user, passwd, host, port, dirs):
334 336 fw = ftpwrapper(user, passwd, host, port, dirs)
335 337 return fw
336 338
337 339 class ftpwrapper(urllib.ftpwrapper):
338 340 # range support note:
339 341 # this ftpwrapper code is copied directly from
340 342 # urllib. The only enhancement is to add the rest
341 343 # argument and pass it on to ftp.ntransfercmd
342 344 def retrfile(self, file, type, rest=None):
343 345 self.endtransfer()
344 346 if type in ('d', 'D'):
345 347 cmd = 'TYPE A'
346 348 isdir = 1
347 349 else:
348 350 cmd = 'TYPE ' + type
349 351 isdir = 0
350 352 try:
351 353 self.ftp.voidcmd(cmd)
352 354 except ftplib.all_errors:
353 355 self.init()
354 356 self.ftp.voidcmd(cmd)
355 357 conn = None
356 358 if file and not isdir:
357 359 # Use nlst to see if the file exists at all
358 360 try:
359 361 self.ftp.nlst(file)
360 362 except ftplib.error_perm, reason:
361 363 raise IOError('ftp error', reason), sys.exc_info()[2]
362 364 # Restore the transfer mode!
363 365 self.ftp.voidcmd(cmd)
364 366 # Try to retrieve as a file
365 367 try:
366 368 cmd = 'RETR ' + file
367 369 conn = self.ftp.ntransfercmd(cmd, rest)
368 370 except ftplib.error_perm, reason:
369 371 if str(reason).startswith('501'):
370 372 # workaround for REST not supported error
371 373 fp, retrlen = self.retrfile(file, type)
372 374 fp = RangeableFileObject(fp, (rest,''))
373 375 return (fp, retrlen)
374 376 elif not str(reason).startswith('550'):
375 377 raise IOError('ftp error', reason), sys.exc_info()[2]
376 378 if not conn:
377 379 # Set transfer mode to ASCII!
378 380 self.ftp.voidcmd('TYPE A')
379 381 # Try a directory listing
380 382 if file:
381 383 cmd = 'LIST ' + file
382 384 else:
383 385 cmd = 'LIST'
384 386 conn = self.ftp.ntransfercmd(cmd)
385 387 self.busy = 1
386 388 # Pass back both a suitably decorated object and a retrieval length
387 389 return (addclosehook(conn[0].makefile('rb'),
388 390 self.endtransfer), conn[1])
389 391
390 392
391 393 ####################################################################
392 394 # Range Tuple Functions
393 395 # XXX: These range tuple functions might go better in a class.
394 396
395 397 _rangere = None
396 398 def range_header_to_tuple(range_header):
397 399 """Get a (firstbyte,lastbyte) tuple from a Range header value.
398 400
399 401 Range headers have the form "bytes=<firstbyte>-<lastbyte>". This
400 402 function pulls the firstbyte and lastbyte values and returns
401 403 a (firstbyte,lastbyte) tuple. If lastbyte is not specified in
402 404 the header value, it is returned as an empty string in the
403 405 tuple.
404 406
405 407 Return None if range_header is None
406 408 Return () if range_header does not conform to the range spec
407 409 pattern.
408 410
409 411 """
410 412 global _rangere
411 413 if range_header is None:
412 414 return None
413 415 if _rangere is None:
414 416 import re
415 417 _rangere = re.compile(r'^bytes=(\d{1,})-(\d*)')
416 418 match = _rangere.match(range_header)
417 419 if match:
418 420 tup = range_tuple_normalize(match.group(1, 2))
419 421 if tup and tup[1]:
420 422 tup = (tup[0], tup[1]+1)
421 423 return tup
422 424 return ()
423 425
424 426 def range_tuple_to_header(range_tup):
425 427 """Convert a range tuple to a Range header value.
426 428 Return a string of the form "bytes=<firstbyte>-<lastbyte>" or None
427 429 if no range is needed.
428 430 """
429 431 if range_tup is None:
430 432 return None
431 433 range_tup = range_tuple_normalize(range_tup)
432 434 if range_tup:
433 435 if range_tup[1]:
434 436 range_tup = (range_tup[0], range_tup[1] - 1)
435 437 return 'bytes=%s-%s' % range_tup
436 438
437 439 def range_tuple_normalize(range_tup):
438 440 """Normalize a (first_byte,last_byte) range tuple.
439 441 Return a tuple whose first element is guaranteed to be an int
440 442 and whose second element will be '' (meaning: the last byte) or
441 443 an int. Finally, return None if the normalized tuple == (0,'')
442 444 as that is equivelant to retrieving the entire file.
443 445 """
444 446 if range_tup is None:
445 447 return None
446 448 # handle first byte
447 449 fb = range_tup[0]
448 450 if fb in (None, ''):
449 451 fb = 0
450 452 else:
451 453 fb = int(fb)
452 454 # handle last byte
453 455 try:
454 456 lb = range_tup[1]
455 457 except IndexError:
456 458 lb = ''
457 459 else:
458 460 if lb is None:
459 461 lb = ''
460 462 elif lb != '':
461 463 lb = int(lb)
462 464 # check if range is over the entire file
463 465 if (fb, lb) == (0, ''):
464 466 return None
465 467 # check that the range is valid
466 468 if lb < fb:
467 469 raise RangeError('Invalid byte range: %s-%s' % (fb, lb))
468 470 return (fb, lb)
General Comments 0
You need to be logged in to leave comments. Login now