##// END OF EJS Templates
byterange: fix import error...
Martin Geisler -
r8378:59acf649 default
parent child Browse files
Show More
@@ -1,469 +1,469 b''
1 1 # This library is free software; you can redistribute it and/or
2 2 # modify it under the terms of the GNU Lesser General Public
3 3 # License as published by the Free Software Foundation; either
4 4 # version 2.1 of the License, or (at your option) any later version.
5 5 #
6 6 # This library is distributed in the hope that it will be useful,
7 7 # but WITHOUT ANY WARRANTY; without even the implied warranty of
8 8 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
9 9 # Lesser General Public License for more details.
10 10 #
11 11 # You should have received a copy of the GNU Lesser General Public
12 12 # License along with this library; if not, write to the
13 13 # Free Software Foundation, Inc.,
14 14 # 59 Temple Place, Suite 330,
15 15 # Boston, MA 02111-1307 USA
16 16
17 17 # This file is part of urlgrabber, a high-level cross-protocol url-grabber
18 18 # Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
19 19
20 20 # $Id: byterange.py,v 1.9 2005/02/14 21:55:07 mstenner Exp $
21 21
22 22 import os
23 23 import stat
24 24 import urllib
25 25 import urllib2
26 import email.utils
26 import email.Utils
27 27
28 28 try:
29 29 from cStringIO import StringIO
30 30 except ImportError, msg:
31 31 from StringIO import StringIO
32 32
33 33 class RangeError(IOError):
34 34 """Error raised when an unsatisfiable range is requested."""
35 35 pass
36 36
37 37 class HTTPRangeHandler(urllib2.BaseHandler):
38 38 """Handler that enables HTTP Range headers.
39 39
40 40 This was extremely simple. The Range header is a HTTP feature to
41 41 begin with so all this class does is tell urllib2 that the
42 42 "206 Partial Content" reponse from the HTTP server is what we
43 43 expected.
44 44
45 45 Example:
46 46 import urllib2
47 47 import byterange
48 48
49 49 range_handler = range.HTTPRangeHandler()
50 50 opener = urllib2.build_opener(range_handler)
51 51
52 52 # install it
53 53 urllib2.install_opener(opener)
54 54
55 55 # create Request and set Range header
56 56 req = urllib2.Request('http://www.python.org/')
57 57 req.header['Range'] = 'bytes=30-50'
58 58 f = urllib2.urlopen(req)
59 59 """
60 60
61 61 def http_error_206(self, req, fp, code, msg, hdrs):
62 62 # 206 Partial Content Response
63 63 r = urllib.addinfourl(fp, hdrs, req.get_full_url())
64 64 r.code = code
65 65 r.msg = msg
66 66 return r
67 67
68 68 def http_error_416(self, req, fp, code, msg, hdrs):
69 69 # HTTP's Range Not Satisfiable error
70 70 raise RangeError('Requested Range Not Satisfiable')
71 71
72 72 class RangeableFileObject:
73 73 """File object wrapper to enable raw range handling.
74 74 This was implemented primarilary for handling range
75 75 specifications for file:// urls. This object effectively makes
76 76 a file object look like it consists only of a range of bytes in
77 77 the stream.
78 78
79 79 Examples:
80 80 # expose 10 bytes, starting at byte position 20, from
81 81 # /etc/aliases.
82 82 >>> fo = RangeableFileObject(file('/etc/passwd', 'r'), (20,30))
83 83 # seek seeks within the range (to position 23 in this case)
84 84 >>> fo.seek(3)
85 85 # tell tells where your at _within the range_ (position 3 in
86 86 # this case)
87 87 >>> fo.tell()
88 88 # read EOFs if an attempt is made to read past the last
89 89 # byte in the range. the following will return only 7 bytes.
90 90 >>> fo.read(30)
91 91 """
92 92
93 93 def __init__(self, fo, rangetup):
94 94 """Create a RangeableFileObject.
95 95 fo -- a file like object. only the read() method need be
96 96 supported but supporting an optimized seek() is
97 97 preferable.
98 98 rangetup -- a (firstbyte,lastbyte) tuple specifying the range
99 99 to work over.
100 100 The file object provided is assumed to be at byte offset 0.
101 101 """
102 102 self.fo = fo
103 103 (self.firstbyte, self.lastbyte) = range_tuple_normalize(rangetup)
104 104 self.realpos = 0
105 105 self._do_seek(self.firstbyte)
106 106
107 107 def __getattr__(self, name):
108 108 """This effectively allows us to wrap at the instance level.
109 109 Any attribute not found in _this_ object will be searched for
110 110 in self.fo. This includes methods."""
111 111 if hasattr(self.fo, name):
112 112 return getattr(self.fo, name)
113 113 raise AttributeError(name)
114 114
115 115 def tell(self):
116 116 """Return the position within the range.
117 117 This is different from fo.seek in that position 0 is the
118 118 first byte position of the range tuple. For example, if
119 119 this object was created with a range tuple of (500,899),
120 120 tell() will return 0 when at byte position 500 of the file.
121 121 """
122 122 return (self.realpos - self.firstbyte)
123 123
124 124 def seek(self, offset, whence=0):
125 125 """Seek within the byte range.
126 126 Positioning is identical to that described under tell().
127 127 """
128 128 assert whence in (0, 1, 2)
129 129 if whence == 0: # absolute seek
130 130 realoffset = self.firstbyte + offset
131 131 elif whence == 1: # relative seek
132 132 realoffset = self.realpos + offset
133 133 elif whence == 2: # absolute from end of file
134 134 # XXX: are we raising the right Error here?
135 135 raise IOError('seek from end of file not supported.')
136 136
137 137 # do not allow seek past lastbyte in range
138 138 if self.lastbyte and (realoffset >= self.lastbyte):
139 139 realoffset = self.lastbyte
140 140
141 141 self._do_seek(realoffset - self.realpos)
142 142
143 143 def read(self, size=-1):
144 144 """Read within the range.
145 145 This method will limit the size read based on the range.
146 146 """
147 147 size = self._calc_read_size(size)
148 148 rslt = self.fo.read(size)
149 149 self.realpos += len(rslt)
150 150 return rslt
151 151
152 152 def readline(self, size=-1):
153 153 """Read lines within the range.
154 154 This method will limit the size read based on the range.
155 155 """
156 156 size = self._calc_read_size(size)
157 157 rslt = self.fo.readline(size)
158 158 self.realpos += len(rslt)
159 159 return rslt
160 160
161 161 def _calc_read_size(self, size):
162 162 """Handles calculating the amount of data to read based on
163 163 the range.
164 164 """
165 165 if self.lastbyte:
166 166 if size > -1:
167 167 if ((self.realpos + size) >= self.lastbyte):
168 168 size = (self.lastbyte - self.realpos)
169 169 else:
170 170 size = (self.lastbyte - self.realpos)
171 171 return size
172 172
173 173 def _do_seek(self, offset):
174 174 """Seek based on whether wrapped object supports seek().
175 175 offset is relative to the current position (self.realpos).
176 176 """
177 177 assert offset >= 0
178 178 if not hasattr(self.fo, 'seek'):
179 179 self._poor_mans_seek(offset)
180 180 else:
181 181 self.fo.seek(self.realpos + offset)
182 182 self.realpos += offset
183 183
184 184 def _poor_mans_seek(self, offset):
185 185 """Seek by calling the wrapped file objects read() method.
186 186 This is used for file like objects that do not have native
187 187 seek support. The wrapped objects read() method is called
188 188 to manually seek to the desired position.
189 189 offset -- read this number of bytes from the wrapped
190 190 file object.
191 191 raise RangeError if we encounter EOF before reaching the
192 192 specified offset.
193 193 """
194 194 pos = 0
195 195 bufsize = 1024
196 196 while pos < offset:
197 197 if (pos + bufsize) > offset:
198 198 bufsize = offset - pos
199 199 buf = self.fo.read(bufsize)
200 200 if len(buf) != bufsize:
201 201 raise RangeError('Requested Range Not Satisfiable')
202 202 pos += bufsize
203 203
204 204 class FileRangeHandler(urllib2.FileHandler):
205 205 """FileHandler subclass that adds Range support.
206 206 This class handles Range headers exactly like an HTTP
207 207 server would.
208 208 """
209 209 def open_local_file(self, req):
210 210 import mimetypes
211 211 import mimetools
212 212 host = req.get_host()
213 213 file = req.get_selector()
214 214 localfile = urllib.url2pathname(file)
215 215 stats = os.stat(localfile)
216 216 size = stats[stat.ST_SIZE]
217 modified = email.utils.formatdate(stats[stat.ST_MTIME])
217 modified = email.Utils.formatdate(stats[stat.ST_MTIME])
218 218 mtype = mimetypes.guess_type(file)[0]
219 219 if host:
220 220 host, port = urllib.splitport(host)
221 221 if port or socket.gethostbyname(host) not in self.get_names():
222 222 raise urllib2.URLError('file not on local host')
223 223 fo = open(localfile,'rb')
224 224 brange = req.headers.get('Range', None)
225 225 brange = range_header_to_tuple(brange)
226 226 assert brange != ()
227 227 if brange:
228 228 (fb, lb) = brange
229 229 if lb == '':
230 230 lb = size
231 231 if fb < 0 or fb > size or lb > size:
232 232 raise RangeError('Requested Range Not Satisfiable')
233 233 size = (lb - fb)
234 234 fo = RangeableFileObject(fo, (fb, lb))
235 235 headers = mimetools.Message(StringIO(
236 236 'Content-Type: %s\nContent-Length: %d\nLast-Modified: %s\n' %
237 237 (mtype or 'text/plain', size, modified)))
238 238 return urllib.addinfourl(fo, headers, 'file:'+file)
239 239
240 240
241 241 # FTP Range Support
242 242 # Unfortunately, a large amount of base FTP code had to be copied
243 243 # from urllib and urllib2 in order to insert the FTP REST command.
244 244 # Code modifications for range support have been commented as
245 245 # follows:
246 246 # -- range support modifications start/end here
247 247
248 248 from urllib import splitport, splituser, splitpasswd, splitattr, \
249 249 unquote, addclosehook, addinfourl
250 250 import ftplib
251 251 import socket
252 252 import sys
253 253 import mimetypes
254 254 import mimetools
255 255
256 256 class FTPRangeHandler(urllib2.FTPHandler):
257 257 def ftp_open(self, req):
258 258 host = req.get_host()
259 259 if not host:
260 260 raise IOError('ftp error', 'no host given')
261 261 host, port = splitport(host)
262 262 if port is None:
263 263 port = ftplib.FTP_PORT
264 264
265 265 # username/password handling
266 266 user, host = splituser(host)
267 267 if user:
268 268 user, passwd = splitpasswd(user)
269 269 else:
270 270 passwd = None
271 271 host = unquote(host)
272 272 user = unquote(user or '')
273 273 passwd = unquote(passwd or '')
274 274
275 275 try:
276 276 host = socket.gethostbyname(host)
277 277 except socket.error, msg:
278 278 raise urllib2.URLError(msg)
279 279 path, attrs = splitattr(req.get_selector())
280 280 dirs = path.split('/')
281 281 dirs = map(unquote, dirs)
282 282 dirs, file = dirs[:-1], dirs[-1]
283 283 if dirs and not dirs[0]:
284 284 dirs = dirs[1:]
285 285 try:
286 286 fw = self.connect_ftp(user, passwd, host, port, dirs)
287 287 type = file and 'I' or 'D'
288 288 for attr in attrs:
289 289 attr, value = splitattr(attr)
290 290 if attr.lower() == 'type' and \
291 291 value in ('a', 'A', 'i', 'I', 'd', 'D'):
292 292 type = value.upper()
293 293
294 294 # -- range support modifications start here
295 295 rest = None
296 296 range_tup = range_header_to_tuple(req.headers.get('Range', None))
297 297 assert range_tup != ()
298 298 if range_tup:
299 299 (fb, lb) = range_tup
300 300 if fb > 0:
301 301 rest = fb
302 302 # -- range support modifications end here
303 303
304 304 fp, retrlen = fw.retrfile(file, type, rest)
305 305
306 306 # -- range support modifications start here
307 307 if range_tup:
308 308 (fb, lb) = range_tup
309 309 if lb == '':
310 310 if retrlen is None or retrlen == 0:
311 311 raise RangeError('Requested Range Not Satisfiable due to unobtainable file length.')
312 312 lb = retrlen
313 313 retrlen = lb - fb
314 314 if retrlen < 0:
315 315 # beginning of range is larger than file
316 316 raise RangeError('Requested Range Not Satisfiable')
317 317 else:
318 318 retrlen = lb - fb
319 319 fp = RangeableFileObject(fp, (0, retrlen))
320 320 # -- range support modifications end here
321 321
322 322 headers = ""
323 323 mtype = mimetypes.guess_type(req.get_full_url())[0]
324 324 if mtype:
325 325 headers += "Content-Type: %s\n" % mtype
326 326 if retrlen is not None and retrlen >= 0:
327 327 headers += "Content-Length: %d\n" % retrlen
328 328 sf = StringIO(headers)
329 329 headers = mimetools.Message(sf)
330 330 return addinfourl(fp, headers, req.get_full_url())
331 331 except ftplib.all_errors, msg:
332 332 raise IOError('ftp error', msg), sys.exc_info()[2]
333 333
334 334 def connect_ftp(self, user, passwd, host, port, dirs):
335 335 fw = ftpwrapper(user, passwd, host, port, dirs)
336 336 return fw
337 337
338 338 class ftpwrapper(urllib.ftpwrapper):
339 339 # range support note:
340 340 # this ftpwrapper code is copied directly from
341 341 # urllib. The only enhancement is to add the rest
342 342 # argument and pass it on to ftp.ntransfercmd
343 343 def retrfile(self, file, type, rest=None):
344 344 self.endtransfer()
345 345 if type in ('d', 'D'):
346 346 cmd = 'TYPE A'
347 347 isdir = 1
348 348 else:
349 349 cmd = 'TYPE ' + type
350 350 isdir = 0
351 351 try:
352 352 self.ftp.voidcmd(cmd)
353 353 except ftplib.all_errors:
354 354 self.init()
355 355 self.ftp.voidcmd(cmd)
356 356 conn = None
357 357 if file and not isdir:
358 358 # Use nlst to see if the file exists at all
359 359 try:
360 360 self.ftp.nlst(file)
361 361 except ftplib.error_perm, reason:
362 362 raise IOError('ftp error', reason), sys.exc_info()[2]
363 363 # Restore the transfer mode!
364 364 self.ftp.voidcmd(cmd)
365 365 # Try to retrieve as a file
366 366 try:
367 367 cmd = 'RETR ' + file
368 368 conn = self.ftp.ntransfercmd(cmd, rest)
369 369 except ftplib.error_perm, reason:
370 370 if str(reason).startswith('501'):
371 371 # workaround for REST not supported error
372 372 fp, retrlen = self.retrfile(file, type)
373 373 fp = RangeableFileObject(fp, (rest,''))
374 374 return (fp, retrlen)
375 375 elif not str(reason).startswith('550'):
376 376 raise IOError('ftp error', reason), sys.exc_info()[2]
377 377 if not conn:
378 378 # Set transfer mode to ASCII!
379 379 self.ftp.voidcmd('TYPE A')
380 380 # Try a directory listing
381 381 if file:
382 382 cmd = 'LIST ' + file
383 383 else:
384 384 cmd = 'LIST'
385 385 conn = self.ftp.ntransfercmd(cmd)
386 386 self.busy = 1
387 387 # Pass back both a suitably decorated object and a retrieval length
388 388 return (addclosehook(conn[0].makefile('rb'),
389 389 self.endtransfer), conn[1])
390 390
391 391
392 392 ####################################################################
393 393 # Range Tuple Functions
394 394 # XXX: These range tuple functions might go better in a class.
395 395
396 396 _rangere = None
397 397 def range_header_to_tuple(range_header):
398 398 """Get a (firstbyte,lastbyte) tuple from a Range header value.
399 399
400 400 Range headers have the form "bytes=<firstbyte>-<lastbyte>". This
401 401 function pulls the firstbyte and lastbyte values and returns
402 402 a (firstbyte,lastbyte) tuple. If lastbyte is not specified in
403 403 the header value, it is returned as an empty string in the
404 404 tuple.
405 405
406 406 Return None if range_header is None
407 407 Return () if range_header does not conform to the range spec
408 408 pattern.
409 409
410 410 """
411 411 global _rangere
412 412 if range_header is None:
413 413 return None
414 414 if _rangere is None:
415 415 import re
416 416 _rangere = re.compile(r'^bytes=(\d{1,})-(\d*)')
417 417 match = _rangere.match(range_header)
418 418 if match:
419 419 tup = range_tuple_normalize(match.group(1, 2))
420 420 if tup and tup[1]:
421 421 tup = (tup[0], tup[1]+1)
422 422 return tup
423 423 return ()
424 424
425 425 def range_tuple_to_header(range_tup):
426 426 """Convert a range tuple to a Range header value.
427 427 Return a string of the form "bytes=<firstbyte>-<lastbyte>" or None
428 428 if no range is needed.
429 429 """
430 430 if range_tup is None:
431 431 return None
432 432 range_tup = range_tuple_normalize(range_tup)
433 433 if range_tup:
434 434 if range_tup[1]:
435 435 range_tup = (range_tup[0], range_tup[1] - 1)
436 436 return 'bytes=%s-%s' % range_tup
437 437
438 438 def range_tuple_normalize(range_tup):
439 439 """Normalize a (first_byte,last_byte) range tuple.
440 440 Return a tuple whose first element is guaranteed to be an int
441 441 and whose second element will be '' (meaning: the last byte) or
442 442 an int. Finally, return None if the normalized tuple == (0,'')
443 443 as that is equivelant to retrieving the entire file.
444 444 """
445 445 if range_tup is None:
446 446 return None
447 447 # handle first byte
448 448 fb = range_tup[0]
449 449 if fb in (None, ''):
450 450 fb = 0
451 451 else:
452 452 fb = int(fb)
453 453 # handle last byte
454 454 try:
455 455 lb = range_tup[1]
456 456 except IndexError:
457 457 lb = ''
458 458 else:
459 459 if lb is None:
460 460 lb = ''
461 461 elif lb != '':
462 462 lb = int(lb)
463 463 # check if range is over the entire file
464 464 if (fb, lb) == (0, ''):
465 465 return None
466 466 # check that the range is valid
467 467 if lb < fb:
468 468 raise RangeError('Invalid byte range: %s-%s' % (fb, lb))
469 469 return (fb, lb)
General Comments 0
You need to be logged in to leave comments. Login now