##// END OF EJS Templates
byterange: replace uses of hasattr with getattr
Augie Fackler -
r14947:3aa34005 default
parent child Browse files
Show More
@@ -1,466 +1,462 b''
1 1 # This library is free software; you can redistribute it and/or
2 2 # modify it under the terms of the GNU Lesser General Public
3 3 # License as published by the Free Software Foundation; either
4 4 # version 2.1 of the License, or (at your option) any later version.
5 5 #
6 6 # This library is distributed in the hope that it will be useful,
7 7 # but WITHOUT ANY WARRANTY; without even the implied warranty of
8 8 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
9 9 # Lesser General Public License for more details.
10 10 #
11 11 # You should have received a copy of the GNU Lesser General Public
12 12 # License along with this library; if not, write to the
13 13 # Free Software Foundation, Inc.,
14 14 # 59 Temple Place, Suite 330,
15 15 # Boston, MA 02111-1307 USA
16 16
17 17 # This file is part of urlgrabber, a high-level cross-protocol url-grabber
18 18 # Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
19 19
20 20 # $Id: byterange.py,v 1.9 2005/02/14 21:55:07 mstenner Exp $
21 21
22 22 import os
23 23 import stat
24 24 import urllib
25 25 import urllib2
26 26 import email.Utils
27 27
28 28 class RangeError(IOError):
29 29 """Error raised when an unsatisfiable range is requested."""
30 30 pass
31 31
32 32 class HTTPRangeHandler(urllib2.BaseHandler):
33 33 """Handler that enables HTTP Range headers.
34 34
35 35 This was extremely simple. The Range header is a HTTP feature to
36 36 begin with so all this class does is tell urllib2 that the
37 37 "206 Partial Content" reponse from the HTTP server is what we
38 38 expected.
39 39
40 40 Example:
41 41 import urllib2
42 42 import byterange
43 43
44 44 range_handler = range.HTTPRangeHandler()
45 45 opener = urllib2.build_opener(range_handler)
46 46
47 47 # install it
48 48 urllib2.install_opener(opener)
49 49
50 50 # create Request and set Range header
51 51 req = urllib2.Request('http://www.python.org/')
52 52 req.header['Range'] = 'bytes=30-50'
53 53 f = urllib2.urlopen(req)
54 54 """
55 55
56 56 def http_error_206(self, req, fp, code, msg, hdrs):
57 57 # 206 Partial Content Response
58 58 r = urllib.addinfourl(fp, hdrs, req.get_full_url())
59 59 r.code = code
60 60 r.msg = msg
61 61 return r
62 62
63 63 def http_error_416(self, req, fp, code, msg, hdrs):
64 64 # HTTP's Range Not Satisfiable error
65 65 raise RangeError('Requested Range Not Satisfiable')
66 66
67 67 class RangeableFileObject(object):
68 68 """File object wrapper to enable raw range handling.
69 69 This was implemented primarilary for handling range
70 70 specifications for file:// urls. This object effectively makes
71 71 a file object look like it consists only of a range of bytes in
72 72 the stream.
73 73
74 74 Examples:
75 75 # expose 10 bytes, starting at byte position 20, from
76 76 # /etc/aliases.
77 77 >>> fo = RangeableFileObject(file('/etc/passwd', 'r'), (20,30))
78 78 # seek seeks within the range (to position 23 in this case)
79 79 >>> fo.seek(3)
80 80 # tell tells where your at _within the range_ (position 3 in
81 81 # this case)
82 82 >>> fo.tell()
83 83 # read EOFs if an attempt is made to read past the last
84 84 # byte in the range. the following will return only 7 bytes.
85 85 >>> fo.read(30)
86 86 """
87 87
88 88 def __init__(self, fo, rangetup):
89 89 """Create a RangeableFileObject.
90 90 fo -- a file like object. only the read() method need be
91 91 supported but supporting an optimized seek() is
92 92 preferable.
93 93 rangetup -- a (firstbyte,lastbyte) tuple specifying the range
94 94 to work over.
95 95 The file object provided is assumed to be at byte offset 0.
96 96 """
97 97 self.fo = fo
98 98 (self.firstbyte, self.lastbyte) = range_tuple_normalize(rangetup)
99 99 self.realpos = 0
100 100 self._do_seek(self.firstbyte)
101 101
102 102 def __getattr__(self, name):
103 103 """This effectively allows us to wrap at the instance level.
104 104 Any attribute not found in _this_ object will be searched for
105 105 in self.fo. This includes methods."""
106 if hasattr(self.fo, name):
107 return getattr(self.fo, name)
108 raise AttributeError(name)
106 return getattr(self.fo, name)
109 107
110 108 def tell(self):
111 109 """Return the position within the range.
112 110 This is different from fo.seek in that position 0 is the
113 111 first byte position of the range tuple. For example, if
114 112 this object was created with a range tuple of (500,899),
115 113 tell() will return 0 when at byte position 500 of the file.
116 114 """
117 115 return (self.realpos - self.firstbyte)
118 116
119 117 def seek(self, offset, whence=0):
120 118 """Seek within the byte range.
121 119 Positioning is identical to that described under tell().
122 120 """
123 121 assert whence in (0, 1, 2)
124 122 if whence == 0: # absolute seek
125 123 realoffset = self.firstbyte + offset
126 124 elif whence == 1: # relative seek
127 125 realoffset = self.realpos + offset
128 126 elif whence == 2: # absolute from end of file
129 127 # XXX: are we raising the right Error here?
130 128 raise IOError('seek from end of file not supported.')
131 129
132 130 # do not allow seek past lastbyte in range
133 131 if self.lastbyte and (realoffset >= self.lastbyte):
134 132 realoffset = self.lastbyte
135 133
136 134 self._do_seek(realoffset - self.realpos)
137 135
138 136 def read(self, size=-1):
139 137 """Read within the range.
140 138 This method will limit the size read based on the range.
141 139 """
142 140 size = self._calc_read_size(size)
143 141 rslt = self.fo.read(size)
144 142 self.realpos += len(rslt)
145 143 return rslt
146 144
147 145 def readline(self, size=-1):
148 146 """Read lines within the range.
149 147 This method will limit the size read based on the range.
150 148 """
151 149 size = self._calc_read_size(size)
152 150 rslt = self.fo.readline(size)
153 151 self.realpos += len(rslt)
154 152 return rslt
155 153
156 154 def _calc_read_size(self, size):
157 155 """Handles calculating the amount of data to read based on
158 156 the range.
159 157 """
160 158 if self.lastbyte:
161 159 if size > -1:
162 160 if ((self.realpos + size) >= self.lastbyte):
163 161 size = (self.lastbyte - self.realpos)
164 162 else:
165 163 size = (self.lastbyte - self.realpos)
166 164 return size
167 165
168 166 def _do_seek(self, offset):
169 167 """Seek based on whether wrapped object supports seek().
170 168 offset is relative to the current position (self.realpos).
171 169 """
172 170 assert offset >= 0
173 if not hasattr(self.fo, 'seek'):
174 self._poor_mans_seek(offset)
175 else:
176 self.fo.seek(self.realpos + offset)
171 seek = getattr(self.fo, 'seek', self._poor_mans_seek)
172 seek(self.realpos + offset)
177 173 self.realpos += offset
178 174
179 175 def _poor_mans_seek(self, offset):
180 176 """Seek by calling the wrapped file objects read() method.
181 177 This is used for file like objects that do not have native
182 178 seek support. The wrapped objects read() method is called
183 179 to manually seek to the desired position.
184 180 offset -- read this number of bytes from the wrapped
185 181 file object.
186 182 raise RangeError if we encounter EOF before reaching the
187 183 specified offset.
188 184 """
189 185 pos = 0
190 186 bufsize = 1024
191 187 while pos < offset:
192 188 if (pos + bufsize) > offset:
193 189 bufsize = offset - pos
194 190 buf = self.fo.read(bufsize)
195 191 if len(buf) != bufsize:
196 192 raise RangeError('Requested Range Not Satisfiable')
197 193 pos += bufsize
198 194
199 195 class FileRangeHandler(urllib2.FileHandler):
200 196 """FileHandler subclass that adds Range support.
201 197 This class handles Range headers exactly like an HTTP
202 198 server would.
203 199 """
204 200 def open_local_file(self, req):
205 201 import mimetypes
206 202 import email
207 203 host = req.get_host()
208 204 file = req.get_selector()
209 205 localfile = urllib.url2pathname(file)
210 206 stats = os.stat(localfile)
211 207 size = stats[stat.ST_SIZE]
212 208 modified = email.Utils.formatdate(stats[stat.ST_MTIME])
213 209 mtype = mimetypes.guess_type(file)[0]
214 210 if host:
215 211 host, port = urllib.splitport(host)
216 212 if port or socket.gethostbyname(host) not in self.get_names():
217 213 raise urllib2.URLError('file not on local host')
218 214 fo = open(localfile,'rb')
219 215 brange = req.headers.get('Range', None)
220 216 brange = range_header_to_tuple(brange)
221 217 assert brange != ()
222 218 if brange:
223 219 (fb, lb) = brange
224 220 if lb == '':
225 221 lb = size
226 222 if fb < 0 or fb > size or lb > size:
227 223 raise RangeError('Requested Range Not Satisfiable')
228 224 size = (lb - fb)
229 225 fo = RangeableFileObject(fo, (fb, lb))
230 226 headers = email.message_from_string(
231 227 'Content-Type: %s\nContent-Length: %d\nLast-Modified: %s\n' %
232 228 (mtype or 'text/plain', size, modified))
233 229 return urllib.addinfourl(fo, headers, 'file:'+file)
234 230
235 231
236 232 # FTP Range Support
237 233 # Unfortunately, a large amount of base FTP code had to be copied
238 234 # from urllib and urllib2 in order to insert the FTP REST command.
239 235 # Code modifications for range support have been commented as
240 236 # follows:
241 237 # -- range support modifications start/end here
242 238
243 239 from urllib import splitport, splituser, splitpasswd, splitattr, \
244 240 unquote, addclosehook, addinfourl
245 241 import ftplib
246 242 import socket
247 243 import sys
248 244 import mimetypes
249 245 import email
250 246
251 247 class FTPRangeHandler(urllib2.FTPHandler):
252 248 def ftp_open(self, req):
253 249 host = req.get_host()
254 250 if not host:
255 251 raise IOError('ftp error', 'no host given')
256 252 host, port = splitport(host)
257 253 if port is None:
258 254 port = ftplib.FTP_PORT
259 255 else:
260 256 port = int(port)
261 257
262 258 # username/password handling
263 259 user, host = splituser(host)
264 260 if user:
265 261 user, passwd = splitpasswd(user)
266 262 else:
267 263 passwd = None
268 264 host = unquote(host)
269 265 user = unquote(user or '')
270 266 passwd = unquote(passwd or '')
271 267
272 268 try:
273 269 host = socket.gethostbyname(host)
274 270 except socket.error, msg:
275 271 raise urllib2.URLError(msg)
276 272 path, attrs = splitattr(req.get_selector())
277 273 dirs = path.split('/')
278 274 dirs = map(unquote, dirs)
279 275 dirs, file = dirs[:-1], dirs[-1]
280 276 if dirs and not dirs[0]:
281 277 dirs = dirs[1:]
282 278 try:
283 279 fw = self.connect_ftp(user, passwd, host, port, dirs)
284 280 type = file and 'I' or 'D'
285 281 for attr in attrs:
286 282 attr, value = splitattr(attr)
287 283 if attr.lower() == 'type' and \
288 284 value in ('a', 'A', 'i', 'I', 'd', 'D'):
289 285 type = value.upper()
290 286
291 287 # -- range support modifications start here
292 288 rest = None
293 289 range_tup = range_header_to_tuple(req.headers.get('Range', None))
294 290 assert range_tup != ()
295 291 if range_tup:
296 292 (fb, lb) = range_tup
297 293 if fb > 0:
298 294 rest = fb
299 295 # -- range support modifications end here
300 296
301 297 fp, retrlen = fw.retrfile(file, type, rest)
302 298
303 299 # -- range support modifications start here
304 300 if range_tup:
305 301 (fb, lb) = range_tup
306 302 if lb == '':
307 303 if retrlen is None or retrlen == 0:
308 304 raise RangeError('Requested Range Not Satisfiable due'
309 305 ' to unobtainable file length.')
310 306 lb = retrlen
311 307 retrlen = lb - fb
312 308 if retrlen < 0:
313 309 # beginning of range is larger than file
314 310 raise RangeError('Requested Range Not Satisfiable')
315 311 else:
316 312 retrlen = lb - fb
317 313 fp = RangeableFileObject(fp, (0, retrlen))
318 314 # -- range support modifications end here
319 315
320 316 headers = ""
321 317 mtype = mimetypes.guess_type(req.get_full_url())[0]
322 318 if mtype:
323 319 headers += "Content-Type: %s\n" % mtype
324 320 if retrlen is not None and retrlen >= 0:
325 321 headers += "Content-Length: %d\n" % retrlen
326 322 headers = email.message_from_string(headers)
327 323 return addinfourl(fp, headers, req.get_full_url())
328 324 except ftplib.all_errors, msg:
329 325 raise IOError('ftp error', msg), sys.exc_info()[2]
330 326
331 327 def connect_ftp(self, user, passwd, host, port, dirs):
332 328 fw = ftpwrapper(user, passwd, host, port, dirs)
333 329 return fw
334 330
335 331 class ftpwrapper(urllib.ftpwrapper):
336 332 # range support note:
337 333 # this ftpwrapper code is copied directly from
338 334 # urllib. The only enhancement is to add the rest
339 335 # argument and pass it on to ftp.ntransfercmd
340 336 def retrfile(self, file, type, rest=None):
341 337 self.endtransfer()
342 338 if type in ('d', 'D'):
343 339 cmd = 'TYPE A'
344 340 isdir = 1
345 341 else:
346 342 cmd = 'TYPE ' + type
347 343 isdir = 0
348 344 try:
349 345 self.ftp.voidcmd(cmd)
350 346 except ftplib.all_errors:
351 347 self.init()
352 348 self.ftp.voidcmd(cmd)
353 349 conn = None
354 350 if file and not isdir:
355 351 # Use nlst to see if the file exists at all
356 352 try:
357 353 self.ftp.nlst(file)
358 354 except ftplib.error_perm, reason:
359 355 raise IOError('ftp error', reason), sys.exc_info()[2]
360 356 # Restore the transfer mode!
361 357 self.ftp.voidcmd(cmd)
362 358 # Try to retrieve as a file
363 359 try:
364 360 cmd = 'RETR ' + file
365 361 conn = self.ftp.ntransfercmd(cmd, rest)
366 362 except ftplib.error_perm, reason:
367 363 if str(reason).startswith('501'):
368 364 # workaround for REST not supported error
369 365 fp, retrlen = self.retrfile(file, type)
370 366 fp = RangeableFileObject(fp, (rest,''))
371 367 return (fp, retrlen)
372 368 elif not str(reason).startswith('550'):
373 369 raise IOError('ftp error', reason), sys.exc_info()[2]
374 370 if not conn:
375 371 # Set transfer mode to ASCII!
376 372 self.ftp.voidcmd('TYPE A')
377 373 # Try a directory listing
378 374 if file:
379 375 cmd = 'LIST ' + file
380 376 else:
381 377 cmd = 'LIST'
382 378 conn = self.ftp.ntransfercmd(cmd)
383 379 self.busy = 1
384 380 # Pass back both a suitably decorated object and a retrieval length
385 381 return (addclosehook(conn[0].makefile('rb'),
386 382 self.endtransfer), conn[1])
387 383
388 384
389 385 ####################################################################
390 386 # Range Tuple Functions
391 387 # XXX: These range tuple functions might go better in a class.
392 388
393 389 _rangere = None
394 390 def range_header_to_tuple(range_header):
395 391 """Get a (firstbyte,lastbyte) tuple from a Range header value.
396 392
397 393 Range headers have the form "bytes=<firstbyte>-<lastbyte>". This
398 394 function pulls the firstbyte and lastbyte values and returns
399 395 a (firstbyte,lastbyte) tuple. If lastbyte is not specified in
400 396 the header value, it is returned as an empty string in the
401 397 tuple.
402 398
403 399 Return None if range_header is None
404 400 Return () if range_header does not conform to the range spec
405 401 pattern.
406 402
407 403 """
408 404 global _rangere
409 405 if range_header is None:
410 406 return None
411 407 if _rangere is None:
412 408 import re
413 409 _rangere = re.compile(r'^bytes=(\d{1,})-(\d*)')
414 410 match = _rangere.match(range_header)
415 411 if match:
416 412 tup = range_tuple_normalize(match.group(1, 2))
417 413 if tup and tup[1]:
418 414 tup = (tup[0], tup[1]+1)
419 415 return tup
420 416 return ()
421 417
422 418 def range_tuple_to_header(range_tup):
423 419 """Convert a range tuple to a Range header value.
424 420 Return a string of the form "bytes=<firstbyte>-<lastbyte>" or None
425 421 if no range is needed.
426 422 """
427 423 if range_tup is None:
428 424 return None
429 425 range_tup = range_tuple_normalize(range_tup)
430 426 if range_tup:
431 427 if range_tup[1]:
432 428 range_tup = (range_tup[0], range_tup[1] - 1)
433 429 return 'bytes=%s-%s' % range_tup
434 430
435 431 def range_tuple_normalize(range_tup):
436 432 """Normalize a (first_byte,last_byte) range tuple.
437 433 Return a tuple whose first element is guaranteed to be an int
438 434 and whose second element will be '' (meaning: the last byte) or
439 435 an int. Finally, return None if the normalized tuple == (0,'')
440 436 as that is equivelant to retrieving the entire file.
441 437 """
442 438 if range_tup is None:
443 439 return None
444 440 # handle first byte
445 441 fb = range_tup[0]
446 442 if fb in (None, ''):
447 443 fb = 0
448 444 else:
449 445 fb = int(fb)
450 446 # handle last byte
451 447 try:
452 448 lb = range_tup[1]
453 449 except IndexError:
454 450 lb = ''
455 451 else:
456 452 if lb is None:
457 453 lb = ''
458 454 elif lb != '':
459 455 lb = int(lb)
460 456 # check if range is over the entire file
461 457 if (fb, lb) == (0, ''):
462 458 return None
463 459 # check that the range is valid
464 460 if lb < fb:
465 461 raise RangeError('Invalid byte range: %s-%s' % (fb, lb))
466 462 return (fb, lb)
General Comments 0
You need to be logged in to leave comments. Login now