##// END OF EJS Templates
byterange: remove superfluous pass statements
Augie Fackler -
r34370:635553ca default
parent child Browse files
Show More
@@ -1,472 +1,471 b''
1 1 # This library is free software; you can redistribute it and/or
2 2 # modify it under the terms of the GNU Lesser General Public
3 3 # License as published by the Free Software Foundation; either
4 4 # version 2.1 of the License, or (at your option) any later version.
5 5 #
6 6 # This library is distributed in the hope that it will be useful,
7 7 # but WITHOUT ANY WARRANTY; without even the implied warranty of
8 8 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
9 9 # Lesser General Public License for more details.
10 10 #
11 11 # You should have received a copy of the GNU Lesser General Public
12 12 # License along with this library; if not, see
13 13 # <http://www.gnu.org/licenses/>.
14 14
15 15 # This file is part of urlgrabber, a high-level cross-protocol url-grabber
16 16 # Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
17 17
18 18 # $Id: byterange.py,v 1.9 2005/02/14 21:55:07 mstenner Exp $
19 19
20 20 from __future__ import absolute_import
21 21
22 22 import email
23 23 import ftplib
24 24 import mimetypes
25 25 import os
26 26 import re
27 27 import socket
28 28 import stat
29 29
30 30 from . import (
31 31 util,
32 32 )
33 33
34 34 urlerr = util.urlerr
35 35 urlreq = util.urlreq
36 36
37 37 addclosehook = urlreq.addclosehook
38 38 addinfourl = urlreq.addinfourl
39 39 splitattr = urlreq.splitattr
40 40 splitpasswd = urlreq.splitpasswd
41 41 splitport = urlreq.splitport
42 42 splituser = urlreq.splituser
43 43 unquote = urlreq.unquote
44 44
45 45 class RangeError(IOError):
46 46 """Error raised when an unsatisfiable range is requested."""
47 pass
48 47
49 48 class HTTPRangeHandler(urlreq.basehandler):
50 49 """Handler that enables HTTP Range headers.
51 50
52 51 This was extremely simple. The Range header is a HTTP feature to
53 52 begin with so all this class does is tell urllib2 that the
54 53 "206 Partial Content" response from the HTTP server is what we
55 54 expected.
56 55
57 56 Example:
58 57 import urllib2
59 58 import byterange
60 59
61 60 range_handler = range.HTTPRangeHandler()
62 61 opener = urlreq.buildopener(range_handler)
63 62
64 63 # install it
65 64 urlreq.installopener(opener)
66 65
67 66 # create Request and set Range header
68 67 req = urlreq.request('http://www.python.org/')
69 68 req.header['Range'] = 'bytes=30-50'
70 69 f = urlreq.urlopen(req)
71 70 """
72 71
73 72 def http_error_206(self, req, fp, code, msg, hdrs):
74 73 # 206 Partial Content Response
75 74 r = urlreq.addinfourl(fp, hdrs, req.get_full_url())
76 75 r.code = code
77 76 r.msg = msg
78 77 return r
79 78
80 79 def http_error_416(self, req, fp, code, msg, hdrs):
81 80 # HTTP's Range Not Satisfiable error
82 81 raise RangeError('Requested Range Not Satisfiable')
83 82
84 83 class RangeableFileObject(object):
85 84 """File object wrapper to enable raw range handling.
86 85 This was implemented primarily for handling range
87 86 specifications for file:// urls. This object effectively makes
88 87 a file object look like it consists only of a range of bytes in
89 88 the stream.
90 89
91 90 Examples:
92 91 # expose 10 bytes, starting at byte position 20, from
93 92 # /etc/aliases.
94 93 >>> fo = RangeableFileObject(file(b'/etc/passwd', b'r'), (20,30))
95 94 # seek seeks within the range (to position 23 in this case)
96 95 >>> fo.seek(3)
97 96 # tell tells where your at _within the range_ (position 3 in
98 97 # this case)
99 98 >>> fo.tell()
100 99 # read EOFs if an attempt is made to read past the last
101 100 # byte in the range. the following will return only 7 bytes.
102 101 >>> fo.read(30)
103 102 """
104 103
105 104 def __init__(self, fo, rangetup):
106 105 """Create a RangeableFileObject.
107 106 fo -- a file like object. only the read() method need be
108 107 supported but supporting an optimized seek() is
109 108 preferable.
110 109 rangetup -- a (firstbyte,lastbyte) tuple specifying the range
111 110 to work over.
112 111 The file object provided is assumed to be at byte offset 0.
113 112 """
114 113 self.fo = fo
115 114 (self.firstbyte, self.lastbyte) = range_tuple_normalize(rangetup)
116 115 self.realpos = 0
117 116 self._do_seek(self.firstbyte)
118 117
119 118 def __getattr__(self, name):
120 119 """This effectively allows us to wrap at the instance level.
121 120 Any attribute not found in _this_ object will be searched for
122 121 in self.fo. This includes methods."""
123 122 return getattr(self.fo, name)
124 123
125 124 def tell(self):
126 125 """Return the position within the range.
127 126 This is different from fo.seek in that position 0 is the
128 127 first byte position of the range tuple. For example, if
129 128 this object was created with a range tuple of (500,899),
130 129 tell() will return 0 when at byte position 500 of the file.
131 130 """
132 131 return (self.realpos - self.firstbyte)
133 132
134 133 def seek(self, offset, whence=0):
135 134 """Seek within the byte range.
136 135 Positioning is identical to that described under tell().
137 136 """
138 137 assert whence in (0, 1, 2)
139 138 if whence == 0: # absolute seek
140 139 realoffset = self.firstbyte + offset
141 140 elif whence == 1: # relative seek
142 141 realoffset = self.realpos + offset
143 142 elif whence == 2: # absolute from end of file
144 143 # XXX: are we raising the right Error here?
145 144 raise IOError('seek from end of file not supported.')
146 145
147 146 # do not allow seek past lastbyte in range
148 147 if self.lastbyte and (realoffset >= self.lastbyte):
149 148 realoffset = self.lastbyte
150 149
151 150 self._do_seek(realoffset - self.realpos)
152 151
153 152 def read(self, size=-1):
154 153 """Read within the range.
155 154 This method will limit the size read based on the range.
156 155 """
157 156 size = self._calc_read_size(size)
158 157 rslt = self.fo.read(size)
159 158 self.realpos += len(rslt)
160 159 return rslt
161 160
162 161 def readline(self, size=-1):
163 162 """Read lines within the range.
164 163 This method will limit the size read based on the range.
165 164 """
166 165 size = self._calc_read_size(size)
167 166 rslt = self.fo.readline(size)
168 167 self.realpos += len(rslt)
169 168 return rslt
170 169
171 170 def _calc_read_size(self, size):
172 171 """Handles calculating the amount of data to read based on
173 172 the range.
174 173 """
175 174 if self.lastbyte:
176 175 if size > -1:
177 176 if ((self.realpos + size) >= self.lastbyte):
178 177 size = (self.lastbyte - self.realpos)
179 178 else:
180 179 size = (self.lastbyte - self.realpos)
181 180 return size
182 181
183 182 def _do_seek(self, offset):
184 183 """Seek based on whether wrapped object supports seek().
185 184 offset is relative to the current position (self.realpos).
186 185 """
187 186 assert offset >= 0
188 187 seek = getattr(self.fo, 'seek', self._poor_mans_seek)
189 188 seek(self.realpos + offset)
190 189 self.realpos += offset
191 190
192 191 def _poor_mans_seek(self, offset):
193 192 """Seek by calling the wrapped file objects read() method.
194 193 This is used for file like objects that do not have native
195 194 seek support. The wrapped objects read() method is called
196 195 to manually seek to the desired position.
197 196 offset -- read this number of bytes from the wrapped
198 197 file object.
199 198 raise RangeError if we encounter EOF before reaching the
200 199 specified offset.
201 200 """
202 201 pos = 0
203 202 bufsize = 1024
204 203 while pos < offset:
205 204 if (pos + bufsize) > offset:
206 205 bufsize = offset - pos
207 206 buf = self.fo.read(bufsize)
208 207 if len(buf) != bufsize:
209 208 raise RangeError('Requested Range Not Satisfiable')
210 209 pos += bufsize
211 210
212 211 class FileRangeHandler(urlreq.filehandler):
213 212 """FileHandler subclass that adds Range support.
214 213 This class handles Range headers exactly like an HTTP
215 214 server would.
216 215 """
217 216 def open_local_file(self, req):
218 217 host = req.get_host()
219 218 file = req.get_selector()
220 219 localfile = urlreq.url2pathname(file)
221 220 stats = os.stat(localfile)
222 221 size = stats[stat.ST_SIZE]
223 222 modified = email.Utils.formatdate(stats[stat.ST_MTIME])
224 223 mtype = mimetypes.guess_type(file)[0]
225 224 if host:
226 225 host, port = urlreq.splitport(host)
227 226 if port or socket.gethostbyname(host) not in self.get_names():
228 227 raise urlerr.urlerror('file not on local host')
229 228 fo = open(localfile,'rb')
230 229 brange = req.headers.get('Range', None)
231 230 brange = range_header_to_tuple(brange)
232 231 assert brange != ()
233 232 if brange:
234 233 (fb, lb) = brange
235 234 if lb == '':
236 235 lb = size
237 236 if fb < 0 or fb > size or lb > size:
238 237 raise RangeError('Requested Range Not Satisfiable')
239 238 size = (lb - fb)
240 239 fo = RangeableFileObject(fo, (fb, lb))
241 240 headers = email.message_from_string(
242 241 'Content-Type: %s\nContent-Length: %d\nLast-Modified: %s\n' %
243 242 (mtype or 'text/plain', size, modified))
244 243 return urlreq.addinfourl(fo, headers, 'file:'+file)
245 244
246 245
247 246 # FTP Range Support
248 247 # Unfortunately, a large amount of base FTP code had to be copied
249 248 # from urllib and urllib2 in order to insert the FTP REST command.
250 249 # Code modifications for range support have been commented as
251 250 # follows:
252 251 # -- range support modifications start/end here
253 252
254 253 class FTPRangeHandler(urlreq.ftphandler):
255 254 def ftp_open(self, req):
256 255 host = req.get_host()
257 256 if not host:
258 257 raise IOError('ftp error', 'no host given')
259 258 host, port = splitport(host)
260 259 if port is None:
261 260 port = ftplib.FTP_PORT
262 261 else:
263 262 port = int(port)
264 263
265 264 # username/password handling
266 265 user, host = splituser(host)
267 266 if user:
268 267 user, passwd = splitpasswd(user)
269 268 else:
270 269 passwd = None
271 270 host = unquote(host)
272 271 user = unquote(user or '')
273 272 passwd = unquote(passwd or '')
274 273
275 274 try:
276 275 host = socket.gethostbyname(host)
277 276 except socket.error as msg:
278 277 raise urlerr.urlerror(msg)
279 278 path, attrs = splitattr(req.get_selector())
280 279 dirs = path.split('/')
281 280 dirs = map(unquote, dirs)
282 281 dirs, file = dirs[:-1], dirs[-1]
283 282 if dirs and not dirs[0]:
284 283 dirs = dirs[1:]
285 284 try:
286 285 fw = self.connect_ftp(user, passwd, host, port, dirs)
287 286 if file:
288 287 type = 'I'
289 288 else:
290 289 type = 'D'
291 290
292 291 for attr in attrs:
293 292 attr, value = splitattr(attr)
294 293 if attr.lower() == 'type' and \
295 294 value in ('a', 'A', 'i', 'I', 'd', 'D'):
296 295 type = value.upper()
297 296
298 297 # -- range support modifications start here
299 298 rest = None
300 299 range_tup = range_header_to_tuple(req.headers.get('Range', None))
301 300 assert range_tup != ()
302 301 if range_tup:
303 302 (fb, lb) = range_tup
304 303 if fb > 0:
305 304 rest = fb
306 305 # -- range support modifications end here
307 306
308 307 fp, retrlen = fw.retrfile(file, type, rest)
309 308
310 309 # -- range support modifications start here
311 310 if range_tup:
312 311 (fb, lb) = range_tup
313 312 if lb == '':
314 313 if retrlen is None or retrlen == 0:
315 314 raise RangeError('Requested Range Not Satisfiable due'
316 315 ' to unobtainable file length.')
317 316 lb = retrlen
318 317 retrlen = lb - fb
319 318 if retrlen < 0:
320 319 # beginning of range is larger than file
321 320 raise RangeError('Requested Range Not Satisfiable')
322 321 else:
323 322 retrlen = lb - fb
324 323 fp = RangeableFileObject(fp, (0, retrlen))
325 324 # -- range support modifications end here
326 325
327 326 headers = ""
328 327 mtype = mimetypes.guess_type(req.get_full_url())[0]
329 328 if mtype:
330 329 headers += "Content-Type: %s\n" % mtype
331 330 if retrlen is not None and retrlen >= 0:
332 331 headers += "Content-Length: %d\n" % retrlen
333 332 headers = email.message_from_string(headers)
334 333 return addinfourl(fp, headers, req.get_full_url())
335 334 except ftplib.all_errors as msg:
336 335 raise IOError('ftp error', msg)
337 336
338 337 def connect_ftp(self, user, passwd, host, port, dirs):
339 338 fw = ftpwrapper(user, passwd, host, port, dirs)
340 339 return fw
341 340
342 341 class ftpwrapper(urlreq.ftpwrapper):
343 342 # range support note:
344 343 # this ftpwrapper code is copied directly from
345 344 # urllib. The only enhancement is to add the rest
346 345 # argument and pass it on to ftp.ntransfercmd
347 346 def retrfile(self, file, type, rest=None):
348 347 self.endtransfer()
349 348 if type in ('d', 'D'):
350 349 cmd = 'TYPE A'
351 350 isdir = 1
352 351 else:
353 352 cmd = 'TYPE ' + type
354 353 isdir = 0
355 354 try:
356 355 self.ftp.voidcmd(cmd)
357 356 except ftplib.all_errors:
358 357 self.init()
359 358 self.ftp.voidcmd(cmd)
360 359 conn = None
361 360 if file and not isdir:
362 361 # Use nlst to see if the file exists at all
363 362 try:
364 363 self.ftp.nlst(file)
365 364 except ftplib.error_perm as reason:
366 365 raise IOError('ftp error', reason)
367 366 # Restore the transfer mode!
368 367 self.ftp.voidcmd(cmd)
369 368 # Try to retrieve as a file
370 369 try:
371 370 cmd = 'RETR ' + file
372 371 conn = self.ftp.ntransfercmd(cmd, rest)
373 372 except ftplib.error_perm as reason:
374 373 if str(reason).startswith('501'):
375 374 # workaround for REST not supported error
376 375 fp, retrlen = self.retrfile(file, type)
377 376 fp = RangeableFileObject(fp, (rest,''))
378 377 return (fp, retrlen)
379 378 elif not str(reason).startswith('550'):
380 379 raise IOError('ftp error', reason)
381 380 if not conn:
382 381 # Set transfer mode to ASCII!
383 382 self.ftp.voidcmd('TYPE A')
384 383 # Try a directory listing
385 384 if file:
386 385 cmd = 'LIST ' + file
387 386 else:
388 387 cmd = 'LIST'
389 388 conn = self.ftp.ntransfercmd(cmd)
390 389 self.busy = 1
391 390 # Pass back both a suitably decorated object and a retrieval length
392 391 return (addclosehook(conn[0].makefile('rb'),
393 392 self.endtransfer), conn[1])
394 393
395 394
396 395 ####################################################################
397 396 # Range Tuple Functions
398 397 # XXX: These range tuple functions might go better in a class.
399 398
400 399 _rangere = None
401 400 def range_header_to_tuple(range_header):
402 401 """Get a (firstbyte,lastbyte) tuple from a Range header value.
403 402
404 403 Range headers have the form "bytes=<firstbyte>-<lastbyte>". This
405 404 function pulls the firstbyte and lastbyte values and returns
406 405 a (firstbyte,lastbyte) tuple. If lastbyte is not specified in
407 406 the header value, it is returned as an empty string in the
408 407 tuple.
409 408
410 409 Return None if range_header is None
411 410 Return () if range_header does not conform to the range spec
412 411 pattern.
413 412
414 413 """
415 414 global _rangere
416 415 if range_header is None:
417 416 return None
418 417 if _rangere is None:
419 418 _rangere = re.compile(r'^bytes=(\d{1,})-(\d*)')
420 419 match = _rangere.match(range_header)
421 420 if match:
422 421 tup = range_tuple_normalize(match.group(1, 2))
423 422 if tup and tup[1]:
424 423 tup = (tup[0], tup[1]+1)
425 424 return tup
426 425 return ()
427 426
428 427 def range_tuple_to_header(range_tup):
429 428 """Convert a range tuple to a Range header value.
430 429 Return a string of the form "bytes=<firstbyte>-<lastbyte>" or None
431 430 if no range is needed.
432 431 """
433 432 if range_tup is None:
434 433 return None
435 434 range_tup = range_tuple_normalize(range_tup)
436 435 if range_tup:
437 436 if range_tup[1]:
438 437 range_tup = (range_tup[0], range_tup[1] - 1)
439 438 return 'bytes=%s-%s' % range_tup
440 439
441 440 def range_tuple_normalize(range_tup):
442 441 """Normalize a (first_byte,last_byte) range tuple.
443 442 Return a tuple whose first element is guaranteed to be an int
444 443 and whose second element will be '' (meaning: the last byte) or
445 444 an int. Finally, return None if the normalized tuple == (0,'')
446 445 as that is equivalent to retrieving the entire file.
447 446 """
448 447 if range_tup is None:
449 448 return None
450 449 # handle first byte
451 450 fb = range_tup[0]
452 451 if fb in (None, ''):
453 452 fb = 0
454 453 else:
455 454 fb = int(fb)
456 455 # handle last byte
457 456 try:
458 457 lb = range_tup[1]
459 458 except IndexError:
460 459 lb = ''
461 460 else:
462 461 if lb is None:
463 462 lb = ''
464 463 elif lb != '':
465 464 lb = int(lb)
466 465 # check if range is over the entire file
467 466 if (fb, lb) == (0, ''):
468 467 return None
469 468 # check that the range is valid
470 469 if lb < fb:
471 470 raise RangeError('Invalid byte range: %s-%s' % (fb, lb))
472 471 return (fb, lb)
General Comments 0
You need to be logged in to leave comments. Login now