##// END OF EJS Templates
spelling: primarily
timeless@mozdev.org -
r17507:f2561368 default
parent child Browse files
Show More
@@ -1,460 +1,460 b''
1 1 # This library is free software; you can redistribute it and/or
2 2 # modify it under the terms of the GNU Lesser General Public
3 3 # License as published by the Free Software Foundation; either
4 4 # version 2.1 of the License, or (at your option) any later version.
5 5 #
6 6 # This library is distributed in the hope that it will be useful,
7 7 # but WITHOUT ANY WARRANTY; without even the implied warranty of
8 8 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
9 9 # Lesser General Public License for more details.
10 10 #
11 11 # You should have received a copy of the GNU Lesser General Public
12 12 # License along with this library; if not, see
13 13 # <http://www.gnu.org/licenses/>.
14 14
15 15 # This file is part of urlgrabber, a high-level cross-protocol url-grabber
16 16 # Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
17 17
18 18 # $Id: byterange.py,v 1.9 2005/02/14 21:55:07 mstenner Exp $
19 19
20 20 import os
21 21 import stat
22 22 import urllib
23 23 import urllib2
24 24 import email.Utils
25 25
26 26 class RangeError(IOError):
27 27 """Error raised when an unsatisfiable range is requested."""
28 28 pass
29 29
30 30 class HTTPRangeHandler(urllib2.BaseHandler):
31 31 """Handler that enables HTTP Range headers.
32 32
33 33 This was extremely simple. The Range header is a HTTP feature to
34 34 begin with so all this class does is tell urllib2 that the
35 35 "206 Partial Content" reponse from the HTTP server is what we
36 36 expected.
37 37
38 38 Example:
39 39 import urllib2
40 40 import byterange
41 41
42 42 range_handler = range.HTTPRangeHandler()
43 43 opener = urllib2.build_opener(range_handler)
44 44
45 45 # install it
46 46 urllib2.install_opener(opener)
47 47
48 48 # create Request and set Range header
49 49 req = urllib2.Request('http://www.python.org/')
50 50 req.header['Range'] = 'bytes=30-50'
51 51 f = urllib2.urlopen(req)
52 52 """
53 53
54 54 def http_error_206(self, req, fp, code, msg, hdrs):
55 55 # 206 Partial Content Response
56 56 r = urllib.addinfourl(fp, hdrs, req.get_full_url())
57 57 r.code = code
58 58 r.msg = msg
59 59 return r
60 60
61 61 def http_error_416(self, req, fp, code, msg, hdrs):
62 62 # HTTP's Range Not Satisfiable error
63 63 raise RangeError('Requested Range Not Satisfiable')
64 64
65 65 class RangeableFileObject(object):
66 66 """File object wrapper to enable raw range handling.
67 This was implemented primarilary for handling range
67 This was implemented primarily for handling range
68 68 specifications for file:// urls. This object effectively makes
69 69 a file object look like it consists only of a range of bytes in
70 70 the stream.
71 71
72 72 Examples:
73 73 # expose 10 bytes, starting at byte position 20, from
74 74 # /etc/aliases.
75 75 >>> fo = RangeableFileObject(file('/etc/passwd', 'r'), (20,30))
76 76 # seek seeks within the range (to position 23 in this case)
77 77 >>> fo.seek(3)
78 78 # tell tells where your at _within the range_ (position 3 in
79 79 # this case)
80 80 >>> fo.tell()
81 81 # read EOFs if an attempt is made to read past the last
82 82 # byte in the range. the following will return only 7 bytes.
83 83 >>> fo.read(30)
84 84 """
85 85
86 86 def __init__(self, fo, rangetup):
87 87 """Create a RangeableFileObject.
88 88 fo -- a file like object. only the read() method need be
89 89 supported but supporting an optimized seek() is
90 90 preferable.
91 91 rangetup -- a (firstbyte,lastbyte) tuple specifying the range
92 92 to work over.
93 93 The file object provided is assumed to be at byte offset 0.
94 94 """
95 95 self.fo = fo
96 96 (self.firstbyte, self.lastbyte) = range_tuple_normalize(rangetup)
97 97 self.realpos = 0
98 98 self._do_seek(self.firstbyte)
99 99
100 100 def __getattr__(self, name):
101 101 """This effectively allows us to wrap at the instance level.
102 102 Any attribute not found in _this_ object will be searched for
103 103 in self.fo. This includes methods."""
104 104 return getattr(self.fo, name)
105 105
106 106 def tell(self):
107 107 """Return the position within the range.
108 108 This is different from fo.seek in that position 0 is the
109 109 first byte position of the range tuple. For example, if
110 110 this object was created with a range tuple of (500,899),
111 111 tell() will return 0 when at byte position 500 of the file.
112 112 """
113 113 return (self.realpos - self.firstbyte)
114 114
115 115 def seek(self, offset, whence=0):
116 116 """Seek within the byte range.
117 117 Positioning is identical to that described under tell().
118 118 """
119 119 assert whence in (0, 1, 2)
120 120 if whence == 0: # absolute seek
121 121 realoffset = self.firstbyte + offset
122 122 elif whence == 1: # relative seek
123 123 realoffset = self.realpos + offset
124 124 elif whence == 2: # absolute from end of file
125 125 # XXX: are we raising the right Error here?
126 126 raise IOError('seek from end of file not supported.')
127 127
128 128 # do not allow seek past lastbyte in range
129 129 if self.lastbyte and (realoffset >= self.lastbyte):
130 130 realoffset = self.lastbyte
131 131
132 132 self._do_seek(realoffset - self.realpos)
133 133
134 134 def read(self, size=-1):
135 135 """Read within the range.
136 136 This method will limit the size read based on the range.
137 137 """
138 138 size = self._calc_read_size(size)
139 139 rslt = self.fo.read(size)
140 140 self.realpos += len(rslt)
141 141 return rslt
142 142
143 143 def readline(self, size=-1):
144 144 """Read lines within the range.
145 145 This method will limit the size read based on the range.
146 146 """
147 147 size = self._calc_read_size(size)
148 148 rslt = self.fo.readline(size)
149 149 self.realpos += len(rslt)
150 150 return rslt
151 151
152 152 def _calc_read_size(self, size):
153 153 """Handles calculating the amount of data to read based on
154 154 the range.
155 155 """
156 156 if self.lastbyte:
157 157 if size > -1:
158 158 if ((self.realpos + size) >= self.lastbyte):
159 159 size = (self.lastbyte - self.realpos)
160 160 else:
161 161 size = (self.lastbyte - self.realpos)
162 162 return size
163 163
164 164 def _do_seek(self, offset):
165 165 """Seek based on whether wrapped object supports seek().
166 166 offset is relative to the current position (self.realpos).
167 167 """
168 168 assert offset >= 0
169 169 seek = getattr(self.fo, 'seek', self._poor_mans_seek)
170 170 seek(self.realpos + offset)
171 171 self.realpos += offset
172 172
173 173 def _poor_mans_seek(self, offset):
174 174 """Seek by calling the wrapped file objects read() method.
175 175 This is used for file like objects that do not have native
176 176 seek support. The wrapped objects read() method is called
177 177 to manually seek to the desired position.
178 178 offset -- read this number of bytes from the wrapped
179 179 file object.
180 180 raise RangeError if we encounter EOF before reaching the
181 181 specified offset.
182 182 """
183 183 pos = 0
184 184 bufsize = 1024
185 185 while pos < offset:
186 186 if (pos + bufsize) > offset:
187 187 bufsize = offset - pos
188 188 buf = self.fo.read(bufsize)
189 189 if len(buf) != bufsize:
190 190 raise RangeError('Requested Range Not Satisfiable')
191 191 pos += bufsize
192 192
193 193 class FileRangeHandler(urllib2.FileHandler):
194 194 """FileHandler subclass that adds Range support.
195 195 This class handles Range headers exactly like an HTTP
196 196 server would.
197 197 """
198 198 def open_local_file(self, req):
199 199 import mimetypes
200 200 import email
201 201 host = req.get_host()
202 202 file = req.get_selector()
203 203 localfile = urllib.url2pathname(file)
204 204 stats = os.stat(localfile)
205 205 size = stats[stat.ST_SIZE]
206 206 modified = email.Utils.formatdate(stats[stat.ST_MTIME])
207 207 mtype = mimetypes.guess_type(file)[0]
208 208 if host:
209 209 host, port = urllib.splitport(host)
210 210 if port or socket.gethostbyname(host) not in self.get_names():
211 211 raise urllib2.URLError('file not on local host')
212 212 fo = open(localfile,'rb')
213 213 brange = req.headers.get('Range', None)
214 214 brange = range_header_to_tuple(brange)
215 215 assert brange != ()
216 216 if brange:
217 217 (fb, lb) = brange
218 218 if lb == '':
219 219 lb = size
220 220 if fb < 0 or fb > size or lb > size:
221 221 raise RangeError('Requested Range Not Satisfiable')
222 222 size = (lb - fb)
223 223 fo = RangeableFileObject(fo, (fb, lb))
224 224 headers = email.message_from_string(
225 225 'Content-Type: %s\nContent-Length: %d\nLast-Modified: %s\n' %
226 226 (mtype or 'text/plain', size, modified))
227 227 return urllib.addinfourl(fo, headers, 'file:'+file)
228 228
229 229
230 230 # FTP Range Support
231 231 # Unfortunately, a large amount of base FTP code had to be copied
232 232 # from urllib and urllib2 in order to insert the FTP REST command.
233 233 # Code modifications for range support have been commented as
234 234 # follows:
235 235 # -- range support modifications start/end here
236 236
237 237 from urllib import splitport, splituser, splitpasswd, splitattr, \
238 238 unquote, addclosehook, addinfourl
239 239 import ftplib
240 240 import socket
241 241 import sys
242 242 import mimetypes
243 243 import email
244 244
245 245 class FTPRangeHandler(urllib2.FTPHandler):
246 246 def ftp_open(self, req):
247 247 host = req.get_host()
248 248 if not host:
249 249 raise IOError('ftp error', 'no host given')
250 250 host, port = splitport(host)
251 251 if port is None:
252 252 port = ftplib.FTP_PORT
253 253 else:
254 254 port = int(port)
255 255
256 256 # username/password handling
257 257 user, host = splituser(host)
258 258 if user:
259 259 user, passwd = splitpasswd(user)
260 260 else:
261 261 passwd = None
262 262 host = unquote(host)
263 263 user = unquote(user or '')
264 264 passwd = unquote(passwd or '')
265 265
266 266 try:
267 267 host = socket.gethostbyname(host)
268 268 except socket.error, msg:
269 269 raise urllib2.URLError(msg)
270 270 path, attrs = splitattr(req.get_selector())
271 271 dirs = path.split('/')
272 272 dirs = map(unquote, dirs)
273 273 dirs, file = dirs[:-1], dirs[-1]
274 274 if dirs and not dirs[0]:
275 275 dirs = dirs[1:]
276 276 try:
277 277 fw = self.connect_ftp(user, passwd, host, port, dirs)
278 278 type = file and 'I' or 'D'
279 279 for attr in attrs:
280 280 attr, value = splitattr(attr)
281 281 if attr.lower() == 'type' and \
282 282 value in ('a', 'A', 'i', 'I', 'd', 'D'):
283 283 type = value.upper()
284 284
285 285 # -- range support modifications start here
286 286 rest = None
287 287 range_tup = range_header_to_tuple(req.headers.get('Range', None))
288 288 assert range_tup != ()
289 289 if range_tup:
290 290 (fb, lb) = range_tup
291 291 if fb > 0:
292 292 rest = fb
293 293 # -- range support modifications end here
294 294
295 295 fp, retrlen = fw.retrfile(file, type, rest)
296 296
297 297 # -- range support modifications start here
298 298 if range_tup:
299 299 (fb, lb) = range_tup
300 300 if lb == '':
301 301 if retrlen is None or retrlen == 0:
302 302 raise RangeError('Requested Range Not Satisfiable due'
303 303 ' to unobtainable file length.')
304 304 lb = retrlen
305 305 retrlen = lb - fb
306 306 if retrlen < 0:
307 307 # beginning of range is larger than file
308 308 raise RangeError('Requested Range Not Satisfiable')
309 309 else:
310 310 retrlen = lb - fb
311 311 fp = RangeableFileObject(fp, (0, retrlen))
312 312 # -- range support modifications end here
313 313
314 314 headers = ""
315 315 mtype = mimetypes.guess_type(req.get_full_url())[0]
316 316 if mtype:
317 317 headers += "Content-Type: %s\n" % mtype
318 318 if retrlen is not None and retrlen >= 0:
319 319 headers += "Content-Length: %d\n" % retrlen
320 320 headers = email.message_from_string(headers)
321 321 return addinfourl(fp, headers, req.get_full_url())
322 322 except ftplib.all_errors, msg:
323 323 raise IOError('ftp error', msg), sys.exc_info()[2]
324 324
325 325 def connect_ftp(self, user, passwd, host, port, dirs):
326 326 fw = ftpwrapper(user, passwd, host, port, dirs)
327 327 return fw
328 328
329 329 class ftpwrapper(urllib.ftpwrapper):
330 330 # range support note:
331 331 # this ftpwrapper code is copied directly from
332 332 # urllib. The only enhancement is to add the rest
333 333 # argument and pass it on to ftp.ntransfercmd
334 334 def retrfile(self, file, type, rest=None):
335 335 self.endtransfer()
336 336 if type in ('d', 'D'):
337 337 cmd = 'TYPE A'
338 338 isdir = 1
339 339 else:
340 340 cmd = 'TYPE ' + type
341 341 isdir = 0
342 342 try:
343 343 self.ftp.voidcmd(cmd)
344 344 except ftplib.all_errors:
345 345 self.init()
346 346 self.ftp.voidcmd(cmd)
347 347 conn = None
348 348 if file and not isdir:
349 349 # Use nlst to see if the file exists at all
350 350 try:
351 351 self.ftp.nlst(file)
352 352 except ftplib.error_perm, reason:
353 353 raise IOError('ftp error', reason), sys.exc_info()[2]
354 354 # Restore the transfer mode!
355 355 self.ftp.voidcmd(cmd)
356 356 # Try to retrieve as a file
357 357 try:
358 358 cmd = 'RETR ' + file
359 359 conn = self.ftp.ntransfercmd(cmd, rest)
360 360 except ftplib.error_perm, reason:
361 361 if str(reason).startswith('501'):
362 362 # workaround for REST not supported error
363 363 fp, retrlen = self.retrfile(file, type)
364 364 fp = RangeableFileObject(fp, (rest,''))
365 365 return (fp, retrlen)
366 366 elif not str(reason).startswith('550'):
367 367 raise IOError('ftp error', reason), sys.exc_info()[2]
368 368 if not conn:
369 369 # Set transfer mode to ASCII!
370 370 self.ftp.voidcmd('TYPE A')
371 371 # Try a directory listing
372 372 if file:
373 373 cmd = 'LIST ' + file
374 374 else:
375 375 cmd = 'LIST'
376 376 conn = self.ftp.ntransfercmd(cmd)
377 377 self.busy = 1
378 378 # Pass back both a suitably decorated object and a retrieval length
379 379 return (addclosehook(conn[0].makefile('rb'),
380 380 self.endtransfer), conn[1])
381 381
382 382
383 383 ####################################################################
384 384 # Range Tuple Functions
385 385 # XXX: These range tuple functions might go better in a class.
386 386
387 387 _rangere = None
388 388 def range_header_to_tuple(range_header):
389 389 """Get a (firstbyte,lastbyte) tuple from a Range header value.
390 390
391 391 Range headers have the form "bytes=<firstbyte>-<lastbyte>". This
392 392 function pulls the firstbyte and lastbyte values and returns
393 393 a (firstbyte,lastbyte) tuple. If lastbyte is not specified in
394 394 the header value, it is returned as an empty string in the
395 395 tuple.
396 396
397 397 Return None if range_header is None
398 398 Return () if range_header does not conform to the range spec
399 399 pattern.
400 400
401 401 """
402 402 global _rangere
403 403 if range_header is None:
404 404 return None
405 405 if _rangere is None:
406 406 import re
407 407 _rangere = re.compile(r'^bytes=(\d{1,})-(\d*)')
408 408 match = _rangere.match(range_header)
409 409 if match:
410 410 tup = range_tuple_normalize(match.group(1, 2))
411 411 if tup and tup[1]:
412 412 tup = (tup[0], tup[1]+1)
413 413 return tup
414 414 return ()
415 415
416 416 def range_tuple_to_header(range_tup):
417 417 """Convert a range tuple to a Range header value.
418 418 Return a string of the form "bytes=<firstbyte>-<lastbyte>" or None
419 419 if no range is needed.
420 420 """
421 421 if range_tup is None:
422 422 return None
423 423 range_tup = range_tuple_normalize(range_tup)
424 424 if range_tup:
425 425 if range_tup[1]:
426 426 range_tup = (range_tup[0], range_tup[1] - 1)
427 427 return 'bytes=%s-%s' % range_tup
428 428
429 429 def range_tuple_normalize(range_tup):
430 430 """Normalize a (first_byte,last_byte) range tuple.
431 431 Return a tuple whose first element is guaranteed to be an int
432 432 and whose second element will be '' (meaning: the last byte) or
433 433 an int. Finally, return None if the normalized tuple == (0,'')
434 434 as that is equivalent to retrieving the entire file.
435 435 """
436 436 if range_tup is None:
437 437 return None
438 438 # handle first byte
439 439 fb = range_tup[0]
440 440 if fb in (None, ''):
441 441 fb = 0
442 442 else:
443 443 fb = int(fb)
444 444 # handle last byte
445 445 try:
446 446 lb = range_tup[1]
447 447 except IndexError:
448 448 lb = ''
449 449 else:
450 450 if lb is None:
451 451 lb = ''
452 452 elif lb != '':
453 453 lb = int(lb)
454 454 # check if range is over the entire file
455 455 if (fb, lb) == (0, ''):
456 456 return None
457 457 # check that the range is valid
458 458 if lb < fb:
459 459 raise RangeError('Invalid byte range: %s-%s' % (fb, lb))
460 460 return (fb, lb)
General Comments 0
You need to be logged in to leave comments. Login now