##// END OF EJS Templates
byterange: backport fix from upstream
Benoit Boissinot -
r9695:e4211db4 default
parent child Browse files
Show More
@@ -1,468 +1,470 b''
1 # This library is free software; you can redistribute it and/or
1 # This library is free software; you can redistribute it and/or
2 # modify it under the terms of the GNU Lesser General Public
2 # modify it under the terms of the GNU Lesser General Public
3 # License as published by the Free Software Foundation; either
3 # License as published by the Free Software Foundation; either
4 # version 2.1 of the License, or (at your option) any later version.
4 # version 2.1 of the License, or (at your option) any later version.
5 #
5 #
6 # This library is distributed in the hope that it will be useful,
6 # This library is distributed in the hope that it will be useful,
7 # but WITHOUT ANY WARRANTY; without even the implied warranty of
7 # but WITHOUT ANY WARRANTY; without even the implied warranty of
8 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
8 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
9 # Lesser General Public License for more details.
9 # Lesser General Public License for more details.
10 #
10 #
11 # You should have received a copy of the GNU Lesser General Public
11 # You should have received a copy of the GNU Lesser General Public
12 # License along with this library; if not, write to the
12 # License along with this library; if not, write to the
13 # Free Software Foundation, Inc.,
13 # Free Software Foundation, Inc.,
14 # 59 Temple Place, Suite 330,
14 # 59 Temple Place, Suite 330,
15 # Boston, MA 02111-1307 USA
15 # Boston, MA 02111-1307 USA
16
16
17 # This file is part of urlgrabber, a high-level cross-protocol url-grabber
17 # This file is part of urlgrabber, a high-level cross-protocol url-grabber
18 # Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
18 # Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
19
19
20 # $Id: byterange.py,v 1.9 2005/02/14 21:55:07 mstenner Exp $
20 # $Id: byterange.py,v 1.9 2005/02/14 21:55:07 mstenner Exp $
21
21
22 import os
22 import os
23 import stat
23 import stat
24 import urllib
24 import urllib
25 import urllib2
25 import urllib2
26 import email.Utils
26 import email.Utils
27
27
28 try:
28 try:
29 from cStringIO import StringIO
29 from cStringIO import StringIO
30 except ImportError, msg:
30 except ImportError, msg:
31 from StringIO import StringIO
31 from StringIO import StringIO
32
32
33 class RangeError(IOError):
33 class RangeError(IOError):
34 """Error raised when an unsatisfiable range is requested."""
34 """Error raised when an unsatisfiable range is requested."""
35 pass
35 pass
36
36
37 class HTTPRangeHandler(urllib2.BaseHandler):
37 class HTTPRangeHandler(urllib2.BaseHandler):
38 """Handler that enables HTTP Range headers.
38 """Handler that enables HTTP Range headers.
39
39
40 This was extremely simple. The Range header is a HTTP feature to
40 This was extremely simple. The Range header is a HTTP feature to
41 begin with so all this class does is tell urllib2 that the
41 begin with so all this class does is tell urllib2 that the
42 "206 Partial Content" reponse from the HTTP server is what we
42 "206 Partial Content" reponse from the HTTP server is what we
43 expected.
43 expected.
44
44
45 Example:
45 Example:
46 import urllib2
46 import urllib2
47 import byterange
47 import byterange
48
48
49 range_handler = range.HTTPRangeHandler()
49 range_handler = range.HTTPRangeHandler()
50 opener = urllib2.build_opener(range_handler)
50 opener = urllib2.build_opener(range_handler)
51
51
52 # install it
52 # install it
53 urllib2.install_opener(opener)
53 urllib2.install_opener(opener)
54
54
55 # create Request and set Range header
55 # create Request and set Range header
56 req = urllib2.Request('http://www.python.org/')
56 req = urllib2.Request('http://www.python.org/')
57 req.header['Range'] = 'bytes=30-50'
57 req.header['Range'] = 'bytes=30-50'
58 f = urllib2.urlopen(req)
58 f = urllib2.urlopen(req)
59 """
59 """
60
60
61 def http_error_206(self, req, fp, code, msg, hdrs):
61 def http_error_206(self, req, fp, code, msg, hdrs):
62 # 206 Partial Content Response
62 # 206 Partial Content Response
63 r = urllib.addinfourl(fp, hdrs, req.get_full_url())
63 r = urllib.addinfourl(fp, hdrs, req.get_full_url())
64 r.code = code
64 r.code = code
65 r.msg = msg
65 r.msg = msg
66 return r
66 return r
67
67
68 def http_error_416(self, req, fp, code, msg, hdrs):
68 def http_error_416(self, req, fp, code, msg, hdrs):
69 # HTTP's Range Not Satisfiable error
69 # HTTP's Range Not Satisfiable error
70 raise RangeError('Requested Range Not Satisfiable')
70 raise RangeError('Requested Range Not Satisfiable')
71
71
72 class RangeableFileObject:
72 class RangeableFileObject:
73 """File object wrapper to enable raw range handling.
73 """File object wrapper to enable raw range handling.
74 This was implemented primarilary for handling range
74 This was implemented primarilary for handling range
75 specifications for file:// urls. This object effectively makes
75 specifications for file:// urls. This object effectively makes
76 a file object look like it consists only of a range of bytes in
76 a file object look like it consists only of a range of bytes in
77 the stream.
77 the stream.
78
78
79 Examples:
79 Examples:
80 # expose 10 bytes, starting at byte position 20, from
80 # expose 10 bytes, starting at byte position 20, from
81 # /etc/aliases.
81 # /etc/aliases.
82 >>> fo = RangeableFileObject(file('/etc/passwd', 'r'), (20,30))
82 >>> fo = RangeableFileObject(file('/etc/passwd', 'r'), (20,30))
83 # seek seeks within the range (to position 23 in this case)
83 # seek seeks within the range (to position 23 in this case)
84 >>> fo.seek(3)
84 >>> fo.seek(3)
85 # tell tells where your at _within the range_ (position 3 in
85 # tell tells where your at _within the range_ (position 3 in
86 # this case)
86 # this case)
87 >>> fo.tell()
87 >>> fo.tell()
88 # read EOFs if an attempt is made to read past the last
88 # read EOFs if an attempt is made to read past the last
89 # byte in the range. the following will return only 7 bytes.
89 # byte in the range. the following will return only 7 bytes.
90 >>> fo.read(30)
90 >>> fo.read(30)
91 """
91 """
92
92
93 def __init__(self, fo, rangetup):
93 def __init__(self, fo, rangetup):
94 """Create a RangeableFileObject.
94 """Create a RangeableFileObject.
95 fo -- a file like object. only the read() method need be
95 fo -- a file like object. only the read() method need be
96 supported but supporting an optimized seek() is
96 supported but supporting an optimized seek() is
97 preferable.
97 preferable.
98 rangetup -- a (firstbyte,lastbyte) tuple specifying the range
98 rangetup -- a (firstbyte,lastbyte) tuple specifying the range
99 to work over.
99 to work over.
100 The file object provided is assumed to be at byte offset 0.
100 The file object provided is assumed to be at byte offset 0.
101 """
101 """
102 self.fo = fo
102 self.fo = fo
103 (self.firstbyte, self.lastbyte) = range_tuple_normalize(rangetup)
103 (self.firstbyte, self.lastbyte) = range_tuple_normalize(rangetup)
104 self.realpos = 0
104 self.realpos = 0
105 self._do_seek(self.firstbyte)
105 self._do_seek(self.firstbyte)
106
106
107 def __getattr__(self, name):
107 def __getattr__(self, name):
108 """This effectively allows us to wrap at the instance level.
108 """This effectively allows us to wrap at the instance level.
109 Any attribute not found in _this_ object will be searched for
109 Any attribute not found in _this_ object will be searched for
110 in self.fo. This includes methods."""
110 in self.fo. This includes methods."""
111 if hasattr(self.fo, name):
111 if hasattr(self.fo, name):
112 return getattr(self.fo, name)
112 return getattr(self.fo, name)
113 raise AttributeError(name)
113 raise AttributeError(name)
114
114
115 def tell(self):
115 def tell(self):
116 """Return the position within the range.
116 """Return the position within the range.
117 This is different from fo.seek in that position 0 is the
117 This is different from fo.seek in that position 0 is the
118 first byte position of the range tuple. For example, if
118 first byte position of the range tuple. For example, if
119 this object was created with a range tuple of (500,899),
119 this object was created with a range tuple of (500,899),
120 tell() will return 0 when at byte position 500 of the file.
120 tell() will return 0 when at byte position 500 of the file.
121 """
121 """
122 return (self.realpos - self.firstbyte)
122 return (self.realpos - self.firstbyte)
123
123
124 def seek(self, offset, whence=0):
124 def seek(self, offset, whence=0):
125 """Seek within the byte range.
125 """Seek within the byte range.
126 Positioning is identical to that described under tell().
126 Positioning is identical to that described under tell().
127 """
127 """
128 assert whence in (0, 1, 2)
128 assert whence in (0, 1, 2)
129 if whence == 0: # absolute seek
129 if whence == 0: # absolute seek
130 realoffset = self.firstbyte + offset
130 realoffset = self.firstbyte + offset
131 elif whence == 1: # relative seek
131 elif whence == 1: # relative seek
132 realoffset = self.realpos + offset
132 realoffset = self.realpos + offset
133 elif whence == 2: # absolute from end of file
133 elif whence == 2: # absolute from end of file
134 # XXX: are we raising the right Error here?
134 # XXX: are we raising the right Error here?
135 raise IOError('seek from end of file not supported.')
135 raise IOError('seek from end of file not supported.')
136
136
137 # do not allow seek past lastbyte in range
137 # do not allow seek past lastbyte in range
138 if self.lastbyte and (realoffset >= self.lastbyte):
138 if self.lastbyte and (realoffset >= self.lastbyte):
139 realoffset = self.lastbyte
139 realoffset = self.lastbyte
140
140
141 self._do_seek(realoffset - self.realpos)
141 self._do_seek(realoffset - self.realpos)
142
142
143 def read(self, size=-1):
143 def read(self, size=-1):
144 """Read within the range.
144 """Read within the range.
145 This method will limit the size read based on the range.
145 This method will limit the size read based on the range.
146 """
146 """
147 size = self._calc_read_size(size)
147 size = self._calc_read_size(size)
148 rslt = self.fo.read(size)
148 rslt = self.fo.read(size)
149 self.realpos += len(rslt)
149 self.realpos += len(rslt)
150 return rslt
150 return rslt
151
151
152 def readline(self, size=-1):
152 def readline(self, size=-1):
153 """Read lines within the range.
153 """Read lines within the range.
154 This method will limit the size read based on the range.
154 This method will limit the size read based on the range.
155 """
155 """
156 size = self._calc_read_size(size)
156 size = self._calc_read_size(size)
157 rslt = self.fo.readline(size)
157 rslt = self.fo.readline(size)
158 self.realpos += len(rslt)
158 self.realpos += len(rslt)
159 return rslt
159 return rslt
160
160
161 def _calc_read_size(self, size):
161 def _calc_read_size(self, size):
162 """Handles calculating the amount of data to read based on
162 """Handles calculating the amount of data to read based on
163 the range.
163 the range.
164 """
164 """
165 if self.lastbyte:
165 if self.lastbyte:
166 if size > -1:
166 if size > -1:
167 if ((self.realpos + size) >= self.lastbyte):
167 if ((self.realpos + size) >= self.lastbyte):
168 size = (self.lastbyte - self.realpos)
168 size = (self.lastbyte - self.realpos)
169 else:
169 else:
170 size = (self.lastbyte - self.realpos)
170 size = (self.lastbyte - self.realpos)
171 return size
171 return size
172
172
173 def _do_seek(self, offset):
173 def _do_seek(self, offset):
174 """Seek based on whether wrapped object supports seek().
174 """Seek based on whether wrapped object supports seek().
175 offset is relative to the current position (self.realpos).
175 offset is relative to the current position (self.realpos).
176 """
176 """
177 assert offset >= 0
177 assert offset >= 0
178 if not hasattr(self.fo, 'seek'):
178 if not hasattr(self.fo, 'seek'):
179 self._poor_mans_seek(offset)
179 self._poor_mans_seek(offset)
180 else:
180 else:
181 self.fo.seek(self.realpos + offset)
181 self.fo.seek(self.realpos + offset)
182 self.realpos += offset
182 self.realpos += offset
183
183
184 def _poor_mans_seek(self, offset):
184 def _poor_mans_seek(self, offset):
185 """Seek by calling the wrapped file objects read() method.
185 """Seek by calling the wrapped file objects read() method.
186 This is used for file like objects that do not have native
186 This is used for file like objects that do not have native
187 seek support. The wrapped objects read() method is called
187 seek support. The wrapped objects read() method is called
188 to manually seek to the desired position.
188 to manually seek to the desired position.
189 offset -- read this number of bytes from the wrapped
189 offset -- read this number of bytes from the wrapped
190 file object.
190 file object.
191 raise RangeError if we encounter EOF before reaching the
191 raise RangeError if we encounter EOF before reaching the
192 specified offset.
192 specified offset.
193 """
193 """
194 pos = 0
194 pos = 0
195 bufsize = 1024
195 bufsize = 1024
196 while pos < offset:
196 while pos < offset:
197 if (pos + bufsize) > offset:
197 if (pos + bufsize) > offset:
198 bufsize = offset - pos
198 bufsize = offset - pos
199 buf = self.fo.read(bufsize)
199 buf = self.fo.read(bufsize)
200 if len(buf) != bufsize:
200 if len(buf) != bufsize:
201 raise RangeError('Requested Range Not Satisfiable')
201 raise RangeError('Requested Range Not Satisfiable')
202 pos += bufsize
202 pos += bufsize
203
203
204 class FileRangeHandler(urllib2.FileHandler):
204 class FileRangeHandler(urllib2.FileHandler):
205 """FileHandler subclass that adds Range support.
205 """FileHandler subclass that adds Range support.
206 This class handles Range headers exactly like an HTTP
206 This class handles Range headers exactly like an HTTP
207 server would.
207 server would.
208 """
208 """
209 def open_local_file(self, req):
209 def open_local_file(self, req):
210 import mimetypes
210 import mimetypes
211 import email
211 import email
212 host = req.get_host()
212 host = req.get_host()
213 file = req.get_selector()
213 file = req.get_selector()
214 localfile = urllib.url2pathname(file)
214 localfile = urllib.url2pathname(file)
215 stats = os.stat(localfile)
215 stats = os.stat(localfile)
216 size = stats[stat.ST_SIZE]
216 size = stats[stat.ST_SIZE]
217 modified = email.Utils.formatdate(stats[stat.ST_MTIME])
217 modified = email.Utils.formatdate(stats[stat.ST_MTIME])
218 mtype = mimetypes.guess_type(file)[0]
218 mtype = mimetypes.guess_type(file)[0]
219 if host:
219 if host:
220 host, port = urllib.splitport(host)
220 host, port = urllib.splitport(host)
221 if port or socket.gethostbyname(host) not in self.get_names():
221 if port or socket.gethostbyname(host) not in self.get_names():
222 raise urllib2.URLError('file not on local host')
222 raise urllib2.URLError('file not on local host')
223 fo = open(localfile,'rb')
223 fo = open(localfile,'rb')
224 brange = req.headers.get('Range', None)
224 brange = req.headers.get('Range', None)
225 brange = range_header_to_tuple(brange)
225 brange = range_header_to_tuple(brange)
226 assert brange != ()
226 assert brange != ()
227 if brange:
227 if brange:
228 (fb, lb) = brange
228 (fb, lb) = brange
229 if lb == '':
229 if lb == '':
230 lb = size
230 lb = size
231 if fb < 0 or fb > size or lb > size:
231 if fb < 0 or fb > size or lb > size:
232 raise RangeError('Requested Range Not Satisfiable')
232 raise RangeError('Requested Range Not Satisfiable')
233 size = (lb - fb)
233 size = (lb - fb)
234 fo = RangeableFileObject(fo, (fb, lb))
234 fo = RangeableFileObject(fo, (fb, lb))
235 headers = email.message_from_string(
235 headers = email.message_from_string(
236 'Content-Type: %s\nContent-Length: %d\nLast-Modified: %s\n' %
236 'Content-Type: %s\nContent-Length: %d\nLast-Modified: %s\n' %
237 (mtype or 'text/plain', size, modified))
237 (mtype or 'text/plain', size, modified))
238 return urllib.addinfourl(fo, headers, 'file:'+file)
238 return urllib.addinfourl(fo, headers, 'file:'+file)
239
239
240
240
241 # FTP Range Support
241 # FTP Range Support
242 # Unfortunately, a large amount of base FTP code had to be copied
242 # Unfortunately, a large amount of base FTP code had to be copied
243 # from urllib and urllib2 in order to insert the FTP REST command.
243 # from urllib and urllib2 in order to insert the FTP REST command.
244 # Code modifications for range support have been commented as
244 # Code modifications for range support have been commented as
245 # follows:
245 # follows:
246 # -- range support modifications start/end here
246 # -- range support modifications start/end here
247
247
248 from urllib import splitport, splituser, splitpasswd, splitattr, \
248 from urllib import splitport, splituser, splitpasswd, splitattr, \
249 unquote, addclosehook, addinfourl
249 unquote, addclosehook, addinfourl
250 import ftplib
250 import ftplib
251 import socket
251 import socket
252 import sys
252 import sys
253 import mimetypes
253 import mimetypes
254 import email
254 import email
255
255
256 class FTPRangeHandler(urllib2.FTPHandler):
256 class FTPRangeHandler(urllib2.FTPHandler):
257 def ftp_open(self, req):
257 def ftp_open(self, req):
258 host = req.get_host()
258 host = req.get_host()
259 if not host:
259 if not host:
260 raise IOError('ftp error', 'no host given')
260 raise IOError('ftp error', 'no host given')
261 host, port = splitport(host)
261 host, port = splitport(host)
262 if port is None:
262 if port is None:
263 port = ftplib.FTP_PORT
263 port = ftplib.FTP_PORT
264 else:
265 port = int(port)
264
266
265 # username/password handling
267 # username/password handling
266 user, host = splituser(host)
268 user, host = splituser(host)
267 if user:
269 if user:
268 user, passwd = splitpasswd(user)
270 user, passwd = splitpasswd(user)
269 else:
271 else:
270 passwd = None
272 passwd = None
271 host = unquote(host)
273 host = unquote(host)
272 user = unquote(user or '')
274 user = unquote(user or '')
273 passwd = unquote(passwd or '')
275 passwd = unquote(passwd or '')
274
276
275 try:
277 try:
276 host = socket.gethostbyname(host)
278 host = socket.gethostbyname(host)
277 except socket.error, msg:
279 except socket.error, msg:
278 raise urllib2.URLError(msg)
280 raise urllib2.URLError(msg)
279 path, attrs = splitattr(req.get_selector())
281 path, attrs = splitattr(req.get_selector())
280 dirs = path.split('/')
282 dirs = path.split('/')
281 dirs = map(unquote, dirs)
283 dirs = map(unquote, dirs)
282 dirs, file = dirs[:-1], dirs[-1]
284 dirs, file = dirs[:-1], dirs[-1]
283 if dirs and not dirs[0]:
285 if dirs and not dirs[0]:
284 dirs = dirs[1:]
286 dirs = dirs[1:]
285 try:
287 try:
286 fw = self.connect_ftp(user, passwd, host, port, dirs)
288 fw = self.connect_ftp(user, passwd, host, port, dirs)
287 type = file and 'I' or 'D'
289 type = file and 'I' or 'D'
288 for attr in attrs:
290 for attr in attrs:
289 attr, value = splitattr(attr)
291 attr, value = splitattr(attr)
290 if attr.lower() == 'type' and \
292 if attr.lower() == 'type' and \
291 value in ('a', 'A', 'i', 'I', 'd', 'D'):
293 value in ('a', 'A', 'i', 'I', 'd', 'D'):
292 type = value.upper()
294 type = value.upper()
293
295
294 # -- range support modifications start here
296 # -- range support modifications start here
295 rest = None
297 rest = None
296 range_tup = range_header_to_tuple(req.headers.get('Range', None))
298 range_tup = range_header_to_tuple(req.headers.get('Range', None))
297 assert range_tup != ()
299 assert range_tup != ()
298 if range_tup:
300 if range_tup:
299 (fb, lb) = range_tup
301 (fb, lb) = range_tup
300 if fb > 0:
302 if fb > 0:
301 rest = fb
303 rest = fb
302 # -- range support modifications end here
304 # -- range support modifications end here
303
305
304 fp, retrlen = fw.retrfile(file, type, rest)
306 fp, retrlen = fw.retrfile(file, type, rest)
305
307
306 # -- range support modifications start here
308 # -- range support modifications start here
307 if range_tup:
309 if range_tup:
308 (fb, lb) = range_tup
310 (fb, lb) = range_tup
309 if lb == '':
311 if lb == '':
310 if retrlen is None or retrlen == 0:
312 if retrlen is None or retrlen == 0:
311 raise RangeError('Requested Range Not Satisfiable due to unobtainable file length.')
313 raise RangeError('Requested Range Not Satisfiable due to unobtainable file length.')
312 lb = retrlen
314 lb = retrlen
313 retrlen = lb - fb
315 retrlen = lb - fb
314 if retrlen < 0:
316 if retrlen < 0:
315 # beginning of range is larger than file
317 # beginning of range is larger than file
316 raise RangeError('Requested Range Not Satisfiable')
318 raise RangeError('Requested Range Not Satisfiable')
317 else:
319 else:
318 retrlen = lb - fb
320 retrlen = lb - fb
319 fp = RangeableFileObject(fp, (0, retrlen))
321 fp = RangeableFileObject(fp, (0, retrlen))
320 # -- range support modifications end here
322 # -- range support modifications end here
321
323
322 headers = ""
324 headers = ""
323 mtype = mimetypes.guess_type(req.get_full_url())[0]
325 mtype = mimetypes.guess_type(req.get_full_url())[0]
324 if mtype:
326 if mtype:
325 headers += "Content-Type: %s\n" % mtype
327 headers += "Content-Type: %s\n" % mtype
326 if retrlen is not None and retrlen >= 0:
328 if retrlen is not None and retrlen >= 0:
327 headers += "Content-Length: %d\n" % retrlen
329 headers += "Content-Length: %d\n" % retrlen
328 headers = email.message_from_string(headers)
330 headers = email.message_from_string(headers)
329 return addinfourl(fp, headers, req.get_full_url())
331 return addinfourl(fp, headers, req.get_full_url())
330 except ftplib.all_errors, msg:
332 except ftplib.all_errors, msg:
331 raise IOError('ftp error', msg), sys.exc_info()[2]
333 raise IOError('ftp error', msg), sys.exc_info()[2]
332
334
333 def connect_ftp(self, user, passwd, host, port, dirs):
335 def connect_ftp(self, user, passwd, host, port, dirs):
334 fw = ftpwrapper(user, passwd, host, port, dirs)
336 fw = ftpwrapper(user, passwd, host, port, dirs)
335 return fw
337 return fw
336
338
337 class ftpwrapper(urllib.ftpwrapper):
339 class ftpwrapper(urllib.ftpwrapper):
338 # range support note:
340 # range support note:
339 # this ftpwrapper code is copied directly from
341 # this ftpwrapper code is copied directly from
340 # urllib. The only enhancement is to add the rest
342 # urllib. The only enhancement is to add the rest
341 # argument and pass it on to ftp.ntransfercmd
343 # argument and pass it on to ftp.ntransfercmd
342 def retrfile(self, file, type, rest=None):
344 def retrfile(self, file, type, rest=None):
343 self.endtransfer()
345 self.endtransfer()
344 if type in ('d', 'D'):
346 if type in ('d', 'D'):
345 cmd = 'TYPE A'
347 cmd = 'TYPE A'
346 isdir = 1
348 isdir = 1
347 else:
349 else:
348 cmd = 'TYPE ' + type
350 cmd = 'TYPE ' + type
349 isdir = 0
351 isdir = 0
350 try:
352 try:
351 self.ftp.voidcmd(cmd)
353 self.ftp.voidcmd(cmd)
352 except ftplib.all_errors:
354 except ftplib.all_errors:
353 self.init()
355 self.init()
354 self.ftp.voidcmd(cmd)
356 self.ftp.voidcmd(cmd)
355 conn = None
357 conn = None
356 if file and not isdir:
358 if file and not isdir:
357 # Use nlst to see if the file exists at all
359 # Use nlst to see if the file exists at all
358 try:
360 try:
359 self.ftp.nlst(file)
361 self.ftp.nlst(file)
360 except ftplib.error_perm, reason:
362 except ftplib.error_perm, reason:
361 raise IOError('ftp error', reason), sys.exc_info()[2]
363 raise IOError('ftp error', reason), sys.exc_info()[2]
362 # Restore the transfer mode!
364 # Restore the transfer mode!
363 self.ftp.voidcmd(cmd)
365 self.ftp.voidcmd(cmd)
364 # Try to retrieve as a file
366 # Try to retrieve as a file
365 try:
367 try:
366 cmd = 'RETR ' + file
368 cmd = 'RETR ' + file
367 conn = self.ftp.ntransfercmd(cmd, rest)
369 conn = self.ftp.ntransfercmd(cmd, rest)
368 except ftplib.error_perm, reason:
370 except ftplib.error_perm, reason:
369 if str(reason).startswith('501'):
371 if str(reason).startswith('501'):
370 # workaround for REST not supported error
372 # workaround for REST not supported error
371 fp, retrlen = self.retrfile(file, type)
373 fp, retrlen = self.retrfile(file, type)
372 fp = RangeableFileObject(fp, (rest,''))
374 fp = RangeableFileObject(fp, (rest,''))
373 return (fp, retrlen)
375 return (fp, retrlen)
374 elif not str(reason).startswith('550'):
376 elif not str(reason).startswith('550'):
375 raise IOError('ftp error', reason), sys.exc_info()[2]
377 raise IOError('ftp error', reason), sys.exc_info()[2]
376 if not conn:
378 if not conn:
377 # Set transfer mode to ASCII!
379 # Set transfer mode to ASCII!
378 self.ftp.voidcmd('TYPE A')
380 self.ftp.voidcmd('TYPE A')
379 # Try a directory listing
381 # Try a directory listing
380 if file:
382 if file:
381 cmd = 'LIST ' + file
383 cmd = 'LIST ' + file
382 else:
384 else:
383 cmd = 'LIST'
385 cmd = 'LIST'
384 conn = self.ftp.ntransfercmd(cmd)
386 conn = self.ftp.ntransfercmd(cmd)
385 self.busy = 1
387 self.busy = 1
386 # Pass back both a suitably decorated object and a retrieval length
388 # Pass back both a suitably decorated object and a retrieval length
387 return (addclosehook(conn[0].makefile('rb'),
389 return (addclosehook(conn[0].makefile('rb'),
388 self.endtransfer), conn[1])
390 self.endtransfer), conn[1])
389
391
390
392
391 ####################################################################
393 ####################################################################
392 # Range Tuple Functions
394 # Range Tuple Functions
393 # XXX: These range tuple functions might go better in a class.
395 # XXX: These range tuple functions might go better in a class.
394
396
395 _rangere = None
397 _rangere = None
396 def range_header_to_tuple(range_header):
398 def range_header_to_tuple(range_header):
397 """Get a (firstbyte,lastbyte) tuple from a Range header value.
399 """Get a (firstbyte,lastbyte) tuple from a Range header value.
398
400
399 Range headers have the form "bytes=<firstbyte>-<lastbyte>". This
401 Range headers have the form "bytes=<firstbyte>-<lastbyte>". This
400 function pulls the firstbyte and lastbyte values and returns
402 function pulls the firstbyte and lastbyte values and returns
401 a (firstbyte,lastbyte) tuple. If lastbyte is not specified in
403 a (firstbyte,lastbyte) tuple. If lastbyte is not specified in
402 the header value, it is returned as an empty string in the
404 the header value, it is returned as an empty string in the
403 tuple.
405 tuple.
404
406
405 Return None if range_header is None
407 Return None if range_header is None
406 Return () if range_header does not conform to the range spec
408 Return () if range_header does not conform to the range spec
407 pattern.
409 pattern.
408
410
409 """
411 """
410 global _rangere
412 global _rangere
411 if range_header is None:
413 if range_header is None:
412 return None
414 return None
413 if _rangere is None:
415 if _rangere is None:
414 import re
416 import re
415 _rangere = re.compile(r'^bytes=(\d{1,})-(\d*)')
417 _rangere = re.compile(r'^bytes=(\d{1,})-(\d*)')
416 match = _rangere.match(range_header)
418 match = _rangere.match(range_header)
417 if match:
419 if match:
418 tup = range_tuple_normalize(match.group(1, 2))
420 tup = range_tuple_normalize(match.group(1, 2))
419 if tup and tup[1]:
421 if tup and tup[1]:
420 tup = (tup[0], tup[1]+1)
422 tup = (tup[0], tup[1]+1)
421 return tup
423 return tup
422 return ()
424 return ()
423
425
424 def range_tuple_to_header(range_tup):
426 def range_tuple_to_header(range_tup):
425 """Convert a range tuple to a Range header value.
427 """Convert a range tuple to a Range header value.
426 Return a string of the form "bytes=<firstbyte>-<lastbyte>" or None
428 Return a string of the form "bytes=<firstbyte>-<lastbyte>" or None
427 if no range is needed.
429 if no range is needed.
428 """
430 """
429 if range_tup is None:
431 if range_tup is None:
430 return None
432 return None
431 range_tup = range_tuple_normalize(range_tup)
433 range_tup = range_tuple_normalize(range_tup)
432 if range_tup:
434 if range_tup:
433 if range_tup[1]:
435 if range_tup[1]:
434 range_tup = (range_tup[0], range_tup[1] - 1)
436 range_tup = (range_tup[0], range_tup[1] - 1)
435 return 'bytes=%s-%s' % range_tup
437 return 'bytes=%s-%s' % range_tup
436
438
437 def range_tuple_normalize(range_tup):
439 def range_tuple_normalize(range_tup):
438 """Normalize a (first_byte,last_byte) range tuple.
440 """Normalize a (first_byte,last_byte) range tuple.
439 Return a tuple whose first element is guaranteed to be an int
441 Return a tuple whose first element is guaranteed to be an int
440 and whose second element will be '' (meaning: the last byte) or
442 and whose second element will be '' (meaning: the last byte) or
441 an int. Finally, return None if the normalized tuple == (0,'')
443 an int. Finally, return None if the normalized tuple == (0,'')
442 as that is equivelant to retrieving the entire file.
444 as that is equivelant to retrieving the entire file.
443 """
445 """
444 if range_tup is None:
446 if range_tup is None:
445 return None
447 return None
446 # handle first byte
448 # handle first byte
447 fb = range_tup[0]
449 fb = range_tup[0]
448 if fb in (None, ''):
450 if fb in (None, ''):
449 fb = 0
451 fb = 0
450 else:
452 else:
451 fb = int(fb)
453 fb = int(fb)
452 # handle last byte
454 # handle last byte
453 try:
455 try:
454 lb = range_tup[1]
456 lb = range_tup[1]
455 except IndexError:
457 except IndexError:
456 lb = ''
458 lb = ''
457 else:
459 else:
458 if lb is None:
460 if lb is None:
459 lb = ''
461 lb = ''
460 elif lb != '':
462 elif lb != '':
461 lb = int(lb)
463 lb = int(lb)
462 # check if range is over the entire file
464 # check if range is over the entire file
463 if (fb, lb) == (0, ''):
465 if (fb, lb) == (0, ''):
464 return None
466 return None
465 # check that the range is valid
467 # check that the range is valid
466 if lb < fb:
468 if lb < fb:
467 raise RangeError('Invalid byte range: %s-%s' % (fb, lb))
469 raise RangeError('Invalid byte range: %s-%s' % (fb, lb))
468 return (fb, lb)
470 return (fb, lb)
General Comments 0
You need to be logged in to leave comments. Login now