16
16
17
17
from ftplib import FTP
18
18
from mimetypes import MimeTypes
19
- import os
20
- import tempfile
21
19
from urllib .parse import urlparse
20
+ from kiss_headers import parse_it
21
+ from pathlib import Path
22
22
23
23
import requests
24
+ import tempfile
24
25
25
26
26
27
class Response :
@@ -41,14 +42,33 @@ def __init__(self, location, content_type, size, url):
41
42
def fetch_http (url , location ):
42
43
"""
43
44
Return a `Response` object built from fetching the content at a HTTP/HTTPS based `url` URL string
44
- saving the content in a file at `location`
45
+ Saving the content in a file at `location`
46
+ If `location` is an existing directory - try to deduce the filename
47
+ If deduction failed, save the content in a temporary file created at a `location`
45
48
"""
46
49
r = requests .get (url )
47
- with open (location , 'wb' ) as f :
50
+
51
+ if Path .is_dir (location ):
52
+ content_disposition = parse_it (r .headers ).get ("content-disposition" ) or {}
53
+ filename_priority = [
54
+ content_disposition .get ("filename*" ),
55
+ content_disposition .get ("filename" ),
56
+ Path (urlparse (url ).path ).name ,
57
+ ]
58
+ filename_found = False
59
+ for filename in filename_priority :
60
+ if filename is not None and len (filename ):
61
+ filename_found = True
62
+ location = location / filename
63
+ break
64
+ if not filename_found :
65
+ location /= tempfile .NamedTemporaryFile (dir = location , delete = False ).name
66
+
67
+ with open (location , "wb" ) as f :
48
68
f .write (r .content )
49
69
50
- content_type = r .headers .get (' content-type' )
51
- size = r .headers .get (' content-length' )
70
+ content_type = r .headers .get (" content-type" )
71
+ size = r .headers .get (" content-length" )
52
72
size = int (size ) if size else None
53
73
54
74
resp = Response (location = location , content_type = content_type , size = size , url = url )
@@ -59,49 +79,57 @@ def fetch_http(url, location):
59
79
def fetch_ftp (url , location ):
60
80
"""
61
81
Return a `Response` object built from fetching the content at a FTP based `url` URL string
62
- saving the content in a file at `location`
82
+ Saving the content in a file at `location`
83
+ If `location` is an existing directory - deduce the filename from the URL
63
84
"""
64
85
url_parts = urlparse (url )
65
86
66
87
netloc = url_parts .netloc
67
- path = url_parts .path
68
- dir , file = os .path .split (path )
88
+ path = Path (url_parts .path )
89
+ directory = path .parent
90
+ filename = path .name
91
+
92
+ if Path .is_dir (location ):
93
+ location /= filename
69
94
70
95
ftp = FTP (netloc )
71
96
ftp .login ()
72
97
73
- size = ftp .size (path )
98
+ size = ftp .size (str ( path ) )
74
99
mime = MimeTypes ()
75
- mime_type = mime .guess_type (file )
100
+ mime_type = mime .guess_type (filename )
76
101
if mime_type :
77
102
content_type = mime_type [0 ]
78
103
else :
79
104
content_type = None
80
105
81
- ftp .cwd (dir )
82
- file = ' RETR {}' .format (file )
83
- with open (location , 'wb' ) as f :
84
- ftp .retrbinary (file , f .write )
106
+ ftp .cwd (str ( directory ) )
107
+ filename = " RETR {}" .format (filename )
108
+ with open (location , "wb" ) as f :
109
+ ftp .retrbinary (filename , f .write )
85
110
ftp .close ()
86
111
87
112
resp = Response (location = location , content_type = content_type , size = size , url = url )
88
113
return resp
89
114
90
115
91
- def fetch (url ):
116
+ def fetch (url , location = None ):
92
117
"""
93
- Return a `Response` object built from fetching the content at the `url` URL string and store content at a temporary file.
118
+ Return a `Response` object built from fetching the content at the `url` URL string and store content at a provided `location`
119
+ If `location` is None, save the content in a newly created temporary file
120
+ If `location` is an existing directory - try to deduce the filename
94
121
"""
95
122
96
- temp = tempfile .NamedTemporaryFile (delete = False )
97
- location = temp .name
123
+ if location is None :
124
+ temp = tempfile .NamedTemporaryFile (delete = False )
125
+ location = temp .name
98
126
99
127
url_parts = urlparse (url )
100
128
scheme = url_parts .scheme
101
129
102
- fetchers = {' ftp' : fetch_ftp , ' http' : fetch_http , ' https' : fetch_http }
130
+ fetchers = {" ftp" : fetch_ftp , " http" : fetch_http , " https" : fetch_http }
103
131
104
132
if scheme in fetchers :
105
133
return fetchers .get (scheme )(url , location )
106
134
107
- raise Exception (' Not a supported/known scheme.' )
135
+ raise Exception (" Not a supported/known scheme." )
0 commit comments