16
16
17
17
from ftplib import FTP
18
18
from mimetypes import MimeTypes
19
- import os
20
- import tempfile
19
+ from pathlib import Path
20
+ from pathlib import PurePosixPath
21
21
from urllib .parse import urlparse
22
+ from kiss_headers import parse_it
22
23
23
24
import requests
25
+ import tempfile
24
26
25
27
26
28
class Response :
@@ -41,14 +43,35 @@ def __init__(self, location, content_type, size, url):
41
43
def fetch_http (url , location ):
42
44
"""
43
45
Return a `Response` object built from fetching the content at a HTTP/HTTPS based `url` URL string
44
- saving the content in a file at `location`
46
+ Saving the content in a file at `location`
47
+ If `location` is an existing directory - try to deduce the filename
48
+ If deduction failed, save the content in a temporary file created at a `location`
45
49
"""
46
50
r = requests .get (url )
47
- with open (location , 'wb' ) as f :
51
+
52
+ if Path .is_dir (location ):
53
+ content_disposition = parse_it (r .headers ).get ("content-disposition" ) or {}
54
+ filename_priority = [
55
+ content_disposition .get ("filename*" ),
56
+ content_disposition .get ("filename" ),
57
+ PurePosixPath (urlparse (url ).path ).name ,
58
+ ]
59
+ filename_found = False
60
+ for filename in filename_priority :
61
+ if filename is not None and len (filename ):
62
+ filename_found = True
63
+ location /= filename
64
+ break
65
+ if not filename_found :
66
+ location = Path (
67
+ tempfile .NamedTemporaryFile (dir = location , delete = False ).name
68
+ )
69
+
70
+ with open (location , "wb" ) as f :
48
71
f .write (r .content )
49
72
50
- content_type = r .headers .get (' content-type' )
51
- size = r .headers .get (' content-length' )
73
+ content_type = r .headers .get (" content-type" )
74
+ size = r .headers .get (" content-length" )
52
75
size = int (size ) if size else None
53
76
54
77
resp = Response (location = location , content_type = content_type , size = size , url = url )
@@ -59,49 +82,57 @@ def fetch_http(url, location):
59
82
def fetch_ftp (url , location ):
60
83
"""
61
84
Return a `Response` object built from fetching the content at a FTP based `url` URL string
62
- saving the content in a file at `location`
85
+ Saving the content in a file at `location`
86
+ If `location` is an existing directory - deduce the filename from the URL
63
87
"""
64
88
url_parts = urlparse (url )
65
89
66
90
netloc = url_parts .netloc
67
- path = url_parts .path
68
- dir , file = os .path .split (path )
91
+ path = PurePosixPath (url_parts .path )
92
+ directory = path .parent
93
+ filename = path .name
94
+
95
+ if Path .is_dir (location ):
96
+ location /= filename
69
97
70
98
ftp = FTP (netloc )
71
99
ftp .login ()
72
100
73
- size = ftp .size (path )
101
+ size = ftp .size (str ( path ) )
74
102
mime = MimeTypes ()
75
- mime_type = mime .guess_type (file )
103
+ mime_type = mime .guess_type (filename )
76
104
if mime_type :
77
105
content_type = mime_type [0 ]
78
106
else :
79
107
content_type = None
80
108
81
- ftp .cwd (dir )
82
- file = ' RETR {}' .format (file )
83
- with open (location , 'wb' ) as f :
84
- ftp .retrbinary (file , f .write )
109
+ ftp .cwd (str ( directory ) )
110
+ filename = " RETR {}" .format (filename )
111
+ with open (location , "wb" ) as f :
112
+ ftp .retrbinary (filename , f .write )
85
113
ftp .close ()
86
114
87
115
resp = Response (location = location , content_type = content_type , size = size , url = url )
88
116
return resp
89
117
90
118
91
- def fetch (url ):
119
+ def fetch (url , location = None ):
92
120
"""
93
- Return a `Response` object built from fetching the content at the `url` URL string and store content at a temporary file.
121
+ Return a `Response` object built from fetching the content at the `url` URL string and store content at a provided `location`
122
+ If `location` is None, save the content in a newly created temporary file
123
+ If `location` is an existing directory - try to deduce the filename
94
124
"""
95
125
96
- temp = tempfile .NamedTemporaryFile (delete = False )
97
- location = temp .name
126
+ if location is None :
127
+ temp = tempfile .NamedTemporaryFile (delete = False )
128
+ location = Path (temp .name )
98
129
99
130
url_parts = urlparse (url )
100
131
scheme = url_parts .scheme
101
132
102
- fetchers = {' ftp' : fetch_ftp , ' http' : fetch_http , ' https' : fetch_http }
133
+ fetchers = {" ftp" : fetch_ftp , " http" : fetch_http , " https" : fetch_http }
103
134
104
135
if scheme in fetchers :
105
136
return fetchers .get (scheme )(url , location )
106
137
107
- raise Exception (' Not a supported/known scheme.' )
138
+ raise Exception (" Not a supported/known scheme." )
0 commit comments