-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathgodownload.go
413 lines (353 loc) · 8.68 KB
/
godownload.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
package godownload
import (
"archive/zip"
"bytes"
"errors"
"fmt"
"io"
"io/ioutil"
"log"
"net/http"
"net/url"
"os"
"path/filepath"
"runtime"
"strings"
"sync"
"time"
"gopkg.in/yaml.v2"
)
//GoDownload provides main struct for global config and downloading items
type GoDownload struct {
//Overwrite provides overwriting file with same name
Overwrite bool
//Always create new file. If file with same name exist
// create "file_1"
Alwaysnew bool
//UserAgent provides setting user agent for http request
UserAgent string
//Retry provides number of attempts to download file
Retry int
//Authentication before downloading. Auth in the format username:password
Auth string
//Specify archive format for downloaded file
Archive string
//Path to the config file
Configpath string
//Directory for downloaded file
Outdir string
}
// Options defines attributes for config of GoDownload
type Options struct {
// URL parameter needs only for DownloadMany.
// In the case with Download. This paremeter will be ignore
URL string
//Download web page instead file
Page bool
//Outpath sets the path of the downloaded file
Outpath string
//Overwrite provides overwriting file with same name
Overwrite bool
//Always create new file. If file with same name exist
// create "file_1"
Alwaysnew bool
//UserAgent provides setting user agent for http request
UserAgent string
//Retry provides number of attempts to download file
Retry int
//Authentication before downloading. Auth in the format username:password
Auth string
//Specify archive format for downloaded file
Archive string
//TODO
TimeLimit time.Time
}
// Download provides downloading of file
func (gd *GoDownload) Download(path string, opt *Options) {
if gd.Configpath != "" {
opta, err := loadConfig(gd.Configpath)
if err != nil {
log.Fatal(err)
}
gd = opta
}
if opt == nil {
opt = &Options{
Overwrite: gd.Overwrite,
Alwaysnew: gd.Alwaysnew,
UserAgent: gd.UserAgent,
Retry: gd.Retry,
Auth: gd.Auth,
Archive: gd.Archive,
}
}
if opt.Page {
result, err := gd.pageDownload(path, opt)
if err != nil {
log.Fatal(err)
}
createTargetFile(opt.Outpath)
starttime := time.Now()
errwrite := ioutil.WriteFile(opt.Outpath, []byte(result), 0777)
if errwrite != nil {
log.Fatal(errwrite)
}
log.Printf("Finish to download from %s in %s.", path,
time.Since(starttime))
} else {
gd.fileDownload(path, opt)
}
}
//DownloadMany provides downloading several files
func (gd *GoDownload) DownloadMany(items []*Options) {
runtime.GOMAXPROCS(runtime.NumCPU())
var wg sync.WaitGroup
for _, item := range items {
wg.Add(1)
go func(it *Options) {
gd.Download(it.URL, it)
wg.Done()
}(item)
}
wg.Wait()
}
//DownloadManySimple is identical for DownloadMany, but as arguments is slice of url
func (gd *GoDownload) DownloadManySimple(items []string) {
result := []*Options{}
for _, item := range items {
result = append(result, &Options{URL: item, Outpath: getFileNameFromURL(item)})
}
gd.DownloadMany(result)
}
//FromFile provides getting links from file and download
func (gd *GoDownload) FromFile(path string) {
urls := fromFile(path)
gd.DownloadManySimple(urls)
}
//fileDownload method provides downloading files
func (gd *GoDownload) fileDownload(path string, opt *Options) {
if gd.Outdir != "" {
createDir(gd.Outdir)
if opt.Outpath != "" {
opt.Outpath = fmt.Sprintf("%s/%s", gd.Outdir, opt.Outpath)
} else {
opt.Outpath = gd.Outdir
}
}
outpath := outpathResolver(path, opt)
fmt.Println(outpath)
//Last chance to check if outpath is not empty
if outpath == "" {
log.Fatal("Something wen wrong and outpath is empty")
}
createTargetFile(outpath)
retry := 0
useragent := ""
auth := ""
if opt != nil {
retry = opt.Retry
useragent = opt.UserAgent
auth = opt.Auth
}
log.Printf("Start to download from %s", path)
starttime := time.Now()
resp, err := downloadGeneral(retry, path, useragent, auth)
if err != nil {
log.Fatal(err)
}
defer resp.Body.Close()
transferred := copyToFile(resp.Body, outpath)
log.Printf("Finish to download from %s in %s. Transferred bytes: %d", path,
time.Since(starttime), transferred)
if opt != nil && opt.Archive == "zip" {
err := zipPack(outpath)
if err != nil {
log.Printf("Error to create zeip archive")
return
}
os.Remove(outpath)
}
}
func (gd *GoDownload) pageDownload(path string, opt *Options) (string, error) {
if opt.Outpath != "" {
return "", errors.New("outpath not specified")
}
response, err := http.Get(path)
if err != nil {
return "", err
}
defer response.Body.Close()
contents, err := ioutil.ReadAll(response.Body)
if err != nil {
return "", err
}
return string(contents), nil
}
func checkExist(path string) bool {
if _, err := os.Stat(path); os.IsNotExist(err) {
return false
}
return true
}
func createTargetFile(path string) {
res, err := os.Create(path)
if err != nil {
panic(err)
}
defer res.Close()
}
//Main inner method for downloading
func download(url, useragent, auth string) (*http.Response, error) {
client := http.Client{}
req, err := http.NewRequest("GET", url, nil)
if err != nil {
return nil, err
}
if auth != "" {
res := strings.Split(auth, ":")
if len(res) != 2 {
return nil, errors.New("authentication must be in the format username:password")
}
req.SetBasicAuth(res[0], res[1])
}
if useragent != "" {
req.Header.Set("User-Agent", useragent)
}
resp, err := client.Do(req)
if err != nil {
return nil, err
}
return resp, nil
}
func downloadGeneral(retry int, url, useragent, auth string) (*http.Response, error) {
retrynums := 0
for {
res, err := download(url, useragent, auth)
if err != nil {
if retry == 0 || retrynums == retry {
return nil, err
}
} else {
return res, nil
}
fmt.Printf("Tried again to download from %s\n", url)
retrynums++
time.Sleep(100 * time.Millisecond)
}
}
//copy to file
func copyToFile(resp io.Reader, outpath string) int {
dst := &bytes.Buffer{}
_, err := io.Copy(dst, resp)
if err != nil {
panic(err)
}
errwrite := ioutil.WriteFile(outpath, dst.Bytes(), 0777)
if errwrite != nil {
log.Fatal(errwrite)
}
return dst.Len()
}
func getFileNameFromURL(urlitem string) string {
res, err := url.Parse(urlitem)
if err != nil {
panic(err)
}
items := strings.Split(res.Path, "/")
return items[len(items)-1]
}
//outpathResolver provides correct outpath for downloaded file
//It's done for better view of the Download method
func outpathResolver(path string, item *Options) (outpath string) {
if item == nil {
outpath = getFileNameFromURL(path)
if checkExist(outpath) {
log.Fatalf("File %s already exist. You can set Options.Overwrite = true for overwrite this file", path)
}
return
}
//Default value for outpath
outpath = item.Outpath
//Check if outpath is exist
if checkExist(item.Outpath) {
if item.Alwaysnew {
ext := filepath.Ext(outpath)
if ext == "" {
name := getFileNameFromURL(path)
outpath = outpath + "/" + name
}
dupcount := fileCount(outpath)
newname := outpath[0:len(outpath)-len(ext)] +
fmt.Sprintf("_%d", dupcount+1)
if len(ext) > 0 {
newname += ext
}
if filepath.Dir(outpath) == "." {
outpath = filepath.Dir(outpath) + "/" + newname
} else {
outpath = newname
}
} else if !item.Overwrite {
log.Fatalf("File %s already exist. You can set Options.Overwrite = true for overwrite this file", item.Outpath)
}
} else {
outpath = getFileNameFromURL(path)
if checkExist(outpath) {
log.Fatalf("File %s already exist. You can set Options.Overwrite = true for overwrite this file", path)
}
}
return outpath
}
//Pack output files to zip archive
func zipPack(path string) error {
newfile, err := os.Create(path + ".zip")
if err != nil {
return err
}
defer newfile.Close()
zipit := zip.NewWriter(newfile)
defer zipit.Close()
zipfile, err := os.Open(path)
if err != nil {
return err
}
defer zipfile.Close()
info, err := zipfile.Stat()
if err != nil {
return err
}
header, err := zip.FileInfoHeader(info)
if err != nil {
return err
}
header.Method = zip.Deflate
writer, err := zipit.CreateHeader(header)
if err != nil {
return err
}
_, err = io.Copy(writer, zipfile)
fmt.Printf("Output as %s\n", path+".zip")
return err
}
//loadConfig data from .yaml path
func loadConfig(path string) (*GoDownload, error) {
var opt GoDownload
data, err := ioutil.ReadFile(path)
if err != nil {
return nil, err
}
errconf := yaml.Unmarshal(data, &opt)
if errconf != nil {
return nil, errconf
}
return &opt, nil
}
//create dir for downloading
func createDir(dirname string) {
if _, err := os.Stat(dirname); os.IsNotExist(err) {
errmk := os.Mkdir(dirname, 0777)
if errmk != nil {
log.Fatal(errmk)
}
}
}