|
20 | 20 |
|
21 | 21 |
|
22 | 22 | FORMAT_CSV = 'csv' |
| 23 | +FORMAT_HTML = 'html' |
23 | 24 | FORMAT_XLSX_MULTIPLE = 'xlsx-multiple' |
24 | 25 | FORMAT_XLSX_SINGLE = 'xlsx-single' |
25 | 26 | FORMAT_XLSX = FORMAT_XLSX_MULTIPLE |
|
29 | 30 | _DEFAULT_TIMEOUT = (10, 300) # seconds (connect and read) |
30 | 31 | _FORMATS_EXT = { |
31 | 32 | FORMAT_CSV: '.csv', |
| 33 | + FORMAT_HTML: '.html', |
32 | 34 | FORMAT_XLSX: '.xlsx', |
33 | 35 | FORMAT_XLSX_MULTIPLE: '.xlsx', |
34 | 36 | FORMAT_XLSX_SINGLE: '.xlsx', |
35 | 37 | FORMAT_XML: '.xml', |
36 | 38 | } |
37 | 39 | _EXT_FORMATS = { |
38 | 40 | '.csv': FORMAT_CSV, |
| 41 | + '.html': FORMAT_HTML, |
39 | 42 | '.xlsx': FORMAT_XLSX, |
40 | 43 | '.xml': FORMAT_XML, |
41 | 44 | } |
42 | | -_STRING_FORMATS = {FORMAT_CSV, FORMAT_XML} |
| 45 | +_STRING_FORMATS = {FORMAT_CSV, FORMAT_HTML, FORMAT_XML} |
43 | 46 |
|
44 | 47 | class Client(object): |
45 | 48 | def __init__(self, api_key, api_url=_API_URL, timeout=_DEFAULT_TIMEOUT): |
@@ -87,6 +90,14 @@ def csv(self, pdf_path, csv_path=None): |
87 | 90 | """ |
88 | 91 | return self.convert(pdf_path, csv_path, out_format=FORMAT_CSV) |
89 | 92 |
|
| 93 | + def html(self, pdf_path, html_path=None): |
| 94 | + """ |
| 95 | + Convenience method to convert HTML to CSV. |
| 96 | +
|
| 97 | + If html_path is None, returns the output as a string. |
| 98 | + """ |
| 99 | + return self.convert(pdf_path, html_path, out_format=FORMAT_HTML) |
| 100 | + |
90 | 101 | def convert(self, pdf_path, out_path=None, out_format=None, query_params=None, **requests_params): |
91 | 102 | """ |
92 | 103 | Convert PDF given by `pdf_path` into `format` at `out_path`. |
|
0 commit comments