Skip to content

Commit b1e1d33

Browse files
committed
Add support for downloading HTML
We're adding HTML as an output format to the site.
1 parent da9bfb0 commit b1e1d33

File tree

1 file changed

+12
-1
lines changed

1 file changed

+12
-1
lines changed

pdftables_api/pdftables_api.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020

2121

2222
FORMAT_CSV = 'csv'
23+
FORMAT_HTML = 'html'
2324
FORMAT_XLSX_MULTIPLE = 'xlsx-multiple'
2425
FORMAT_XLSX_SINGLE = 'xlsx-single'
2526
FORMAT_XLSX = FORMAT_XLSX_MULTIPLE
@@ -29,17 +30,19 @@
2930
_DEFAULT_TIMEOUT = (10, 300) # seconds (connect and read)
3031
_FORMATS_EXT = {
3132
FORMAT_CSV: '.csv',
33+
FORMAT_HTML: '.html',
3234
FORMAT_XLSX: '.xlsx',
3335
FORMAT_XLSX_MULTIPLE: '.xlsx',
3436
FORMAT_XLSX_SINGLE: '.xlsx',
3537
FORMAT_XML: '.xml',
3638
}
3739
_EXT_FORMATS = {
3840
'.csv': FORMAT_CSV,
41+
'.html': FORMAT_HTML,
3942
'.xlsx': FORMAT_XLSX,
4043
'.xml': FORMAT_XML,
4144
}
42-
_STRING_FORMATS = {FORMAT_CSV, FORMAT_XML}
45+
_STRING_FORMATS = {FORMAT_CSV, FORMAT_HTML, FORMAT_XML}
4346

4447
class Client(object):
4548
def __init__(self, api_key, api_url=_API_URL, timeout=_DEFAULT_TIMEOUT):
@@ -87,6 +90,14 @@ def csv(self, pdf_path, csv_path=None):
8790
"""
8891
return self.convert(pdf_path, csv_path, out_format=FORMAT_CSV)
8992

93+
def html(self, pdf_path, html_path=None):
94+
"""
95+
Convenience method to convert HTML to CSV.
96+
97+
If html_path is None, returns the output as a string.
98+
"""
99+
return self.convert(pdf_path, html_path, out_format=FORMAT_HTML)
100+
90101
def convert(self, pdf_path, out_path=None, out_format=None, query_params=None, **requests_params):
91102
"""
92103
Convert PDF given by `pdf_path` into `format` at `out_path`.

0 commit comments

Comments
 (0)