Skip to content

Commit 4b35ef0

Browse files
authored
Merge pull request #14 from pdftables/html-download
Add support for HTML download
2 parents 25ead8c + afa4a3f commit 4b35ef0

File tree

2 files changed

+14
-1
lines changed

2 files changed

+14
-1
lines changed

pdftables_api/pdftables_api.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020

2121

2222
FORMAT_CSV = 'csv'
23+
FORMAT_HTML = 'html'
2324
FORMAT_XLSX_MULTIPLE = 'xlsx-multiple'
2425
FORMAT_XLSX_SINGLE = 'xlsx-single'
2526
FORMAT_XLSX = FORMAT_XLSX_MULTIPLE
@@ -29,17 +30,19 @@
2930
_DEFAULT_TIMEOUT = (10, 300) # seconds (connect and read)
3031
_FORMATS_EXT = {
3132
FORMAT_CSV: '.csv',
33+
FORMAT_HTML: '.html',
3234
FORMAT_XLSX: '.xlsx',
3335
FORMAT_XLSX_MULTIPLE: '.xlsx',
3436
FORMAT_XLSX_SINGLE: '.xlsx',
3537
FORMAT_XML: '.xml',
3638
}
3739
_EXT_FORMATS = {
3840
'.csv': FORMAT_CSV,
41+
'.html': FORMAT_HTML,
3942
'.xlsx': FORMAT_XLSX,
4043
'.xml': FORMAT_XML,
4144
}
42-
_STRING_FORMATS = {FORMAT_CSV, FORMAT_XML}
45+
_STRING_FORMATS = {FORMAT_CSV, FORMAT_HTML, FORMAT_XML}
4346

4447
class Client(object):
4548
def __init__(self, api_key, api_url=_API_URL, timeout=_DEFAULT_TIMEOUT):
@@ -87,6 +90,14 @@ def csv(self, pdf_path, csv_path=None):
8790
"""
8891
return self.convert(pdf_path, csv_path, out_format=FORMAT_CSV)
8992

93+
def html(self, pdf_path, html_path=None):
94+
"""
95+
Convenience method to convert HTML to CSV.
96+
97+
If html_path is None, returns the output as a string.
98+
"""
99+
return self.convert(pdf_path, html_path, out_format=FORMAT_HTML)
100+
90101
def convert(self, pdf_path, out_path=None, out_format=None, query_params=None, **requests_params):
91102
"""
92103
Convert PDF given by `pdf_path` into `format` at `out_path`.

test/test_pdftables_api.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,8 @@ def test_unmodified(self):
3939
Client.ensure_format_ext('foo.xlsx', 'xlsx-multiple'))
4040
self.assertEqual(('foo.xml', 'xml'),
4141
Client.ensure_format_ext('foo.xml', 'xml'))
42+
self.assertEqual(('foo.html', 'html'),
43+
Client.ensure_format_ext('foo.html', 'html'))
4244

4345
def test_missing_format(self):
4446
self.assertEqual(('foo.xlsx', 'xlsx-multiple'),

0 commit comments

Comments
 (0)