@@ -93,8 +93,11 @@ def wrapper(*args, **kwargs):
93
93
@pyarrow_cpu_count_equal_numba_num_treads
94
94
def do_read_csv (filepath_or_buffer , sep , delimiter , names , usecols , dtype , skiprows , parse_dates ):
95
95
96
+ if delimiter is None :
97
+ delimiter = sep
98
+
96
99
pa_options = get_pyarrow_read_csv_options (
97
- sep , delimiter , names , usecols , dtype , skiprows , parse_dates )
100
+ delimiter , names , usecols , dtype , skiprows , parse_dates )
98
101
99
102
table = csv .read_csv (
100
103
filepath_or_buffer ,
@@ -107,11 +110,11 @@ def do_read_csv(filepath_or_buffer, sep, delimiter, names, usecols, dtype, skipr
107
110
108
111
109
112
def csv_reader_infer_nb_arrow_type (
110
- filepath_or_buffer , sep , delimiter , names , usecols , dtype , skiprows , parse_dates
113
+ filepath_or_buffer , delimiter = ',' , names = None , usecols = None , dtype = None , skiprows = None , parse_dates = False
111
114
):
112
115
113
116
read_opts , parse_opts , convert_opts = get_pyarrow_read_csv_options (
114
- sep , delimiter , names , usecols , dtype , skiprows , parse_dates )
117
+ delimiter , names , usecols , dtype , skiprows , parse_dates )
115
118
csv_reader = csv .open_csv (filepath_or_buffer ,
116
119
read_options = read_opts ,
117
120
parse_options = parse_opts ,
@@ -138,13 +141,13 @@ def csv_reader_infer_nb_arrow_type(
138
141
139
142
140
143
def csv_reader_infer_nb_pandas_type (
141
- filepath_or_buffer , sep , delimiter , names , usecols , dtype , skiprows , parse_dates
144
+ filepath_or_buffer , delimiter = ',' , names = None , usecols = None , dtype = None , skiprows = None , parse_dates = False
142
145
):
143
146
144
147
# infer column types from the first block (similarly as Arrow does this)
145
148
# TO-DO: tune the block size or allow user configure it via env var
146
149
rows_to_read = 1000
147
- df = pd .read_csv (filepath_or_buffer , sep = sep , delimiter = delimiter , names = names ,
150
+ df = pd .read_csv (filepath_or_buffer , delimiter = delimiter , names = names ,
148
151
usecols = usecols , dtype = dtype , skiprows = skiprows , nrows = rows_to_read ,
149
152
parse_dates = parse_dates )
150
153
@@ -185,10 +188,7 @@ def csv_reader_get_pyarrow_read_options(names, skiprows):
185
188
return read_options
186
189
187
190
188
- def csv_reader_get_pyarrow_parse_options (delimiter , sep ):
189
-
190
- if delimiter is None :
191
- delimiter = sep
191
+ def csv_reader_get_pyarrow_parse_options (delimiter ):
192
192
193
193
parse_options = csv .ParseOptions (
194
194
delimiter = delimiter ,
@@ -264,11 +264,11 @@ def csv_reader_get_pyarrow_convert_options(names, usecols, dtype, parse_dates):
264
264
return convert_options
265
265
266
266
267
- def get_pyarrow_read_csv_options (sep , delimiter , names , usecols , dtype , skiprows , parse_dates ):
267
+ def get_pyarrow_read_csv_options (delimiter , names , usecols , dtype , skiprows , parse_dates ):
268
268
""" This function attempts to map pandas read_csv parameters to pyarrow read_csv options to be used """
269
269
270
270
read_opts = csv_reader_get_pyarrow_read_options (names , skiprows )
271
- parse_opts = csv_reader_get_pyarrow_parse_options (delimiter , sep )
271
+ parse_opts = csv_reader_get_pyarrow_parse_options (delimiter )
272
272
convert_opts = csv_reader_get_pyarrow_convert_options (names , usecols , dtype , parse_dates )
273
273
274
274
return (read_opts , parse_opts , convert_opts )
0 commit comments