8
8
"""
9
9
10
10
from .cparser_util import parse_string
11
+ from .dialect import SimpleDialect
11
12
from .utils import pairwise
12
13
13
14
@@ -76,19 +77,28 @@ def reduce_pairwise(data, dialects):
76
77
visited = set ()
77
78
for A , B in equal_dialects :
78
79
ans = break_ties_two (data , A , B )
79
- if not ans is None :
80
+ if ans is not None :
80
81
new_dialects .add (ans )
81
82
visited .add (A )
82
83
visited .add (B )
83
84
84
85
# and add the dialects that we didn't visit
85
86
for d in dialects :
86
- if not d in visited :
87
+ if d not in visited :
87
88
new_dialects .add (d )
88
89
89
90
return list (new_dialects )
90
91
91
92
93
+ def _dialects_only_differ_in_field (
94
+ A : SimpleDialect , B : SimpleDialect , field : str
95
+ ) -> bool :
96
+ keys = ["delimiter" , "quotechar" , "escapechar" ]
97
+ return all (
98
+ getattr (A , key ) == getattr (B , key ) for key in keys if key != field
99
+ )
100
+
101
+
92
102
def break_ties_two (data , A , B ):
93
103
"""Break ties between two dialects.
94
104
@@ -127,11 +137,7 @@ def break_ties_two(data, A, B):
127
137
The chosen dialect if the tie can be broken, None otherwise.
128
138
129
139
"""
130
- keys = {"delimiter" , "quotechar" , "escapechar" }
131
- diff_only_in_key = lambda key : all (
132
- getattr (A , x ) == getattr (B , x ) for x in keys if x != key
133
- )
134
- if diff_only_in_key ("quotechar" ):
140
+ if _dialects_only_differ_in_field (A , B , "quotechar" ):
135
141
if A .quotechar == "" or B .quotechar == "" :
136
142
d_no = A if A .quotechar == "" else B
137
143
d_yes = B if d_no == A else A
@@ -145,7 +151,7 @@ def break_ties_two(data, A, B):
145
151
else :
146
152
# quotechar has an effect
147
153
return d_yes
148
- elif diff_only_in_key ( "delimiter" ):
154
+ elif _dialects_only_differ_in_field ( A , B , "delimiter" ):
149
155
if sorted ([A .delimiter , B .delimiter ]) == sorted (["," , " " ]):
150
156
# Artifact due to type detection (comma as radix point)
151
157
if A .delimiter == "," :
@@ -158,7 +164,7 @@ def break_ties_two(data, A, B):
158
164
return B
159
165
else :
160
166
return A
161
- elif diff_only_in_key ( "escapechar" ):
167
+ elif _dialects_only_differ_in_field ( A , B , "escapechar" ):
162
168
Dnone , Descape = (A , B ) if A .escapechar == "" else (B , A )
163
169
164
170
X = list (parse_string (data , Dnone ))
@@ -226,7 +232,7 @@ def break_ties_two(data, A, B):
226
232
for rX , rY in zip (X , Y ):
227
233
for x , y in zip (rX , rY ):
228
234
if x != y :
229
- if not eq in x :
235
+ if eq not in x :
230
236
return None
231
237
232
238
# Now we know that the only cells that have the
0 commit comments