Skip to content

Commit b8c9b49

Browse files
committed
Improve code quality using ruff
1 parent a43b8fe commit b8c9b49

17 files changed

+241
-67
lines changed

clevercsv/__init__.py

+26
Original file line numberDiff line numberDiff line change
@@ -24,3 +24,29 @@
2424
from .wrappers import stream_table
2525
from .wrappers import write_table
2626
from .write import writer
27+
28+
__all__ = [
29+
"QUOTE_ALL",
30+
"QUOTE_MINIMAL",
31+
"QUOTE_NONE",
32+
"QUOTE_NONNUMERIC",
33+
"__version__",
34+
"field_size_limit",
35+
"Detector",
36+
"Sniffer",
37+
"excel",
38+
"excel_tab",
39+
"unix_dialect",
40+
"DictReader",
41+
"DictWriter",
42+
"Error",
43+
"reader",
44+
"detect_dialect",
45+
"read_dataframe",
46+
"read_dicts",
47+
"read_table",
48+
"stream_dicts",
49+
"stream_table",
50+
"write_table",
51+
"writer",
52+
]

clevercsv/break_ties.py

+16-10
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
"""
99

1010
from .cparser_util import parse_string
11+
from .dialect import SimpleDialect
1112
from .utils import pairwise
1213

1314

@@ -76,19 +77,28 @@ def reduce_pairwise(data, dialects):
7677
visited = set()
7778
for A, B in equal_dialects:
7879
ans = break_ties_two(data, A, B)
79-
if not ans is None:
80+
if ans is not None:
8081
new_dialects.add(ans)
8182
visited.add(A)
8283
visited.add(B)
8384

8485
# and add the dialects that we didn't visit
8586
for d in dialects:
86-
if not d in visited:
87+
if d not in visited:
8788
new_dialects.add(d)
8889

8990
return list(new_dialects)
9091

9192

93+
def _dialects_only_differ_in_field(
94+
A: SimpleDialect, B: SimpleDialect, field: str
95+
) -> bool:
96+
keys = ["delimiter", "quotechar", "escapechar"]
97+
return all(
98+
getattr(A, key) == getattr(B, key) for key in keys if key != field
99+
)
100+
101+
92102
def break_ties_two(data, A, B):
93103
"""Break ties between two dialects.
94104
@@ -127,11 +137,7 @@ def break_ties_two(data, A, B):
127137
The chosen dialect if the tie can be broken, None otherwise.
128138
129139
"""
130-
keys = {"delimiter", "quotechar", "escapechar"}
131-
diff_only_in_key = lambda key: all(
132-
getattr(A, x) == getattr(B, x) for x in keys if x != key
133-
)
134-
if diff_only_in_key("quotechar"):
140+
if _dialects_only_differ_in_field(A, B, "quotechar"):
135141
if A.quotechar == "" or B.quotechar == "":
136142
d_no = A if A.quotechar == "" else B
137143
d_yes = B if d_no == A else A
@@ -145,7 +151,7 @@ def break_ties_two(data, A, B):
145151
else:
146152
# quotechar has an effect
147153
return d_yes
148-
elif diff_only_in_key("delimiter"):
154+
elif _dialects_only_differ_in_field(A, B, "delimiter"):
149155
if sorted([A.delimiter, B.delimiter]) == sorted([",", " "]):
150156
# Artifact due to type detection (comma as radix point)
151157
if A.delimiter == ",":
@@ -158,7 +164,7 @@ def break_ties_two(data, A, B):
158164
return B
159165
else:
160166
return A
161-
elif diff_only_in_key("escapechar"):
167+
elif _dialects_only_differ_in_field(A, B, "escapechar"):
162168
Dnone, Descape = (A, B) if A.escapechar == "" else (B, A)
163169

164170
X = list(parse_string(data, Dnone))
@@ -226,7 +232,7 @@ def break_ties_two(data, A, B):
226232
for rX, rY in zip(X, Y):
227233
for x, y in zip(rX, rY):
228234
if x != y:
229-
if not eq in x:
235+
if eq not in x:
230236
return None
231237

232238
# Now we know that the only cells that have the

clevercsv/consistency.py

+10-10
Original file line numberDiff line numberDiff line change
@@ -58,41 +58,41 @@ def detect_consistency_dialects(data, dialects, skip=True, verbose=False):
5858
5959
This function takes a list of dialects to consider.
6060
"""
61-
log = lambda *a, **kw: print(*a, **kw) if verbose else None
62-
log("Considering %i dialects." % len(dialects))
63-
6461
old_limit = field_size_limit(len(data) + 1)
65-
scores = consistency_scores(data, dialects, skip=skip, logger=log)
62+
scores = consistency_scores(data, dialects, skip=skip, verbose=verbose)
6663
H = get_best_set(scores)
6764
result = break_ties(data, H)
6865
field_size_limit(old_limit)
6966

7067
return result
7168

7269

73-
def consistency_scores(data, dialects, skip=True, logger=print):
70+
def consistency_scores(data, dialects, skip=True, verbose: bool = False):
7471
scores = {}
7572

7673
Qmax = -float("inf")
7774
for dialect in sorted(dialects):
7875
P = pattern_score(data, dialect)
7976
if P < Qmax and skip:
8077
scores[dialect] = {"pattern": P, "type": None, "Q": None}
81-
logger("%15r:\tP = %15.6f\tskip." % (dialect, P))
78+
if verbose:
79+
print("%15r:\tP = %15.6f\tskip." % (dialect, P))
8280
continue
8381
T = type_score(data, dialect)
8482
Q = P * T
8583
Qmax = max(Q, Qmax)
8684
scores[dialect] = {"pattern": P, "type": T, "Q": Q}
87-
logger(
88-
"%15r:\tP = %15.6f\tT = %15.6f\tQ = %15.6f" % (dialect, P, T, Q)
89-
)
85+
if verbose:
86+
print(
87+
"%15r:\tP = %15.6f\tT = %15.6f\tQ = %15.6f"
88+
% (dialect, P, T, Q)
89+
)
9090
return scores
9191

9292

9393
def get_best_set(scores):
9494
Qscores = [score["Q"] for score in scores.values()]
95-
Qscores = filter(lambda q: not q is None, Qscores)
95+
Qscores = filter(lambda q: q is not None, Qscores)
9696
Qmax = max(Qscores)
9797
return set([d for d, score in scores.items() if score["Q"] == Qmax])
9898

clevercsv/console/commands/__init__.py

+8
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,11 @@
55
from .explore import ExploreCommand
66
from .standardize import StandardizeCommand
77
from .view import ViewCommand
8+
9+
__all__ = [
10+
"CodeCommand",
11+
"DetectCommand",
12+
"ExploreCommand",
13+
"StandardizeCommand",
14+
"ViewCommand",
15+
]

clevercsv/console/commands/_utils.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,8 @@ def generate_code(filename, dialect, encoding, use_pandas=False):
2828
if use_pandas:
2929
return base + [
3030
"",
31-
f'df = clevercsv.read_dataframe("{filename}", delimiter={d}, quotechar={q}, escapechar={e})',
31+
f'df = clevercsv.read_dataframe("{filename}", delimiter={d}, '
32+
f"quotechar={q}, escapechar={e})",
3233
"",
3334
]
3435

clevercsv/cparser_util.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -91,10 +91,10 @@ def parse_data(
9191
if dialect is None:
9292
dialect = SimpleDialect("", "", "")
9393

94-
delimiter_ = delimiter if not delimiter is None else dialect.delimiter
95-
quotechar_ = quotechar if not quotechar is None else dialect.quotechar
96-
escapechar_ = escapechar if not escapechar is None else dialect.escapechar
97-
strict_ = strict if not strict is None else dialect.strict
94+
delimiter_ = delimiter if delimiter is not None else dialect.delimiter
95+
quotechar_ = quotechar if quotechar is not None else dialect.quotechar
96+
escapechar_ = escapechar if escapechar is not None else dialect.escapechar
97+
strict_ = strict if strict is not None else dialect.strict
9898

9999
parser = Parser(
100100
data,

clevercsv/detect.py

+12-8
Original file line numberDiff line numberDiff line change
@@ -36,23 +36,27 @@ def sniff(self, sample, delimiters=None, verbose=False):
3636
return self.detect(sample, delimiters=delimiters, verbose=verbose)
3737

3838
def detect(
39-
self, sample, delimiters=None, verbose=False, method="auto", skip=True
39+
self,
40+
sample,
41+
delimiters=None,
42+
verbose=False,
43+
method="auto",
44+
skip=True,
4045
):
4146
# method in ['auto', 'normal', 'consistency']
42-
# wrapper for the print function
43-
log = lambda *a, **kw: print(*a, **kw) if verbose else None
44-
4547
if method == "normal" or method == "auto":
46-
log("Running normal form detection ...", flush=True)
48+
if verbose:
49+
print("Running normal form detection ...", flush=True)
4750
dialect = detect_dialect_normal(
4851
sample, delimiters=delimiters, verbose=verbose
4952
)
50-
if not dialect is None:
53+
if dialect is not None:
5154
self.method_ = "normal"
5255
return dialect
5356

5457
self.method_ = "consistency"
55-
log("Running data consistency measure ...", flush=True)
58+
if verbose:
59+
print("Running data consistency measure ...", flush=True)
5660
return detect_dialect_consistency(
5761
sample, delimiters=delimiters, skip=skip, verbose=verbose
5862
)
@@ -117,7 +121,7 @@ def has_header(self, sample):
117121
# on whether it's a header
118122
hasHeader = 0
119123
for col, colType in columnTypes.items():
120-
if type(colType) == type(0): # it's a length
124+
if isinstance(colType, int): # it's a length
121125
if len(header[col]) != colType:
122126
hasHeader += 1
123127
else:

0 commit comments

Comments
 (0)