Skip to content

Commit d897e9c

Browse files
authored
Other management for unicity error? (#27)
* Other management for unicity error? * fix
1 parent 1602de7 commit d897e9c

2 files changed

Lines changed: 44 additions & 27 deletions

File tree

checkcel/checkcel.py

Lines changed: 27 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ def _log_debug_failures(self):
5050

5151
def _log_validator_failures(self):
5252
for field_name, validator in self.validators.items():
53-
if validator.bad:
53+
if validator.bad['invalid_set'] or validator.bad['invalid_unique']:
5454
self.error(
5555
" {} failed {} time(s) ({:.1%}) on field: '{}'".format(
5656
validator.__class__.__name__,
@@ -59,17 +59,34 @@ def _log_validator_failures(self):
5959
field_name,
6060
)
6161
)
62-
try:
63-
# If self.bad is iterable, it contains the fields which
64-
# caused it to fail
65-
data = validator.bad
66-
wrong_terms = ", ".join(["'{}'".format(val) for val in data["invalid_set"]])
67-
wrong_rows = ", ".join([str(val) for val in data["invalid_rows"]])
62+
if validator.bad['invalid_set']:
63+
try:
64+
# If self.bad is iterable, it contains the fields which
65+
# caused it to fail
66+
data = validator.bad
67+
wrong_terms = ", ".join(["'{}'".format(val) for val in data["invalid_set"]])
68+
wrong_rows = ", ".join([str(val) for val in data["invalid_rows"]])
69+
self.error(
70+
" Invalid fields: [{}] in rows: [{}]".format(wrong_terms, wrong_rows)
71+
)
72+
except TypeError as e:
73+
raise e
74+
75+
if validator.bad['invalid_unique']:
6876
self.error(
69-
" Invalid fields: [{}] in rows: [{}]".format(wrong_terms, wrong_rows)
77+
" The following values failed unicity check: ".format(
78+
)
7079
)
71-
except TypeError as e:
72-
raise e
80+
try:
81+
# If self.bad is iterable, it contains the fields which
82+
# caused it to fail
83+
for key, values in validator.bad['invalid_unique'].items():
84+
wrong_rows = ", ".join([str(val) for val in values])
85+
self.error(
86+
" Value: '{}' in rows: [{}]".format(key, wrong_rows)
87+
)
88+
except TypeError as e:
89+
raise e
7390

7491
def _log_missing_validators(self):
7592
self.error(" Missing validators for:")

checkcel/validators.py

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,12 @@ class Validator(object):
2020

2121
def __init__(self, empty_ok=None, ignore_case=None, ignore_space=None, empty_ok_if=None, empty_ok_unless=None, readme=None, unique=None, na_ok=None, skip_generation=None, skip_validation=None):
2222
self.logger = logs.logger
23-
self.invalid_dict = defaultdict(set)
23+
self.invalid_dict = {
24+
"invalid_set": set(),
25+
"invalid_rows": set(),
26+
"invalid_unique": defaultdict(set)
27+
}
28+
2429
self.fail_count = 0
2530
self.empty_ok = empty_ok
2631
self.na_ok = na_ok
@@ -188,8 +193,7 @@ def validate(self, field, row_number, row):
188193

189194
if field and self.unique:
190195
if field in self.unique_values:
191-
self.invalid_dict["invalid_set"].add(field)
192-
self.invalid_dict["invalid_rows"].add(row_number)
196+
self.invalid_dict["invalid_unique"][field].add(row_number)
193197
raise ValidationException("'{}' is already in the column".format(field))
194198
self.unique_values.add(field)
195199

@@ -254,8 +258,7 @@ def validate(self, field, row_number, row):
254258

255259
if field and self.unique:
256260
if field in self.unique_values:
257-
self.invalid_dict["invalid_set"].add(field)
258-
self.invalid_dict["invalid_rows"].add(row_number)
261+
self.invalid_dict["invalid_unique"][field].add(row_number)
259262
raise ValidationException("'{}' is already in the column".format(field))
260263
self.unique_values.add(field)
261264

@@ -371,8 +374,7 @@ def validate(self, field, row_number, row):
371374
)
372375
if field and self.unique:
373376
if str(field) in self.unique_values:
374-
self.invalid_dict["invalid_set"].add(field)
375-
self.invalid_dict["invalid_rows"].add(row_number)
377+
self.invalid_dict["invalid_unique"][field].add(row_number)
376378
raise ValidationException("'{}' is already in the column".format(field))
377379
self.unique_values.add(str(field))
378380

@@ -473,8 +475,7 @@ def validate(self, field, row_number, row):
473475

474476
if field and self.unique:
475477
if field in self.unique_values:
476-
self.invalid_dict["invalid_set"].add(field)
477-
self.invalid_dict["invalid_rows"].add(row_number)
478+
self.invalid_dict["invalid_unique"][field].add(row_number)
478479
raise ValidationException("'{}' is already in the column".format(field))
479480
self.unique_values.add(field)
480481

@@ -586,8 +587,7 @@ def validate(self, field, row_number, row):
586587

587588
if field and self.unique:
588589
if field in self.unique_values:
589-
self.invalid_dict["invalid_set"].add(field)
590-
self.invalid_dict["invalid_rows"].add(row_number)
590+
self.invalid_dict["invalid_unique"][field].add(row_number)
591591
raise ValidationException("'{}' is already in the column".format(field))
592592
self.unique_values.add(field)
593593

@@ -696,8 +696,7 @@ def validate(self, field, row_number, row):
696696

697697
if field and self.unique:
698698
if field in self.unique_values:
699-
self.invalid_dict["invalid_set"].add(field)
700-
self.invalid_dict["invalid_rows"].add(row_number)
699+
self.invalid_dict["invalid_unique"][field].add(row_number)
701700
raise ValidationException("'{}' is already in the column".format(field))
702701
self.unique_values.add(field)
703702

@@ -776,8 +775,7 @@ def validate(self, field, row_number, row):
776775
raise ValidationException(e)
777776
if self.unique:
778777
if field in self.unique_values:
779-
self.invalid_dict["invalid_set"].add(field)
780-
self.invalid_dict["invalid_rows"].add(row_number)
778+
self.invalid_dict["invalid_unique"][field].add(row_number)
781779
raise ValidationException("'{}' is already in the column".format(field))
782780
self.unique_values.add(field)
783781

@@ -998,8 +996,7 @@ def validate(self, field, row_number, row):
998996
if key not in self.unique_values:
999997
self.unique_values.add(key)
1000998
else:
1001-
self.invalid_dict["invalid_set"].add(field)
1002-
self.invalid_dict["invalid_rows"].add(row_number)
999+
self.invalid_dict["invalid_unique"][field].add(row_number)
10031000
if self.unique_with:
10041001
raise ValidationException(
10051002
"'{}' is already in the column (unique with: {})".format(
@@ -1102,6 +1099,7 @@ def validate(self, field, row_number, row):
11021099

11031100
if field and self.unique:
11041101
if field in self.unique_values:
1102+
self.invalid_dict["invalid_unique"][field].add(row_number)
11051103
raise ValidationException("'{}' is already in the column".format(field))
11061104
self.unique_values.add(field)
11071105

@@ -1246,6 +1244,7 @@ def validate(self, field, row_number, row):
12461244

12471245
if field and self.unique:
12481246
if field in self.unique_values:
1247+
self.invalid_dict["invalid_unique"][field].add(row_number)
12491248
raise ValidationException("'{}' is already in the column".format(field))
12501249
self.unique_values.add(field)
12511250

@@ -1341,6 +1340,7 @@ def validate(self, field, row_number, row):
13411340
raise ValidationException("{} is not a valid GPS coordinate")
13421341
if field and self.unique:
13431342
if field in self.unique_values:
1343+
self.invalid_dict["invalid_unique"][field].add(row_number)
13441344
raise ValidationException("'{}' is already in the column".format(field))
13451345
self.unique_values.add(field)
13461346

0 commit comments

Comments
 (0)