From dfa01c093f79b258dc902610ae6b0fd9050935bc Mon Sep 17 00:00:00 2001
From: Anatoly Bubenkov <bubenkoff@gmail.com>
Date: Wed, 28 Mar 2018 22:16:19 +0200
Subject: [PATCH] add deviation tolerancy

---
 detector/Fixtures/test3.csv |  5 +++++
 detector/detect.go          | 14 +++++++++++++-
 detector/detect_test.go     | 13 +++++++++++++
 3 files changed, 31 insertions(+), 1 deletion(-)
 create mode 100644 detector/Fixtures/test3.csv

diff --git a/detector/Fixtures/test3.csv b/detector/Fixtures/test3.csv
new file mode 100644
index 0000000..b5da06a
--- /dev/null
+++ b/detector/Fixtures/test3.csv
@@ -0,0 +1,5 @@
+Email Address,,first name,last name,Gender,Date of birth,country,city,STREET,POSTAL CODE,INTEREST 1 - Producer,INTEREST 2 - 2-Dutch Recs,INTEREST 3 - SKINK,INTEREST 4 - Musical Madness,INTEREST 5  - Dutch Master Works,INTEREST 6 - 4-Dots,INTEREST 7 - Blue Forest,INTEREST 8 - Brooks,INTEREST 9 - PROMOTER GENERAL,INTEREST 10 - PROMOTER CLEAN
+some@example.com,First,Last,,,,,,,,,,,,,,Brooks,,
+some+1@example.com,First,Last,,,,,,,,,,,,,,Brooks,,
+some+2@example.com,First,Last,,,,,,,,,,,,,,Brooks,,
+some+3@example.com,First,Last,,,,,,,,,,,,,,Brooks,,
diff --git a/detector/detect.go b/detector/detect.go
index 4f86e31..abf4355 100644
--- a/detector/detect.go
+++ b/detector/detect.go
@@ -3,6 +3,7 @@ package detector
 import (
 	"bufio"
 	"io"
+	// "log"
 	"math"
 	"regexp"
 )
@@ -145,11 +146,22 @@ func (d *detector) analyze(ft frequencyTable, sampleLine int) []byte {
 	}
 
 	var candidates []byte
+	var minDeviation float64
+	var minDelimiter byte
 	for delimiter, frequencyOfLine := range ft {
-		if float64(0.0) == deviation(frequencyOfLine, sampleLine) {
+		dev := deviation(frequencyOfLine, sampleLine)
+		if float64(0.0) == dev {
 			candidates = append(candidates, delimiter)
+		} else if minDeviation > dev || minDeviation == 0 {
+			// find minimum deviation available
+			minDeviation = dev
+			minDelimiter = delimiter
 		}
 	}
+	// if zero deviation candidates are not found, pick the minimum one
+	if len(candidates) == 0 && minDeviation > 0 {
+		candidates = append(candidates, minDelimiter)
+	}
 
 	return candidates
 }
diff --git a/detector/detect_test.go b/detector/detect_test.go
index 42be316..63dc233 100644
--- a/detector/detect_test.go
+++ b/detector/detect_test.go
@@ -66,6 +66,19 @@ func TestDetectDelimiterComma(t *testing.T) {
 	assert.Equal(t, []string{","}, delimiters)
 }
 
+func TestDetectDelimiterComma2(t *testing.T) {
+	detector := New()
+	sampleLines := 4
+	detector.Configure(&sampleLines, nil)
+	file, err := os.OpenFile("./Fixtures/test3.csv", os.O_RDONLY, os.ModePerm)
+	assert.NoError(t, err)
+	defer file.Close()
+
+	delimiters := detector.DetectDelimiter(file, '"')
+
+	assert.Equal(t, []string{","}, delimiters)
+}
+
 func TestDetectDelimiterSemicolon(t *testing.T) {
 	detector := New()
 	sampleLines := 4