Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixes #16559 - Do not trim leading spaces for tab delimited #17442

Merged
merged 7 commits into from
Oct 26, 2021
6 changes: 5 additions & 1 deletion modules/csv/csv.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,11 @@ var quoteRegexp = regexp.MustCompile(`["'][\s\S]+?["']`)
func CreateReader(input io.Reader, delimiter rune) *stdcsv.Reader {
rd := stdcsv.NewReader(input)
rd.Comma = delimiter
rd.TrimLeadingSpace = true
if delimiter != '\t' && delimiter != ' ' {
// TrimLeadingSpace can't be true when delimiter is a tab or a space as the value for a column might be empty,
// thus would change `\t\t` to just `\t` or ` ` (two spaces) to just ` ` (single space)
rd.TrimLeadingSpace = true
}
Comment on lines +25 to +29
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if delimiter != '\t' && delimiter != ' ' {
// TrimLeadingSpace can't be true when delimiter is a tab or a space as the value for a column might be empty,
// thus would change `\t\t` to just `\t` or ` ` (two spaces) to just ` ` (single space)
rd.TrimLeadingSpace = true
}
// TrimLeadingSpace can't be true when delimiter is a tab or a space as the value for a column might be empty,
// thus would change `\t\t` to just `\t` or ` ` (two spaces) to just ` ` (single space)
rd.TrimLeadingSpace = delimiter != '\t' && delimiter != ' '

return rd
}

Expand Down
55 changes: 48 additions & 7 deletions modules/csv/csv_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,16 +17,57 @@ func TestCreateReader(t *testing.T) {
assert.Equal(t, ',', rd.Comma)
}

//nolint
func TestCreateReaderAndGuessDelimiter(t *testing.T) {
input := "a;b;c\n1;2;3\n4;5;6"
var csvToRowsMap = map[string][][]string{
`a;b;c
1;2;3
4;5;6`: {{"a", "b", "c"}, {"1", "2", "3"}, {"4", "5", "6"}},
`col1 col2 col3
a b c
e f
g h i
j l
m n
p q r
u
v w x
y
`: {{"col1", "col2", "col3"},
{"a", "b", "c"},
{"", "e", "f"},
{"g", "h", "i"},
{"j", "", "l"},
{"m", "n", ""},
{"p", "q", "r"},
{"", "", "u"},
{"v", "w", "x"},
{"y", "", ""},
{"", "", ""}},
` col1,col2,col3
a, b, c
d,e,f
,h, i
j, ,
, , `: {{"col1", "col2", "col3"},
{"a", "b", "c"},
{"d", "e", "f"},
{"", "h", "i"},
{"j", "", ""},
{"", "", ""}},
}

rd, err := CreateReaderAndGuessDelimiter(strings.NewReader(input))
assert.NoError(t, err)
assert.Equal(t, ';', rd.Comma)
for csv, expectedRows := range csvToRowsMap {
rd, err := CreateReaderAndGuessDelimiter(strings.NewReader(csv))
assert.NoError(t, err)
rows, err := rd.ReadAll()
assert.NoError(t, err)
assert.EqualValues(t, rows, expectedRows)
}
}

func TestGuessDelimiter(t *testing.T) {
var kases = map[string]rune{
var csvToDelimiterMap = map[string]rune{
"a": ',',
"1,2": ',',
"1;2": ';',
Expand All @@ -37,7 +78,7 @@ func TestGuessDelimiter(t *testing.T) {
"<br/>": ',',
}

for k, v := range kases {
assert.EqualValues(t, guessDelimiter([]byte(k)), v)
for csv, expectedDelimiter := range csvToDelimiterMap {
assert.EqualValues(t, guessDelimiter([]byte(csv)), expectedDelimiter)
}
}