Skip to content

Commit 2b7db84

Browse files
jadams41djaglowski
andauthored
parser/keyvalue: handle escaped quotes when parsing (open-telemetry#36176)
<!--Ex. Fixing a bug - Describe the bug and how this fixes the issue. Ex. Adding a feature - Explain what this achieves.--> #### Description Currently, the keyvalue parser does handle escaped quotes and instead will parse this as independent '\', '"' characters. This results in unexpected breakages between fields for strings like: > <key>="...\\" ..." Here, the backslash will be appended to the result pair, while the (now un-)escaped quotation will result in the pair being terminated early. Add handling of escaped quotation marks (for both ", ') in the keyvalue parser along with a testcase to exercise this functionality. --------- Signed-off-by: Ethan Adams <[email protected]> Co-authored-by: Daniel Jaglowski <[email protected]>
1 parent 52f28f3 commit 2b7db84

File tree

4 files changed

+81
-7
lines changed

4 files changed

+81
-7
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
# Use this changelog template to create an entry for release notes.
2+
3+
# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
4+
change_type: bug_fix
5+
6+
# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver)
7+
component: parseutils
8+
9+
# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
10+
note: Handle escaped quotes when parsing pairs using SplitString.
11+
12+
# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists.
13+
issues: [36176]
14+
15+
# (Optional) One or more lines of additional information to render under the primary note.
16+
# These lines will be padded with 2 spaces and then inserted directly into the document.
17+
# Use pipe (|) for multiline entries.
18+
subtext:
19+
20+
# If your change doesn't affect end users or the exported elements of any package,
21+
# you should instead start your pull request title with [chore] or use the "Skip Changelog" label.
22+
# Optional: The change log or logs in which this entry should be included.
23+
# e.g. '[user]' or '[user, api]'
24+
# Include 'user' if the change is relevant to end users.
25+
# Include 'api' if there is a change to a library API.
26+
# Default: '[user]'
27+
change_logs: []

internal/coreinternal/parseutils/parser.go

+14-7
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ func SplitString(input, delimiter string) ([]string, error) {
1818
current := ""
1919
delimiterLength := len(delimiter)
2020
quoteChar := "" // "" means we are not in quotes
21+
escaped := false
2122

2223
for i := 0; i < len(input); i++ {
2324
if quoteChar == "" && i+delimiterLength <= len(input) && input[i:i+delimiterLength] == delimiter { // delimiter
@@ -31,13 +32,19 @@ func SplitString(input, delimiter string) ([]string, error) {
3132
continue
3233
}
3334

34-
if quoteChar == "" && (input[i] == '"' || input[i] == '\'') { // start of quote
35-
quoteChar = string(input[i])
36-
continue
37-
}
38-
if string(input[i]) == quoteChar { // end of quote
39-
quoteChar = ""
40-
continue
35+
if !escaped { // consider quote termination so long as previous character wasn't backslash
36+
if quoteChar == "" && (input[i] == '"' || input[i] == '\'') { // start of quote
37+
quoteChar = string(input[i])
38+
continue
39+
}
40+
if string(input[i]) == quoteChar { // end of quote
41+
quoteChar = ""
42+
continue
43+
}
44+
// Only if we weren't escaped could the next character result in escaped state
45+
escaped = input[i] == '\\' // potentially escaping next character
46+
} else {
47+
escaped = false
4148
}
4249

4350
current += string(input[i])

internal/coreinternal/parseutils/parser_test.go

+22
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,17 @@ func Test_SplitString(t *testing.T) {
8585
`c=this is a "co ol"`,
8686
},
8787
},
88+
{
89+
name: "embedded escaped quotes",
90+
input: `ab c="this \"is \"" d='a \'co ol\' value' e="\""`,
91+
delimiter: " ",
92+
expected: []string{
93+
"ab",
94+
`c=this \"is \"`,
95+
`d=a \'co ol\' value`,
96+
`e=\"`,
97+
},
98+
},
8899
{
89100
name: "quoted values include whitespace",
90101
input: `name=" ottl " func=" key_ value"`,
@@ -259,6 +270,17 @@ func Test_ParseKeyValuePairs(t *testing.T) {
259270
"c": "d",
260271
},
261272
},
273+
{
274+
name: "escaped quotes",
275+
pairs: []string{"key=foobar", `key2="foo bar"`, `key3="foo \"bar\""`, `key4='\'foo\' \'bar\''`},
276+
delimiter: "=",
277+
expected: map[string]any{
278+
"key": "foobar",
279+
"key2": `"foo bar"`,
280+
"key3": `"foo \"bar\""`,
281+
"key4": `'\'foo\' \'bar\''`,
282+
},
283+
},
262284
}
263285

264286
for _, tc := range testCases {

pkg/stanza/operator/parser/keyvalue/parser_test.go

+18
Original file line numberDiff line numberDiff line change
@@ -688,6 +688,24 @@ key=value`,
688688
true,
689689
false,
690690
},
691+
{
692+
"containerd output",
693+
func(_ *Config) {},
694+
&entry.Entry{
695+
Body: `time="2024-11-01T12:38:17.992190505Z" level=warning msg="cleanup warnings time='2024-11-01T12:38:17Z' level=debug msg=\"starting signal loop\" namespace=moby-10000.10000 pid=1608080 runtime=io.containerd.runc.v2" namespace=moby-10000.10000`,
696+
},
697+
&entry.Entry{
698+
Attributes: map[string]any{
699+
"time": "2024-11-01T12:38:17.992190505Z",
700+
"level": "warning",
701+
"msg": `cleanup warnings time='2024-11-01T12:38:17Z' level=debug msg=\"starting signal loop\" namespace=moby-10000.10000 pid=1608080 runtime=io.containerd.runc.v2`,
702+
"namespace": "moby-10000.10000",
703+
},
704+
Body: `time="2024-11-01T12:38:17.992190505Z" level=warning msg="cleanup warnings time='2024-11-01T12:38:17Z' level=debug msg=\"starting signal loop\" namespace=moby-10000.10000 pid=1608080 runtime=io.containerd.runc.v2" namespace=moby-10000.10000`,
705+
},
706+
false,
707+
false,
708+
},
691709
}
692710

693711
for _, tc := range cases {

0 commit comments

Comments
 (0)