Skip to content

Commit e197ac1

Browse files
committed
fix: unescape unicode properly
1 parent e494d09 commit e197ac1

File tree

3 files changed

+85
-31
lines changed

3 files changed

+85
-31
lines changed

fixtures/test-all.properties

+2
Original file line numberDiff line numberDiff line change
@@ -70,3 +70,5 @@ encodedHelloInJapanese = \u3053\u3093\u306b\u3061\u306f
7070
# Using \u without being followed by four hexadecimal digits will throw an exception.
7171
# But with more modern file encodings like UTF-8, you can directly use supported characters.
7272
helloInJapanese = こんにちは
73+
# Key can be also encoded
74+
\u3053\u3093\u306b\u3061\u306f = hello

src/properties.spec.ts

+20-14
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,6 @@ describe('data access', () => {
116116
expect(result).toBeUndefined()
117117
})
118118

119-
120119
it('should return last value of duplicate key', () => {
121120
const config: properties.Properties = {
122121
lines: [
@@ -129,6 +128,25 @@ describe('data access', () => {
129128
const result = properties.get(config, 'key1')
130129
expect(result).toBe('foo3')
131130
})
131+
132+
it('should throw on invalid unicode sequence in key', () => {
133+
const config: properties.Properties = {
134+
lines: ['foo\\u23a=bar']
135+
}
136+
137+
expect(() => properties.get(config, 'foo')).toThrowError()
138+
})
139+
140+
it.each([
141+
['foo=bar\\u23a'],
142+
['foo=bar\\u23ax5']
143+
])('should throw on invalid unicode sequence in value %s', (line) => {
144+
const config: properties.Properties = {
145+
lines: [line]
146+
}
147+
148+
expect(() => properties.get(config, 'foo')).toThrowError()
149+
})
132150
})
133151

134152
describe('set value', () => {
@@ -364,6 +382,7 @@ describe('data access', () => {
364382
'evenLikeThis\\': '',
365383
hello: 'hello',
366384
helloInJapanese: 'こんにちは',
385+
'こんにちは': 'hello',
367386
keyWithBackslashes: 'This has random backslashes',
368387
'keyWithDelimiters:= ': 'This is the value for the key "keyWithDelimiters:= "',
369388
'keyWitheven\\': 'this colon is not escaped',
@@ -383,19 +402,6 @@ describe('data access', () => {
383402
})
384403
})
385404

386-
describe('unescape', () => {
387-
it.each([
388-
['foo', 'foo'],
389-
['\\:\\#\\!\\ ', ':#! '],
390-
['a\\r\\f\\n\\t\\\\\\ ', 'a\r\f\n\t\\ '],
391-
['\\u0000\\u0001', '\0\u0001'],
392-
['\\u3053\\u3093\\u306b\\u3061\\u306f', 'こんにちは']
393-
])('should unescape string "%s" to "%s"', (str: string, expected: string) => {
394-
const result = properties.unescape(str)
395-
expect(result).toEqual(expected)
396-
})
397-
})
398-
399405
describe('escapeKey', () => {
400406
it.each([
401407
['foo1', 'foo1'],

src/properties.ts

+63-17
Original file line numberDiff line numberDiff line change
@@ -232,7 +232,8 @@ function* listPairs(lines: string[]): Generator<{
232232
sep: string,
233233
value: string,
234234
skipSpace: boolean,
235-
escapedNext: boolean
235+
escapedNext: boolean,
236+
unicode?: string
236237
} =>
237238
({state: State.START, start: -1, key: '', sep: '', value: '', skipSpace: true, escapedNext: false})
238239

@@ -245,6 +246,20 @@ function* listPairs(lines: string[]): Generator<{
245246
}
246247
state.skipSpace = false
247248

249+
// Parse unicode
250+
if (state.unicode) {
251+
// Handle incomplete sequence
252+
if (char === 'EOL') {
253+
throw new Error(`Invalid unicode sequence at line ${line}`)
254+
}
255+
256+
// Append and consume until it has correct length
257+
state.unicode += char
258+
if (state.unicode.length < 6) {
259+
continue
260+
}
261+
}
262+
248263
// First char on the line
249264
if (state.state === State.START) {
250265
switch (char) {
@@ -272,6 +287,13 @@ function* listPairs(lines: string[]): Generator<{
272287

273288
// Key
274289
if (state.state === State.KEY) {
290+
// Special unicode handling
291+
if (state.unicode) {
292+
state.key += parseUnicode(state.unicode, line)
293+
state.unicode = undefined
294+
continue
295+
}
296+
275297
switch (char) {
276298
case 'EOL':
277299
if (state.escapedNext) {
@@ -307,10 +329,20 @@ function* listPairs(lines: string[]): Generator<{
307329
}
308330
break
309331
default:
310-
// Normal char
311-
// TODO handle unicode
312-
state.key += state.escapedNext ? unescapeChar(char) : char
313-
state.escapedNext = false
332+
// Escape sequence
333+
if (state.escapedNext) {
334+
state.escapedNext = false
335+
if (char === 'u') {
336+
// Unicode
337+
state.unicode = '0x'
338+
} else {
339+
// Special char
340+
state.key += unescapeChar(char)
341+
}
342+
} else {
343+
// Normal char
344+
state.key += char
345+
}
314346
break
315347
}
316348
}
@@ -346,6 +378,13 @@ function* listPairs(lines: string[]): Generator<{
346378

347379
// Value
348380
if (state.state === State.VALUE) {
381+
// Special unicode handling
382+
if (state.unicode) {
383+
state.value += parseUnicode(state.unicode, line)
384+
state.unicode = undefined
385+
continue
386+
}
387+
349388
switch (char) {
350389
case 'EOL':
351390
if (state.escapedNext) {
@@ -369,10 +408,19 @@ function* listPairs(lines: string[]): Generator<{
369408
}
370409
break
371410
default:
372-
// Normal char
373-
// TODO handle unicode
374-
state.value += state.escapedNext ? unescapeChar(char) : char
375-
state.escapedNext = false
411+
if (state.escapedNext) {
412+
state.escapedNext = false
413+
if (char === 'u') {
414+
// Unicode
415+
state.unicode = '0x'
416+
} else {
417+
// Special char
418+
state.value += unescapeChar(char)
419+
}
420+
} else {
421+
// Normal char
422+
state.value += char
423+
}
376424
break
377425
}
378426
}
@@ -395,14 +443,12 @@ const unescapeChar = (c: string): string => {
395443
}
396444
}
397445

398-
/**
399-
* Unescape key or value.
400-
*
401-
* @param str Escaped string.
402-
* @return Actual string.
403-
*/
404-
export const unescape = (str: string): string =>
405-
str.replace(/\\(.)/g, s => unescapeChar(s[1]))
446+
const parseUnicode = (sequence: string, line: number): string => {
447+
if (!sequence.match(/^0x[\da-fA-F]{4}$/)) {
448+
throw new Error(`Invalid unicode sequence at line ${line}`)
449+
}
450+
return String.fromCharCode(parseInt(sequence, 16))
451+
}
406452

407453
/**
408454
* Escape property key.

0 commit comments

Comments
 (0)