Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions parser/lexer/lexer.go
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,37 @@ func (l *Lexer) scanEscape(quote rune) rune {
case 'x':
ch = l.scanDigits(l.next(), 16, 2)
case 'u':
// Support variable-length form: \u{XXXXXX}
if l.peek() == '{' {
// consume '{'
l.next()
// read 1-6 hex digits
digits := 0
for {
p := l.peek()
if p == '}' {
break
}
if digitVal(p) >= 16 {
l.error("invalid char escape")
return eof
}
if digits >= 6 {
l.error("invalid char escape")
return eof
}
l.next()
digits++
}
if l.peek() != '}' || digits == 0 {
l.error("invalid char escape")
return eof
}
// consume '}' and continue
l.next()
ch = l.next()
break
}
ch = l.scanDigits(l.next(), 16, 4)
case 'U':
ch = l.scanDigits(l.next(), 16, 8)
Expand Down
8 changes: 8 additions & 0 deletions parser/lexer/lexer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,14 @@ func TestLex(t *testing.T) {
{Kind: EOF},
},
},
{
"\"\\u{61}\\u{1F600}\" '\\u{61}\\u{1F600}'",
[]Token{
{Kind: String, Value: "a😀"},
{Kind: String, Value: "a😀"},
{Kind: EOF},
},
},
}

for _, test := range tests {
Expand Down
35 changes: 35 additions & 0 deletions parser/lexer/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,41 @@ func unescapeChar(s string) (value rune, multibyte bool, tail string, err error)

// 4. Unicode escape sequences, reproduced from `strconv/quote.go`
case 'x', 'X', 'u', 'U':
// Support Go/Rust-style variable-length form: \u{XXXXXX}
if c == 'u' && len(s) > 0 && s[0] == '{' {
// consume '{'
s = s[1:]
var v rune
digits := 0
for len(s) > 0 && s[0] != '}' {
x, ok := unhex(s[0])
if !ok {
err = fmt.Errorf("unable to unescape string")
return
}
if digits >= 6 { // at most 6 hex digits
err = fmt.Errorf("unable to unescape string")
return
}
v = v<<4 | x
s = s[1:]
digits++
}
// require closing '}' and at least 1 digit
if len(s) == 0 || s[0] != '}' || digits == 0 {
err = fmt.Errorf("unable to unescape string")
return
}
// consume '}'
s = s[1:]
if v > utf8.MaxRune {
err = fmt.Errorf("unable to unescape string")
return
}
value = v
multibyte = true
break
}
n := 0
switch c {
case 'x', 'X':
Expand Down
Loading