Skip to content

Commit 89c652c

Browse files
authored
Merge pull request #19 from sjjian/support_line_comments
support comments
2 parents 15e060b + 0f12cd8 commit 89c652c

File tree

2 files changed

+84
-27
lines changed

2 files changed

+84
-27
lines changed

lexer.go

Lines changed: 61 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,12 @@ package parser
33
import (
44
"bytes"
55
"fmt"
6+
"strconv"
7+
"strings"
8+
69
"github.com/sjjian/oracle-sql-parser/ast"
710
"github.com/timtadh/lexmachine"
811
"github.com/timtadh/lexmachine/machines"
9-
"strconv"
10-
"strings"
1112
)
1213

1314
var lexer *lexmachine.Lexer
@@ -22,36 +23,66 @@ func skip(*lexmachine.Scanner, *machines.Match) (interface{}, error) {
2223
return nil, nil
2324
}
2425

25-
func AddTokenBetween(tokenId int, start []byte, end byte) {
26-
lexer.Add(start, func(scan *lexmachine.Scanner, match *machines.Match) (interface{}, error) {
27-
var buf bytes.Buffer
26+
type byteScanner struct {
27+
text []byte
28+
pos int
29+
}
30+
31+
func (bs *byteScanner) Next() (byte, bool) {
32+
if bs.pos >= len(bs.text) {
33+
return 0, false
34+
}
35+
b := bs.text[bs.pos]
36+
bs.pos += 1
37+
return b, true
38+
}
39+
40+
func (bs *byteScanner) LastN(count int) []byte {
41+
if bs.pos <= count {
42+
return bs.text[:bs.pos]
43+
}
44+
return bs.text[bs.pos-count : bs.pos]
45+
}
46+
47+
func AddTokenBetween(left []byte, right []byte, matchEnd bool, action lexmachine.Action) {
48+
lexer.Add(left, func(scan *lexmachine.Scanner, match *machines.Match) (interface{}, error) {
2849
match.EndLine = match.StartLine
2950
match.EndColumn = match.StartColumn
30-
for tc := scan.TC; tc < len(scan.Text); tc++ {
31-
curByte := scan.Text[tc]
32-
33-
// calculate location
51+
matchRight := false
52+
bs := byteScanner{text: scan.Text[scan.TC:]}
53+
for {
54+
b, has := bs.Next()
55+
if !has {
56+
break
57+
}
3458
match.EndColumn += 1
35-
if curByte == '\n' {
59+
if b == '\n' {
3660
match.EndLine += 1
3761
}
38-
// match end
39-
if curByte == end {
40-
scan.TC = tc + 1
41-
match.TC = scan.TC
42-
match.Bytes = buf.Bytes()
43-
return scan.Token(tokenId, buf.String(), match), nil
44-
} else {
45-
// between start and end
46-
buf.WriteByte(curByte)
62+
if bytes.Equal(right, bs.LastN(len(right))) {
63+
matchRight = true
64+
break
4765
}
4866
}
67+
if matchRight {
68+
match.Bytes = scan.Text[scan.TC : scan.TC+bs.pos-len(right)]
69+
scan.TC = scan.TC + bs.pos
70+
match.TC = scan.TC
71+
return action(scan, match)
72+
}
73+
if matchEnd {
74+
match.Bytes = scan.Text[scan.TC:]
75+
scan.TC = scan.TC + bs.pos
76+
match.TC = scan.TC
77+
return action(scan, match)
78+
}
79+
4980
return nil, fmt.Errorf("unclosed %s with %s, staring at %d, (%d, %d)",
50-
string(start), string(end), match.TC, match.StartLine, match.StartColumn)
81+
string(left), string(right), match.TC, match.StartLine, match.StartColumn)
5182
})
5283
}
5384

54-
func AddIdentToken(tokenId int, rs string) {
85+
func AddIdentToken(rs string, action lexmachine.Action) {
5586
l := strings.ToLower(rs)
5687
u := strings.ToUpper(rs)
5788
var regex bytes.Buffer
@@ -65,7 +96,7 @@ func AddIdentToken(tokenId int, rs string) {
6596
regex.WriteString("]")
6697
}
6798
}
68-
lexer.Add(regex.Bytes(), token(tokenId))
99+
lexer.Add(regex.Bytes(), action)
69100
}
70101

71102
var stdTokenMap = map[string]int{
@@ -316,18 +347,21 @@ func init() {
316347
lexer = lexmachine.NewLexer()
317348

318349
for keyword, tokenId := range stdTokenMap {
319-
AddIdentToken(tokenId, keyword)
350+
AddIdentToken(keyword, token(tokenId))
320351
}
321352
for keyword, tokenId := range reservedKeyword {
322-
AddIdentToken(tokenId, keyword)
353+
AddIdentToken(keyword, token(tokenId))
323354
}
324355

325356
for keyword, tokenId := range unReservedKeyword {
326-
AddIdentToken(tokenId, keyword)
357+
AddIdentToken(keyword, token(tokenId))
327358
}
328359

329360
lexer.Add([]byte("( |\t|\n|\r)+"), skip)
330361

362+
AddTokenBetween([]byte("--"), []byte("\n"), true, skip)
363+
AddTokenBetween([]byte(`/\*`), []byte("*/"), false, skip)
364+
331365
lexer.Add([]byte("[a-zA-Z]+\\w*"), token(_nonquotedIdentifier))
332366

333367
lexer.Add([]byte(`[0-9]+`), func(s *lexmachine.Scanner, m *machines.Match) (interface{}, error) {
@@ -338,8 +372,8 @@ func init() {
338372
return s.Token(_intNumber, v, m), nil
339373
})
340374

341-
AddTokenBetween(_doubleQuoteStr, []byte(`"`), byte('"'))
342-
AddTokenBetween(_singleQuoteStr, []byte(`'`), byte('\''))
375+
AddTokenBetween([]byte("\""), []byte("\""), false, token(_doubleQuoteStr))
376+
AddTokenBetween([]byte("'"), []byte("'"), false, token(_singleQuoteStr))
343377
err := lexer.CompileNFA()
344378
if err != nil {
345379
panic(err)

test/test_comments.sql

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
--comment
2+
CREATE TABLE db1.t1 (
3+
id int, -- comment
4+
name varchar2(255) -- comment comment
5+
); -- !@#$%^&*(
6+
7+
/*
8+
comment comment comment
9+
*/
10+
CREATE TABLE db1.t1 (
11+
/* comment */
12+
id int,
13+
name varchar2(255)
14+
);
15+
16+
/*
17+
comment comment comment
18+
*/
19+
CREATE TABLE db1.t1 (
20+
/* comment */
21+
id int, -- comment
22+
name varchar2(255) -- comment comment
23+
); -- !@#$%^&*(

0 commit comments

Comments
 (0)