@@ -3,11 +3,12 @@ package parser
33import (
44 "bytes"
55 "fmt"
6+ "strconv"
7+ "strings"
8+
69 "github.com/sjjian/oracle-sql-parser/ast"
710 "github.com/timtadh/lexmachine"
811 "github.com/timtadh/lexmachine/machines"
9- "strconv"
10- "strings"
1112)
1213
1314var lexer * lexmachine.Lexer
@@ -22,36 +23,66 @@ func skip(*lexmachine.Scanner, *machines.Match) (interface{}, error) {
2223 return nil , nil
2324}
2425
25- func AddTokenBetween (tokenId int , start []byte , end byte ) {
26- lexer .Add (start , func (scan * lexmachine.Scanner , match * machines.Match ) (interface {}, error ) {
27- var buf bytes.Buffer
26+ type byteScanner struct {
27+ text []byte
28+ pos int
29+ }
30+
31+ func (bs * byteScanner ) Next () (byte , bool ) {
32+ if bs .pos >= len (bs .text ) {
33+ return 0 , false
34+ }
35+ b := bs .text [bs .pos ]
36+ bs .pos += 1
37+ return b , true
38+ }
39+
40+ func (bs * byteScanner ) LastN (count int ) []byte {
41+ if bs .pos <= count {
42+ return bs .text [:bs .pos ]
43+ }
44+ return bs .text [bs .pos - count : bs .pos ]
45+ }
46+
47+ func AddTokenBetween (left []byte , right []byte , matchEnd bool , action lexmachine.Action ) {
48+ lexer .Add (left , func (scan * lexmachine.Scanner , match * machines.Match ) (interface {}, error ) {
2849 match .EndLine = match .StartLine
2950 match .EndColumn = match .StartColumn
30- for tc := scan .TC ; tc < len (scan .Text ); tc ++ {
31- curByte := scan .Text [tc ]
32-
33- // calculate location
51+ matchRight := false
52+ bs := byteScanner {text : scan .Text [scan .TC :]}
53+ for {
54+ b , has := bs .Next ()
55+ if ! has {
56+ break
57+ }
3458 match .EndColumn += 1
35- if curByte == '\n' {
59+ if b == '\n' {
3660 match .EndLine += 1
3761 }
38- // match end
39- if curByte == end {
40- scan .TC = tc + 1
41- match .TC = scan .TC
42- match .Bytes = buf .Bytes ()
43- return scan .Token (tokenId , buf .String (), match ), nil
44- } else {
45- // between start and end
46- buf .WriteByte (curByte )
62+ if bytes .Equal (right , bs .LastN (len (right ))) {
63+ matchRight = true
64+ break
4765 }
4866 }
67+ if matchRight {
68+ match .Bytes = scan .Text [scan .TC : scan .TC + bs .pos - len (right )]
69+ scan .TC = scan .TC + bs .pos
70+ match .TC = scan .TC
71+ return action (scan , match )
72+ }
73+ if matchEnd {
74+ match .Bytes = scan .Text [scan .TC :]
75+ scan .TC = scan .TC + bs .pos
76+ match .TC = scan .TC
77+ return action (scan , match )
78+ }
79+
4980 return nil , fmt .Errorf ("unclosed %s with %s, staring at %d, (%d, %d)" ,
50- string (start ), string (end ), match .TC , match .StartLine , match .StartColumn )
81+ string (left ), string (right ), match .TC , match .StartLine , match .StartColumn )
5182 })
5283}
5384
54- func AddIdentToken (tokenId int , rs string ) {
85+ func AddIdentToken (rs string , action lexmachine. Action ) {
5586 l := strings .ToLower (rs )
5687 u := strings .ToUpper (rs )
5788 var regex bytes.Buffer
@@ -65,7 +96,7 @@ func AddIdentToken(tokenId int, rs string) {
6596 regex .WriteString ("]" )
6697 }
6798 }
68- lexer .Add (regex .Bytes (), token ( tokenId ) )
99+ lexer .Add (regex .Bytes (), action )
69100}
70101
71102var stdTokenMap = map [string ]int {
@@ -316,18 +347,21 @@ func init() {
316347 lexer = lexmachine .NewLexer ()
317348
318349 for keyword , tokenId := range stdTokenMap {
319- AddIdentToken (tokenId , keyword )
350+ AddIdentToken (keyword , token ( tokenId ) )
320351 }
321352 for keyword , tokenId := range reservedKeyword {
322- AddIdentToken (tokenId , keyword )
353+ AddIdentToken (keyword , token ( tokenId ) )
323354 }
324355
325356 for keyword , tokenId := range unReservedKeyword {
326- AddIdentToken (tokenId , keyword )
357+ AddIdentToken (keyword , token ( tokenId ) )
327358 }
328359
329360 lexer .Add ([]byte ("( |\t |\n |\r )+" ), skip )
330361
362+ AddTokenBetween ([]byte ("--" ), []byte ("\n " ), true , skip )
363+ AddTokenBetween ([]byte (`/\*` ), []byte ("*/" ), false , skip )
364+
331365 lexer .Add ([]byte ("[a-zA-Z]+\\ w*" ), token (_nonquotedIdentifier ))
332366
333367 lexer .Add ([]byte (`[0-9]+` ), func (s * lexmachine.Scanner , m * machines.Match ) (interface {}, error ) {
@@ -338,8 +372,8 @@ func init() {
338372 return s .Token (_intNumber , v , m ), nil
339373 })
340374
341- AddTokenBetween (_doubleQuoteStr , []byte (`"` ), byte ('"' ))
342- AddTokenBetween (_singleQuoteStr , []byte (`'` ), byte ('\'' ))
375+ AddTokenBetween ([]byte (" \" " ), [] byte (" \" " ), false , token ( _doubleQuoteStr ))
376+ AddTokenBetween ([]byte ("'" ), [] byte ("'" ), false , token ( _singleQuoteStr ))
343377 err := lexer .CompileNFA ()
344378 if err != nil {
345379 panic (err )
0 commit comments