From 7103de97d48f4438d16f1f6a32a8c31afa4d3e55 Mon Sep 17 00:00:00 2001 From: Alexandre Daubois Date: Fri, 24 Apr 2026 10:23:31 +0200 Subject: [PATCH] fix(extgen): extract Go function bodies via go/ast instead of brace counting --- internal/extgen/astutil.go | 51 ++++++++++ internal/extgen/classparser.go | 156 ++++++++++++----------------- internal/extgen/funcparser.go | 122 ++++++++-------------- internal/extgen/funcparser_test.go | 23 +++++ 4 files changed, 182 insertions(+), 170 deletions(-) create mode 100644 internal/extgen/astutil.go diff --git a/internal/extgen/astutil.go b/internal/extgen/astutil.go new file mode 100644 index 0000000000..7be3e9f84b --- /dev/null +++ b/internal/extgen/astutil.go @@ -0,0 +1,51 @@ +package extgen + +import ( + "fmt" + "go/ast" + "go/token" + "regexp" + "strings" +) + +// findDirective searches a comment group for a line matching re and returns the +// first capture group (typically the directive payload) along with the comment's +// source line number. Returns "" when no comment matches. +func findDirective(group *ast.CommentGroup, fset *token.FileSet, re *regexp.Regexp) (string, int) { + if group == nil { + return "", 0 + } + for _, comment := range group.List { + if matches := re.FindStringSubmatch(comment.Text); matches != nil { + return strings.TrimSpace(matches[1]), fset.Position(comment.Pos()).Line + } + } + return "", 0 +} + +// extractNodeSource returns the verbatim source text covered by node in src. +func extractNodeSource(src []byte, fset *token.FileSet, node ast.Node) string { + start := fset.Position(node.Pos()).Offset + end := fset.Position(node.End()).Offset + if start < 0 || end > len(src) || start > end { + return "" + } + return string(src[start:end]) +} + +// checkOrphanDirectives returns an error for every comment that matches re but +// whose source line was not consumed by a declaration. +func checkOrphanDirectives(file *ast.File, fset *token.FileSet, re *regexp.Regexp, consumed map[int]bool, directiveLabel string) error { + for _, group := range file.Comments { + for _, comment := range group.List { + if !re.MatchString(comment.Text) { + continue + } + line := fset.Position(comment.Pos()).Line + if !consumed[line] { + return fmt.Errorf("%s directive at line %d is not followed by a function declaration", directiveLabel, line) + } + } + } + return nil +} diff --git a/internal/extgen/classparser.go b/internal/extgen/classparser.go index caef0ea239..e6079b8362 100644 --- a/internal/extgen/classparser.go +++ b/internal/extgen/classparser.go @@ -1,7 +1,6 @@ package extgen import ( - "bufio" "fmt" "go/ast" "go/parser" @@ -204,91 +203,88 @@ func (cp *classParser) goTypeToPHPType(goType string) phpType { return phpMixed } -func (cp *classParser) parseMethods(filename string) (methods []phpClassMethod, err error) { - file, err := os.Open(filename) +func (cp *classParser) parseMethods(filename string) ([]phpClassMethod, error) { + src, err := os.ReadFile(filename) if err != nil { return nil, err } - defer func() { - e := file.Close() - if err != nil { - err = e - } - }() - - scanner := bufio.NewScanner(file) - var currentMethod *phpClassMethod + fset := token.NewFileSet() + file, err := parser.ParseFile(fset, filename, src, parser.ParseComments) + if err != nil { + return nil, fmt.Errorf("parsing file: %w", err) + } - lineNumber := 0 - for scanner.Scan() { - lineNumber++ - line := strings.TrimSpace(scanner.Text()) + validator := Validator{} + var methods []phpClassMethod + consumed := make(map[int]bool) - if matches := phpMethodRegex.FindStringSubmatch(line); matches != nil { - className := strings.TrimSpace(matches[1]) - signature := strings.TrimSpace(matches[2]) + for _, decl := range file.Decls { + funcDecl, ok := decl.(*ast.FuncDecl) + if !ok { + continue + } - method, err := cp.parseMethodSignature(className, signature) - if err != nil { - fmt.Printf("Warning: Error parsing method signature %q: %v\n", signature, err) + directive, directiveLine := findDirective(funcDecl.Doc, fset, phpMethodRegex) + if directive == "" { + continue + } + rawMatch := phpMethodRegex.FindStringSubmatch(findMatchingComment(funcDecl.Doc, phpMethodRegex)) + if len(rawMatch) != 3 { + continue + } + className := strings.TrimSpace(rawMatch[1]) + signature := strings.TrimSpace(rawMatch[2]) + consumed[directiveLine] = true - continue - } + method, err := cp.parseMethodSignature(className, signature) + if err != nil { + fmt.Printf("Warning: Error parsing method signature %q: %v\n", signature, err) + continue + } - validator := Validator{} - phpFunc := phpFunction{ - Name: method.Name, - Signature: method.Signature, - Params: method.Params, - ReturnType: method.ReturnType, - IsReturnNullable: method.isReturnNullable, - } + phpFunc := phpFunction{ + Name: method.Name, + Signature: method.Signature, + Params: method.Params, + ReturnType: method.ReturnType, + IsReturnNullable: method.isReturnNullable, + } + if err := validator.validateTypes(phpFunc); err != nil { + fmt.Printf("Warning: Method \"%s::%s\" uses unsupported types: %v\n", className, method.Name, err) + continue + } - if err := validator.validateTypes(phpFunc); err != nil { - fmt.Printf("Warning: Method \"%s::%s\" uses unsupported types: %v\n", className, method.Name, err) + method.lineNumber = directiveLine + method.GoFunction = extractNodeSource(src, fset, funcDecl) - continue - } - - method.lineNumber = lineNumber - currentMethod = method + phpFunc.GoFunction = method.GoFunction + if err := validator.validateGoFunctionSignatureWithOptions(phpFunc, true); err != nil { + fmt.Printf("Warning: Go method signature mismatch for '%s::%s': %v\n", method.ClassName, method.Name, err) + continue } - if currentMethod != nil && strings.HasPrefix(line, "func ") { - goFunc, err := cp.extractGoMethodFunction(scanner, line) - if err != nil { - return nil, fmt.Errorf("extracting Go method function: %w", err) - } - - currentMethod.GoFunction = goFunc + methods = append(methods, *method) + } - validator := Validator{} - phpFunc := phpFunction{ - Name: currentMethod.Name, - Signature: currentMethod.Signature, - GoFunction: currentMethod.GoFunction, - Params: currentMethod.Params, - ReturnType: currentMethod.ReturnType, - IsReturnNullable: currentMethod.isReturnNullable, - } + if err := checkOrphanDirectives(file, fset, phpMethodRegex, consumed, "//export_php:method"); err != nil { + return nil, err + } - if err := validator.validateGoFunctionSignatureWithOptions(phpFunc, true); err != nil { - fmt.Printf("Warning: Go method signature mismatch for '%s::%s': %v\n", currentMethod.ClassName, currentMethod.Name, err) - currentMethod = nil - continue - } + return methods, nil +} - methods = append(methods, *currentMethod) - currentMethod = nil - } +// findMatchingComment returns the raw comment text whose line matches re. +func findMatchingComment(group *ast.CommentGroup, re *regexp.Regexp) string { + if group == nil { + return "" } - - if currentMethod != nil { - return nil, fmt.Errorf("//export_php:method directive at line %d is not followed by a function declaration", currentMethod.lineNumber) + for _, comment := range group.List { + if re.MatchString(comment.Text) { + return comment.Text + } } - - return methods, scanner.Err() + return "" } func (cp *classParser) parseMethodSignature(className, signature string) (*phpClassMethod, error) { @@ -365,27 +361,3 @@ func (cp *classParser) sanitizeDefaultValue(value string) string { return strings.Trim(value, `'"`) } -func (cp *classParser) extractGoMethodFunction(scanner *bufio.Scanner, firstLine string) (string, error) { - goFunc := firstLine + "\n" - braceCount := 1 - - for scanner.Scan() { - line := scanner.Text() - goFunc += line + "\n" - - for _, char := range line { - switch char { - case '{': - braceCount++ - case '}': - braceCount-- - } - } - - if braceCount == 0 { - break - } - } - - return goFunc, nil -} diff --git a/internal/extgen/funcparser.go b/internal/extgen/funcparser.go index 5e64de1b4c..2cea11f6a7 100644 --- a/internal/extgen/funcparser.go +++ b/internal/extgen/funcparser.go @@ -1,8 +1,10 @@ package extgen import ( - "bufio" "fmt" + "go/ast" + "go/parser" + "go/token" "os" "regexp" "strings" @@ -14,102 +16,66 @@ var typeNameRegex = regexp.MustCompile(`(\??[\w|]+)\s+\$?(\w+)`) type FuncParser struct{} -func (fp *FuncParser) parse(filename string) (functions []phpFunction, err error) { - file, err := os.Open(filename) +func (fp *FuncParser) parse(filename string) ([]phpFunction, error) { + src, err := os.ReadFile(filename) if err != nil { return nil, err } - defer func() { - e := file.Close() - if err == nil { - err = e - } - }() - - scanner := bufio.NewScanner(file) - var currentPHPFunc *phpFunction - validator := Validator{} - - lineNumber := 0 - for scanner.Scan() { - lineNumber++ - line := strings.TrimSpace(scanner.Text()) - - if matches := phpFuncRegex.FindStringSubmatch(line); matches != nil { - signature := strings.TrimSpace(matches[1]) - phpFunc, err := fp.parseSignature(signature) - if err != nil { - fmt.Printf("Warning: Error parsing signature '%s': %v\n", signature, err) - - continue - } - if err := validator.validateFunction(*phpFunc); err != nil { - fmt.Printf("Warning: Invalid function '%s': %v\n", phpFunc.Name, err) - - continue - } + fset := token.NewFileSet() + file, err := parser.ParseFile(fset, filename, src, parser.ParseComments) + if err != nil { + return nil, fmt.Errorf("parsing file: %w", err) + } - if err := validator.validateTypes(*phpFunc); err != nil { - fmt.Printf("Warning: Function '%s' uses unsupported types: %v\n", phpFunc.Name, err) + validator := Validator{} + var functions []phpFunction + consumed := make(map[int]bool) - continue - } + for _, decl := range file.Decls { + funcDecl, ok := decl.(*ast.FuncDecl) + if !ok || funcDecl.Recv != nil { + continue + } - phpFunc.lineNumber = lineNumber - currentPHPFunc = phpFunc + directive, directiveLine := findDirective(funcDecl.Doc, fset, phpFuncRegex) + if directive == "" { + continue } + consumed[directiveLine] = true - if currentPHPFunc != nil && strings.HasPrefix(line, "func ") { - goFunc, err := fp.extractGoFunction(scanner, line) - if err != nil { - return nil, fmt.Errorf("extracting Go function: %w", err) - } + phpFunc, err := fp.parseSignature(directive) + if err != nil { + fmt.Printf("Warning: Error parsing signature '%s': %v\n", directive, err) + continue + } - currentPHPFunc.GoFunction = goFunc + if err := validator.validateFunction(*phpFunc); err != nil { + fmt.Printf("Warning: Invalid function '%s': %v\n", phpFunc.Name, err) + continue + } - if err := validator.validateGoFunctionSignatureWithOptions(*currentPHPFunc, false); err != nil { - fmt.Printf("Warning: Go function signature mismatch for %q: %v\n", currentPHPFunc.Name, err) - currentPHPFunc = nil + if err := validator.validateTypes(*phpFunc); err != nil { + fmt.Printf("Warning: Function '%s' uses unsupported types: %v\n", phpFunc.Name, err) + continue + } - continue - } + phpFunc.lineNumber = directiveLine + phpFunc.GoFunction = extractNodeSource(src, fset, funcDecl) - functions = append(functions, *currentPHPFunc) - currentPHPFunc = nil + if err := validator.validateGoFunctionSignatureWithOptions(*phpFunc, false); err != nil { + fmt.Printf("Warning: Go function signature mismatch for %q: %v\n", phpFunc.Name, err) + continue } - } - if currentPHPFunc != nil { - return nil, fmt.Errorf("//export_php function directive at line %d is not followed by a function declaration", currentPHPFunc.lineNumber) + functions = append(functions, *phpFunc) } - return functions, scanner.Err() -} - -func (fp *FuncParser) extractGoFunction(scanner *bufio.Scanner, firstLine string) (string, error) { - goFunc := firstLine + "\n" - braceCount := 1 - - for scanner.Scan() { - line := scanner.Text() - goFunc += line + "\n" - - for _, char := range line { - switch char { - case '{': - braceCount++ - case '}': - braceCount-- - } - } - - if braceCount == 0 { - break - } + if err := checkOrphanDirectives(file, fset, phpFuncRegex, consumed, "//export_php function"); err != nil { + return nil, err } - return goFunc, nil + return functions, nil } func (fp *FuncParser) parseSignature(signature string) (*phpFunction, error) { diff --git a/internal/extgen/funcparser_test.go b/internal/extgen/funcparser_test.go index 282d23b4f4..026345ac6b 100644 --- a/internal/extgen/funcparser_test.go +++ b/internal/extgen/funcparser_test.go @@ -9,6 +9,29 @@ import ( "github.com/stretchr/testify/require" ) +func TestFunctionParserHandlesBracesInStringsAndComments(t *testing.T) { + input := "package main\n\n" + + "//export_php:function tricky(int $a): int\n" + + "func tricky(a int64) int64 {\n" + + "\ts := \"}\" + \"{\" // comment with braces } {\n" + + "\t_ = s\n" + + "\treturn a\n" + + "}\n" + + tmpDir := t.TempDir() + fileName := filepath.Join(tmpDir, "tricky.go") + require.NoError(t, os.WriteFile(fileName, []byte(input), 0644)) + + parser := &FuncParser{} + functions, err := parser.parse(fileName) + require.NoError(t, err) + require.Len(t, functions, 1) + + assert.Equal(t, "tricky", functions[0].Name) + assert.Contains(t, functions[0].GoFunction, `s := "}" + "{"`) + assert.Contains(t, functions[0].GoFunction, "return a") +} + func TestFunctionParser(t *testing.T) { tests := []struct { name string