Skip to content

Commit b0a8781

Browse files
committed
feat: add support for by-reference functions and anonymous class modifiers
- Introduced `by_ref` field in function statements to handle by-reference return types. - Added `modifiers` field in anonymous class expressions to support new modifiers. - Enhanced parser to accommodate optional leading semicolons in switch statements. - Implemented support for trailing commas in attribute lists. - Updated tests to cover new functionality and edge cases.
1 parent c8bb19a commit b0a8781

File tree

35 files changed

+221
-51
lines changed

35 files changed

+221
-51
lines changed

src/ast/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,7 @@ pub enum Stmt<'ast> {
112112
Function {
113113
attributes: &'ast [AttributeGroup<'ast>],
114114
name: &'ast Token,
115+
by_ref: bool,
115116
params: &'ast [Param<'ast>],
116117
return_type: Option<&'ast Type<'ast>>,
117118
body: &'ast [StmtId<'ast>],
@@ -380,6 +381,7 @@ pub enum Expr<'ast> {
380381
},
381382
AnonymousClass {
382383
attributes: &'ast [AttributeGroup<'ast>],
384+
modifiers: &'ast [Token],
383385
args: &'ast [Arg<'ast>],
384386
extends: Option<Name<'ast>>,
385387
implements: &'ast [Name<'ast>],

src/parser/attributes.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,10 @@ impl<'src, 'ast> Parser<'src, 'ast> {
2828

2929
if self.current_token.kind == TokenKind::Comma {
3030
self.bump();
31+
// Support trailing comma (possible_comma in grammar)
32+
if self.current_token.kind == TokenKind::CloseBracket {
33+
break;
34+
}
3135
} else {
3236
break;
3337
}

src/parser/control_flow.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -322,6 +322,11 @@ impl<'src, 'ast> Parser<'src, 'ast> {
322322
false
323323
};
324324

325+
// Optional leading semicolon: '{' ';' case_list or ':' ';' case_list
326+
if self.current_token.kind == TokenKind::SemiColon {
327+
self.bump();
328+
}
329+
325330
let mut cases = bumpalo::collections::Vec::new_in(self.arena);
326331
let end_token = if is_alt {
327332
TokenKind::EndSwitch

src/parser/definitions.rs

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -159,9 +159,12 @@ impl<'src, 'ast> Parser<'src, 'ast> {
159159
pub(super) fn parse_anonymous_class(
160160
&mut self,
161161
attributes: &'ast [AttributeGroup<'ast>],
162+
modifiers: &'ast [Token],
162163
) -> (ExprId<'ast>, &'ast [Arg<'ast>]) {
163164
let start = if let Some(attr) = attributes.first() {
164165
attr.span.start
166+
} else if let Some(m) = modifiers.first() {
167+
m.span.start
165168
} else {
166169
self.current_token.span.start
167170
};
@@ -215,6 +218,7 @@ impl<'src, 'ast> Parser<'src, 'ast> {
215218
return (
216219
self.arena.alloc(Expr::AnonymousClass {
217220
attributes,
221+
modifiers,
218222
args: ctor_args,
219223
extends,
220224
implements: self.arena.alloc_slice_copy(&implements),
@@ -250,6 +254,7 @@ impl<'src, 'ast> Parser<'src, 'ast> {
250254
(
251255
self.arena.alloc(Expr::AnonymousClass {
252256
attributes,
257+
modifiers,
253258
args: ctor_args,
254259
extends,
255260
implements: self.arena.alloc_slice_copy(&implements),
@@ -1590,8 +1595,23 @@ impl<'src, 'ast> Parser<'src, 'ast> {
15901595
};
15911596
self.bump(); // Eat function
15921597

1593-
// Name
1594-
let name = if self.current_token.kind == TokenKind::Identifier {
1598+
// By-reference return (returns_ref)
1599+
let by_ref = if matches!(
1600+
self.current_token.kind,
1601+
TokenKind::Ampersand
1602+
| TokenKind::AmpersandFollowedByVarOrVararg
1603+
| TokenKind::AmpersandNotFollowedByVarOrVararg
1604+
) {
1605+
self.bump();
1606+
true
1607+
} else {
1608+
false
1609+
};
1610+
1611+
// Name (function_name: T_STRING | T_READONLY)
1612+
let name = if self.current_token.kind == TokenKind::Identifier
1613+
|| self.current_token.kind == TokenKind::Readonly
1614+
{
15951615
let token = self.arena.alloc(self.current_token);
15961616
self.bump();
15971617
token
@@ -1626,6 +1646,7 @@ impl<'src, 'ast> Parser<'src, 'ast> {
16261646
self.arena.alloc(Stmt::Function {
16271647
attributes,
16281648
name,
1649+
by_ref,
16291650
params,
16301651
return_type,
16311652
body,

src/parser/expr.rs

Lines changed: 25 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1225,18 +1225,36 @@ impl<'src, 'ast> Parser<'src, 'ast> {
12251225
&[]
12261226
};
12271227

1228+
// Parse optional modifiers for anonymous class
1229+
let mut modifiers = std::vec::Vec::new();
1230+
while matches!(
1231+
self.current_token.kind,
1232+
TokenKind::Abstract | TokenKind::Final | TokenKind::Readonly
1233+
) {
1234+
modifiers.push(self.current_token);
1235+
self.bump();
1236+
}
1237+
12281238
if self.current_token.kind == TokenKind::Class {
1229-
let (class, args) = self.parse_anonymous_class(attributes);
1239+
let (class, args) = self
1240+
.parse_anonymous_class(attributes, self.arena.alloc_slice_copy(&modifiers));
12301241
let span = Span::new(token.span.start, class.span().end);
12311242
self.arena.alloc(Expr::New { class, args, span })
12321243
} else {
1233-
if !attributes.is_empty() {
1244+
if !attributes.is_empty() || !modifiers.is_empty() {
1245+
let start = if let Some(attr) = attributes.first() {
1246+
attr.span.start
1247+
} else {
1248+
modifiers.first().unwrap().span.start
1249+
};
1250+
let end = if let Some(attr) = attributes.last() {
1251+
attr.span.end
1252+
} else {
1253+
modifiers.last().unwrap().span.end
1254+
};
12341255
self.errors.push(ParseError {
1235-
span: Span::new(
1236-
attributes.first().unwrap().span.start,
1237-
attributes.last().unwrap().span.end,
1238-
),
1239-
message: "Attributes are only allowed on anonymous classes in new expression",
1256+
span: Span::new(start, end),
1257+
message: "Attributes and modifiers are only allowed on anonymous classes in new expression",
12401258
});
12411259
}
12421260

src/parser/stmt.rs

Lines changed: 11 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -92,11 +92,22 @@ impl<'src, 'ast> Parser<'src, 'ast> {
9292
}
9393
let start = self.current_token.span.start;
9494
self.bump();
95+
// Parentheses are required by grammar: T_HALT_COMPILER '(' ')' ';'
9596
if self.current_token.kind == TokenKind::OpenParen {
9697
self.bump();
98+
} else {
99+
self.errors.push(ParseError {
100+
span: self.current_token.span,
101+
message: "Expected '(' after __halt_compiler",
102+
});
97103
}
98104
if self.current_token.kind == TokenKind::CloseParen {
99105
self.bump();
106+
} else {
107+
self.errors.push(ParseError {
108+
span: self.current_token.span,
109+
message: "Expected ')' after __halt_compiler(",
110+
});
100111
}
101112
self.expect_semicolon();
102113

@@ -338,25 +349,6 @@ impl<'src, 'ast> Parser<'src, 'ast> {
338349
});
339350
}
340351
Some(statements.into_bump_slice() as &'ast [StmtId<'ast>])
341-
} else if self.current_token.kind == TokenKind::Colon {
342-
self.bump();
343-
let mut statements = bumpalo::collections::Vec::new_in(self.arena);
344-
while !matches!(
345-
self.current_token.kind,
346-
TokenKind::EndDeclare | TokenKind::Eof
347-
) {
348-
statements.push(self.parse_stmt());
349-
}
350-
if self.current_token.kind == TokenKind::EndDeclare {
351-
self.bump();
352-
self.expect_semicolon();
353-
} else {
354-
self.errors.push(crate::ast::ParseError {
355-
span: self.current_token.span,
356-
message: "Missing enddeclare",
357-
});
358-
}
359-
Some(statements.into_bump_slice() as &'ast [StmtId<'ast>])
360352
} else {
361353
self.expect_semicolon();
362354
None

tests/grammar_review_fixes.rs

Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
use bumpalo::Bump;
2+
use php_parser_rs::lexer::Lexer;
3+
use php_parser_rs::parser::Parser;
4+
5+
#[test]
6+
fn test_function_by_ref() {
7+
let code = b"<?php function &getRef() { return $x; }";
8+
let arena = Bump::new();
9+
let lexer = Lexer::new(code);
10+
let mut parser = Parser::new(lexer, &arena);
11+
let program = parser.parse_program();
12+
13+
assert!(program.errors.is_empty());
14+
assert!(
15+
program.statements.len() >= 2,
16+
"Program should have at least 2 statements (open tag + function)"
17+
);
18+
19+
// Skip the Nop statement from <?php tag and check the function
20+
let func_stmt = program
21+
.statements
22+
.iter()
23+
.find(|stmt| matches!(**stmt, php_parser_rs::ast::Stmt::Function { .. }))
24+
.expect("Should find a Function statement");
25+
26+
if let php_parser_rs::ast::Stmt::Function { by_ref, .. } = **func_stmt {
27+
assert!(by_ref, "Function should have by_ref=true");
28+
}
29+
}
30+
31+
#[test]
32+
fn test_function_readonly_name() {
33+
let code = b"<?php function readonly() { }";
34+
let arena = Bump::new();
35+
let lexer = Lexer::new(code);
36+
let mut parser = Parser::new(lexer, &arena);
37+
let program = parser.parse_program();
38+
assert!(program.errors.is_empty());
39+
}
40+
41+
#[test]
42+
fn test_anonymous_class_modifiers() {
43+
let code = b"<?php $x = new readonly class { };";
44+
let arena = Bump::new();
45+
let lexer = Lexer::new(code);
46+
let mut parser = Parser::new(lexer, &arena);
47+
let program = parser.parse_program();
48+
assert!(program.errors.is_empty());
49+
50+
// Find the expression statement (skip Nop from open tag)
51+
let expr_stmt = program
52+
.statements
53+
.iter()
54+
.find(|stmt| matches!(**stmt, php_parser_rs::ast::Stmt::Expression { .. }))
55+
.expect("Should find an Expression statement");
56+
57+
// Check modifiers are parsed
58+
if let php_parser_rs::ast::Stmt::Expression { expr, .. } = **expr_stmt {
59+
if let php_parser_rs::ast::Expr::Assign { expr: right, .. } = *expr {
60+
if let php_parser_rs::ast::Expr::New { class, .. } = *right {
61+
if let php_parser_rs::ast::Expr::AnonymousClass { modifiers, .. } = *class {
62+
assert_eq!(modifiers.len(), 1, "Should have one modifier");
63+
assert_eq!(
64+
modifiers[0].kind,
65+
php_parser_rs::lexer::token::TokenKind::Readonly
66+
);
67+
} else {
68+
panic!("Expected AnonymousClass");
69+
}
70+
} else {
71+
panic!("Expected New expression");
72+
}
73+
} else {
74+
panic!("Expected Assignment");
75+
}
76+
}
77+
}
78+
79+
#[test]
80+
fn test_switch_leading_semicolon() {
81+
let code = b"<?php switch ($x) {; case 1: break; }";
82+
let arena = Bump::new();
83+
let lexer = Lexer::new(code);
84+
let mut parser = Parser::new(lexer, &arena);
85+
let program = parser.parse_program();
86+
assert!(program.errors.is_empty());
87+
}
88+
89+
#[test]
90+
fn test_attribute_trailing_comma() {
91+
let code = b"<?php #[Attr1, Attr2,] class Foo {}";
92+
let arena = Bump::new();
93+
let lexer = Lexer::new(code);
94+
let mut parser = Parser::new(lexer, &arena);
95+
let program = parser.parse_program();
96+
assert!(program.errors.is_empty());
97+
}
98+
99+
#[test]
100+
fn test_halt_compiler_requires_parens() {
101+
let code = b"<?php __halt_compiler;";
102+
let arena = Bump::new();
103+
let lexer = Lexer::new(code);
104+
let mut parser = Parser::new(lexer, &arena);
105+
let program = parser.parse_program();
106+
// Should have error for missing parentheses
107+
assert!(!program.errors.is_empty());
108+
assert!(program.errors[0].message.contains("'('"));
109+
}
110+
111+
#[test]
112+
fn test_halt_compiler_with_parens() {
113+
let code = b"<?php __halt_compiler();";
114+
let arena = Bump::new();
115+
let lexer = Lexer::new(code);
116+
let mut parser = Parser::new(lexer, &arena);
117+
let program = parser.parse_program();
118+
assert!(program.errors.is_empty());
119+
}

tests/property_hooks.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,8 @@ fn test_property_hooks() {
2727

2828
let statements: Vec<&Stmt> = program
2929
.statements
30-
.iter().copied()
30+
.iter()
31+
.copied()
3132
.filter(|s| !matches!(s, Stmt::Nop { .. }))
3233
.collect();
3334

tests/snapshots/additional_edge_cases__complex_type_combinations.snap

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
---
22
source: tests/additional_edge_cases.rs
3-
assertion_line: 171
43
expression: program
54
---
65
Program {
@@ -20,6 +19,7 @@ Program {
2019
end: 22,
2120
},
2221
},
22+
by_ref: false,
2323
params: [
2424
Param {
2525
attributes: [],

tests/snapshots/additional_edge_cases__dnf_types.snap

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
---
22
source: tests/additional_edge_cases.rs
3-
assertion_line: 51
43
expression: program
54
---
65
Program {
@@ -20,6 +19,7 @@ Program {
2019
end: 19,
2120
},
2221
},
22+
by_ref: false,
2323
params: [
2424
Param {
2525
attributes: [],

0 commit comments

Comments
 (0)