Skip to content

Commit 521a006

Browse files
committed
perf(pest): simplify grammar, remove unnecessary lookaheads
1 parent f6deb58 commit 521a006

File tree

1 file changed

+32
-39
lines changed

1 file changed

+32
-39
lines changed

src/pipeline/template.pest

Lines changed: 32 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ shorthand_range = {
3939
| range_full
4040
}
4141

42-
// Order of operations matter
42+
// Main operations - using specific arg types where needed
4343
regex_extract = { "regex_extract" ~ ":" ~ regex_arg ~ (":" ~ number)? }
4444
filter_not = { "filter_not" ~ ":" ~ regex_arg }
4545
filter = { "filter" ~ ":" ~ regex_arg }
@@ -48,13 +48,13 @@ map = { "map" ~ ":" ~ map_operation }
4848
split = { "split" ~ ":" ~ split_arg ~ ":" ~ range_spec? }
4949
substring = { "substring" ~ ":" ~ range_spec }
5050
replace = { "replace" ~ ":" ~ sed_string }
51-
append = { "append" ~ ":" ~ arg }
52-
prepend = { "prepend" ~ ":" ~ arg }
51+
append = { "append" ~ ":" ~ simple_arg }
52+
prepend = { "prepend" ~ ":" ~ simple_arg }
5353
upper = { "upper" }
5454
lower = { "lower" }
5555
trim = { "trim" ~ (":" ~ direction)? }
56-
strip = { "strip" ~ ":" ~ arg }
57-
join = { "join" ~ ":" ~ arg }
56+
strip = { "strip" ~ ":" ~ simple_arg }
57+
join = { "join" ~ ":" ~ simple_arg }
5858
slice = { "slice" ~ ":" ~ range_spec }
5959
sort = { "sort" ~ (":" ~ sort_direction)? }
6060
reverse = { "reverse" }
@@ -64,7 +64,7 @@ pad = { "pad" ~ ":" ~ number ~ (":" ~ pad_char)? ~ (":" ~ direction)?
6464
// Direction specifiers
6565
direction = { "left" | "right" | "both" }
6666
sort_direction = { "asc" | "desc" }
67-
pad_char = @{ (escaped_char | normal_char)+ }
67+
pad_char = @{ simple_arg_content+ }
6868

6969
// Map operation
7070
map_operation = { "{" ~ map_operation_list ~ "}" }
@@ -84,45 +84,35 @@ map_inner_operation = {
8484
| map_regex_extract
8585
}
8686

87-
// Map-specific operations that use map_regex_arg
87+
// Map-specific regex extract
8888
map_regex_extract = { "regex_extract" ~ ":" ~ map_regex_arg ~ (":" ~ number)? }
8989

90-
// Regular arguments
91-
arg = { (escaped_char | normal_char | " " | "\t")* }
92-
normal_char = { !(":" | "|" | "}" | "{" | "\\") ~ ANY }
93-
escaped_char = { "\\" ~ (":" | "|" | "\\" | "n" | "t" | "r" | "/" | "{" | "}") }
94-
95-
// Split arguments - allow pipes unless followed by operation names
96-
split_arg = @{ (split_escaped_char | split_pipe_or_char)* }
97-
split_pipe_or_char = { split_pipe | split_normal_char }
98-
split_pipe = { "|" ~ !operation_name }
99-
split_normal_char = {
100-
!(":" ~ (operation_name | range_spec | number)) ~ !("|" ~ operation_name) ~ !("}" ~ (EOI | !(split_any_char*))) ~ ANY
101-
}
90+
// Simplified argument handling - three types to handle specific cases
91+
simple_arg = @{ simple_arg_content* }
92+
simple_arg_content = { escaped_char | simple_normal_char }
93+
simple_normal_char = { !(":" | "|" | "}" | "{" | "\\") ~ ANY }
94+
95+
// Split args - need to handle pipes that aren't operations
96+
split_arg = @{ (split_escaped_char | split_content)* }
97+
split_content = { !(":" ~ (number | range_part)) ~ !("|" ~ operation_keyword) ~ !("}" ~ EOI) ~ ANY }
10298
split_escaped_char = { "\\" ~ ANY }
103-
split_any_char = { ANY }
104-
105-
// Regex arguments - allow pipes unless followed by operation names, and handle braces
106-
regex_arg = @{ (regex_escaped_char | regex_pipe_or_char)* }
107-
regex_pipe_or_char = { regex_pipe | regex_normal_char }
108-
regex_pipe = { "|" ~ !operation_name }
109-
regex_normal_char = {
110-
!(":" ~ (operation_name | range_spec | number)) ~ !("|" ~ operation_name) ~ !("}" ~ (EOI | "|")) ~ ANY
111-
}
99+
100+
// Regex args - need to handle pipes and braces in regex patterns
101+
regex_arg = @{ (regex_escaped_char | regex_content)* }
102+
regex_content = { !(":" ~ (number | range_part)) ~ !("|" ~ operation_keyword) ~ !("}" ~ EOI) ~ ANY }
112103
regex_escaped_char = { "\\" ~ ANY }
113104

114-
// Regex arguments for map operations (improved to handle braces in regex)
115-
map_regex_arg = @{ (map_regex_escaped_char | map_regex_balanced_brace | map_regex_pipe_or_char)* }
116-
map_regex_pipe_or_char = { map_regex_pipe | map_regex_normal_char }
117-
map_regex_pipe = { "|" ~ !operation_name }
118-
map_regex_balanced_brace = { "{" ~ (!("}" ~ &("|" | "}" | EOI | (":" ~ number))) ~ ANY)* ~ "}" }
119-
map_regex_normal_char = {
120-
!(":" ~ (operation_name | range_spec | number)) ~ !("|" ~ operation_name) ~ !("{" | ("}" ~ &("|" | "}" | EOI | (":" ~ number)))) ~ ANY
121-
}
105+
// Map regex args - handle braces in regex patterns
106+
map_regex_arg = @{ (map_regex_escaped_char | map_regex_brace | map_regex_content)* }
107+
map_regex_brace = { "{" ~ (!"}" ~ ANY)* ~ "}" }
108+
map_regex_content = { !(":" ~ number) ~ !("|" ~ operation_keyword) ~ !("{" | ("}" ~ ("|" | "}" | EOI))) ~ ANY }
122109
map_regex_escaped_char = { "\\" ~ ANY }
123110

124-
// Operation names for lookahead
125-
operation_name = {
111+
// Common escaped character handling
112+
escaped_char = { "\\" ~ ANY }
113+
114+
// Operation keywords for lookahead (simplified list)
115+
operation_keyword = {
126116
"split"
127117
| "join"
128118
| "substring"
@@ -145,7 +135,10 @@ operation_name = {
145135
| "pad"
146136
}
147137

148-
// Sed strings (preserve sed/regex escape sequences)
138+
// Range parts for lookahead
139+
range_part = { ".." | "..=" }
140+
141+
// Sed strings
149142
sed_string = { "s/" ~ sed_pattern ~ "/" ~ sed_replacement ~ "/" ~ sed_flags? }
150143
sed_pattern = @{ sed_content }
151144
sed_replacement = @{ sed_content }

0 commit comments

Comments
 (0)