@@ -39,7 +39,7 @@ shorthand_range = {
3939 | range_full
4040}
4141
42- // Order of operations matter
42+ // Main operations - using specific arg types where needed
4343regex_extract = { "regex_extract" ~ ":" ~ regex_arg ~ (":" ~ number)? }
4444filter_not = { "filter_not" ~ ":" ~ regex_arg }
4545filter = { "filter" ~ ":" ~ regex_arg }
@@ -48,13 +48,13 @@ map = { "map" ~ ":" ~ map_operation }
4848split = { "split" ~ ":" ~ split_arg ~ ":" ~ range_spec? }
4949substring = { "substring" ~ ":" ~ range_spec }
5050replace = { "replace" ~ ":" ~ sed_string }
51- append = { "append" ~ ":" ~ arg }
52- prepend = { "prepend" ~ ":" ~ arg }
51+ append = { "append" ~ ":" ~ simple_arg }
52+ prepend = { "prepend" ~ ":" ~ simple_arg }
5353upper = { "upper" }
5454lower = { "lower" }
5555trim = { "trim" ~ (":" ~ direction)? }
56- strip = { "strip" ~ ":" ~ arg }
57- join = { "join" ~ ":" ~ arg }
56+ strip = { "strip" ~ ":" ~ simple_arg }
57+ join = { "join" ~ ":" ~ simple_arg }
5858slice = { "slice" ~ ":" ~ range_spec }
5959sort = { "sort" ~ (":" ~ sort_direction)? }
6060reverse = { "reverse" }
@@ -64,7 +64,7 @@ pad = { "pad" ~ ":" ~ number ~ (":" ~ pad_char)? ~ (":" ~ direction)?
6464// Direction specifiers
6565direction = { "left" | "right" | "both" }
6666sort_direction = { "asc" | "desc" }
67- pad_char = @{ (escaped_char | normal_char) + }
67+ pad_char = @{ simple_arg_content + }
6868
6969// Map operation
7070map_operation = { "{" ~ map_operation_list ~ "}" }
@@ -84,45 +84,35 @@ map_inner_operation = {
8484 | map_regex_extract
8585}
8686
87- // Map-specific operations that use map_regex_arg
87+ // Map-specific regex extract
8888map_regex_extract = { "regex_extract" ~ ":" ~ map_regex_arg ~ (":" ~ number)? }
8989
90- // Regular arguments
91- arg = { (escaped_char | normal_char | " " | "\t")* }
92- normal_char = { !(":" | "|" | "}" | "{" | "\\") ~ ANY }
93- escaped_char = { "\\" ~ (":" | "|" | "\\" | "n" | "t" | "r" | "/" | "{" | "}") }
94-
95- // Split arguments - allow pipes unless followed by operation names
96- split_arg = @{ (split_escaped_char | split_pipe_or_char)* }
97- split_pipe_or_char = { split_pipe | split_normal_char }
98- split_pipe = { "|" ~ !operation_name }
99- split_normal_char = {
100- !(":" ~ (operation_name | range_spec | number)) ~ !("|" ~ operation_name) ~ !("}" ~ (EOI | !(split_any_char*))) ~ ANY
101- }
90+ // Simplified argument handling - three types to handle specific cases
91+ simple_arg = @{ simple_arg_content* }
92+ simple_arg_content = { escaped_char | simple_normal_char }
93+ simple_normal_char = { !(":" | "|" | "}" | "{" | "\\") ~ ANY }
94+
95+ // Split args - need to handle pipes that aren't operations
96+ split_arg = @{ (split_escaped_char | split_content)* }
97+ split_content = { !(":" ~ (number | range_part)) ~ !("|" ~ operation_keyword) ~ !("}" ~ EOI) ~ ANY }
10298split_escaped_char = { "\\" ~ ANY }
103- split_any_char = { ANY }
104-
105- // Regex arguments - allow pipes unless followed by operation names, and handle braces
106- regex_arg = @{ (regex_escaped_char | regex_pipe_or_char)* }
107- regex_pipe_or_char = { regex_pipe | regex_normal_char }
108- regex_pipe = { "|" ~ !operation_name }
109- regex_normal_char = {
110- !(":" ~ (operation_name | range_spec | number)) ~ !("|" ~ operation_name) ~ !("}" ~ (EOI | "|")) ~ ANY
111- }
99+
100+ // Regex args - need to handle pipes and braces in regex patterns
101+ regex_arg = @{ (regex_escaped_char | regex_content)* }
102+ regex_content = { !(":" ~ (number | range_part)) ~ !("|" ~ operation_keyword) ~ !("}" ~ EOI) ~ ANY }
112103regex_escaped_char = { "\\" ~ ANY }
113104
114- // Regex arguments for map operations (improved to handle braces in regex)
115- map_regex_arg = @{ (map_regex_escaped_char | map_regex_balanced_brace | map_regex_pipe_or_char)* }
116- map_regex_pipe_or_char = { map_regex_pipe | map_regex_normal_char }
117- map_regex_pipe = { "|" ~ !operation_name }
118- map_regex_balanced_brace = { "{" ~ (!("}" ~ &("|" | "}" | EOI | (":" ~ number))) ~ ANY)* ~ "}" }
119- map_regex_normal_char = {
120- !(":" ~ (operation_name | range_spec | number)) ~ !("|" ~ operation_name) ~ !("{" | ("}" ~ &("|" | "}" | EOI | (":" ~ number)))) ~ ANY
121- }
105+ // Map regex args - handle braces in regex patterns
106+ map_regex_arg = @{ (map_regex_escaped_char | map_regex_brace | map_regex_content)* }
107+ map_regex_brace = { "{" ~ (!"}" ~ ANY)* ~ "}" }
108+ map_regex_content = { !(":" ~ number) ~ !("|" ~ operation_keyword) ~ !("{" | ("}" ~ ("|" | "}" | EOI))) ~ ANY }
122109map_regex_escaped_char = { "\\" ~ ANY }
123110
124- // Operation names for lookahead
125- operation_name = {
111+ // Common escaped character handling
112+ escaped_char = { "\\" ~ ANY }
113+
114+ // Operation keywords for lookahead (simplified list)
115+ operation_keyword = {
126116 "split"
127117 | "join"
128118 | "substring"
@@ -145,7 +135,10 @@ operation_name = {
145135 | "pad"
146136}
147137
148- // Sed strings (preserve sed/regex escape sequences)
138+ // Range parts for lookahead
139+ range_part = { ".." | "..=" }
140+
141+ // Sed strings
149142sed_string = { "s/" ~ sed_pattern ~ "/" ~ sed_replacement ~ "/" ~ sed_flags? }
150143sed_pattern = @{ sed_content }
151144sed_replacement = @{ sed_content }
0 commit comments