@@ -130,11 +130,39 @@ module RFC3629
130130 include RFC5234
131131 include RFC3629
132132
133+ # CHAR8 = %x01-ff
134+ # ; any OCTET except NUL, %x00
135+ CHAR8 = /[\x01 -\xff ]/n
136+
137+ # list-wildcards = "%" / "*"
138+ LIST_WILDCARDS = /[%*]/n
133139 # quoted-specials = DQUOTE / "\"
134140 QUOTED_SPECIALS = /["\\ ]/n
135141 # resp-specials = "]"
136142 RESP_SPECIALS = /[\] ]/n
137143
144+ # atomish = 1*<any ATOM-CHAR except "[">
145+ # ; We use "atomish" for msg-att and section, in order
146+ # ; to simplify "BODY[HEADER.FIELDS (foo bar)]".
147+ #
148+ # atom-specials = "(" / ")" / "{" / SP / CTL / list-wildcards /
149+ # quoted-specials / resp-specials
150+ # ATOM-CHAR = <any CHAR except atom-specials>
151+ # atom = 1*ATOM-CHAR
152+ # ASTRING-CHAR = ATOM-CHAR / resp-specials
153+ # tag = 1*<any ASTRING-CHAR except "+">
154+
155+ ATOM_SPECIALS = /[(){ \x00 -\x1f \x7f %*"\\ \] ]/n
156+ ASTRING_SPECIALS = /[(){ \x00 -\x1f \x7f %*"\\ ]/n
157+
158+ ASTRING_CHAR = CHAR - ASTRING_SPECIALS
159+ ATOM_CHAR = CHAR - ATOM_SPECIALS
160+
161+ ATOM = /#{ ATOM_CHAR } +/n
162+ ASTRING_CHARS = /#{ ASTRING_CHAR } +/n
163+ ATOMISH = /#{ ATOM_CHAR - /[\[ ]/ } +/
164+ TAG = /#{ ASTRING_CHAR - /[+]/ } +/
165+
138166 # TEXT-CHAR = <any CHAR except CR and LF>
139167 TEXT_CHAR = CHAR - /[\r \n ]/
140168
@@ -167,6 +195,19 @@ module RFC3629
167195 TEXT_rev1 = /#{ TEXT_CHAR } +/
168196 TEXT_rev2 = /#{ Regexp . union TEXT_CHAR , UTF8_2 , UTF8_3 , UTF8_4 } +/
169197
198+ # RFC3501:
199+ # literal = "{" number "}" CRLF *CHAR8
200+ # ; Number represents the number of CHAR8s
201+ # RFC9051:
202+ # literal = "{" number64 ["+"] "}" CRLF *CHAR8
203+ # ; <number64> represents the number of CHAR8s.
204+ # ; A non-synchronizing literal is distinguished
205+ # ; from a synchronizing literal by the presence of
206+ # ; "+" before the closing "}".
207+ # ; Non-synchronizing literals are not allowed when
208+ # ; sent from server to the client.
209+ LITERAL = /\{ (\d +)\} \r \n /n
210+
170211 module_function
171212
172213 def unescape_quoted! ( quoted )
@@ -185,30 +226,36 @@ def unescape_quoted(quoted)
185226
186227 # the default, used in most places
187228 BEG_REGEXP = /\G (?:\
188- (?# 1: SPACE )( +)|\
189- (?# 2: NIL )(NIL)(?=[\x80 -\xff (){ \x00 -\x1f \x7f %*"\\ \[ \] +])|\
190- (?# 3: NUMBER )(\d +)(?=[\x80 -\xff (){ \x00 -\x1f \x7f %*"\\ \[ \] +])|\
191- (?# 4: ATOM )([^\x80 -\xff (){ \x00 -\x1f \x7f %*"\\ \[ \] +]+)|\
192- (?# 5: QUOTED )#{ Patterns ::QUOTED_rev2 } |\
193- (?# 6: LPAR )(\( )|\
194- (?# 7: RPAR )(\) )|\
195- (?# 8: BSLASH )(\\ )|\
196- (?# 9: STAR )(\* )|\
197- (?# 10: LBRA )(\[ )|\
198- (?# 11: RBRA )(\] )|\
199- (?# 12: LITERAL )\{ (\d +)\} \r \n |\
200- (?# 13: PLUS )(\+ )|\
201- (?# 14: PERCENT )(%)|\
202- (?# 15: CRLF )(\r \n )|\
203- (?# 16: EOF )(\z ))/ni
229+ (?# 1: SPACE )( )|\
230+ (?# 2: ATOM prefixed with a compatible subtype)\
231+ ((?:\
232+ (?# 3: NIL )(NIL)|\
233+ (?# 4: NUMBER )(\d +)|\
234+ (?# 5: PLUS )(\+ ))\
235+ (?# 6: ATOM remaining after prefix )(#{ Patterns ::ATOMISH } )?\
236+ (?# This enables greedy alternation without lookahead, in linear time.)\
237+ )|\
238+ (?# Also need to check for ATOM without a subtype prefix.)\
239+ (?# 7: ATOM )(#{ Patterns ::ATOMISH } )|\
240+ (?# 8: QUOTED )#{ Patterns ::QUOTED_rev2 } |\
241+ (?# 9: LPAR )(\( )|\
242+ (?# 10: RPAR )(\) )|\
243+ (?# 11: BSLASH )(\\ )|\
244+ (?# 12: STAR )(\* )|\
245+ (?# 13: LBRA )(\[ )|\
246+ (?# 14: RBRA )(\] )|\
247+ (?# 15: LITERAL )#{ Patterns ::LITERAL } |\
248+ (?# 16: PERCENT )(%)|\
249+ (?# 17: CRLF )(\r \n )|\
250+ (?# 18: EOF )(\z ))/ni
204251
205252 # envelope, body(structure), namespaces
206253 DATA_REGEXP = /\G (?:\
207254 (?# 1: SPACE )( )|\
208255 (?# 2: NIL )(NIL)|\
209256 (?# 3: NUMBER )(\d +)|\
210257 (?# 4: QUOTED )#{ Patterns ::QUOTED_rev2 } |\
211- (?# 5: LITERAL )\{ ( \d +) \} \r \n |\
258+ (?# 5: LITERAL )#{ Patterns :: LITERAL } |\
212259 (?# 6: LPAR )(\( )|\
213260 (?# 7: RPAR )(\) ))/ni
214261
@@ -1501,38 +1548,42 @@ def next_token
15011548 @pos = $~. end ( 0 )
15021549 if $1
15031550 return Token . new ( T_SPACE , $+)
1504- elsif $2
1505- return Token . new ( T_NIL , $+)
1551+ elsif $2 && $6
1552+ # greedily match ATOM, prefixed with NUMBER, NIL, or PLUS.
1553+ return Token . new ( T_ATOM , $2)
15061554 elsif $3
1507- return Token . new ( T_NUMBER , $+)
1555+ return Token . new ( T_NIL , $+)
15081556 elsif $4
1509- return Token . new ( T_ATOM , $+)
1557+ return Token . new ( T_NUMBER , $+)
15101558 elsif $5
1559+ return Token . new ( T_PLUS , $+)
1560+ elsif $7
1561+ # match ATOM, without a NUMBER, NIL, or PLUS prefix
1562+ return Token . new ( T_ATOM , $+)
1563+ elsif $8
15111564 return Token . new ( T_QUOTED , Patterns . unescape_quoted ( $+) )
1512- elsif $6
1565+ elsif $9
15131566 return Token . new ( T_LPAR , $+)
1514- elsif $7
1567+ elsif $10
15151568 return Token . new ( T_RPAR , $+)
1516- elsif $8
1569+ elsif $11
15171570 return Token . new ( T_BSLASH , $+)
1518- elsif $9
1571+ elsif $12
15191572 return Token . new ( T_STAR , $+)
1520- elsif $10
1573+ elsif $13
15211574 return Token . new ( T_LBRA , $+)
1522- elsif $11
1575+ elsif $14
15231576 return Token . new ( T_RBRA , $+)
1524- elsif $12
1577+ elsif $15
15251578 len = $+. to_i
15261579 val = @str [ @pos , len ]
15271580 @pos += len
15281581 return Token . new ( T_LITERAL , val )
1529- elsif $13
1530- return Token . new ( T_PLUS , $+)
1531- elsif $14
1582+ elsif $16
15321583 return Token . new ( T_PERCENT , $+)
1533- elsif $15
1584+ elsif $17
15341585 return Token . new ( T_CRLF , $+)
1535- elsif $16
1586+ elsif $18
15361587 return Token . new ( T_EOF , $+)
15371588 else
15381589 parse_error ( "[Net::IMAP BUG] BEG_REGEXP is invalid" )
0 commit comments