@@ -427,6 +427,17 @@ def case_insensitive__nstring
427427 alias nz_number number
428428 alias nz_number? number?
429429
430+ # valid number ranges are not enforced by parser
431+ # nz-number64 = digit-nz *DIGIT
432+ # ; Unsigned 63-bit integer
433+ # ; (0 < n <= 9,223,372,036,854,775,807)
434+ alias nz_number64 nz_number
435+
436+ # valid number ranges are not enforced by parser
437+ # uniqueid = nz-number
438+ # ; Strictly ascending
439+ alias uniqueid nz_number
440+
430441 # [RFC3501 & RFC9051:]
431442 # response = *(continue-req / response-data) response-done
432443 #
@@ -607,49 +618,93 @@ def response_data__simple_numeric
607618 alias mailbox_data__exists response_data__simple_numeric
608619 alias mailbox_data__recent response_data__simple_numeric
609620
621+ # RFC3501 & RFC9051:
622+ # msg-att = "(" (msg-att-dynamic / msg-att-static)
623+ # *(SP (msg-att-dynamic / msg-att-static)) ")"
624+ #
625+ # msg-att-dynamic = "FLAGS" SP "(" [flag-fetch *(SP flag-fetch)] ")"
626+ # RFC5257 (ANNOTATE extension):
627+ # msg-att-dynamic =/ "ANNOTATION" SP
628+ # ( "(" entry-att *(SP entry-att) ")" /
629+ # "(" entry *(SP entry) ")" )
630+ # RFC7162 (CONDSTORE extension):
631+ # msg-att-dynamic =/ fetch-mod-resp
632+ # fetch-mod-resp = "MODSEQ" SP "(" permsg-modsequence ")"
633+ # RFC8970 (PREVIEW extension):
634+ # msg-att-dynamic =/ "PREVIEW" SP nstring
635+ #
636+ # RFC3501:
637+ # msg-att-static = "ENVELOPE" SP envelope /
638+ # "INTERNALDATE" SP date-time /
639+ # "RFC822" [".HEADER" / ".TEXT"] SP nstring /
640+ # "RFC822.SIZE" SP number /
641+ # "BODY" ["STRUCTURE"] SP body /
642+ # "BODY" section ["<" number ">"] SP nstring /
643+ # "UID" SP uniqueid
644+ # RFC3516 (BINARY extension):
645+ # msg-att-static =/ "BINARY" section-binary SP (nstring / literal8)
646+ # / "BINARY.SIZE" section-binary SP number
647+ # RFC8514 (SAVEDATE extension):
648+ # msg-att-static =/ "SAVEDATE" SP (date-time / nil)
649+ # RFC8474 (OBJECTID extension):
650+ # msg-att-static =/ fetch-emailid-resp / fetch-threadid-resp
651+ # fetch-emailid-resp = "EMAILID" SP "(" objectid ")"
652+ # fetch-threadid-resp = "THREADID" SP ( "(" objectid ")" / nil )
653+ # RFC9051:
654+ # msg-att-static = "ENVELOPE" SP envelope /
655+ # "INTERNALDATE" SP date-time /
656+ # "RFC822.SIZE" SP number64 /
657+ # "BODY" ["STRUCTURE"] SP body /
658+ # "BODY" section ["<" number ">"] SP nstring /
659+ # "BINARY" section-binary SP (nstring / literal8) /
660+ # "BINARY.SIZE" section-binary SP number /
661+ # "UID" SP uniqueid
662+ #
663+ # Re https://www.rfc-editor.org/errata/eid7246, I'm adding "offset" to the
664+ # official "BINARY" ABNF, like so:
665+ #
666+ # msg-att-static =/ "BINARY" section-binary ["<" number ">"] SP
667+ # (nstring / literal8)
610668 def msg_att ( n )
611- match ( T_LPAR )
669+ lpar
612670 attr = { }
613671 while true
614- token = lookahead
615- case token . symbol
616- when T_RPAR
617- shift_token
618- break
619- when T_SPACE
620- shift_token
621- next
622- end
623- case token . value
624- when /\A (?:ENVELOPE)\z /ni
625- name , val = envelope_data
626- when /\A (?:FLAGS)\z /ni
627- name , val = flags_data
628- when /\A (?:INTERNALDATE)\z /ni
629- name , val = internaldate_data
630- when /\A (?:RFC822(?:\. HEADER|\. TEXT)?)\z /ni
631- name , val = rfc822_text
632- when /\A (?:RFC822\. SIZE)\z /ni
633- name , val = rfc822_size
634- when /\A (?:BODY(?:STRUCTURE)?)\z /ni
635- name , val = body_data
636- when /\A (?:UID)\z /ni
637- name , val = uid_data
638- when /\A (?:MODSEQ)\z /ni
639- name , val = modseq_data
640- else
641- parse_error ( "unknown attribute `%s' for {%d}" , token . value , n )
642- end
672+ name = msg_att__label ; SP!
673+ val =
674+ case name
675+ when "UID" then uniqueid
676+ when "FLAGS" then flag_list
677+ when "BODY" then body
678+ when /\A BODY\[ /ni then nstring
679+ when "BODYSTRUCTURE" then body
680+ when "ENVELOPE" then envelope
681+ when "INTERNALDATE" then date_time
682+ when "RFC822.SIZE" then number64
683+ when "RFC822" then nstring # not in rev2
684+ when "RFC822.HEADER" then nstring # not in rev2
685+ when "RFC822.TEXT" then nstring # not in rev2
686+ when "MODSEQ" then parens__modseq # CONDSTORE
687+ else parse_error ( "unknown attribute `%s' for {%d}" , name , n )
688+ end
643689 attr [ name ] = val
690+ break unless SP?
691+ break if lookahead_rpar?
644692 end
645- return attr
646- end
647-
648- def envelope_data
649- token = match ( T_ATOM )
650- name = token . value . upcase
651- match ( T_SPACE )
652- return name , envelope
693+ rpar
694+ attr
695+ end
696+
697+ # appends "[section]" and "<partial>" to the base label
698+ def msg_att__label
699+ case ( name = tagged_ext_label )
700+ when /\A (?:RFC822(?:\. HEADER|\. TEXT)?)\z /ni
701+ # ignoring "[]" fixes https://bugs.ruby-lang.org/issues/5620
702+ lbra? and rbra
703+ when "BODY"
704+ peek_lbra? and name << section and
705+ peek_str? ( "<" ) and name << atom # partial
706+ end
707+ name
653708 end
654709
655710 def envelope
@@ -687,58 +742,10 @@ def envelope
687742 return result
688743 end
689744
690- def flags_data
691- token = match ( T_ATOM )
692- name = token . value . upcase
693- match ( T_SPACE )
694- return name , flag_list
695- end
696-
697- def internaldate_data
698- token = match ( T_ATOM )
699- name = token . value . upcase
700- match ( T_SPACE )
701- token = match ( T_QUOTED )
702- return name , token . value
703- end
704-
705- def rfc822_text
706- token = match ( T_ATOM )
707- name = token . value . upcase
708- token = lookahead
709- if token . symbol == T_LBRA
710- shift_token
711- match ( T_RBRA )
712- end
713- match ( T_SPACE )
714- return name , nstring
715- end
716-
717- def rfc822_size
718- token = match ( T_ATOM )
719- name = token . value . upcase
720- match ( T_SPACE )
721- return name , number
722- end
723-
724- def body_data
725- token = match ( T_ATOM )
726- name = token . value . upcase
727- token = lookahead
728- if token . symbol == T_SPACE
729- shift_token
730- return name , body
731- end
732- name . concat ( section )
733- token = lookahead
734- if token . symbol == T_ATOM
735- name . concat ( token . value )
736- shift_token
737- end
738- match ( T_SPACE )
739- data = nstring
740- return name , data
741- end
745+ # date-time = DQUOTE date-day-fixed "-" date-month "-" date-year
746+ # SP time SP zone DQUOTE
747+ alias date_time quoted
748+ alias ndatetime nquoted
742749
743750 # RFC-3501 & RFC-9051:
744751 # body = "(" (body-type-1part / body-type-mpart) ")"
@@ -996,48 +1003,78 @@ def body_extension
9961003 end
9971004 end
9981005
1006+ # section = "[" [section-spec] "]"
9991007 def section
1000- str = String . new
1001- token = match ( T_LBRA )
1002- str . concat ( token . value )
1003- token = match ( T_ATOM , T_NUMBER , T_RBRA )
1004- if token . symbol == T_RBRA
1005- str . concat ( token . value )
1006- return str
1007- end
1008- str . concat ( token . value )
1009- token = lookahead
1010- if token . symbol == T_SPACE
1011- shift_token
1012- str . concat ( token . value )
1013- token = match ( T_LPAR )
1014- str . concat ( token . value )
1015- while true
1016- token = lookahead
1017- case token . symbol
1018- when T_RPAR
1019- str . concat ( token . value )
1020- shift_token
1021- break
1022- when T_SPACE
1023- shift_token
1024- str . concat ( token . value )
1025- end
1026- str . concat ( format_string ( astring ) )
1027- end
1028- end
1029- token = match ( T_RBRA )
1030- str . concat ( token . value )
1031- return str
1008+ str = +lbra
1009+ str << section_spec unless peek_rbra?
1010+ str << rbra
1011+ end
1012+
1013+ # section-spec = section-msgtext / (section-part ["." section-text])
1014+ # section-msgtext = "HEADER" /
1015+ # "HEADER.FIELDS" [".NOT"] SP header-list /
1016+ # "TEXT"
1017+ # ; top-level or MESSAGE/RFC822 or
1018+ # ; MESSAGE/GLOBAL part
1019+ # section-part = nz-number *("." nz-number)
1020+ # ; body part reference.
1021+ # ; Allows for accessing nested body parts.
1022+ # section-text = section-msgtext / "MIME"
1023+ # ; text other than actual body part (headers,
1024+ # ; etc.)
1025+ #
1026+ # n.b: we could "cheat" here and just grab all text inside the brackets,
1027+ # but literals would need special treatment.
1028+ def section_spec
1029+ str = "" . b
1030+ str << atom # grabs everything up to "SP header-list" or "]"
1031+ str << " " << header_list if SP?
1032+ str
10321033 end
10331034
1034- def format_string ( str )
1035- case str
1035+ # header-list = "(" header-fld-name *(SP header-fld-name) ")"
1036+ def header_list
1037+ str = +""
1038+ str << lpar << header_fld_name
1039+ str << " " << header_fld_name while SP?
1040+ str << rpar
1041+ end
1042+
1043+ # RFC3501 & RFC9051:
1044+ # header-fld-name = astring
1045+ #
1046+ # Although RFC3501 allows any astring, RFC5322-valid header names are one
1047+ # or more of the printable US-ASCII characters, except SP and colon. So
1048+ # empty string isn't valid, and literals aren't needed and should not be
1049+ # used. This syntax is unchanged by [I18N-HDRS] (RFC6532).
1050+ #
1051+ # RFC5233:
1052+ # optional-field = field-name ":" unstructured CRLF
1053+ # field-name = 1*ftext
1054+ # ftext = %d33-57 / ; Printable US-ASCII
1055+ # %d59-126 ; characters not including
1056+ # ; ":".
1057+ #
1058+ # Atom and quoted should be sufficient.
1059+ #
1060+ # TODO: Use original source string, rather than decode and re-encode.
1061+ # TODO: or at least, DRY up this code with the send_command formatting.
1062+ def header_fld_name
1063+ case ( str = astring )
10361064 when ""
1065+ warn '%s header-fld-name is an invalid RFC5322 field-name: ""' %
1066+ [ self . class ]
10371067 return '""'
10381068 when /[\x80 -\xff \r \n ]/n
1069+ warn "%s header-fld-name %p has invalid RFC5322 field-name char: %p" %
1070+ [ self . class , str , $&]
10391071 # literal
10401072 return "{" + str . bytesize . to_s + "}" + CRLF + str
1073+ when /[^\x21 -\x39 \x3b -\xfe ]/n
1074+ warn "%s header-fld-name %p has invalid RFC5322 field-name char: %p" %
1075+ [ self . class , str , $&]
1076+ # invalid quoted string
1077+ return '"' + str . gsub ( /["\\ ]/n , "\\ \\ \\ &" ) + '"'
10411078 when /[(){ \x00 -\x1f \x7f %*"\\ ]/n
10421079 # quoted string
10431080 return '"' + str . gsub ( /["\\ ]/n , "\\ \\ \\ &" ) + '"'
@@ -1047,23 +1084,6 @@ def format_string(str)
10471084 end
10481085 end
10491086
1050- def uid_data
1051- token = match ( T_ATOM )
1052- name = token . value . upcase
1053- match ( T_SPACE )
1054- return name , number
1055- end
1056-
1057- def modseq_data
1058- token = match ( T_ATOM )
1059- name = token . value . upcase
1060- match ( T_SPACE )
1061- match ( T_LPAR )
1062- modseq = number
1063- match ( T_RPAR )
1064- return name , modseq
1065- end
1066-
10671087 def mailbox_data__flags
10681088 token = match ( T_ATOM )
10691089 name = token . value . upcase
@@ -1631,6 +1651,20 @@ def charset
16311651 end
16321652 end
16331653
1654+ # RFC7162:
1655+ # mod-sequence-value = 1*DIGIT
1656+ # ;; Positive unsigned 63-bit integer
1657+ # ;; (mod-sequence)
1658+ # ;; (1 <= n <= 9,223,372,036,854,775,807).
1659+ alias mod_sequence_value nz_number64
1660+
1661+ # RFC7162:
1662+ # permsg-modsequence = mod-sequence-value
1663+ # ;; Per-message mod-sequence.
1664+ alias permsg_modsequence mod_sequence_value
1665+
1666+ def parens__modseq ; lpar ; _ = permsg_modsequence ; rpar ; _ end
1667+
16341668 # RFC-4315 (UIDPLUS) or RFC9051 (IMAP4rev2):
16351669 # uid-set = (uniqueid / uid-range) *("," uid-set)
16361670 # uid-range = (uniqueid ":" uniqueid)
0 commit comments