@@ -1034,7 +1034,12 @@ def body_extension
10341034 end
10351035
10361036 # section = "[" [section-spec] "]"
1037- #
1037+ def section
1038+ str = +lbra
1039+ str << section_spec unless peek_rbra?
1040+ str << rbra
1041+ end
1042+
10381043 # section-spec = section-msgtext / (section-part ["." section-text])
10391044 # section-msgtext = "HEADER" /
10401045 # "HEADER.FIELDS" [".NOT"] SP header-list /
@@ -1048,58 +1053,58 @@ def body_extension
10481053 # ; text other than actual body part (headers,
10491054 # ; etc.)
10501055 #
1056+ # n.b: we could "cheat" here and just grab all text inside the brackets,
1057+ # but literals would need special treatment.
1058+ def section_spec
1059+ str = "" . b
1060+ str << atom # grabs everything up to "SP header-list" or "]"
1061+ str << " " << header_list if SP?
1062+ str
1063+ end
1064+
10511065 # header-list = "(" header-fld-name *(SP header-fld-name) ")"
1052- #
1053- def section
1054- str = String . new
1055- token = match ( T_LBRA )
1056- str . concat ( token . value )
1057- token = match ( T_ATOM , T_NUMBER , T_RBRA )
1058- if token . symbol == T_RBRA
1059- str . concat ( token . value )
1060- return str
1061- end
1062- str . concat ( token . value )
1063- token = lookahead
1064- if token . symbol == T_SPACE
1065- shift_token
1066- str . concat ( token . value )
1067- token = match ( T_LPAR )
1068- str . concat ( token . value )
1069- while true
1070- token = lookahead
1071- case token . symbol
1072- when T_RPAR
1073- str . concat ( token . value )
1074- shift_token
1075- break
1076- when T_SPACE
1077- shift_token
1078- str . concat ( token . value )
1079- end
1080- str . concat ( format_string ( astring ) )
1081- end
1082- end
1083- token = match ( T_RBRA )
1084- str . concat ( token . value )
1085- return str
1066+ def header_list
1067+ str = +""
1068+ str << lpar << header_fld_name
1069+ str << " " << header_fld_name while SP?
1070+ str << rpar
10861071 end
10871072
1073+ # RFC3501 & RFC9051:
10881074 # header-fld-name = astring
10891075 #
1076+ # Although RFC3501 allows any astring, RFC5322-valid header names are one
1077+ # or more of the printable US-ASCII characters, except SP and colon. So
1078+ # empty string isn't valid, and literals aren't needed and should not be
1079+ # used. This syntax is unchanged by [I18N-HDRS] (RFC6532).
1080+ #
10901081 # RFC5233:
10911082 # optional-field = field-name ":" unstructured CRLF
10921083 # field-name = 1*ftext
10931084 # ftext = %d33-57 / ; Printable US-ASCII
10941085 # %d59-126 ; characters not including
10951086 # ; ":".
1096- def format_string ( str )
1097- case str
1087+ #
1088+ # Atom and quoted should be sufficient.
1089+ #
1090+ # TODO: Use original source string, rather than decode and re-encode.
1091+ # TODO: or at least, DRY up this code with the send_command formatting.
1092+ def header_fld_name
1093+ case ( str = astring )
10981094 when ""
1095+ warn '%s header-fld-name is an invalid RFC5322 field-name: ""' %
1096+ [ self . class ]
10991097 return '""'
11001098 when /[\x80 -\xff \r \n ]/n
1099+ warn "%s header-fld-name %p has invalid RFC5322 field-name char: %p" %
1100+ [ self . class , str , $&]
11011101 # literal
11021102 return "{" + str . bytesize . to_s + "}" + CRLF + str
1103+ when /[^\x21 -\x39 \x3b -\xfe ]/n
1104+ warn "%s header-fld-name %p has invalid RFC5322 field-name char: %p" %
1105+ [ self . class , str , $&]
1106+ # invalid quoted string
1107+ return '"' + str . gsub ( /["\\ ]/n , "\\ \\ \\ &" ) + '"'
11031108 when /[(){ \x00 -\x1f \x7f %*"\\ ]/n
11041109 # quoted string
11051110 return '"' + str . gsub ( /["\\ ]/n , "\\ \\ \\ &" ) + '"'
0 commit comments