From 8a45dca07f198958e5d78a0285059e435d6a42ca Mon Sep 17 00:00:00 2001 From: Gareth Rees Date: Thu, 6 Oct 2016 16:34:16 +0100 Subject: [PATCH 1/4] Ruby 1.9+ compatible path in Rakefile Rake was failing because PKG_NAME was not defined. --- Rakefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Rakefile b/Rakefile index f09cb7d..f2e659f 100644 --- a/Rakefile +++ b/Rakefile @@ -4,7 +4,8 @@ require 'rake/testtask' require 'rbconfig' require 'fileutils' -spec = eval File.read('ruby-msg.gemspec') +gemspec_file = File.expand_path(File.dirname(__FILE__) + '/ruby-msg.gemspec') +spec = eval File.read(gemspec_file) task :default => [:test] From 43c87974dd32824eefd3801f398a7ec3d5fdbf75 Mon Sep 17 00:00:00 2001 From: Gareth Rees Date: Thu, 6 Oct 2016 16:40:52 +0100 Subject: [PATCH 2/4] Treat 0x001f as 0x001e MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Looking at the MS reference [1], 001E or 001F should be PT_STRING8. Mapi::Types::DATA seems to ignore this. It sets 001f as PT_UNICODE, and this sort of maps to the original filter converting to UTF8. I say sort of, because I'm not sure how how Mapi::Types::DATA and Mapi::Msg::PropertySTore::ENCODINGS interact. This change at least makes both the existing and new spec pass ¯\_(ツ)_/¯ [1] https://msdn.microsoft.com/en-us/library/bb147591.aspx --- lib/mapi/msg.rb | 2 +- test/small-business-rates-relief.msg | Bin 0 -> 30720 bytes test/test_msg.rb | 9 +++++++++ 3 files changed, 10 insertions(+), 1 deletion(-) create mode 100644 test/small-business-rates-relief.msg diff --git a/lib/mapi/msg.rb b/lib/mapi/msg.rb index c805ea4..94193d2 100644 --- a/lib/mapi/msg.rb +++ b/lib/mapi/msg.rb @@ -101,7 +101,7 @@ class PropertyStore # change these to use mapi symbolic const names ENCODINGS = { 0x000d => proc { |obj| obj }, # seems to be used when its going to be a directory instead of a file. eg nested ole. 3701 usually. in which case we shouldn't get here right? - 0x001f => proc { |obj| Ole::Types::FROM_UTF16.iconv obj.read }, # unicode + 0x001f => proc { |obj| obj.read.chomp 0.chr }, # ascii # FIXME hack did a[0..-2] before, seems right sometimes, but for some others it chopped the text. chomp 0x001e => proc { |obj| obj.read.chomp 0.chr }, diff --git a/test/small-business-rates-relief.msg b/test/small-business-rates-relief.msg new file mode 100644 index 0000000000000000000000000000000000000000..1de113005dca6ff7fcd05b5e824aad9acf11b891 GIT binary patch literal 30720 zcmeHQ33OZ4nSPSIBz6+pApBo?F1gO#lOp6kb%~Tex|;Mu%DB>p&oN~% zEN3@lbpdt*Y$$U_KN53)Ipin(VwCb4QyC}2Ih+@+qZ(f0gW;5lJaw54SOZuK$N*#l z)&bT7HUP2!?2mlFM!-dYO#qI8O8{>JydAI^um$iAz&io&0&E4C0qi#mU>hJ8a4Fz2 zz;?h60D0UAxEw$mQxLuu;%Wty062^&yB)9_z;>m$?g5km$^jJsmcNzG)xi0l|C#Hh zhnR=Xkzxj};~%r$n*XqWQZ@4PLeGC~L}+&4t@*EM=pc>LLQ`5)SN+HGnqdThtL z*WyLiC#GMlhyG&x@p|b8;ro5?{((5&zZ>3(emMG1ldU4k|I_Czm*&4K{r}0A*2JG5M)g#g+} zed<2B{x#Y4(*H})y;D$6Mqk(bH!w!!nnExAzd&2Yrc83|M%z|e_WF? ze$^}gakS+cl_|0O`t-k2`MJYFewh-}FV)rml!f+8uly5e`xyXJV)}24{~rfkee##- zlAn7G++WZue-Lf;$zQ9B{!05_1^qSQ&9eCVcVhIHgMM=R%l#GZtLT+~6m66HU((kX zf38&i5ztTWe@UPEPb$CIS2C|a{-bD{JpLnn?!hIse?sq?4uCpoqVG>`|4E(l{L zBKmfn^tHa%OFtiU`vvv*^ySK1-};C9r!r`O$?T6E)ZL0c{gbYiC{wb;*gKj1rRL=rNIw6GFAqQ- zlF?>8$4avKFX>AwkZk^Ivg@VK@tTZ&$)fE~E>Dr$t_R{155!Y(_7A#w#Boaiqlb-x&RKzmh(H zDIR^wZ5LxO>G)f5|LfWRlrv9P{I_EIj1_(v4#4Bd&$XvZ=lYxWoabYjN!aiMk{;T2D&VRK$<8|I!!dJ#`%<~+9 zSK4&?G_AdMrJdnD<4s;U?ur3+z-|D~gqJ)2y#nRbQzhUkKo!6Ns0P#koPb(@3*ZLS z0qOw_fJVSxKog)D&;n=$cmQpHb^!O2ynuaxPQZS^0YDevAm9+--GHkB*8sW!J%C<7 zAHWCb2lxR4fI+}vzz~3P9tmGHJ@6Vp*|BhW2-mTwHj}vL7>jQIo^bxXQRQyDbK%Ed zKi;x9s;KxJz-I&Ai5S3r5M{mMoeaK%p^CQ0@U{Zq(BL~G1%M9GW*9q~Dl|Ecnjy4h zix5iM#q-SZ-j7y%zk?FW6pvCX+6-_S@;knKq6m3@w zn<_3E-E{ZUAG`E7H=lN%c-DK{N7cUHE6Uw~bCce7=RPS*{tL$JB}|m2gYNOH><52f z!fXlp0)s`xCEJQk&N2T~YE9qJ+5Qy8WHXoyXEJ7t_nBT;YgBxPGv-77G22-+{UYN` zc1ZE0bgtQ#{@6 zJwC;gxpuRm-(H$?GV}9lt^eYTC-$d1#&QBjRrj9Mx%3mMj+)fDH9doeU%aSh#AnLR z&-iS{O&K#yp5d)(&a4_R^-MgSWwZ43s;RR_Gd($T&6b|w6eZO$I$(LZ|GoWYL(Xi9 znrql*NuLS+vzndmuml4sC&t#CNKxNCd3gOP^IT3~!t#9ZpL#dU8D=f2I+5cl@IALCu-Rd>ZO_e^o4)9tthTb2R`2Di zX?C;2=ii^YE!UuWL(2TH2^}}CoEXVHwmtXqC$`NQvIh#)Zp-Q6(2kzW8Q*Og4uev$ zYrH`9<~{$ywjw1kt&VJ+v-IQ|Ci9-!`Elz*{rS)5J-73PzHgE@5-1d;E-TJBa z?e4Rk+n+8fx%7*+dFw-ainlv{J!DkXR)f0!6d24-PkFWcu|1#3^A`>lKAff;zVu{X zu4l93Xo_-m`iY$dciWGd?wDTFqh7!E<}hpx@Zl`s-9e^v}FcNYHmr%OzNZg#?6lBGCh}V+ku+Rj$;`fvwBNK98D!KUxK*Z!z2uyy|WTtkCnXUj7!_N}Al>#EM|Kbf0dcq-o#xN6*J zQBE$TI`Tbxf0Q}X>8X0sQQYb>9yU4xp$e;Y(tpj#!nUfT)|}o>hrhAAxy?HLETC#!zq1%tBtS?X%5K z+2*_MNOe?gROWwnWZx?ov=&S6xUnZiS;(8o^OQefZ_k|BFqbl$GxzY7J*{g3mSaQP z=Y6*MP+oEBY+yPy+dP}<2;7kN^7>PkS+0KRnuQ)?-pug-1+3oG99!e}x=#Chx?cHV z@yj`LO?Mo=XK*~rrUcGZ6qP^WpB*T$+2*sVt+unHSDPH056x|MVgJI5~ZoJgH3=pFCEWx+XjnWgTY18a)-K`DVUh(E0`m)8bZ|;SvoYeVAf8Su6?Nraqnbi3m zZQJHX_C30z=iN_~-1YSvOsR8&N+3lkziUT|x^VsbGUj?_3T|^-Z+hHTRv z_B)(&(+lm{Z$D}Eyk~pC&7)@=TMj+*oMldd&6wz$_+se9Wp~{5t@gLye#6JjpRj(i z>VpHDvSP#XlcO7LHl;H-Hm@61=j*U>jl!wZfI@3&m5_+|9I z5sz8`eoF*@-1{nk{)({&?8E(T)c6sxOd=|vN2b3X!@VDrj|d(nA*~OkN8=M)rsggB z7gqo5k{frK@B8Mx*_%H-qWrrd%WK4L#!Mr6(vQ*Ej6QXu$N6ULVexKm2gaCzl)qkt5|^_$YF#qK=rj zu2JzBz~>k!s>;34ei5XT=fHe=1e{6S&{D%HjKGd`?PAeTLAVJMvSWqRrZA3i+Eo>sx@5bm|U8h|F{ zT6HO{_AsqT+fI~d{VMgU@gBS9LMfMG)1Hl9|F&X&X{%CI%$*SU42Jtoep6womd>S= z2dOwW3qiF5I_GyEB4f(~e11sIvBI&xGft0EKO8^PczzsLjyrynATlcDXk&jbx0aW+ zz2IMtTsc}Jd_>O!trw{yLYJT!1U(hFd%-L9Isob%t)*yOo z`O#1a-XkMBxB{;dnb(t$o{{ii0^O`;w~1p-TIJPvsw!Y4`LtMcYUF z&3Qp<%rPK8sX59P=|OE)$-X;`nlbcv2sYJQ)r{LeI2DwATIkr!IY?(pc7-nv_~V{b@r-3{!~-dYW&p8w5bLdm4_sD3Xw&|mT zLiRCXD_ijtXBaK&knq_)(28(HDFY~BEu$3j5Dhwtlpsj~NCg>Hj$fm!HM?s9}%L zGDS4W-lZmGgvC0>W0GRTm$LWBLjxka*7K|-QX7SpUn()zx71GoI2;F$^n~m??K^KG9qizy3 z`f%l_W<#1mWwe53tc)1~8R8&&nE-rUsKi=ioUtjI5 zZS~f>y7soT?k{w<^7Jq^#fy()TP4oDnfH&Zu*a1+S*^kTgBu>T75luEm~jW-hw8C6 z(t?v%7iK^;R$I-m+%13(%=C7YcyZq;YU_~alOD9p1D-ClZUD|!;NVV)9nW}B)`Vwv zf{GJ(D)Ed9cMYg%z+DUKIu`4s9Zsxw){@iJqzf&7yRc{6ec=8`zt|6mcrDsN+PzZP zIoi*outvSmW+CpPBSXd*{06&bci0QuD=Wk&_p1s;9~5E_Yd7>SEiP>Vcb!&peh?pT zvB1kXlXjf@ri?lm12fJa5LQ&1>s)oqctLvbJoJ1M%E%cbGRc{Cen1)Ii9sn&9f+Bc zDkBD-6-ZoMzsv8cl#22q=&=~GRsdMC3%ZbR^bj|7J|<$>5qMJ%bVr^Ud0z>RqgRFJ z#~xlKBud=Vhae;SiRTVw;eOkLD|bMz24~VoYW|ISD?nS0gvhucx5-CFM8m{w1(qV< zvVl?&pcwZqaV^7Vt=RR{t99ElT9v&dxzlEdlibJS)#|nsGmXlD$GA zCq|nWm^$%%IV{9+@O~{u{5a_C0xv^&nroFd;2lFvGoGu59Hon#^+6gpxN;*7Y(~HE z>|h)m9D`gPpn3rLa`4fHrzX*})hHPUuRc&5z&$Nl6Zma_&h~<97o=~29z4S5x`E{Y zu(;9M2~LlKQVH<1fa4)N8v=GG_?$xTG(&$}y=gkD1;%>#V>fEs1XpE{r3<`N1A~0B z4s|V{*@C|!v>yciI&j%2I35Fs#9EH>5|m8?*FI=xpRm;ZxQ0;5vL-y$2whX2{or9= zn5Rx$+krEH`r$>+cv{RpW8Aq8B?3J@U+VFEAA0vk4fFL(9xFQW`~{4}j(_a6Jhcdm$Bf z#3|=7jA@SkI`DlIZL6Ux52y@b+}5Bqd+-RZTyfAJ*MipJMb7MyirCm+$I*sVJ0M9B z$~d#yksC%W=Rhg?+XD^M;mY{xAgI=(b`n}@!t;ZWnrE5;fsuX8v3so;7fw9s68%?$ z*2Cgiu7bIvQx3X}$@-D+T;!|u3KTe_%26_b z_S5(q2mj=f5#<43<=8%qybW~+p}}G3Y#M!N!xMh=jUAkFjqiikD#J5gU?|1Y<=~3* zmv&L>O)H*kLzxv%1;ODUWE_ReP3Q|+*+%hvDfCneD*d2*5SU7Vu^cppkfXhsL`^^L z>+meccO7(2TQdc`#dvN6xJ$tEUOYbx{p>;8Y3Ozc*y*u@DD{Km5j?AL#uzLJZiYe0 z20WZYMW7uNI7fib1N{B?JP3RN=!Np}yHG`tgLK>RbTgg{LJ}wV>I3xwXrocI^*}!@ zXlDln*?t0T_X5ip@J*vVEgi@CI7WsKb4BCq7_g0lwi|NN-q)dTM@61JID#G=#AxNI z>>lWr&$fYY=>;m`fr*EH#D%sKxH8JqycPZE8xdP+G-(e9L6cF|fbdeQ_x%=Vd^cpZ z3yB$*F-EfhcqYwnXVY_(qNHe1_EtzqAIexk#y8wkE`Xjqf~!No$Y_44*S~rZLkIet zd&-oAkpQDzYOe)%jC#4}#u$lj?eI6bB6=MX8tz98BOTUg`;Kg_X-d9t6GZRGT^Ok| zc>=1fLE^XV1XnzFl4ot?i_vNsY zaJ1zij7aW_%IHI`j$%vr#Bz~!9_G0g!*Hd*=)W9X%DvkO(B#fIcb~MFiRY|b7fhlq zh}xY(qWEi!=xDbDco;)RqTS_qicsPXD_3$O&>i2);%-qt?&aR3EMxpykGa$YtIM8D z{sh-gd2z-U>!R0vtDhb6R6(xxY6YhpNgc2U1ppWBsJk-c_+DBysM8v7O-Ku=MYP(Q zkfVp~*`Ld==~km}j<}(){Q7hmEgkp%Iah{(ArF#G0T?rZ;MBN$OnFQGb2#46U#wa^B8 zkn?|~>+y53(UWl^M@|aqZ)$}^e~kC5C|L*W7)Kv%^;lS!)FN%Hd|O1Wn;RhOxza~` znDh#(`DO#T;Z9{UW@-TR{QP-;bqJf9sA^GBW& z(*x31RN}W2_u)Cturk~)kCp3Zz8k_*MeaK=K9UxnQNp0m7DpRb7t~h)XfleB=iu_a z8)9Z`)eKD&!xS(?POJ+s>m#RH$D_s-ZLvJt>i~Y9o6ys9tgme6Z}s%6p#jpZhFl!+ zkvDdU`_DC9=>2bl;i+}cJ^z)v?FS$F*YCQ%^wSjIf&Weq|2Br6KB4Wt5VKt4E!I+> zpZ!`5x1OsOzvyYZT$>wF$nS`~KYZo=Em8Nk;{F3s`42|jpNP7@3-=!Zd=&68z+VAA z4)_G%lYmbF{u=OUz-It=17-pD06q)27jPdS`ul3p_kXjb?vZf*(eU*PxIVt5%~!(t zui^T2z~2MD5ia`{uJeE=0RI4prUe!J*(BG23Tta+Q_X(2wRK;m>wvSqx~0ygR@XK+ zv@~>h+pE0|oi4Smy>*{Q&2x9vHq^A%yNm4|p4ukwPOGz}vfbru@HDtuydBKp%Pmmm z3ett9A1@wgg}2n+^i1^qLyG#-`yJn%ugTu>_*d5`yR4_*X9&yx$53p%cy$F`@TUI$ z!JEDRR{Q?Ih4z2kS8B%UfxAPD9$#x=3avWdrk<158Tggjq3(Jj+a?-WZcN;g`(QP_FT1#;=(( zzcJk2gxxgewPeQL7xTvf#OJ`eSJa=)IQ`tZrwm5J6(VmF^}pL>G*pTFu*m;nlNnF2 zeV)kU|5q?ni~OA;|8$xeJD{xJvRLn2@_g-*=i5Y{-%mFwKS(ng+GFy3D~IQv=l=dY zfB#0`@1OEJ^_s)PZPN5*w4l69@#xFnzL96}N06MsI8>SW`uL`bje_b0Kc zmA`*M9uiZQwSS8X?HyA*`AMJWN%FmwV%d0^PmI6Cxv!W0L%@@4{@Vn7p3Bn)oL~A6 zf_}33CwrbFASwM7&;M_OKJ7nKJpD`hx_FM} z($$j0n)vhcsMbSY{sLXn@xS8v|G6moi8<9*tI5u*z9-Iat2Z-!GxmGapbqYU%J|nW z^85`WnGcA(7S9?D$3*@TQU4RfqvIk^9J2j{$j>h3CzsSuEvdg=6L|)UcSt(kQH(X8pk$U5jyfVZ3O#rDsgUEkuF>e%k z*}weVF_XxnTBJ-RB_?mi83^$)o*}+W#T?ULM#Ea6fBxQ`Ng2sC8lLw@=>G)ze?jE) z#q%6b?-J`jZM>s6Vfy*~pZhs@OxPw-<+|VW?@3>b#uBw$zHa*XOAoGHT%9i8CQ<2& zz4z}-M+39_J7=NWL`|2k%PnE}y{Z&<|7=ulOpcCMx~6=Gdq)QSYIXn7p^>4`Sg?0& zXu_}71xN8?Q|E3>`bH|-{l1~Vkbh(>WJNCC#-zF`&iVUr%bzFe-9KFZ`9hqhMejd4 z@Ky-V|M|AV>-m+3_(ERj{nrcq{f7&E{>9ZtR|Rm(e=H!EjFzFpKYuU4?VMlVxwh`h$XS^3Xy*3%-AzR_?-AAXmBa7$veD5~$zD5D=<+(cF(UbdM z@_hon1H|_OB5gUw_-^VH-W1T@r5{4spg6~uzi+uZ9{84^cD65Ln+9;u0iOAd1HMzh z_=IKc$niVKyvld}_%7D!68%>_fPU~MT*`nEzp46jH~s3_gGVwi$ofLE_QrhvKi0*a+W-In literal 0 HcmV?d00001 diff --git a/test/test_msg.rb b/test/test_msg.rb index ed52f0e..c1741b6 100644 --- a/test/test_msg.rb +++ b/test/test_msg.rb @@ -35,6 +35,15 @@ def test_rendered_string_is_valid_encoding end end + def test_multipart_rendered_string_is_valid_encoding + msg = Mapi::Msg.open "#{TEST_DIR}/small-business-rates-relief.msg" do |msg| + string_version = msg.to_mime.to_s + if string_version.respond_to?(:valid_encoding?) + assert_equal true, string_version.valid_encoding? + end + end + end + def test_embedded_msg_renders_as_string msg = Mapi::Msg.open "#{TEST_DIR}/embedded.msg" do |msg| assert_match "message/rfc822", msg.to_mime.to_s From 62d7decda069ffc7866c0a20cdf2d2a5708f7a22 Mon Sep 17 00:00:00 2001 From: Gareth Rees Date: Thu, 6 Oct 2016 16:46:54 +0100 Subject: [PATCH 3/4] Force the encoding of 001f to ASCII-8BIT Alternative spec-passing version of the previous commit. --- lib/mapi/msg.rb | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/lib/mapi/msg.rb b/lib/mapi/msg.rb index 94193d2..2882c07 100644 --- a/lib/mapi/msg.rb +++ b/lib/mapi/msg.rb @@ -101,7 +101,14 @@ class PropertyStore # change these to use mapi symbolic const names ENCODINGS = { 0x000d => proc { |obj| obj }, # seems to be used when its going to be a directory instead of a file. eg nested ole. 3701 usually. in which case we shouldn't get here right? - 0x001f => proc { |obj| obj.read.chomp 0.chr }, + 0x001f => proc do |obj| + str = Ole::Types::FROM_UTF16.iconv(obj.read) + str = if str.respond_to?(:force_encoding) + str.force_encoding('ASCII-8BIT') + else + str + end + end, # unicode # ascii # FIXME hack did a[0..-2] before, seems right sometimes, but for some others it chopped the text. chomp 0x001e => proc { |obj| obj.read.chomp 0.chr }, From 146be73e7bf36cd51a214178a79d09bfaab1d352 Mon Sep 17 00:00:00 2001 From: Gareth Rees Date: Thu, 6 Oct 2016 16:49:46 +0100 Subject: [PATCH 4/4] Revert to original encoder and rescue Another alternative spec-passing change --- lib/mapi/mime.rb | 13 +++++++++++-- lib/mapi/msg.rb | 9 +-------- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/lib/mapi/mime.rb b/lib/mapi/mime.rb index 7240489..c8b007a 100644 --- a/lib/mapi/mime.rb +++ b/lib/mapi/mime.rb @@ -106,8 +106,17 @@ def to_s opts={} opts = {:boundary_counter => 0}.merge opts if multipart? boundary = Mime.make_boundary opts[:boundary_counter] += 1, self - @body = [preamble, parts.map { |part| "\r\n" + part.to_s(opts) + "\r\n" }, "--\r\n" + epilogue]. - flatten.join("\r\n--" + boundary) + begin + @body = [preamble, parts.map { |part| "\r\n" + part.to_s(opts) + "\r\n" }, "--\r\n" + epilogue]. + flatten.join("\r\n--" + boundary) + rescue Encoding::CompatibilityError => e + if ''.respond_to?(:force_encoding) + @body = [preamble, parts.map { |part| "\r\n" + part.to_s(opts).force_encoding('ASCII-8BIT') + "\r\n" }, "--\r\n" + epilogue]. + flatten.join("\r\n--" + boundary) + else + raise e + end + end content_type, attrs = Mime.split_header @headers['Content-Type'][0] attrs['boundary'] = boundary @headers['Content-Type'] = [([content_type] + attrs.map { |key, val| %{#{key}="#{val}"} }).join('; ')] diff --git a/lib/mapi/msg.rb b/lib/mapi/msg.rb index 2882c07..c805ea4 100644 --- a/lib/mapi/msg.rb +++ b/lib/mapi/msg.rb @@ -101,14 +101,7 @@ class PropertyStore # change these to use mapi symbolic const names ENCODINGS = { 0x000d => proc { |obj| obj }, # seems to be used when its going to be a directory instead of a file. eg nested ole. 3701 usually. in which case we shouldn't get here right? - 0x001f => proc do |obj| - str = Ole::Types::FROM_UTF16.iconv(obj.read) - str = if str.respond_to?(:force_encoding) - str.force_encoding('ASCII-8BIT') - else - str - end - end, # unicode + 0x001f => proc { |obj| Ole::Types::FROM_UTF16.iconv obj.read }, # unicode # ascii # FIXME hack did a[0..-2] before, seems right sometimes, but for some others it chopped the text. chomp 0x001e => proc { |obj| obj.read.chomp 0.chr },