@@ -539,38 +539,46 @@ class CWebWindow
539539 return hr;
540540 }
541541
542- HRESULT SaveText (FILE *fp, const WValue& value)
542+ HRESULT SaveText (FILE *fp, const WValue& value, size_t & textLength )
543543 {
544544 const int nodeType = value[L" nodeType" ].GetInt ();
545- const auto * nodeName = value[L" nodeName" ].GetString ();
546- const bool fInline = IsInlineElement (nodeName);
547545
548- if (value[ L" nodeType" ]. GetInt () == 3 /* #text */ )
546+ if (nodeType == 3 /* TEXT_NODE */ )
549547 {
550- if (fwprintf (fp, L" %s" , value[L" nodeValue" ].GetString ()) < 0 )
548+ std::wstring text = value[L" nodeValue" ].GetString ();
549+ text =
550+ ((text.length () > 0 && iswspace (text.front ())) ? L" " : L" " ) +
551+ trim_ws (text) +
552+ ((text.length () > 0 && iswspace (text.back ())) ? L" " : L" " );
553+ if (fwprintf (fp, L" %s" , text.c_str ()) < 0 )
551554 return HRESULT_FROM_WIN32 (GetLastError ());
555+ textLength += text.length ();
552556 }
553557 if (value.HasMember (L" children" ) && value[L" children" ].IsArray ())
554558 {
559+ const auto * nodeName = value[L" nodeName" ].GetString ();
560+ const bool fInline = IsInlineElement (nodeName);
555561 if (wcscmp (nodeName, L" SCRIPT" ) != 0 && wcscmp (nodeName, L" STYLE" ) != 0 )
556562 {
557- int textCount = 0 ;
563+ if (nodeType == 1 )
564+ {
565+ if ((!fInline && textLength > 0 ) || wcscmp (nodeName, L" BR" ) == 0 || wcscmp (nodeName, L" HR" ) == 0 )
566+ {
567+ fwprintf (fp, L" \n " );
568+ textLength = 0 ;
569+ }
570+ }
558571 for (const auto & child : value[L" children" ].GetArray ())
559572 {
560- int childNodeType = child[L" nodeType" ].GetInt ();
561- if (childNodeType == 3 )
562- textCount++;
563- HRESULT hr = SaveText (fp, child);
573+ HRESULT hr = SaveText (fp, child, textLength);
564574 if (FAILED (hr))
565575 return hr;
566576 }
567- if ((!fInline && textCount > 0 ) || wcscmp (nodeName, L" BR" ) == 0 || wcscmp (nodeName, L" HR" ) == 0 )
568- fwprintf (fp, L" \n " );
569577 }
570578 }
571579 if (value.HasMember (L" contentDocument" ))
572580 {
573- HRESULT hr = SaveText (fp, value[L" contentDocument" ]);
581+ HRESULT hr = SaveText (fp, value[L" contentDocument" ], textLength );
574582 if (FAILED (hr))
575583 return hr;
576584 }
@@ -593,7 +601,8 @@ class CWebWindow
593601 document.Parse (returnObjectAsJson);
594602 wil::unique_file fp;
595603 _wfopen_s (&fp, filename.c_str (), L" at,ccs=UTF-8" );
596- hr = SaveText (fp.get (), document[L" root" ]);
604+ size_t textLength = 0 ;
605+ hr = SaveText (fp.get (), document[L" root" ], textLength);
597606 }
598607 if (callback2)
599608 callback2->Invoke ({ hr, nullptr });
@@ -1111,40 +1120,89 @@ class CWebWindow
11111120 L" A" ,
11121121 L" ABBR" ,
11131122 L" ACRONYM" ,
1123+ L" AUDIO" ,
11141124 L" B" ,
1125+ L" BDI" ,
11151126 L" BDO" ,
11161127 L" BIG" ,
11171128 L" BR" ,
11181129 L" BUTTON" ,
1130+ L" CANVAS" ,
11191131 L" CITE" ,
11201132 L" CODE" ,
1133+ L" DATA" ,
1134+ L" DATALIST" ,
1135+ L" DEL" ,
11211136 L" DFN" ,
11221137 L" EM" ,
1138+ L" EMBED" ,
11231139 L" I" ,
1140+ L" IFRAME" ,
11241141 L" IMG" ,
11251142 L" INPUT" ,
1143+ L" INS" ,
11261144 L" KBD" ,
11271145 L" LABEL" ,
11281146 L" MAP" ,
1147+ L" MARK" ,
1148+ L" METER" ,
1149+ L" NOSCRIPT" ,
11291150 L" OBJECT" ,
1151+ L" OUTPUT" ,
1152+ L" PICTURE" ,
1153+ L" PROGRESS" ,
11301154 L" Q" ,
1155+ L" RUBY" ,
1156+ L" S" ,
11311157 L" SAMP" ,
11321158 L" SCRIPT" ,
11331159 L" SELECT" ,
1160+ L" SLOT" ,
11341161 L" SMALL" ,
11351162 L" SPAN" ,
11361163 L" STRONG" ,
11371164 L" SUB" ,
11381165 L" SUP" ,
1166+ L" SVG" ,
1167+ L" TEMPLATE" ,
11391168 L" TEXTAREA" ,
1169+ L" TIME" ,
11401170 L" TT" ,
1171+ L" U" ,
11411172 L" VAR" ,
1173+ L" VIDEO" ,
1174+ L" WBR" ,
11421175 };
11431176 return bsearch (&name, inlineElements,
11441177 sizeof (inlineElements) / sizeof (inlineElements[0 ]),
11451178 sizeof (inlineElements[0 ]), cmp);
11461179 }
11471180
1181+ static std::wstring trim_ws (const std::wstring& str)
1182+ {
1183+ if (str.empty ())
1184+ return str;
1185+
1186+ std::wstring result (str);
1187+ std::wstring::iterator it = result.begin ();
1188+ while (it != result.end () && *it < 0x100 && isspace (*it))
1189+ ++it;
1190+
1191+ if (it != result.begin ())
1192+ result.erase (result.begin (), it);
1193+
1194+ if (result.empty ())
1195+ return result;
1196+
1197+ it = result.end () - 1 ;
1198+ while (it != result.begin () && *it < 0x100 && iswspace (*it))
1199+ --it;
1200+
1201+ if (it != result.end () - 1 )
1202+ result.erase (it + 1 , result.end ());
1203+ return result;
1204+ }
1205+
11481206 static std::wstring Escape (const std::wstring& text)
11491207 {
11501208 std::wstring result;
0 commit comments