@@ -6,7 +6,6 @@ import dotenv from "dotenv";
66import sax from "sax" ;
77import { Readable } from "stream" ;
88import { fileURLToPath } from "url" ;
9- import { strict } from "assert" ;
109
1110dotenv . config ( ) ;
1211
@@ -20,9 +19,10 @@ const ai = new OpenAI({
2019 baseURL : process . env . AI_BASEURL
2120} ) ;
2221
23- const MAXLEN = 5000 ;
22+ const MAXLEN = 3000 ;
2423
25- const createParser = ( ) => ( sax as any ) . createStream ( true , { trim : false } , { strictEntities : true } ) ;
24+ const createParser = ( ) =>
25+ ( sax as any ) . createStream ( true , { trim : false } , { strictEntities : true } ) ;
2626
2727async function translate ( language : string , filePath : string ) : Promise < void > {
2828 try {
@@ -92,18 +92,20 @@ async function recursivelyTranslate(
9292 } else {
9393 if (
9494 subSegments . length > 0 &&
95- subSegments [ subSegments . length - 1 ] [ 1 ] != undefined
95+ subSegments [ subSegments . length - 1 ] [ 0 ]
9696 ) {
9797 subSegments [ subSegments . length - 1 ] [ 1 ] += text ;
9898 subSegments [ subSegments . length - 1 ] [ 0 ] = true ;
99-
100- // if (text == "\n " || text == "\r\n " || text == ", \n" || text == ", \r\n") {
101- // subSegments.push([false, text]);
102- // } else {
103- // subSegments.push([true, text]);
104- // }
10599 } else {
106- subSegments . push ( [ true , text ] ) ;
100+ if (
101+ text . trim ( ) !== "" ||
102+ text . trim ( ) === "," ||
103+ text . trim ( ) === "."
104+ ) {
105+ subSegments . push ( [ false , text ] ) ;
106+ } else {
107+ subSegments . push ( [ true , text ] ) ;
108+ }
107109 }
108110 }
109111 } ) ;
@@ -121,7 +123,11 @@ async function recursivelyTranslate(
121123
122124 if ( subCurrentDepth === 2 ) {
123125 // We are closing a segment element.
124- subSegments . push ( [ true , subCurrentSegment ] ) ;
126+ if ( tagName === "LATEXINLINE" ) {
127+ subSegments . push ( [ false , subCurrentSegment ] ) ;
128+ } else {
129+ subSegments . push ( [ true , subCurrentSegment ] ) ;
130+ }
125131 subCurrentSegment = "" ;
126132 subIsRecording = false ;
127133 }
@@ -336,18 +342,17 @@ async function recursivelyTranslate(
336342 clean . on ( "error" , error => {
337343 console . log (
338344 "error encountered when validating XML: " +
339- error +
340- "\nvalidating section: " +
341- chunk . substring ( 0 , 100 ) +
342- "..."
345+ error + "\nfile: " + path +
346+ "\n section: " +
347+ ( safeText . length > 50 ? safeText . substring ( 0 , 100 ) + "..." : safeText )
343348 ) ;
344349
345350 // Attempt to recover using the internal parser
346351 try {
347352 clean . _parser . resume ( ) ;
348353 } catch ( e ) {
349354 console . log ( "Failed to resume parser:" , e ) ;
350- reject ( ) ;
355+ reject ( e ) ;
351356 }
352357 } ) ;
353358
@@ -375,7 +380,10 @@ function formatAttributes(attrs) {
375380}
376381
377382function escapeXML ( str : string ) : string {
378- return str . replace ( / & (? ! (?: a m p ; | l t ; | g t ; | a p o s ; | q u o t ; ) ) / g, "&" ) ;
383+ return str
384+ . replace ( / & (? ! (?: a m p ; | l t ; | g t ; | a p o s ; | q u o t ; ) ) / g, "&" )
385+ . replace ( / < ( [ ^ a - z A - Z \/ ] ) / g, "<$1" ) // Fix lone < characters
386+ . replace ( / ( [ ^ a - z A - Z 0 - 9 " ' \s \/ ] ) > / g, "$1>" ) ; // Fix lone > characters;
379387}
380388
381389function strongEscapeXML ( str : string ) : string {
0 commit comments