From b38b890c182808ac5cc9b147bda830852fccaee9 Mon Sep 17 00:00:00 2001 From: try2love <1929706911@qq.com> Date: Tue, 21 Apr 2026 02:19:41 +0000 Subject: [PATCH 1/3] feat: Chinese support enhanced. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. 新增了共享文本处理层 TextProcessing.swift (line 1),统一了简体中文脚本相关规则:CJK 分词、中文括注识别、全角转半角规范化,以及常见语气词的前导跳过策略。 2. 在 SpeechRecognizer.swift (line 102) 里接入了这套规则。现在中文跟读会更偏向中文 token 匹配,并对 嗯 / 呃 / 额 / 那个 / 就是 / 然后 / 这个 / 啊 / 吧 / 嘛 这类口头填充词做保守容错,同时收紧了 CJK 的模糊匹配,避免单字误判推进。 3. 在 MarqueeTextView.swift (line 12) 里把 cue/token 语义切到共享规则上,中文括注如 【停顿】、〔看镜头〕、(微笑) 会被当成单个提示 token,而不是拆散成普通正文。 4. 在 HighlightingTextEditor.swift (line 96) 里补了中文括注高亮规则,编辑态和 teleprompter 展示态的 cue 语义现在一致。 5. 在 BrowserServer.swift (line 421) 里同步了中文括注与中文标点分类逻辑,远程浏览器镜像不会再把这类中文 cue 当成普通正文字符。 --- Textream/Textream/BrowserServer.swift | 22 ++- .../Textream/HighlightingTextEditor.swift | 2 +- Textream/Textream/MarqueeTextView.swift | 58 +----- Textream/Textream/SpeechRecognizer.swift | 93 +++++++--- Textream/Textream/TextProcessing.swift | 175 ++++++++++++++++++ 5 files changed, 267 insertions(+), 83 deletions(-) create mode 100644 Textream/Textream/TextProcessing.swift diff --git a/Textream/Textream/BrowserServer.swift b/Textream/Textream/BrowserServer.swift index c58f481..981ffc5 100644 --- a/Textream/Textream/BrowserServer.swift +++ b/Textream/Textream/BrowserServer.swift @@ -418,15 +418,29 @@ class BrowserServer { } function rgba(rgb,a){return 'rgba('+rgb[0]+','+rgb[1]+','+rgb[2]+','+a+')';} - // Detect annotation words: [bracket] or emoji-only (no letters/digits) + // Detect annotation words: bracketed cues or emoji/punctuation-only tokens function isAnnotation(w){ - if(w.startsWith('[')&&w.endsWith(']'))return true; - return!/[a-zA-Z0-9\\u00C0-\\u024F\\u0400-\\u04FF\\u3000-\\u9FFF\\uAC00-\\uD7AF]/.test(w); + const readable=/[a-zA-Z0-9\\u00C0-\\u024F\\u0400-\\u04FF\\u3040-\\u30FF\\u3400-\\u4DBF\\u4E00-\\u9FFF\\uAC00-\\uD7AF]/; + const cuePairs=[ + ['[',']'], + ['【','】'], + ['〔','〕'], + ['(',')'], + ['[',']'] + ]; + for(const [open,close] of cuePairs){ + if(w.startsWith(open)&&w.endsWith(close)){ + const inner=w.slice(open.length,w.length-close.length).trim(); + if(inner&&inner.length<=24&&!/[。!?!?;;\\n\\r]/.test(inner))return true; + } + } + return!readable.test(w); } // Count letters+digits in a word function letterCount(w){ - let n=0;for(const ch of w)if(/[a-zA-Z0-9\\u00C0-\\u024F\\u0400-\\u04FF\\u3000-\\u9FFF\\uAC00-\\uD7AF]/.test(ch))n++; + const readable=/[a-zA-Z0-9\\u00C0-\\u024F\\u0400-\\u04FF\\u3040-\\u30FF\\u3400-\\u4DBF\\u4E00-\\u9FFF\\uAC00-\\uD7AF]/; + let n=0;for(const ch of w)if(readable.test(ch))n++; return Math.max(1,n); } diff --git a/Textream/Textream/HighlightingTextEditor.swift b/Textream/Textream/HighlightingTextEditor.swift index 1ec67ee..b936158 100644 --- a/Textream/Textream/HighlightingTextEditor.swift +++ b/Textream/Textream/HighlightingTextEditor.swift @@ -96,7 +96,7 @@ struct HighlightingTextEditor: NSViewRepresentable { weak var textView: NSTextView? private static let annotationPattern = try! NSRegularExpression( - pattern: "\\[[^\\]]+\\]", + pattern: "\\[[^\\]]+\\]|【[^】]+】|〔[^〕]+〕|([^)]+)|[[^]]+]", options: [] ) diff --git a/Textream/Textream/MarqueeTextView.swift b/Textream/Textream/MarqueeTextView.swift index 74e5afb..6d9f97f 100644 --- a/Textream/Textream/MarqueeTextView.swift +++ b/Textream/Textream/MarqueeTextView.swift @@ -7,62 +7,13 @@ import SwiftUI -// MARK: - CJK-aware word splitting - -extension Unicode.Scalar { - var isCJK: Bool { - let v = value - return (v >= 0x4E00 && v <= 0x9FFF) // CJK Unified Ideographs - || (v >= 0x3400 && v <= 0x4DBF) // CJK Extension A - || (v >= 0x20000 && v <= 0x2A6DF) // CJK Extension B - || (v >= 0xF900 && v <= 0xFAFF) // CJK Compatibility Ideographs - || (v >= 0x3040 && v <= 0x309F) // Hiragana - || (v >= 0x30A0 && v <= 0x30FF) // Katakana - || (v >= 0xAC00 && v <= 0xD7AF) // Hangul Syllables - } -} - -/// Splits text into display-ready words. CJK characters (Chinese, Japanese, Korean) -/// are split into individual characters so the flow layout can wrap them properly. -func splitTextIntoWords(_ text: String) -> [String] { - let tokens = text.replacingOccurrences(of: "\n", with: " ") - .split(omittingEmptySubsequences: true, whereSeparator: { $0.isWhitespace }) - .map { String($0) } - - var result: [String] = [] - for token in tokens { - guard token.unicodeScalars.contains(where: { $0.isCJK }) else { - result.append(token) - continue - } - // Token contains CJK characters — split each CJK char individually; - // consecutive non-CJK chars (e.g. Latin letters, digits) stay grouped. - var buffer = "" - for char in token { - if char.unicodeScalars.first.map({ $0.isCJK }) == true { - if !buffer.isEmpty { - result.append(buffer) - buffer = "" - } - result.append(String(char)) - } else { - buffer.append(char) - } - } - if !buffer.isEmpty { - result.append(buffer) - } - } - return result -} - // MARK: - Data struct WordItem: Identifiable { let id: Int let word: String let charOffset: Int // char offset of this word in the full text (counting spaces) - let isAnnotation: Bool // true for [bracket] words and emoji-only words + let isAnnotation: Bool // true for cue tokens such as [pause], 【停顿】, and punctuation-only words } // MARK: - Preference key to report word Y positions @@ -512,12 +463,7 @@ struct WordFlowLayout: View { } static func isAnnotationWord(_ word: String) -> Bool { - // Words inside square brackets like [smile] - if word.hasPrefix("[") && word.hasSuffix("]") { return true } - // Emoji-only words (no letters or numbers) - let stripped = word.filter { $0.isLetter || $0.isNumber } - if stripped.isEmpty { return true } - return false + isAnnotationToken(word) } private func buildLines(items: [WordItem]) -> [[WordItem]] { diff --git a/Textream/Textream/SpeechRecognizer.swift b/Textream/Textream/SpeechRecognizer.swift index 0730b1c..69e241e 100644 --- a/Textream/Textream/SpeechRecognizer.swift +++ b/Textream/Textream/SpeechRecognizer.swift @@ -102,6 +102,14 @@ class SpeechRecognizer { private var sessionGeneration: Int = 0 private var suppressConfigChange: Bool = false + private var speechLocaleIdentifier: String { + NotchSettings.shared.speechLocale + } + + private var isChineseSpeechLocale: Bool { + speechLocaleIdentifier.hasPrefix("zh") + } + /// Update the source text while preserving the current recognized char count. /// Used by Director Mode to live-edit unread text without resetting read progress. func updateText(_ text: String, preservingCharCount: Int) { @@ -391,8 +399,9 @@ class SpeechRecognizer { // MARK: - Fuzzy character-level matching private func matchCharacters(spoken: String) { - // Strategy 1: character-level fuzzy match from the start offset - let charResult = charLevelMatch(spoken: spoken) + let charResult = charMatchCandidates(for: spoken) + .map(charLevelMatch(normalizedSpoken:)) + .max() ?? 0 // Strategy 2: word-level match (handles STT word substitutions) let wordResult = wordLevelMatch(spoken: spoken) @@ -406,10 +415,29 @@ class SpeechRecognizer { } } - private func charLevelMatch(spoken: String) -> Int { + private func charMatchCandidates(for spoken: String) -> [String] { + let normalizedSpoken = Self.normalize(spoken) + guard !normalizedSpoken.isEmpty else { return [normalizedSpoken] } + + let normalizedSourcePrefix = Self.normalize(String(sourceText.dropFirst(matchStartOffset))) + let fillerLength = leadingSpeechFillerLength( + in: normalizedSpoken, + sourcePrefix: normalizedSourcePrefix, + localeIdentifier: speechLocaleIdentifier + ) + + var candidates = [normalizedSpoken] + if fillerLength > 0 && fillerLength < normalizedSpoken.count { + candidates.append(String(normalizedSpoken.dropFirst(fillerLength))) + } + var seen = Set() + return candidates.filter { seen.insert($0).inserted } + } + + private func charLevelMatch(normalizedSpoken: String) -> Int { let remainingSource = String(sourceText.dropFirst(matchStartOffset)) - let src = Array(remainingSource.lowercased().unicodeScalars).map { Character($0) } - let spk = Array(Self.normalize(spoken).unicodeScalars).map { Character($0) } + let src = Array(Self.foldForCharMatch(remainingSource)) + let spk = Array(normalizedSpoken) var si = 0 var ri = 0 @@ -435,10 +463,24 @@ class SpeechRecognizer { ri += 1 lastGoodOrigIndex = si } else { - // Try to re-sync: look ahead in both strings var found = false - // Skip up to 3 chars in spoken (STT inserted extra chars) + let spokenSuffix = String(spk[ri...]) + let sourceSuffix = String(src[si...]) + let fillerSkip = leadingSpeechFillerLength( + in: spokenSuffix, + sourcePrefix: sourceSuffix, + localeIdentifier: speechLocaleIdentifier + ) + if fillerSkip > 0 { + let nextRI = ri + fillerSkip + if nextRI < spk.count && spk[nextRI] == sc { + ri = nextRI + found = true + } + } + if found { continue } + let maxSkipR = min(3, spk.count - ri - 1) if maxSkipR >= 1 { for skipR in 1...maxSkipR { @@ -477,15 +519,13 @@ class SpeechRecognizer { } private static func isAnnotationWord(_ word: String) -> Bool { - if word.hasPrefix("[") && word.hasSuffix("]") { return true } - let stripped = word.filter { $0.isLetter || $0.isNumber } - return stripped.isEmpty + isAnnotationToken(word) } private func wordLevelMatch(spoken: String) -> Int { let remainingSource = String(sourceText.dropFirst(matchStartOffset)) - let sourceWords = remainingSource.split(separator: " ").map { String($0) } - let spokenWords = spoken.lowercased().split(separator: " ").map { String($0) } + let sourceWords = splitTextIntoWords(remainingSource) + let spokenWords = splitTextIntoWords(spoken) var si = 0 // source word index var ri = 0 // spoken word index @@ -500,10 +540,8 @@ class SpeechRecognizer { continue } - let srcWord = sourceWords[si].lowercased() - .filter { $0.isLetter || $0.isNumber } - let spkWord = spokenWords[ri] - .filter { $0.isLetter || $0.isNumber } + let srcWord = Self.normalize(sourceWords[si]).replacingOccurrences(of: " ", with: "") + let spkWord = Self.normalize(spokenWords[ri]).replacingOccurrences(of: " ", with: "") if srcWord == spkWord || isFuzzyMatch(srcWord, spkWord) { // Count original chars including trailing punctuation, plus space @@ -516,9 +554,9 @@ class SpeechRecognizer { } else { // Try skipping up to 3 spoken words (STT hallucinated words) var foundSpk = false - let maxSpkSkip = min(3, spokenWords.count - ri - 1) + let maxSpkSkip = min(isChineseSpeechLocale ? 6 : 3, spokenWords.count - ri - 1) for skip in 1...max(1, maxSpkSkip) where skip <= maxSpkSkip { - let nextSpk = spokenWords[ri + skip].filter { $0.isLetter || $0.isNumber } + let nextSpk = Self.normalize(spokenWords[ri + skip]).replacingOccurrences(of: " ", with: "") if srcWord == nextSpk || isFuzzyMatch(srcWord, nextSpk) { ri += skip foundSpk = true @@ -529,9 +567,9 @@ class SpeechRecognizer { // Try skipping up to 3 source words (user read fast, STT missed words) var foundSrc = false - let maxSrcSkip = min(3, sourceWords.count - si - 1) + let maxSrcSkip = min(isChineseSpeechLocale ? 6 : 3, sourceWords.count - si - 1) for skip in 1...max(1, maxSrcSkip) where skip <= maxSrcSkip { - let nextSrc = sourceWords[si + skip].lowercased().filter { $0.isLetter || $0.isNumber } + let nextSrc = Self.normalize(sourceWords[si + skip]).replacingOccurrences(of: " ", with: "") if nextSrc == spkWord || isFuzzyMatch(nextSrc, spkWord) { // Add all skipped source words' char counts for s in 0..= 3 && editDistance(a, b) <= 1 + } // One starts with the other (phonetic prefix: "not" ~ "notch") if a.hasPrefix(b) || b.hasPrefix(a) { return true } // One contains the other @@ -601,7 +646,11 @@ class SpeechRecognizer { } private static func normalize(_ text: String) -> String { - text.lowercased() - .filter { $0.isLetter || $0.isNumber || $0.isWhitespace } + normalizedSpeechText(text) + } + + private static func foldForCharMatch(_ text: String) -> String { + let folded = text.applyingTransform(.fullwidthToHalfwidth, reverse: false) ?? text + return folded.lowercased() } } diff --git a/Textream/Textream/TextProcessing.swift b/Textream/Textream/TextProcessing.swift new file mode 100644 index 0000000..49ce996 --- /dev/null +++ b/Textream/Textream/TextProcessing.swift @@ -0,0 +1,175 @@ +// +// TextProcessing.swift +// Textream +// +// Created by OpenAI Codex on 20.04.2026. +// + +import Foundation + +private let annotationBracketPairs: [(open: Character, close: Character)] = [ + ("[", "]"), + ("【", "】"), + ("〔", "〕"), + ("(", ")"), + ("[", "]"), +] + +private let zhHansSpeechFillers = [ + "那个", + "就是", + "然后", + "这个", + "嗯", + "呃", + "额", + "啊", + "吧", + "嘛", +] + +extension Unicode.Scalar { + var isCJK: Bool { + let v = value + return (v >= 0x4E00 && v <= 0x9FFF) + || (v >= 0x3400 && v <= 0x4DBF) + || (v >= 0x20000 && v <= 0x2A6DF) + || (v >= 0xF900 && v <= 0xFAFF) + || (v >= 0x3040 && v <= 0x309F) + || (v >= 0x30A0 && v <= 0x30FF) + || (v >= 0xAC00 && v <= 0xD7AF) + } +} + +extension Character { + var isCJKCharacter: Bool { + unicodeScalars.allSatisfy(\.isCJK) + } +} + +/// Splits text into display-ready words. CJK characters are split into single +/// display tokens, but bracketed cue blocks such as `【停顿】` stay intact so +/// they can be dimmed and skipped as a single annotation token. +func splitTextIntoWords(_ text: String) -> [String] { + var result: [String] = [] + var buffer = "" + var index = text.startIndex + + func flushBuffer() { + guard !buffer.isEmpty else { return } + result.append(buffer) + buffer = "" + } + + while index < text.endIndex { + let char = text[index] + + if char.isWhitespace { + flushBuffer() + index = text.index(after: index) + continue + } + + if let annotation = annotationToken(in: text, startingAt: index) { + flushBuffer() + result.append(annotation.token) + index = annotation.endIndex + continue + } + + if char.isCJKCharacter { + flushBuffer() + result.append(String(char)) + index = text.index(after: index) + continue + } + + buffer.append(char) + index = text.index(after: index) + } + + flushBuffer() + return result +} + +func isAnnotationToken(_ word: String) -> Bool { + let trimmed = word.trimmingCharacters(in: .whitespacesAndNewlines) + guard !trimmed.isEmpty else { return false } + + if isBracketedCue(trimmed) { + return true + } + + let stripped = trimmed.filter { $0.isLetter || $0.isNumber } + return stripped.isEmpty +} + +func normalizedSpeechText(_ text: String) -> String { + let folded = text.applyingTransform(.fullwidthToHalfwidth, reverse: false) ?? text + return folded.lowercased().filter { $0.isLetter || $0.isNumber || $0.isWhitespace } +} + +func leadingSpeechFillerLength(in spoken: String, sourcePrefix: String, localeIdentifier: String) -> Int { + guard localeIdentifier.hasPrefix("zh") else { return 0 } + + var remaining = spoken + var remainingSource = sourcePrefix + var totalLength = 0 + var iterations = 0 + + while iterations < 3 { + iterations += 1 + let trimmed = remaining.drop(while: { $0.isWhitespace }) + totalLength += remaining.distance(from: remaining.startIndex, to: trimmed.startIndex) + remaining = String(trimmed) + + remainingSource = String(remainingSource.drop(while: { $0.isWhitespace })) + + guard let filler = zhHansSpeechFillers.first(where: { + remaining.hasPrefix($0) && !remainingSource.hasPrefix($0) + }) else { break } + + totalLength += filler.count + remaining.removeFirst(filler.count) + } + + return totalLength +} + +private func annotationToken(in text: String, startingAt start: String.Index) -> (token: String, endIndex: String.Index)? { + guard let pair = annotationBracketPairs.first(where: { text[start] == $0.open }) else { + return nil + } + + var search = text.index(after: start) + while search < text.endIndex { + let char = text[search] + if char == pair.close { + let token = String(text[start...search]) + return isBracketedCue(token) ? (token, text.index(after: search)) : nil + } + if char.isWhitespace { + return nil + } + search = text.index(after: search) + } + + return nil +} + +private func isBracketedCue(_ word: String) -> Bool { + guard let pair = annotationBracketPairs.first(where: { + word.first == $0.open && word.last == $0.close + }) else { + return false + } + + let inner = word.dropFirst().dropLast().trimmingCharacters(in: .whitespacesAndNewlines) + guard !inner.isEmpty else { return false } + + let scalarCount = inner.unicodeScalars.count + guard scalarCount <= 24 else { return false } + + let invalidCharacters = CharacterSet(charactersIn: "\n\r。!?!?;;") + return inner.rangeOfCharacter(from: invalidCharacters) == nil && word.first == pair.open && word.last == pair.close +} From 9192346adb6c82aa1e6d0a8c0a56754f90af9f7b Mon Sep 17 00:00:00 2001 From: try2love <1929706911@qq.com> Date: Tue, 21 Apr 2026 02:55:57 +0000 Subject: [PATCH 2/3] feat: Simplified Chinese interface MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1.增加应用界面语言切换,支持 System Default、English、简体中文 三个选项。 2.原生 SwiftUI 界面在切换后立即刷新;AppKit 菜单/系统弹窗在下一次显示时使用新语言; 3.Remote Browser 与 Director Web UI 在页面刷新/重新打开后使用当前 App 语言。 4.不改变语音识别语言设置,Speech Language 继续独立控制。 --- Textream/Textream.xcodeproj/project.pbxproj | 1 + Textream/Textream/BrowserServer.swift | 16 +- Textream/Textream/ContentView.swift | 74 ++++--- Textream/Textream/DictationManager.swift | 12 +- Textream/Textream/DirectorServer.swift | 24 ++- .../Textream/ExternalDisplayController.swift | 4 +- Textream/Textream/Localization.swift | 42 ++++ .../Textream/NotchOverlayController.swift | 18 +- Textream/Textream/NotchSettings.swift | 135 ++++++++----- .../Textream/PresentationNotesExtractor.swift | 8 +- Textream/Textream/SettingsView.swift | 170 +++++++++------- Textream/Textream/SpeechRecognizer.swift | 12 +- Textream/Textream/TextreamApp.swift | 18 +- Textream/Textream/TextreamService.swift | 26 +-- Textream/Textream/UpdateChecker.swift | 22 +-- Textream/Textream/en.lproj/InfoPlist.strings | 5 + .../Textream/en.lproj/Localizable.strings | 181 ++++++++++++++++++ .../Textream/zh-Hans.lproj/InfoPlist.strings | 5 + .../zh-Hans.lproj/Localizable.strings | 181 ++++++++++++++++++ 19 files changed, 727 insertions(+), 227 deletions(-) create mode 100644 Textream/Textream/Localization.swift create mode 100644 Textream/Textream/en.lproj/InfoPlist.strings create mode 100644 Textream/Textream/en.lproj/Localizable.strings create mode 100644 Textream/Textream/zh-Hans.lproj/InfoPlist.strings create mode 100644 Textream/Textream/zh-Hans.lproj/Localizable.strings diff --git a/Textream/Textream.xcodeproj/project.pbxproj b/Textream/Textream.xcodeproj/project.pbxproj index df30b53..88fd57d 100644 --- a/Textream/Textream.xcodeproj/project.pbxproj +++ b/Textream/Textream.xcodeproj/project.pbxproj @@ -90,6 +90,7 @@ hasScannedForEncodings = 0; knownRegions = ( en, + "zh-Hans", Base, ); mainGroup = 446966AB2F37E47300AF141F; diff --git a/Textream/Textream/BrowserServer.swift b/Textream/Textream/BrowserServer.swift index 981ffc5..629add2 100644 --- a/Textream/Textream/BrowserServer.swift +++ b/Textream/Textream/BrowserServer.swift @@ -291,7 +291,7 @@ class BrowserServer { static func generateHTML(wsPort: UInt16) -> String { """ - + @@ -378,9 +378,9 @@ class BrowserServer {
📡
-
Waiting for Textream…
-
Start reading in the app to see your teleprompter here
-
Connecting…
+
\(L10n.html("Waiting for Textream…"))
+
\(L10n.html("Start reading in the app to see your teleprompter here"))
+
\(L10n.html("Connecting…"))
@@ -396,11 +396,13 @@ class BrowserServer {
-
Done!
+
\(L10n.html("Done!"))