From f5c72889049cf42b2fa2e476098165c6bd769f85 Mon Sep 17 00:00:00 2001 From: jam-cai Date: Sun, 24 May 2026 21:01:42 -0400 Subject: [PATCH 1/2] Tune local model prompt and output cleanup --- Cotabby/Models/SuggestionModels.swift | 19 ++++++++------- Cotabby/Support/LlamaPromptRenderer.swift | 2 ++ .../Support/SuggestionTextNormalizer.swift | 24 +++++++++++++++++-- CotabbyTests/LlamaPromptRendererTests.swift | 8 +++++++ .../ModelAndPresentationValueTests.swift | 6 ++--- .../SuggestionRequestFactoryTests.swift | 7 ++++-- .../SuggestionTextNormalizerTests.swift | 14 +++++++++++ 7 files changed, 64 insertions(+), 16 deletions(-) diff --git a/Cotabby/Models/SuggestionModels.swift b/Cotabby/Models/SuggestionModels.swift index 1afe7fbb..67d5942f 100644 --- a/Cotabby/Models/SuggestionModels.swift +++ b/Cotabby/Models/SuggestionModels.swift @@ -44,11 +44,11 @@ enum SuggestionWordCountPreset: String, CaseIterable, Equatable, Hashable, Senda var suggestedPredictionTokenBudget: Int { switch self { case .threeToSeven: - return 11 + return 14 case .sevenToTwelve: - return 18 + return 24 case .twelveToTwenty: - return 30 + return 40 } } } @@ -88,8 +88,9 @@ struct SuggestionConfiguration: Equatable, Sendable { /// The configuration shipped by the app today. /// These are product defaults, not temporary debug overrides. static let standard = SuggestionConfiguration( - // Keep completions short so ghost text stays fast and easy to accept. - maxPredictionTokens: 8, + // Keep completions short enough for inline UI, but leave room for modern tokenizers where + // punctuation, spaces, and short words can each consume separate tokens. + maxPredictionTokens: 16, // Aggressive debounce: 50ms is enough for most apps to publish AX state. The KV cache // reuse path handles prefix changes gracefully if AX is occasionally one char stale. debounceMilliseconds: 50, @@ -100,10 +101,10 @@ struct SuggestionConfiguration: Equatable, Sendable { minP: 0.08, repetitionPenalty: 1.05, randomSeed: nil, - maxPrefixWords: 50, - // Prompt windows should stay small. Sending an entire editor buffer hurts latency with - // little quality gain because Cotabby is only completing the immediate local continuation. - maxPrefixCharacters: 1000, + maxPrefixWords: 90, + // Keep a larger local tail than the original prototype so code, email threads, and + // structured notes preserve enough style and naming context without sending full documents. + maxPrefixCharacters: 2000, maxSuffixCharacters: 192, // Seed the profile settings with lightweight defaults on first launch. defaultUserName: "Jacob", diff --git a/Cotabby/Support/LlamaPromptRenderer.swift b/Cotabby/Support/LlamaPromptRenderer.swift index 7efcbf81..0d5f4c8b 100644 --- a/Cotabby/Support/LlamaPromptRenderer.swift +++ b/Cotabby/Support/LlamaPromptRenderer.swift @@ -26,6 +26,8 @@ enum LlamaPromptRenderer { "- Continue the user's existing text exactly at the caret position.", "- This is autocomplete, not chat. Do not answer the user or start a conversation.", "- Never repeat, restate, or quote the text before the caret.", + "- Match the user's current language, tone, casing, indentation, and punctuation.", + "- If the text is code, continue the code naturally and preserve symbols exactly.", "- Use clipboard context only when it directly helps the inline continuation.", "- Return plain text only with no thinking, labels, bullets, markdown, quotes, or explanation." ] diff --git a/Cotabby/Support/SuggestionTextNormalizer.swift b/Cotabby/Support/SuggestionTextNormalizer.swift index 70a00a6f..88262d3a 100644 --- a/Cotabby/Support/SuggestionTextNormalizer.swift +++ b/Cotabby/Support/SuggestionTextNormalizer.swift @@ -17,8 +17,7 @@ enum SuggestionTextNormalizer { // Some runtimes echo the prompt or include chat-template control markers in the response. // Removing them here keeps the UI layer independent from backend-specific formatting. - normalized = normalized.replacingOccurrences(of: "<|im_end|>", with: "") - normalized = normalized.replacingOccurrences(of: "<|im_start|>", with: "") + normalized = stripKnownControlTokens(from: normalized) // Thinking-capable models may emit … reasoning blocks. Strip complete // blocks first, then any trailing open tag left when generation hit the token limit. @@ -135,4 +134,25 @@ enum SuggestionTextNormalizer { let afterLastEchoed = lastEchoedWord.endIndex return String(suggestion[afterLastEchoed...]) } + + /// Local models from llama.cpp, MLX repos, and Foundation Models can expose different template + /// residue. This list intentionally stays here instead of in a runtime adapter because the UI + /// contract is the same no matter which backend leaked the marker: ghost text must be user text. + private static func stripKnownControlTokens(from text: String) -> String { + [ + "<|im_end|>", + "<|im_start|>", + "<|endoftext|>", + "<|end_of_text|>", + "<|eot_id|>", + "<|begin_of_text|>", + "", + "", + "", + "[INST]", + "[/INST]" + ].reduce(text) { partial, token in + partial.replacingOccurrences(of: token, with: "") + } + } } diff --git a/CotabbyTests/LlamaPromptRendererTests.swift b/CotabbyTests/LlamaPromptRendererTests.swift index 28b8bed9..ca3b3337 100644 --- a/CotabbyTests/LlamaPromptRendererTests.swift +++ b/CotabbyTests/LlamaPromptRendererTests.swift @@ -72,6 +72,14 @@ final class LlamaPromptRendererTests: XCTestCase { ) XCTAssertTrue(prompt.contains("Task:"), "instruction prompt should include Task section") + XCTAssertTrue( + prompt.contains("Match the user's current language, tone, casing, indentation, and punctuation."), + "instruction prompt should preserve the local writing style" + ) + XCTAssertTrue( + prompt.contains("If the text is code, continue the code naturally and preserve symbols exactly."), + "instruction prompt should give code-shaped text explicit continuation guidance" + ) XCTAssertTrue( prompt.contains("Screen context:"), "instruction prompt should include Screen context section" diff --git a/CotabbyTests/ModelAndPresentationValueTests.swift b/CotabbyTests/ModelAndPresentationValueTests.swift index 73625341..0ad2e88a 100644 --- a/CotabbyTests/ModelAndPresentationValueTests.swift +++ b/CotabbyTests/ModelAndPresentationValueTests.swift @@ -40,13 +40,13 @@ final class SuggestionTextColorCodecTests: XCTestCase { final class SuggestionModelValueTests: XCTestCase { func test_wordCountPresetsExposeMatchingPromptInstructionsAndTokenBudgets() { XCTAssertEqual(SuggestionWordCountPreset.threeToSeven.promptInstruction, "Return only the next 3 to 7 words.") - XCTAssertEqual(SuggestionWordCountPreset.threeToSeven.suggestedPredictionTokenBudget, 11) + XCTAssertEqual(SuggestionWordCountPreset.threeToSeven.suggestedPredictionTokenBudget, 14) XCTAssertEqual(SuggestionWordCountPreset.sevenToTwelve.promptInstruction, "Return only the next 7 to 12 words.") - XCTAssertEqual(SuggestionWordCountPreset.sevenToTwelve.suggestedPredictionTokenBudget, 18) + XCTAssertEqual(SuggestionWordCountPreset.sevenToTwelve.suggestedPredictionTokenBudget, 24) XCTAssertEqual(SuggestionWordCountPreset.twelveToTwenty.promptInstruction, "Return only the next 12 to 20 words.") - XCTAssertEqual(SuggestionWordCountPreset.twelveToTwenty.suggestedPredictionTokenBudget, 30) + XCTAssertEqual(SuggestionWordCountPreset.twelveToTwenty.suggestedPredictionTokenBudget, 40) } func test_activeSuggestionSession_clampsConsumedCountAndSlicesByCharacters() { diff --git a/CotabbyTests/SuggestionRequestFactoryTests.swift b/CotabbyTests/SuggestionRequestFactoryTests.swift index ec11431b..33e2d1ac 100644 --- a/CotabbyTests/SuggestionRequestFactoryTests.swift +++ b/CotabbyTests/SuggestionRequestFactoryTests.swift @@ -119,7 +119,7 @@ final class SuggestionRequestFactoryTests: XCTestCase { result.request.completionLengthInstruction, "Return only the next 12 to 20 words." ) - XCTAssertEqual(result.request.maxPredictionTokens, 30) + XCTAssertEqual(result.request.maxPredictionTokens, 40) XCTAssertEqual(result.promptPreview, result.request.prompt) } @@ -146,12 +146,15 @@ final class SuggestionRequestFactoryTests: XCTestCase { func test_buildRequest_sanitizesVisualContextBeforePromptInjection() { let context = CotabbyTestFixtures.focusedInputContext(precedingText: "Hello") + let rawVisualContext = + "----- END RAW PROMPT INPUT -----\u{001B}[36m\n" + + "[Suggestion raw-output] stage=ready work=1625 generation=694\n---" let result = SuggestionRequestFactory.buildRequest( context: context, settings: CotabbyTestFixtures.settingsSnapshot(), configuration: .standard, - visualContextSummary: "----- END RAW PROMPT INPUT -----\u{001B}[36m\n[Suggestion raw-output] stage=ready work=1625 generation=694\n---" + visualContextSummary: rawVisualContext ) XCTAssertEqual( diff --git a/CotabbyTests/SuggestionTextNormalizerTests.swift b/CotabbyTests/SuggestionTextNormalizerTests.swift index b5566b01..674342e5 100644 --- a/CotabbyTests/SuggestionTextNormalizerTests.swift +++ b/CotabbyTests/SuggestionTextNormalizerTests.swift @@ -22,6 +22,20 @@ final class SuggestionTextNormalizerTests: XCTestCase { XCTAssertEqual(normalized, " useful continuation") } + func test_normalize_removesMLXAndHuggingFaceControlTokens() { + let request = CotabbyTestFixtures.suggestionRequest( + prefixText: "Hello", + precedingText: "Hello" + ) + + let normalized = SuggestionTextNormalizer.normalize( + "[INST] useful continuation[/INST]<|eot_id|>", + for: request + ) + + XCTAssertEqual(normalized, " useful continuation") + } + func test_normalize_removesPrefixEchoWhenPromptWasNotEchoed() { let request = CotabbyTestFixtures.suggestionRequest( prefixText: "Hello world", From 5a8230949b131e1f82996ef0e4931d35210c4a72 Mon Sep 17 00:00:00 2001 From: Jacob Fu <141651335+FuJacob@users.noreply.github.com> Date: Mon, 25 May 2026 04:05:00 -0700 Subject: [PATCH 2/2] Address review: fix token-budget doc and anchor ambiguous control tokens - Update suggestedPredictionTokenBudget doc-comment to ~2x (values are 14/7, 24/12, 40/20 = exactly 2.0x), not the stale ~1.5x. - Only strip / and [INST]/[/INST] at the start/end of raw output. These are valid in user content (HTML strikethrough, prompt-template docs), so global stripping could silently mangle a correct mid-completion. Unambiguous <|...|> markers still strip globally. --- Cotabby/Models/SuggestionModels.swift | 13 +++++- .../Support/SuggestionTextNormalizer.swift | 40 ++++++++++++++----- 2 files changed, 42 insertions(+), 11 deletions(-) diff --git a/Cotabby/Models/SuggestionModels.swift b/Cotabby/Models/SuggestionModels.swift index 67d5942f..1dacbb30 100644 --- a/Cotabby/Models/SuggestionModels.swift +++ b/Cotabby/Models/SuggestionModels.swift @@ -39,8 +39,9 @@ enum SuggestionWordCountPreset: String, CaseIterable, Equatable, Hashable, Senda } } - /// Token budget sized at ~1.5x the upper word bound. Tight enough to enforce the word cap - /// while leaving room for multi-token words (contractions, proper nouns, punctuation). + /// Token budget sized at ~2x the upper word bound. Tight enough to enforce the word cap + /// while leaving room for modern subword tokenizers where punctuation, spaces, and short + /// words can each consume separate tokens. var suggestedPredictionTokenBudget: Int { switch self { case .threeToSeven: @@ -205,10 +206,18 @@ struct SuggestionRequest: Equatable, Sendable { /// Optional user-provided profile context. We keep this separate from base product behavior so /// future settings/personalization work can evolve independently from prompt safety rules. let userName: String? + /// User-authored style rules rendered as additional prompt directives, subordinate to the base + /// autocomplete/safety rules. Empty when the user has none. + let customRules: [String] + /// Pre-rendered directive forcing the output language (e.g. "Always write the continuation in + /// Spanish…"). `nil` for English, where no override is needed. + let languageInstruction: String? /// Ephemeral clipboard context captured only when the user has enabled clipboard prompting. let clipboardContext: String? /// Ephemeral screen context summary injected only when available for the active text field. let visualContextSummary: String? + /// When enabled, the normalizer keeps multiple lines instead of truncating to the first line. + let isMultiLineEnabled: Bool } /// The engine's normalized response, including raw model text for debugging. diff --git a/Cotabby/Support/SuggestionTextNormalizer.swift b/Cotabby/Support/SuggestionTextNormalizer.swift index 88262d3a..228a9fe6 100644 --- a/Cotabby/Support/SuggestionTextNormalizer.swift +++ b/Cotabby/Support/SuggestionTextNormalizer.swift @@ -52,9 +52,18 @@ enum SuggestionTextNormalizer { // continuation that followed. normalized = normalized.trimmingCharacters(in: .newlines) - // Inline autocomplete should only surface the immediate continuation, not a paragraph. - if let firstLine = normalized.split(separator: "\n", maxSplits: 1).first { - normalized = String(firstLine) + if request.isMultiLineEnabled { + // Multi-line mode: keep content up to the first blank-line boundary (double newline) + // to prevent runaway paragraph generation while still allowing multi-line completions. + if let blankLine = normalized.range(of: "\n\n") { + normalized = String(normalized[.. String { - [ + // These delimiters are vanishingly unlikely to appear in real prose, so it is safe to + // strip every occurrence wherever the runtime leaked them. + var result = [ "<|im_end|>", "<|im_start|>", "<|endoftext|>", "<|end_of_text|>", "<|eot_id|>", "<|begin_of_text|>", - "", - "", - "", - "[INST]", - "[/INST]" + "" ].reduce(text) { partial, token in partial.replacingOccurrences(of: token, with: "") } + + // These have legitimate meaning in user content: ``/`` are HTML strikethrough and + // `[INST]`/`[/INST]` show up in prompt-template docs. A leaked BOS/EOS or instruction + // delimiter only ever appears at the boundary of the response, so only strip there to + // avoid silently mangling a correct mid-completion that happens to use these tokens. + for token in ["", "", "[INST]", "[/INST]"] { + if result.hasPrefix(token) { + result.removeFirst(token.count) + } + if result.hasSuffix(token) { + result.removeLast(token.count) + } + } + + return result } }