Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 13 additions & 11 deletions Cotabby/Models/SuggestionModels.swift
Original file line number Diff line number Diff line change
Expand Up @@ -39,16 +39,17 @@ enum SuggestionWordCountPreset: String, CaseIterable, Equatable, Hashable, Senda
}
}

/// Token budget sized at ~1.5x the upper word bound. Tight enough to enforce the word cap
/// while leaving room for multi-token words (contractions, proper nouns, punctuation).
/// Token budget sized at ~2x the upper word bound. Tight enough to enforce the word cap
/// while leaving room for modern subword tokenizers where punctuation, spaces, and short
/// words can each consume separate tokens.
var suggestedPredictionTokenBudget: Int {
switch self {
case .threeToSeven:
return 11
return 14
case .sevenToTwelve:
return 18
return 24
case .twelveToTwenty:
return 30
return 40
}
}
}
Expand Down Expand Up @@ -88,8 +89,9 @@ struct SuggestionConfiguration: Equatable, Sendable {
/// The configuration shipped by the app today.
/// These are product defaults, not temporary debug overrides.
static let standard = SuggestionConfiguration(
// Keep completions short so ghost text stays fast and easy to accept.
maxPredictionTokens: 8,
// Keep completions short enough for inline UI, but leave room for modern tokenizers where
// punctuation, spaces, and short words can each consume separate tokens.
maxPredictionTokens: 16,
// Aggressive debounce: 50ms is enough for most apps to publish AX state. The KV cache
// reuse path handles prefix changes gracefully if AX is occasionally one char stale.
debounceMilliseconds: 50,
Expand All @@ -100,10 +102,10 @@ struct SuggestionConfiguration: Equatable, Sendable {
minP: 0.08,
repetitionPenalty: 1.05,
randomSeed: nil,
maxPrefixWords: 50,
// Prompt windows should stay small. Sending an entire editor buffer hurts latency with
// little quality gain because Cotabby is only completing the immediate local continuation.
maxPrefixCharacters: 1000,
maxPrefixWords: 90,
// Keep a larger local tail than the original prototype so code, email threads, and
// structured notes preserve enough style and naming context without sending full documents.
maxPrefixCharacters: 2000,
maxSuffixCharacters: 192,
// Seed the profile settings with lightweight defaults on first launch.
defaultUserName: "Jacob",
Expand Down
2 changes: 2 additions & 0 deletions Cotabby/Support/LlamaPromptRenderer.swift
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ enum LlamaPromptRenderer {
"- Continue the user's existing text exactly at the caret position.",
"- This is autocomplete, not chat. Do not answer the user or start a conversation.",
"- Never repeat, restate, or quote the text before the caret.",
"- Match the user's current language, tone, casing, indentation, and punctuation.",
"- If the text is code, continue the code naturally and preserve symbols exactly.",
"- Use clipboard context only when it directly helps the inline continuation.",
"- Return plain text only with no thinking, labels, bullets, markdown, quotes, or explanation."
]
Expand Down
37 changes: 35 additions & 2 deletions Cotabby/Support/SuggestionTextNormalizer.swift
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,7 @@ enum SuggestionTextNormalizer {

// Some runtimes echo the prompt or include chat-template control markers in the response.
// Removing them here keeps the UI layer independent from backend-specific formatting.
normalized = normalized.replacingOccurrences(of: "<|im_end|>", with: "")
normalized = normalized.replacingOccurrences(of: "<|im_start|>", with: "")
normalized = stripKnownControlTokens(from: normalized)

// Thinking-capable models may emit <think>…</think> reasoning blocks. Strip complete
// blocks first, then any trailing open tag left when generation hit the token limit.
Expand Down Expand Up @@ -144,4 +143,38 @@ enum SuggestionTextNormalizer {
let afterLastEchoed = lastEchoedWord.endIndex
return String(suggestion[afterLastEchoed...])
}

/// Local models from llama.cpp, MLX repos, and Foundation Models can expose different template
/// residue. This list intentionally stays here instead of in a runtime adapter because the UI
/// contract is the same no matter which backend leaked the marker: ghost text must be user text.
private static func stripKnownControlTokens(from text: String) -> String {
// These delimiters are vanishingly unlikely to appear in real prose, so it is safe to
// strip every occurrence wherever the runtime leaked them.
var result = [
"<|im_end|>",
"<|im_start|>",
"<|endoftext|>",
"<|end_of_text|>",
"<|eot_id|>",
"<|begin_of_text|>",
"<end_of_turn>"
].reduce(text) { partial, token in
partial.replacingOccurrences(of: token, with: "")
}

// These have legitimate meaning in user content: `<s>`/`</s>` are HTML strikethrough and
// `[INST]`/`[/INST]` show up in prompt-template docs. A leaked BOS/EOS or instruction
// delimiter only ever appears at the boundary of the response, so only strip there to
// avoid silently mangling a correct mid-completion that happens to use these tokens.
for token in ["<s>", "</s>", "[INST]", "[/INST]"] {
if result.hasPrefix(token) {
result.removeFirst(token.count)
}
if result.hasSuffix(token) {
result.removeLast(token.count)
}
}

return result
}
}
8 changes: 8 additions & 0 deletions CotabbyTests/LlamaPromptRendererTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,14 @@ final class LlamaPromptRendererTests: XCTestCase {
)

XCTAssertTrue(prompt.contains("Task:"), "instruction prompt should include Task section")
XCTAssertTrue(
prompt.contains("Match the user's current language, tone, casing, indentation, and punctuation."),
"instruction prompt should preserve the local writing style"
)
XCTAssertTrue(
prompt.contains("If the text is code, continue the code naturally and preserve symbols exactly."),
"instruction prompt should give code-shaped text explicit continuation guidance"
)
XCTAssertTrue(
prompt.contains("Screen context:"),
"instruction prompt should include Screen context section"
Expand Down
6 changes: 3 additions & 3 deletions CotabbyTests/ModelAndPresentationValueTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -40,13 +40,13 @@ final class SuggestionTextColorCodecTests: XCTestCase {
final class SuggestionModelValueTests: XCTestCase {
func test_wordCountPresetsExposeMatchingPromptInstructionsAndTokenBudgets() {
XCTAssertEqual(SuggestionWordCountPreset.threeToSeven.promptInstruction, "Return only the next 3 to 7 words.")
XCTAssertEqual(SuggestionWordCountPreset.threeToSeven.suggestedPredictionTokenBudget, 11)
XCTAssertEqual(SuggestionWordCountPreset.threeToSeven.suggestedPredictionTokenBudget, 14)

XCTAssertEqual(SuggestionWordCountPreset.sevenToTwelve.promptInstruction, "Return only the next 7 to 12 words.")
XCTAssertEqual(SuggestionWordCountPreset.sevenToTwelve.suggestedPredictionTokenBudget, 18)
XCTAssertEqual(SuggestionWordCountPreset.sevenToTwelve.suggestedPredictionTokenBudget, 24)

XCTAssertEqual(SuggestionWordCountPreset.twelveToTwenty.promptInstruction, "Return only the next 12 to 20 words.")
XCTAssertEqual(SuggestionWordCountPreset.twelveToTwenty.suggestedPredictionTokenBudget, 30)
XCTAssertEqual(SuggestionWordCountPreset.twelveToTwenty.suggestedPredictionTokenBudget, 40)
}

func test_activeSuggestionSession_clampsConsumedCountAndSlicesByCharacters() {
Expand Down
7 changes: 5 additions & 2 deletions CotabbyTests/SuggestionRequestFactoryTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ final class SuggestionRequestFactoryTests: XCTestCase {
result.request.completionLengthInstruction,
"Return only the next 12 to 20 words."
)
XCTAssertEqual(result.request.maxPredictionTokens, 30)
XCTAssertEqual(result.request.maxPredictionTokens, 40)
XCTAssertEqual(result.promptPreview, result.request.prompt)
}

Expand All @@ -146,12 +146,15 @@ final class SuggestionRequestFactoryTests: XCTestCase {

func test_buildRequest_sanitizesVisualContextBeforePromptInjection() {
let context = CotabbyTestFixtures.focusedInputContext(precedingText: "Hello")
let rawVisualContext =
"----- END RAW PROMPT INPUT -----\u{001B}[36m\n" +
"[Suggestion raw-output] stage=ready work=1625 generation=694\n---"

let result = SuggestionRequestFactory.buildRequest(
context: context,
settings: CotabbyTestFixtures.settingsSnapshot(),
configuration: .standard,
visualContextSummary: "----- END RAW PROMPT INPUT -----\u{001B}[36m\n[Suggestion raw-output] stage=ready work=1625 generation=694\n---"
visualContextSummary: rawVisualContext
)

XCTAssertEqual(
Expand Down
14 changes: 14 additions & 0 deletions CotabbyTests/SuggestionTextNormalizerTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,20 @@ final class SuggestionTextNormalizerTests: XCTestCase {
XCTAssertEqual(normalized, " useful continuation")
}

func test_normalize_removesMLXAndHuggingFaceControlTokens() {
let request = CotabbyTestFixtures.suggestionRequest(
prefixText: "Hello",
precedingText: "Hello"
)

let normalized = SuggestionTextNormalizer.normalize(
"<s>[INST] useful continuation[/INST]<|eot_id|></s>",
for: request
)

XCTAssertEqual(normalized, " useful continuation")
}

func test_normalize_removesPrefixEchoWhenPromptWasNotEchoed() {
let request = CotabbyTestFixtures.suggestionRequest(
prefixText: "Hello world",
Expand Down
Loading