diff --git a/tabby/Models/LlamaRuntimeModels.swift b/tabby/Models/LlamaRuntimeModels.swift index 3553d6e..0272f00 100644 --- a/tabby/Models/LlamaRuntimeModels.swift +++ b/tabby/Models/LlamaRuntimeModels.swift @@ -188,6 +188,12 @@ enum LlamaRuntimeError: LocalizedError { case unavailable(String) case cancelled case generationFailed(String) + /// Signals that the first-token confidence gate aborted generation because the model's top-1 + /// raw-logit softmax probability at position 0 was below the configured threshold. + /// The engine layer treats this as a normal "no suggestion" outcome rather than a user-facing + /// failure. Carrying probability/threshold/token in the case lets diagnostics distinguish a + /// suppressed-by-confidence empty from any other empty result. + case lowConfidenceSuppression(probability: Float, threshold: Double, token: String) var errorDescription: String? { switch self { @@ -195,6 +201,8 @@ enum LlamaRuntimeError: LocalizedError { return message case .cancelled: return "Runtime work was cancelled." + case let .lowConfidenceSuppression(probability, threshold, _): + return "Suggestion suppressed: first-token confidence \(probability) is below threshold \(threshold)." } } } diff --git a/tabby/Models/SuggestionEngineModels.swift b/tabby/Models/SuggestionEngineModels.swift index f4c84e2..df5f322 100644 --- a/tabby/Models/SuggestionEngineModels.swift +++ b/tabby/Models/SuggestionEngineModels.swift @@ -83,4 +83,13 @@ struct SuggestionSettingsSnapshot: Equatable, Sendable { /// on the first generated token, preventing conversational openers from appearing in /// inline autocomplete suggestions. let isFirstTokenGatingEnabled: Bool + /// When true, the llama runtime measures top-1 probability of the raw-logit softmax at + /// position 0 and silently suppresses the suggestion if it falls below + /// `firstTokenConfidenceThreshold`. Distinct from gating: gating *masks* specific tokens, + /// confidence suppression *aborts* the whole suggestion when the model is uncertain. + let isFirstTokenConfidenceGatingEnabled: Bool + /// Probability threshold in [0, 1]. The suggestion is suppressed when the model's top-1 + /// raw-logit softmax probability at position 0 is below this value. 0 disables in practice + /// (any probability >= 0 passes). + let firstTokenConfidenceThreshold: Double } diff --git a/tabby/Models/SuggestionModels.swift b/tabby/Models/SuggestionModels.swift index 5e5b508..74083e7 100644 --- a/tabby/Models/SuggestionModels.swift +++ b/tabby/Models/SuggestionModels.swift @@ -257,6 +257,12 @@ struct SuggestionRequest: Equatable, Sendable { /// on the very first sampled token. This is a llama-only feature — Apple Intelligence does /// not expose logit-level control. let isFirstTokenGatingEnabled: Bool + /// When true, the llama runtime measures the top-1 raw-logit softmax probability at the + /// first token position and aborts (returns no suggestion) when it falls below + /// `firstTokenConfidenceThreshold`. llama-only. + let isFirstTokenConfidenceGatingEnabled: Bool + /// Probability threshold in [0, 1] used by `isFirstTokenConfidenceGatingEnabled`. + let firstTokenConfidenceThreshold: Double } /// The engine's normalized response, including raw model text for debugging. diff --git a/tabby/Models/SuggestionSettingsModel.swift b/tabby/Models/SuggestionSettingsModel.swift index dfde988..e83a708 100644 --- a/tabby/Models/SuggestionSettingsModel.swift +++ b/tabby/Models/SuggestionSettingsModel.swift @@ -24,6 +24,15 @@ final class SuggestionSettingsModel: ObservableObject { /// This prevents instruction-tuned models from starting suggestions with conversational /// openers that belong in a chat reply, not in inline autocomplete. @Published private(set) var isFirstTokenGatingEnabled: Bool + /// When enabled, the llama runtime measures the top-1 raw-logit softmax probability of the + /// first sampled token and silently suppresses the whole suggestion if it falls below + /// `firstTokenConfidenceThreshold`. This is a *separate* axis from chat-opener gating: + /// gating masks specific tokens; confidence suppression aborts generation entirely when + /// the model's own distribution is too flat to produce a trustworthy continuation. + @Published private(set) var isFirstTokenConfidenceGatingEnabled: Bool + /// Probability threshold in [0, 1]. Higher values are stricter (more suggestions are + /// suppressed). 0 effectively disables the gate even when the toggle is on. + @Published private(set) var firstTokenConfidenceThreshold: Double private let userDefaults: UserDefaults @@ -38,6 +47,14 @@ final class SuggestionSettingsModel: ObservableObject { private static let selectedLocalPromptModeDefaultsKey = "selectedLocalSuggestionPromptMode" private static let customAIInstructionsDefaultsKey = "tabbyCustomAIInstructions" private static let isFirstTokenGatingEnabledDefaultsKey = "tabbyFirstTokenGatingEnabled" + private static let confidenceGatingEnabledDefaultsKey = "tabbyFirstTokenConfidenceGatingEnabled" + private static let confidenceThresholdDefaultsKey = "tabbyFirstTokenConfidenceThreshold" + + /// 0.10 is a deliberately gentle starting point: our local models often peak at ~0.30-0.60 + /// for unambiguous continuations, so this threshold catches only the genuinely-confused + /// cases (e.g. the model sees the prompt as ambiguous and spreads probability widely). + /// We expect to tune this once telemetry from the `first-token-confidence` log accumulates. + private static let defaultFirstTokenConfidenceThreshold: Double = 0.10 init( configuration: SuggestionConfiguration, @@ -74,6 +91,18 @@ final class SuggestionSettingsModel: ObservableObject { } // Default to enabled — first-token gating is a net positive for all known instruct models. let resolvedFirstTokenGatingEnabled = userDefaults.object(forKey: Self.isFirstTokenGatingEnabledDefaultsKey) as? Bool ?? true + // Default off until we've seen field telemetry. The deny-list gate ships on by default + // because it's evidence-backed and surgical; confidence suppression is heuristic and can + // hide useful suggestions when the threshold is mistuned, so users opt in explicitly. + let resolvedConfidenceGatingEnabled = userDefaults + .object(forKey: Self.confidenceGatingEnabledDefaultsKey) as? Bool ?? false + let resolvedConfidenceThreshold: Double = { + guard userDefaults.object(forKey: Self.confidenceThresholdDefaultsKey) != nil else { + return Self.defaultFirstTokenConfidenceThreshold + } + let raw = userDefaults.double(forKey: Self.confidenceThresholdDefaultsKey) + return min(max(raw, 0.0), 1.0) + }() isGloballyEnabled = resolvedGloballyEnabled disabledAppRules = resolvedDisabledAppRules @@ -84,6 +113,8 @@ final class SuggestionSettingsModel: ObservableObject { selectedLocalPromptMode = resolvedLocalPromptMode customAIInstructions = resolvedCustomAIInstructions isFirstTokenGatingEnabled = resolvedFirstTokenGatingEnabled + isFirstTokenConfidenceGatingEnabled = resolvedConfidenceGatingEnabled + firstTokenConfidenceThreshold = resolvedConfidenceThreshold userDefaults.set(resolvedGloballyEnabled, forKey: Self.isGloballyEnabledDefaultsKey) persistDisabledAppRules(resolvedDisabledAppRules) @@ -94,6 +125,8 @@ final class SuggestionSettingsModel: ObservableObject { persistSelectedLocalPromptMode(resolvedLocalPromptMode) persistCustomAIInstructions(resolvedCustomAIInstructions) userDefaults.set(resolvedFirstTokenGatingEnabled, forKey: Self.isFirstTokenGatingEnabledDefaultsKey) + userDefaults.set(resolvedConfidenceGatingEnabled, forKey: Self.confidenceGatingEnabledDefaultsKey) + userDefaults.set(resolvedConfidenceThreshold, forKey: Self.confidenceThresholdDefaultsKey) } /// Compatibility shim for legacy call sites while the UI migrates from the old toggle to the @@ -121,7 +154,9 @@ final class SuggestionSettingsModel: ObservableObject { selectedWordCountPreset: selectedWordCountPreset, effectivePromptMode: effectivePromptMode, customAIInstructions: CustomAIInstructionFormatter.normalized(customAIInstructions), - isFirstTokenGatingEnabled: isFirstTokenGatingEnabled + isFirstTokenGatingEnabled: isFirstTokenGatingEnabled, + isFirstTokenConfidenceGatingEnabled: isFirstTokenConfidenceGatingEnabled, + firstTokenConfidenceThreshold: firstTokenConfidenceThreshold ) } @@ -282,6 +317,27 @@ final class SuggestionSettingsModel: ObservableObject { userDefaults.set(enabled, forKey: Self.isFirstTokenGatingEnabledDefaultsKey) } + func setFirstTokenConfidenceGatingEnabled(_ enabled: Bool) { + guard isFirstTokenConfidenceGatingEnabled != enabled else { + return + } + + isFirstTokenConfidenceGatingEnabled = enabled + userDefaults.set(enabled, forKey: Self.confidenceGatingEnabledDefaultsKey) + } + + func setFirstTokenConfidenceThreshold(_ threshold: Double) { + // Clamp at the setter boundary so any UI bug (slider out of range, manual defaults edit) + // cannot corrupt persisted state. The runtime layer trusts this value as already-valid. + let clamped = min(max(threshold, 0.0), 1.0) + guard firstTokenConfidenceThreshold != clamped else { + return + } + + firstTokenConfidenceThreshold = clamped + userDefaults.set(clamped, forKey: Self.confidenceThresholdDefaultsKey) + } + private static func effectivePromptMode( engine: SuggestionEngineKind, localPromptMode: SuggestionPromptMode @@ -423,7 +479,12 @@ final class SuggestionSettingsModel: ObservableObject { extension SuggestionSettingsModel: SuggestionSettingsProviding { var snapshotPublisher: AnyPublisher { - Publishers.CombineLatest4( + // Combine maxes out at four upstreams per operator, but the snapshot now depends on nine + // published values. We split them into two logical bundles — a "core" group of always-on + // selections and a "first-token" group of llama-only gating settings — then CombineLatest + // those two intermediate publishers. Equality on the bundles via removeDuplicates makes + // the downstream snapshot still emit only on real change. + let coreSelections = Publishers.CombineLatest4( Publishers.CombineLatest4( $isGloballyEnabled, $disabledAppRules, @@ -434,22 +495,33 @@ extension SuggestionSettingsModel: SuggestionSettingsProviding { $customAIInstructions, $isFirstTokenGatingEnabled ) - .map { combinedSettings, localPromptMode, customAIInstructions, firstTokenGatingEnabled in - let (globallyEnabled, disabledAppRules, engine, wordCountPreset) = combinedSettings - return SuggestionSettingsSnapshot( - isGloballyEnabled: globallyEnabled, - disabledAppBundleIdentifiers: Set(disabledAppRules.map(\.bundleIdentifier)), - selectedEngine: engine, - selectedWordCountPreset: wordCountPreset, - effectivePromptMode: Self.effectivePromptMode( - engine: engine, - localPromptMode: localPromptMode - ), - customAIInstructions: CustomAIInstructionFormatter.normalized(customAIInstructions), - isFirstTokenGatingEnabled: firstTokenGatingEnabled - ) - } - .removeDuplicates() - .eraseToAnyPublisher() + + let confidenceSelections = Publishers.CombineLatest( + $isFirstTokenConfidenceGatingEnabled, + $firstTokenConfidenceThreshold + ) + + return Publishers.CombineLatest(coreSelections, confidenceSelections) + .map { coreTuple, confidenceTuple in + let (combinedSettings, localPromptMode, customAIInstructions, firstTokenGatingEnabled) = coreTuple + let (globallyEnabled, disabledAppRules, engine, wordCountPreset) = combinedSettings + let (confidenceGatingEnabled, confidenceThreshold) = confidenceTuple + return SuggestionSettingsSnapshot( + isGloballyEnabled: globallyEnabled, + disabledAppBundleIdentifiers: Set(disabledAppRules.map(\.bundleIdentifier)), + selectedEngine: engine, + selectedWordCountPreset: wordCountPreset, + effectivePromptMode: Self.effectivePromptMode( + engine: engine, + localPromptMode: localPromptMode + ), + customAIInstructions: CustomAIInstructionFormatter.normalized(customAIInstructions), + isFirstTokenGatingEnabled: firstTokenGatingEnabled, + isFirstTokenConfidenceGatingEnabled: confidenceGatingEnabled, + firstTokenConfidenceThreshold: confidenceThreshold + ) + } + .removeDuplicates() + .eraseToAnyPublisher() } } diff --git a/tabby/Services/Runtime/LlamaRuntimeCore.swift b/tabby/Services/Runtime/LlamaRuntimeCore.swift index ed0107f..8783a1f 100644 --- a/tabby/Services/Runtime/LlamaRuntimeCore.swift +++ b/tabby/Services/Runtime/LlamaRuntimeCore.swift @@ -41,6 +41,16 @@ actor LlamaRuntimeCore { category: "first-token-gate" ) + /// Filterable signal for first-token confidence-based suppression. Distinct category from the + /// gate logger because these are *separate signals*: gating masks specific tokens; confidence + /// suppression aborts the whole suggestion when the model's distribution at position 0 is too + /// flat. A single generation can fire neither, one, or both. + /// log stream --predicate 'subsystem == "app.tabby" AND category == "first-token-confidence"' + private static let firstTokenConfidenceLogger = Logger( + subsystem: "app.tabby", + category: "first-token-confidence" + ) + private var backendInitialized = false private var model: OpaquePointer? private var preparedRuntime: PreparedLlamaRuntime? @@ -162,7 +172,9 @@ actor LlamaRuntimeCore { minP: Double, repetitionPenalty: Double, seed: UInt32? = nil, - firstTokenGatingEnabled: Bool = true + firstTokenGatingEnabled: Bool = true, + firstTokenConfidenceGatingEnabled: Bool = false, + firstTokenConfidenceThreshold: Double = 0.0 ) throws -> String { guard let preparedRuntime else { throw LlamaRuntimeError.unavailable("The llama model is not loaded.") @@ -257,6 +269,21 @@ actor LlamaRuntimeCore { logFirstTokenGateFireIfNeeded(context: context, vocab: vocab) } + // Confidence gating runs *before* sampling so we can abort the whole generation + // (and avoid burning a sampled token + decode) when the model's distribution at + // position 0 is too flat. The signal is the top-1 probability of the softmax over + // the raw logits — not the post-sampler distribution — because temperature/top-p + // shape the sampler's output, not the model's actual confidence. + if tokenIndex == 0 && firstTokenConfidenceGatingEnabled { + if let suppression = lowConfidenceSuppressionIfNeeded( + context: context, + vocab: vocab, + threshold: firstTokenConfidenceThreshold + ) { + throw suppression + } + } + let nextToken = llama_sampler_sample(activeSampler, context, -1) if nextToken == llama_vocab_eos(vocab) || llama_vocab_is_eog(vocab, nextToken) { break @@ -282,6 +309,14 @@ actor LlamaRuntimeCore { try decodeToken(nextToken, position: position, in: context) position += 1 } + } catch let error as LlamaRuntimeError { + // Confidence suppression is a clean abort — the prompt KV is still valid and the next + // request can reuse it. Reserve the cache reset for genuine generation failures. + if case .lowConfidenceSuppression = error { + throw error + } + shouldResetPromptCache = true + throw error } catch { shouldResetPromptCache = true throw error @@ -805,6 +840,73 @@ actor LlamaRuntimeCore { ) } + /// Checks the model's confidence at position 0 and returns a suppression error if it is too + /// low. Confidence is defined as the **top-1 probability of the softmax over the raw logits** + /// at the last context position — i.e. how peaked the model's actual distribution is, before + /// any sampler-chain transforms. + /// + /// We deliberately don't use the *sampled* token's post-transform probability: temperature, + /// top-p, and min-p reshape the distribution, so a sampled-token probability of 0.9 after + /// top-p can correspond to a raw distribution where the true top-1 was 0.05 (the model was + /// confused, but the sampler concentrated mass on a survivor). For inline autocomplete we + /// want to suppress when *the model itself* was uncertain, not when the sampler happened to + /// be confident about a leftover. + /// + /// Implementation note: we compute softmax in a numerically-stable way (subtract max logit + /// before exp) over the full vocabulary. This is one O(nVocab) pass — same cost as the gate + /// argmax — and it only runs once per generation when confidence gating is enabled. + private func lowConfidenceSuppressionIfNeeded( + context: OpaquePointer, + vocab: OpaquePointer, + threshold: Double + ) -> LlamaRuntimeError? { + guard let logits = llama_get_logits_ith(context, -1) else { + return nil + } + + let nVocab = Int(llama_vocab_n_tokens(vocab)) + guard nVocab > 0 else { return nil } + + var maxLogit: Float = -.infinity + var argmaxTokenID: llama_token = 0 + for tokenID in 0 ..< nVocab { + let value = logits[tokenID] + if value > maxLogit { + maxLogit = value + argmaxTokenID = llama_token(tokenID) + } + } + + // Numerically-stable softmax: subtract the max before exponentiating so we don't overflow + // float on large logits. The probability of the argmax token is then + // exp(0) / sum(exp(logit_i - max)) = 1 / sum(exp(logit_i - max)) + var expSum: Double = 0 + for tokenID in 0 ..< nVocab { + expSum += Double(exp(logits[tokenID] - maxLogit)) + } + guard expSum > 0 else { return nil } + + let topProbability = Float(1.0 / expSum) + + guard Double(topProbability) < threshold else { + return nil + } + + let piece = pieceString(for: argmaxTokenID, vocab: vocab) + let escaped = piece + .replacingOccurrences(of: "\n", with: "\\n") + .replacingOccurrences(of: "\t", with: "\\t") + Self.firstTokenConfidenceLogger.debug( + "suppressed: top-1 token \(argmaxTokenID, privacy: .public) (\"\(escaped, privacy: .public)\") prob=\(topProbability, privacy: .public) threshold=\(threshold, privacy: .public)" + ) + + return .lowConfidenceSuppression( + probability: topProbability, + threshold: threshold, + token: piece + ) + } + /// Tokenizes a short string and returns just the first token, without BOS. /// Returns nil if tokenization fails or produces no tokens. private func tokenizeFirstToken(_ text: String, vocab: OpaquePointer) -> llama_token? { diff --git a/tabby/Services/Runtime/LlamaRuntimeManager.swift b/tabby/Services/Runtime/LlamaRuntimeManager.swift index 266dcfa..2394377 100644 --- a/tabby/Services/Runtime/LlamaRuntimeManager.swift +++ b/tabby/Services/Runtime/LlamaRuntimeManager.swift @@ -94,7 +94,9 @@ final class LlamaRuntimeManager: ObservableObject { minP: Double, repetitionPenalty: Double, seed: UInt32? = nil, - firstTokenGatingEnabled: Bool = true + firstTokenGatingEnabled: Bool = true, + firstTokenConfidenceGatingEnabled: Bool = false, + firstTokenConfidenceThreshold: Double = 0.0 ) async throws -> String { _ = try await preparedRuntime() @@ -109,7 +111,9 @@ final class LlamaRuntimeManager: ObservableObject { minP: minP, repetitionPenalty: repetitionPenalty, seed: seed, - firstTokenGatingEnabled: firstTokenGatingEnabled + firstTokenGatingEnabled: firstTokenGatingEnabled, + firstTokenConfidenceGatingEnabled: firstTokenConfidenceGatingEnabled, + firstTokenConfidenceThreshold: firstTokenConfidenceThreshold ) } catch is CancellationError { throw LlamaRuntimeError.cancelled diff --git a/tabby/Services/Runtime/LlamaSuggestionEngine.swift b/tabby/Services/Runtime/LlamaSuggestionEngine.swift index 1c71a4e..d07b426 100644 --- a/tabby/Services/Runtime/LlamaSuggestionEngine.swift +++ b/tabby/Services/Runtime/LlamaSuggestionEngine.swift @@ -21,18 +21,38 @@ final class LlamaSuggestionEngine { do { let startTime = Date() let cachedPrefixBytes = promptCacheHintTracker.cachedPrefixBytes(for: request) - let rawSuggestion = try await runtimeManager.generate( - prompt: request.prompt, - cachedPrefixBytes: cachedPrefixBytes, - maxPredictionTokens: request.maxPredictionTokens, - temperature: request.temperature, - topK: request.topK, - topP: request.topP, - minP: request.minP, - repetitionPenalty: request.repetitionPenalty, - seed: request.randomSeed, - firstTokenGatingEnabled: request.isFirstTokenGatingEnabled - ) + let rawSuggestion: String + do { + rawSuggestion = try await runtimeManager.generate( + prompt: request.prompt, + cachedPrefixBytes: cachedPrefixBytes, + maxPredictionTokens: request.maxPredictionTokens, + temperature: request.temperature, + topK: request.topK, + topP: request.topP, + minP: request.minP, + repetitionPenalty: request.repetitionPenalty, + seed: request.randomSeed, + firstTokenGatingEnabled: request.isFirstTokenGatingEnabled, + firstTokenConfidenceGatingEnabled: request.isFirstTokenConfidenceGatingEnabled, + firstTokenConfidenceThreshold: request.firstTokenConfidenceThreshold + ) + } catch let error as LlamaRuntimeError { + // Confidence suppression is a normal "no suggestion" outcome, not a failure. + // The runtime's own KV stays valid (it threw before any sampled-token decode), so + // we deliberately keep the prompt-cache hint tracker intact: the next request can + // still benefit from prefix reuse against this exact prompt. + if case .lowConfidenceSuppression = error { + promptCacheHintTracker.recordSuccessfulRequest(request) + return SuggestionResult( + generation: request.generation, + rawText: "", + text: "", + latency: Date().timeIntervalSince(startTime) + ) + } + throw error + } try Task.checkCancellation() promptCacheHintTracker.recordSuccessfulRequest(request) diff --git a/tabby/Support/SuggestionRequestFactory.swift b/tabby/Support/SuggestionRequestFactory.swift index 989f55a..791eca4 100644 --- a/tabby/Support/SuggestionRequestFactory.swift +++ b/tabby/Support/SuggestionRequestFactory.swift @@ -63,7 +63,9 @@ enum SuggestionRequestFactory { maxSuffixCharacters: configuration.maxSuffixCharacters, completionLengthInstruction: completionLengthInstruction, customAIInstructions: customAIInstructions, - isFirstTokenGatingEnabled: settings.isFirstTokenGatingEnabled + isFirstTokenGatingEnabled: settings.isFirstTokenGatingEnabled, + isFirstTokenConfidenceGatingEnabled: settings.isFirstTokenConfidenceGatingEnabled, + firstTokenConfidenceThreshold: settings.firstTokenConfidenceThreshold ) return SuggestionRequestBuildResult( diff --git a/tabby/UI/SettingsView.swift b/tabby/UI/SettingsView.swift index c6180c9..bbfac18 100644 --- a/tabby/UI/SettingsView.swift +++ b/tabby/UI/SettingsView.swift @@ -182,6 +182,37 @@ struct SettingsView: View { ) .font(.caption) .foregroundStyle(.secondary) + + Toggle("Suppress Low-Confidence Suggestions", isOn: firstTokenConfidenceGatingBinding) + + if suggestionSettings.isFirstTokenConfidenceGatingEnabled { + // Slider only renders when the gate is on so an inactive control doesn't + // imply behavior. The 0...0.5 range covers the useful tuning band — local + // models rarely peak above ~0.6 even on confident tokens, so going past + // 0.5 would suppress almost everything. + LabeledContent("Confidence Threshold") { + HStack(spacing: 8) { + Slider( + value: firstTokenConfidenceThresholdBinding, + in: 0.0 ... 0.5, + step: 0.01 + ) + .frame(maxWidth: 220) + + Text(String(format: "%.2f", suggestionSettings.firstTokenConfidenceThreshold)) + .font(.callout.monospacedDigit()) + .frame(width: 40, alignment: .trailing) + } + } + } + + Text( + "Aborts the suggestion when the model's top choice for the first word " + + "is below the threshold probability. Useful when prompts are ambiguous " + + "and the model's distribution is flat. Open Source engine only." + ) + .font(.caption) + .foregroundStyle(.secondary) } else { Text("Completion Style and custom instructions apply to the Open Source engine.") .font(.caption) @@ -495,6 +526,24 @@ struct SettingsView: View { ) } + private var firstTokenConfidenceGatingBinding: Binding { + Binding( + get: { suggestionSettings.isFirstTokenConfidenceGatingEnabled }, + set: { enabled in + suggestionSettings.setFirstTokenConfidenceGatingEnabled(enabled) + } + ) + } + + private var firstTokenConfidenceThresholdBinding: Binding { + Binding( + get: { suggestionSettings.firstTokenConfidenceThreshold }, + set: { value in + suggestionSettings.setFirstTokenConfidenceThreshold(value) + } + ) + } + private var customAIInstructionsBinding: Binding { Binding( get: { suggestionSettings.customAIInstructions }, diff --git a/tabbyTests/LlamaPromptRendererTests.swift b/tabbyTests/LlamaPromptRendererTests.swift index 17f19ff..f40d15b 100644 --- a/tabbyTests/LlamaPromptRendererTests.swift +++ b/tabbyTests/LlamaPromptRendererTests.swift @@ -213,7 +213,9 @@ final class LlamaPromptRendererTests: XCTestCase { maxSuffixCharacters: 192, completionLengthInstruction: "Return only the next few words.", customAIInstructions: nil, - isFirstTokenGatingEnabled: true + isFirstTokenGatingEnabled: true, + isFirstTokenConfidenceGatingEnabled: false, + firstTokenConfidenceThreshold: 0.0 ) } }