Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
5612bc3
Add Termux command parsing and execution fallback feedback
Android-PowerUser May 3, 2026
c1182fc
Avoid duplicate command detection after streaming completion
Android-PowerUser May 4, 2026
313ccbb
Upgrade Kotlin toolchain to 2.1.20 and adopt Compose compiler plugin
Android-PowerUser May 4, 2026
af1a8bf
Switch workflow script from Android SDK emulator to Waydroid
Android-PowerUser May 4, 2026
2c745dd
Fix Termux RUN_COMMAND dispatch configuration
Android-PowerUser May 5, 2026
258ad25
Fix wait cancellation and Groq vision payloads
Android-PowerUser May 7, 2026
f43520e
Adjust CI compile-check toggle and make Stop cancel Wait delays
Android-PowerUser May 7, 2026
f2b9add
Add Mistral Medium 3.5 model option and reasoning hint
Android-PowerUser May 11, 2026
a1b1a20
Update AGENTS.md
Android-PowerUser May 11, 2026
5bc82ed
Update MenuScreen.kt
Android-PowerUser May 11, 2026
55a22bf
Update AGENTS.md
Android-PowerUser May 11, 2026
68404e8
Revise instructions for clarity and consistency
Android-PowerUser May 11, 2026
7b6873e
Apply generation settings consistently and default Top K to 1
Android-PowerUser May 11, 2026
935e323
Fix Termux RUN_COMMAND dispatch and add detailed diagnostics
Android-PowerUser May 12, 2026
a35ead0
Add Termux result callback handling and output status streaming
Android-PowerUser May 12, 2026
e3c9298
Fix Termux result parsing and carry output into next screenshot prompt
Android-PowerUser May 12, 2026
e4aca44
Fix Termux callback key parsing and receiver lifecycle cleanup
Android-PowerUser May 12, 2026
80e86c1
Make Termux commands async and resume queue on callback
Android-PowerUser May 12, 2026
ec55df9
Inject Termux output into enriched screen info and cap output buffer
Android-PowerUser May 12, 2026
df016a3
Remove Termux output buffer cap per requirement
Android-PowerUser May 12, 2026
a17f2e2
Fix debouncer consuming Termux output before screenshot processing
Android-PowerUser May 12, 2026
b12b5d9
Fix race when persisting Termux output for next bubble
Android-PowerUser May 12, 2026
7efcfc0
Harden Termux callback parsing for AI handoff
Android-PowerUser May 12, 2026
b360c25
Show Termux output instead of screen elements on next screenshot
Android-PowerUser May 12, 2026
ab1b413
Force Termux output into next screenshot screen-info payload
Android-PowerUser May 13, 2026
8be6c38
Use foreground current Termux session for commands
Android-PowerUser May 13, 2026
c632912
Harden Termux permission gating and screenshot handoff
Android-PowerUser May 13, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 16 additions & 2 deletions .github/workflows/manual.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,18 @@ on:
workflow_dispatch: # Ermöglicht manuelle Ausführung des Workflows

jobs:
# Schalter für optionalen Compile-Check in CI.
# "false" => Compile-Schritte werden übersprungen, Code bleibt aber im Workflow erhalten.
# Auf "true" setzen, falls der dedizierte Compile-Check wieder aktiviert werden soll.
compile-check-config:
runs-on: ubuntu-latest
outputs:
enabled: ${{ steps.flags.outputs.enabled }}
steps:
- name: Set compile-check flag
id: flags
run: echo "enabled=false" >> $GITHUB_OUTPUT

detect-changes:
runs-on: ubuntu-latest
outputs:
Expand Down Expand Up @@ -61,7 +73,8 @@ jobs:
echo "Results: app=$APP_CHANGED, humanoperator=$HUMANOPERATOR_CHANGED, shared=$SHARED_CHANGED"

compile-check:
needs: detect-changes
needs: [detect-changes, compile-check-config]
if: needs.compile-check-config.outputs.enabled == 'true'
runs-on: ubuntu-latest
steps:
- name: Checkout code
Expand Down Expand Up @@ -105,7 +118,8 @@ jobs:
run: ./gradlew :humanoperator:compileDebugKotlin

build:
needs: [detect-changes, compile-check]
needs: [detect-changes, compile-check, compile-check-config]
if: always() && !cancelled() && (needs.compile-check.result == 'success' || needs.compile-check.result == 'skipped')
runs-on: ubuntu-latest
env:
BUILD_APP: ${{ needs.detect-changes.outputs.app_changed == 'true' || needs.detect-changes.outputs.shared_changed == 'true' }}
Expand Down
14 changes: 9 additions & 5 deletions AGENTS.md
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
MOST IMPORTANT: 1. Follow commands exactly and do NOTHING BUT what is precisely instructed!!! Nothing more and nothing less.
MOST IMPORTANT:

2. Write to me in the language in which I give the task.
1. FOLLOW COMMANDS EXACTLY and do NOTHING MORE AND NOTHING LESS!!!

2. ASK QUESTIONS about things I haven't specified and DON'T ASSUME anything IMPLICITLY.

3. Before each build, enter critic mode and evaluate the changes as if you were someone else. Check if they meet the requirements, if anything else is affected, and fix any problems. Repeat this until no more critics find any errors.

4. Do not build with minor changes.

5. Ask questions about things I haven't specified and don't assume anything implicitly.

5. Write to me in the language in which I give the task.
6. Ensure that other functions and properties are not affected or broken.

7. Do not make compilation errors. Pay attention to imports.
Expand All @@ -16,4 +18,6 @@ MOST IMPORTANT: 1. Follow commands exactly and do NOTHING BUT what is precisely

9. If debug compilation fails in your environment, resolve the issue before reporting it as complete.

10. This app is production software and not a toy.
10. For code changes only, compile only the code and do not perform a full build.

11. This app is production software and not a toy.
18 changes: 12 additions & 6 deletions app/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@ import java.io.ByteArrayOutputStream
plugins {
id("com.android.application")
id("org.jetbrains.kotlin.android")
id("org.jetbrains.kotlin.plugin.serialization") version "1.9.20"
id("org.jetbrains.kotlin.plugin.serialization") version "2.1.20"
id("org.jetbrains.kotlin.plugin.compose") version "2.1.20"
id("com.google.android.libraries.mapsplatform.secrets-gradle-plugin")
id("kotlin-parcelize")
id("com.google.gms.google-services")
Expand Down Expand Up @@ -93,14 +94,19 @@ android {
buildFeatures {
compose = true
}
composeOptions {
kotlinCompilerExtensionVersion = "1.5.4"
lint {
disable += setOf("CoroutineCreationDuringComposition", "StateFlowValueCalledInComposition")
}

packaging {
jniLibs {
useLegacyPackaging = false
}
}

testOptions {
unitTests.isReturnDefaultValues = true
}
}

fun parseLoadAlignments(readelfOutput: String): List<Long> {
Expand Down Expand Up @@ -193,9 +199,9 @@ if (isReleaseTaskRequested && missingReleaseSigningEnv.isNotEmpty()) {

dependencies {
constraints {
implementation("org.jetbrains.kotlin:kotlin-stdlib:1.9.20")
implementation("org.jetbrains.kotlin:kotlin-stdlib-jdk8:1.9.20")
implementation("org.jetbrains.kotlin:kotlin-reflect:1.9.20")
implementation("org.jetbrains.kotlin:kotlin-stdlib:2.1.20")
implementation("org.jetbrains.kotlin:kotlin-stdlib-jdk8:2.1.20")
implementation("org.jetbrains.kotlin:kotlin-reflect:2.1.20")
}

implementation("androidx.core:core-ktx:1.9.0")
Expand Down
1 change: 1 addition & 0 deletions app/src/main/AndroidManifest.xml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
<!-- Notification permission for Android 13+ (API 33+) -->
<uses-permission android:name="android.permission.POST_NOTIFICATIONS"/>
<uses-permission android:name="android.permission.QUERY_ALL_PACKAGES" />
<uses-permission android:name="com.termux.permission.RUN_COMMAND" />

<application
android:name=".PhotoReasoningApplication"
Expand Down
10 changes: 9 additions & 1 deletion app/src/main/kotlin/com/google/ai/sample/ApiKeyDialog.kt
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,9 @@ fun ApiKeyDialog(
loadKeysForProvider(ApiProvider.GOOGLE)
loadKeysForProvider(ApiProvider.CEREBRAS)
loadKeysForProvider(ApiProvider.MISTRAL)
loadKeysForProvider(ApiProvider.GROQ)
loadKeysForProvider(ApiProvider.CLOUDFLARE)
loadKeysForProvider(ApiProvider.PUTER)
}

Dialog(onDismissRequest = {
Expand Down Expand Up @@ -79,7 +82,7 @@ fun ApiKeyDialog(
.horizontalScroll(rememberScrollState()),
horizontalArrangement = Arrangement.spacedBy(8.dp)
) {
listOf(ApiProvider.VERCEL, ApiProvider.CEREBRAS, ApiProvider.GOOGLE, ApiProvider.MISTRAL, ApiProvider.PUTER).forEach { provider ->
listOf(ApiProvider.VERCEL, ApiProvider.CEREBRAS, ApiProvider.GOOGLE, ApiProvider.MISTRAL, ApiProvider.GROQ, ApiProvider.CLOUDFLARE, ApiProvider.PUTER).forEach { provider ->
FilterChip(
selected = selectedProvider == provider,
onClick = {
Expand All @@ -101,6 +104,8 @@ fun ApiKeyDialog(
ApiProvider.CEREBRAS -> "https://cloud.cerebras.ai/"
ApiProvider.VERCEL -> "https://vercel.com/ai-gateway"
ApiProvider.MISTRAL -> "https://console.mistral.ai/home?profile_dialog=api-keys"
ApiProvider.GROQ -> "https://console.groq.com/keys"
ApiProvider.CLOUDFLARE -> "https://dash.cloudflare.com/"
ApiProvider.PUTER -> "https://puter.com/dashboard#account"
ApiProvider.HUMAN_EXPERT -> return@Button
}
Expand All @@ -112,6 +117,9 @@ fun ApiKeyDialog(
Toast.makeText(context, "Link is in the clipboard.", Toast.LENGTH_SHORT).show()
Toast.makeText(context, "After the sign up paste the link in the Browser", Toast.LENGTH_LONG).show()
}
if (selectedProvider == ApiProvider.CLOUDFLARE) {
Toast.makeText(context, "After sign up use the search bar to find \"API token\"", Toast.LENGTH_LONG).show()
}

val intent = Intent(Intent.ACTION_VIEW, Uri.parse(url))
context.startActivity(intent)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ enum class ApiProvider {
GOOGLE,
CEREBRAS,
MISTRAL,
GROQ,
CLOUDFLARE,
PUTER,
HUMAN_EXPERT
}
Expand All @@ -38,8 +40,11 @@ enum class ModelOption(
PUTER_GPT_5_4_NANO("GPT-5.4 Nano (Puter)", "openai/gpt-5.4-nano", ApiProvider.PUTER, supportsScreenshot = true),
PUTER_GLM5("GLM-5V Turbo (Puter)", "openrouter:z-ai/glm-5v-turbo", ApiProvider.PUTER, supportsScreenshot = true),
PUTER_QWEN3_5_FLASH("Qwen3.5-Flash (Puter)", "qwen/qwen3.5-flash-02-23", ApiProvider.PUTER, supportsScreenshot = true),
GROQ_LLAMA_4_SCOUT_17B("Llama 4 Scout 109B (Groq)", "meta-llama/llama-4-scout-17b-16e-instruct", ApiProvider.GROQ, supportsScreenshot = true),
CLOUDFLARE_KIMI_K2_6("Kimi K2.6 (Cloudflare)", "@cf/moonshotai/kimi-k2.6", ApiProvider.CLOUDFLARE, supportsScreenshot = true),
MISTRAL_LARGE_3("Mistral Large 3", "mistral-large-latest", ApiProvider.MISTRAL),
MISTRAL_MEDIUM_3_1("Mistral Medium 3.1", "mistral-medium-latest", ApiProvider.MISTRAL),
MISTRAL_MEDIUM_3_5("Mistral Medium 3.5", "mistral-medium-3-5", ApiProvider.MISTRAL),
GPT_5_1_CODEX_MAX("GPT-5.1 Codex Max (Vercel)", "openai/gpt-5.1-codex-max", ApiProvider.VERCEL),
GPT_5_1_CODEX_MINI("GPT-5.1 Codex Mini (Vercel)", "openai/gpt-5.1-codex-mini", ApiProvider.VERCEL),
GPT_5_NANO("GPT-5 Nano (Vercel)", "openai/gpt-5-nano", ApiProvider.VERCEL),
Expand Down Expand Up @@ -105,9 +110,16 @@ enum class ModelOption(
),
HUMAN_EXPERT("Human Expert", "human-expert", ApiProvider.HUMAN_EXPERT);

/** Whether this model supports TopK/TopP/Temperature settings */
/** Whether this model supports Temperature/TopP settings in UI */
val supportsGenerationSettings: Boolean
get() = this != HUMAN_EXPERT

/** Whether this model supports TopK setting in UI/request payloads. */
val supportsTopK: Boolean
get() = when (apiProvider) {
ApiProvider.MISTRAL, ApiProvider.PUTER -> false
else -> this != HUMAN_EXPERT
}
}

val GenerativeViewModelFactory = object : ViewModelProvider.Factory {
Expand All @@ -124,7 +136,9 @@ val GenerativeViewModelFactory = object : ViewModelProvider.Factory {
val config = generationConfig {
temperature = genSettings.temperature
topP = genSettings.topP
topK = genSettings.topK
if (currentModel.supportsTopK) {
topK = genSettings.topK.coerceAtLeast(1)
}
}

// Get the API key from MainActivity
Expand All @@ -144,7 +158,13 @@ val GenerativeViewModelFactory = object : ViewModelProvider.Factory {
isAssignableFrom(PhotoReasoningViewModel::class.java) -> {
if (currentModel.modelName.contains("live")) {
// Live API models
val liveApiManager = LiveApiManager(apiKey, currentModel.modelName)
val liveApiManager = LiveApiManager(
apiKey = apiKey,
modelName = currentModel.modelName,
temperature = genSettings.temperature.toDouble(),
topP = genSettings.topP.toDouble(),
topK = genSettings.topK.coerceAtLeast(1)
)

// For Live API, we might not need a GenerativeModel at all
// or we use a fallback model for non-live operations
Expand Down
53 changes: 30 additions & 23 deletions app/src/main/kotlin/com/google/ai/sample/MenuScreen.kt
Original file line number Diff line number Diff line change
Expand Up @@ -215,13 +215,15 @@ fun MenuScreen(
}
val normalModels = allModels.filter {
it != ModelOption.MISTRAL_MEDIUM_3_1 &&
it != ModelOption.MISTRAL_MEDIUM_3_5 &&
it != ModelOption.PUTER_GPT_5_4_NANO &&
it.apiProvider != ApiProvider.VERCEL &&
!STRIKETHROUGH_MODELS.contains(it)
}
val orderedModels = listOf(
ModelOption.PUTER_GPT_5_4_NANO,
ModelOption.MISTRAL_MEDIUM_3_1
ModelOption.MISTRAL_MEDIUM_3_1,
ModelOption.MISTRAL_MEDIUM_3_5
) +
normalModels +
vercelModels +
Expand Down Expand Up @@ -290,10 +292,13 @@ fun MenuScreen(
val modelHint = when (selectedModel) {
ModelOption.GEMMA_3_27B_IT -> "Google doesn't support screenshots in the API for this model."
ModelOption.GPT_OSS_120B -> "This is a pure text model\nCerebras sometimes discontinues free access in the Free Tier, displaying an \"Error 404: gpt-oss-120b does not exist or you do not have access to it\" message, or changes the rate limits."
ModelOption.MISTRAL_MEDIUM_3_5 -> "This is a reasoning model"
ModelOption.MISTRAL_LARGE_3 -> "Mistral AI rejects requests containing non-black images with a 429 Error: Rate limit exceeded response"
ModelOption.GEMINI_3_FLASH -> "Google often rejects requests to this model with a 503 Model is exhausted error"
ModelOption.PUTER_GLM5 -> "This model is expensive and uses up the free quota quickly. Consider GPT-5.4 Nano."
ModelOption.PUTER_QWEN3_5_FLASH -> "$0.07/M input | $0.26/M output"
ModelOption.GROQ_LLAMA_4_SCOUT_17B -> "30 requests per Min"
ModelOption.CLOUDFLARE_KIMI_K2_6 -> "Approx. 15 responses per day are free"
ModelOption.GPT_5_1_CODEX_MAX,
ModelOption.GPT_5_1_CODEX_MINI,
ModelOption.GPT_5_NANO -> "Vercel requires a credit card"
Expand Down Expand Up @@ -413,7 +418,7 @@ fun MenuScreen(
}
var tempSlider by remember(selectedModel) { mutableStateOf(genSettings.value.temperature) }
var topPSlider by remember(selectedModel) { mutableStateOf(genSettings.value.topP) }
var topKSlider by remember(selectedModel) { mutableStateOf(genSettings.value.topK.toFloat()) }
var topKSlider by remember(selectedModel) { mutableStateOf(genSettings.value.topK.coerceAtLeast(1).toFloat()) }

Card(
modifier = Modifier
Expand Down Expand Up @@ -476,28 +481,30 @@ fun MenuScreen(
modifier = Modifier.fillMaxWidth().sliderFriendly()
)

Spacer(modifier = Modifier.height(8.dp))
if (selectedModel.supportsTopK) {
Spacer(modifier = Modifier.height(8.dp))

// TopK Slider (0 - 100)
Text(
text = "Top K: ${Math.round(topKSlider)}",
style = MaterialTheme.typography.bodyMedium
)
androidx.compose.material3.Slider(
value = topKSlider,
onValueChange = { newVal ->
topKSlider = newVal
},
onValueChangeFinished = {
genSettings.value = genSettings.value.copy(topK = Math.round(topKSlider))
com.google.ai.sample.util.GenerationSettingsPreferences.saveSettings(
context, selectedModel.modelName, genSettings.value
)
},
valueRange = 0f..100f,
steps = 0,
modifier = Modifier.fillMaxWidth().sliderFriendly()
)
// TopK Slider (1 - 100)
Text(
text = "Top K: ${Math.round(topKSlider)}",
style = MaterialTheme.typography.bodyMedium
)
androidx.compose.material3.Slider(
value = topKSlider,
onValueChange = { newVal ->
topKSlider = newVal
},
onValueChangeFinished = {
genSettings.value = genSettings.value.copy(topK = Math.round(topKSlider))
com.google.ai.sample.util.GenerationSettingsPreferences.saveSettings(
context, selectedModel.modelName, genSettings.value
)
},
valueRange = 1f..100f,
steps = 98,
modifier = Modifier.fillMaxWidth().sliderFriendly()
)
}

if (selectedModel.isOfflineModel) {
Spacer(modifier = Modifier.height(4.dp))
Expand Down
Loading
Loading