Skip to content

Commit b7fa471

Browse files
authored
refactor: simplify workspace start status management (#222)
Current approach with a secondary poll loop that handles the start action of a workspace is overengineered. Basically the problem is the CLI takes too long before moving the workspace into the queued/starting state, during which the user doesn't have any feedback. To address the issue we: - stopped the main poll loop from updating the environment - moved the environment in the queued state immediately after the start button was pushed. - started a poll loop that moved the workspace from queued state to starting space only after that state became available in the backend. The intermediary stopped state is skipped by the secondary poll loop. @asher pointed out that a better approach can be implemented. We already store the status, and workspace and the agent in the environment. When the start comes in: 1. We directly update the env. status to "queued" 2. We only change the environment status if there is difference in the existing workspace&agent status vs the status from the main poll loop 3. no secondary poll loop is needed.
1 parent e24f564 commit b7fa471

File tree

3 files changed

+72
-66
lines changed

3 files changed

+72
-66
lines changed

src/main/kotlin/com/coder/toolbox/CoderRemoteEnvironment.kt

Lines changed: 52 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,6 @@ import kotlinx.coroutines.launch
3737
import kotlinx.coroutines.withTimeout
3838
import java.io.File
3939
import java.nio.file.Path
40-
import java.util.concurrent.atomic.AtomicBoolean
4140
import kotlin.time.Duration.Companion.minutes
4241
import kotlin.time.Duration.Companion.seconds
4342

@@ -55,37 +54,39 @@ class CoderRemoteEnvironment(
5554
private var workspace: Workspace,
5655
private var agent: WorkspaceAgent,
5756
) : RemoteProviderEnvironment("${workspace.name}.${agent.name}"), BeforeConnectionHook, AfterDisconnectHook {
58-
private var wsRawStatus = WorkspaceAndAgentStatus.from(workspace, agent)
57+
private var environmentStatus = WorkspaceAndAgentStatus.from(workspace, agent)
5958

6059
override var name: String = "${workspace.name}.${agent.name}"
6160
private var isConnected: MutableStateFlow<Boolean> = MutableStateFlow(false)
6261
override val connectionRequest: MutableStateFlow<Boolean> = MutableStateFlow(false)
6362

6463
override val state: MutableStateFlow<RemoteEnvironmentState> =
65-
MutableStateFlow(wsRawStatus.toRemoteEnvironmentState(context))
64+
MutableStateFlow(environmentStatus.toRemoteEnvironmentState(context))
6665
override val description: MutableStateFlow<EnvironmentDescription> =
6766
MutableStateFlow(EnvironmentDescription.General(context.i18n.pnotr(workspace.templateDisplayName)))
6867
override val additionalEnvironmentInformation: MutableMap<LocalizableString, String> = mutableMapOf()
69-
override val actionsList: MutableStateFlow<List<ActionDescription>> = MutableStateFlow(getAvailableActions())
68+
override val actionsList: MutableStateFlow<List<ActionDescription>> = MutableStateFlow(emptyList())
7069

7170
private val networkMetricsMarshaller = Moshi.Builder().build().adapter(NetworkMetrics::class.java)
7271
private val proxyCommandHandle = SshCommandProcessHandle(context)
7372
private var pollJob: Job? = null
74-
private val startIsInProgress = AtomicBoolean(false)
7573

7674
init {
7775
if (context.settingsStore.shouldAutoConnect(id)) {
7876
context.logger.info("resuming SSH connection to $id — last session was still active.")
7977
startSshConnection()
8078
}
79+
refreshAvailableActions()
8180
}
8281

8382
fun asPairOfWorkspaceAndAgent(): Pair<Workspace, WorkspaceAgent> = Pair(workspace, agent)
8483

85-
private fun getAvailableActions(): List<ActionDescription> {
84+
private fun refreshAvailableActions() {
8685
val actions = mutableListOf<ActionDescription>()
87-
if (wsRawStatus.canStop()) {
86+
context.logger.debug("Refreshing available actions for workspace $id with status: $environmentStatus")
87+
if (environmentStatus.canStop()) {
8888
actions.add(Action(context, "Open web terminal") {
89+
context.logger.debug("Launching web terminal for $id...")
8990
context.desktop.browse(client.url.withPath("/${workspace.ownerName}/$name/terminal").toString()) {
9091
context.ui.showErrorInfoPopup(it)
9192
}
@@ -97,8 +98,9 @@ class CoderRemoteEnvironment(
9798
val urlTemplate = context.settingsStore.workspaceViewUrl
9899
?: client.url.withPath("/@${workspace.ownerName}/${workspace.name}").toString()
99100
val url = urlTemplate
100-
.replace("\$workspaceOwner", "${workspace.ownerName}")
101+
.replace("\$workspaceOwner", workspace.ownerName)
101102
.replace("\$workspaceName", workspace.name)
103+
context.logger.debug("Opening the dashboard for $id...")
102104
context.desktop.browse(
103105
url
104106
) {
@@ -108,59 +110,47 @@ class CoderRemoteEnvironment(
108110
)
109111

110112
actions.add(Action(context, "View template") {
113+
context.logger.debug("Opening the template for $id...")
111114
context.desktop.browse(client.url.withPath("/templates/${workspace.templateName}").toString()) {
112115
context.ui.showErrorInfoPopup(it)
113116
}
114-
}
115-
)
117+
})
116118

117-
if (wsRawStatus.canStart()) {
119+
if (environmentStatus.canStart()) {
118120
if (workspace.outdated) {
119121
actions.add(Action(context, "Update and start") {
122+
context.logger.debug("Updating and starting $id...")
120123
val build = client.updateWorkspace(workspace)
121124
update(workspace.copy(latestBuild = build), agent)
122-
}
123-
)
125+
})
124126
} else {
125127
actions.add(Action(context, "Start") {
126-
try {
127-
// needed in order to make sure Queuing is not overridden by the
128-
// general polling loop with the `Stopped` state
129-
startIsInProgress.set(true)
130-
val startJob = context.cs
131-
.launch(CoroutineName("Start Workspace Action CLI Runner") + Dispatchers.IO) {
132-
cli.startWorkspace(workspace.ownerName, workspace.name)
133-
}
134-
// cli takes 15 seconds to move the workspace in queueing/starting state
135-
// while the user won't see anything happening in TBX after start is clicked
136-
// During those 15 seconds we work around by forcing a `Queuing` state
137-
while (startJob.isActive && client.workspace(workspace.id).latestBuild.status.isNotStarted()) {
138-
state.update {
139-
WorkspaceAndAgentStatus.QUEUED.toRemoteEnvironmentState(context)
140-
}
141-
delay(1.seconds)
128+
context.logger.debug("Starting $id... ")
129+
context.cs
130+
.launch(CoroutineName("Start Workspace Action CLI Runner") + Dispatchers.IO) {
131+
cli.startWorkspace(workspace.ownerName, workspace.name)
142132
}
143-
startIsInProgress.set(false)
144-
// retrieve the status again and update the status
145-
update(client.workspace(workspace.id), agent)
146-
} finally {
147-
startIsInProgress.set(false)
148-
}
149-
}
150-
)
133+
// cli takes 15 seconds to move the workspace in queueing/starting state
134+
// while the user won't see anything happening in TBX after start is clicked
135+
// During those 15 seconds we work around by forcing a `Queuing` state
136+
updateStatus(WorkspaceAndAgentStatus.QUEUED)
137+
// force refresh of the actions list (Start should no longer be available)
138+
refreshAvailableActions()
139+
})
151140
}
152141
}
153-
if (wsRawStatus.canStop()) {
142+
if (environmentStatus.canStop()) {
154143
if (workspace.outdated) {
155144
actions.add(Action(context, "Update and restart") {
145+
context.logger.debug("Updating and re-starting $id...")
156146
val build = client.updateWorkspace(workspace)
157147
update(workspace.copy(latestBuild = build), agent)
158148
}
159149
)
160150
}
161151
actions.add(Action(context, "Stop") {
162152
tryStopSshConnection()
163-
153+
context.logger.debug("Stoping $id...")
164154
val build = client.stopWorkspace(workspace)
165155
update(workspace.copy(latestBuild = build), agent)
166156
}
@@ -170,12 +160,14 @@ class CoderRemoteEnvironment(
170160
actions.add(Action(context, "Delete workspace", highlightInRed = true) {
171161
context.cs.launch(CoroutineName("Delete Workspace Action")) {
172162
var dialogText =
173-
if (wsRawStatus.canStop()) "This will close the workspace and remove all its information, including files, unsaved changes, history, and usage data."
163+
if (environmentStatus.canStop()) "This will close the workspace and remove all its information, including files, unsaved changes, history, and usage data."
174164
else "This will remove all information from the workspace, including files, unsaved changes, history, and usage data."
175165
dialogText += "\n\nType \"${workspace.name}\" below to confirm:"
176166

177167
val confirmation = context.ui.showTextInputPopup(
178-
if (wsRawStatus.canStop()) context.i18n.ptrl("Delete running workspace?") else context.i18n.ptrl("Delete workspace?"),
168+
if (environmentStatus.canStop()) context.i18n.ptrl("Delete running workspace?") else context.i18n.ptrl(
169+
"Delete workspace?"
170+
),
179171
context.i18n.pnotr(dialogText),
180172
context.i18n.ptrl("Workspace name"),
181173
TextType.General,
@@ -185,10 +177,14 @@ class CoderRemoteEnvironment(
185177
if (confirmation != workspace.name) {
186178
return@launch
187179
}
180+
context.logger.debug("Deleting $id...")
188181
deleteWorkspace()
189182
}
190183
})
191-
return actions
184+
185+
actionsList.update {
186+
actions
187+
}
192188
}
193189

194190
private suspend fun tryStopSshConnection() {
@@ -264,23 +260,28 @@ class CoderRemoteEnvironment(
264260
* Update the workspace/agent status to the listeners, if it has changed.
265261
*/
266262
fun update(workspace: Workspace, agent: WorkspaceAgent) {
267-
if (startIsInProgress.get()) {
268-
context.logger.info("Skipping update for $id - workspace start is in progress")
263+
if (this.workspace.latestBuild == workspace.latestBuild) {
269264
return
270265
}
271266
this.workspace = workspace
272267
this.agent = agent
273-
wsRawStatus = WorkspaceAndAgentStatus.from(workspace, agent)
268+
// workspace&agent status can be different from "environment status"
269+
// which is forced to queued state when a workspace is scheduled to start
270+
updateStatus(WorkspaceAndAgentStatus.from(workspace, agent))
271+
274272
// we have to regenerate the action list in order to force a redraw
275273
// because the actions don't have a state flow on the enabled property
276-
actionsList.update {
277-
getAvailableActions()
278-
}
274+
refreshAvailableActions()
275+
}
276+
277+
private fun updateStatus(status: WorkspaceAndAgentStatus) {
278+
environmentStatus = status
279279
context.cs.launch(CoroutineName("Workspace Status Updater")) {
280280
state.update {
281-
wsRawStatus.toRemoteEnvironmentState(context)
281+
environmentStatus.toRemoteEnvironmentState(context)
282282
}
283283
}
284+
context.logger.debug("Overall status for workspace $id is $environmentStatus. Workspace status: ${workspace.latestBuild.status}, agent status: ${agent.status}, agent lifecycle state: ${agent.lifecycleState}, login before ready: ${agent.loginBeforeReady}")
284285
}
285286

286287
/**
@@ -310,7 +311,7 @@ class CoderRemoteEnvironment(
310311
* Returns true if the SSH connection was scheduled to start, false otherwise.
311312
*/
312313
fun startSshConnection(): Boolean {
313-
if (wsRawStatus.ready() && !isConnected.value) {
314+
if (environmentStatus.ready() && !isConnected.value) {
314315
context.cs.launch(CoroutineName("SSH Connection Trigger")) {
315316
connectionRequest.update {
316317
true
@@ -336,7 +337,7 @@ class CoderRemoteEnvironment(
336337
withTimeout(5.minutes) {
337338
var workspaceStillExists = true
338339
while (context.cs.isActive && workspaceStillExists) {
339-
if (wsRawStatus == WorkspaceAndAgentStatus.DELETING || wsRawStatus == WorkspaceAndAgentStatus.DELETED) {
340+
if (environmentStatus == WorkspaceAndAgentStatus.DELETING || environmentStatus == WorkspaceAndAgentStatus.DELETED) {
340341
workspaceStillExists = false
341342
context.envPageManager.showPluginEnvironmentsPage()
342343
} else {

src/main/kotlin/com/coder/toolbox/sdk/v2/models/WorkspaceBuild.kt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ import java.util.UUID
1010
*/
1111
@JsonClass(generateAdapter = true)
1212
data class WorkspaceBuild(
13+
@property:Json(name = "id") val id: UUID,
14+
@property:Json(name = "build_number") val buildNumber: Int,
1315
@property:Json(name = "template_version_id") val templateVersionID: UUID,
1416
@property:Json(name = "resources") val resources: List<WorkspaceResource>,
1517
@property:Json(name = "status") val status: WorkspaceStatus,

src/test/kotlin/com/coder/toolbox/sdk/DataGen.kt

Lines changed: 18 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import com.coder.toolbox.sdk.v2.models.WorkspaceStatus
1212
import com.coder.toolbox.util.Arch
1313
import com.coder.toolbox.util.OS
1414
import java.util.UUID
15+
import kotlin.random.Random
1516

1617
class DataGen {
1718
companion object {
@@ -20,19 +21,19 @@ class DataGen {
2021
agentId: String,
2122
): WorkspaceResource = WorkspaceResource(
2223
agents =
23-
listOf(
24-
WorkspaceAgent(
25-
id = UUID.fromString(agentId),
26-
status = WorkspaceAgentStatus.CONNECTED,
27-
name = agentName,
28-
architecture = Arch.from("amd64"),
29-
operatingSystem = OS.from("linux"),
30-
directory = null,
31-
expandedDirectory = null,
32-
lifecycleState = WorkspaceAgentLifecycleState.READY,
33-
loginBeforeReady = false,
24+
listOf(
25+
WorkspaceAgent(
26+
id = UUID.fromString(agentId),
27+
status = WorkspaceAgentStatus.CONNECTED,
28+
name = agentName,
29+
architecture = Arch.from("amd64"),
30+
operatingSystem = OS.from("linux"),
31+
directory = null,
32+
expandedDirectory = null,
33+
lifecycleState = WorkspaceAgentLifecycleState.READY,
34+
loginBeforeReady = false,
35+
),
3436
),
35-
),
3637
)
3738

3839
fun workspace(
@@ -48,9 +49,9 @@ class DataGen {
4849
templateDisplayName = "template-display-name",
4950
templateIcon = "template-icon",
5051
latestBuild =
51-
build(
52-
resources = agents.map { resource(it.key, it.value) },
53-
),
52+
build(
53+
resources = agents.map { resource(it.key, it.value) },
54+
),
5455
outdated = false,
5556
name = name,
5657
ownerName = "owner",
@@ -61,6 +62,8 @@ class DataGen {
6162
templateVersionID: UUID = UUID.randomUUID(),
6263
resources: List<WorkspaceResource> = emptyList(),
6364
): WorkspaceBuild = WorkspaceBuild(
65+
id = UUID.randomUUID(),
66+
buildNumber = Random.nextInt(),
6467
templateVersionID = templateVersionID,
6568
resources = resources,
6669
status = WorkspaceStatus.RUNNING,

0 commit comments

Comments
 (0)