Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import {
Lock,
StarFilled,
TriangleRight,
Warning as WarningIcon,
} from '@cardstack/boxel-ui/icons';
import type { Icon } from '@cardstack/boxel-ui/icons';

Expand Down Expand Up @@ -58,6 +59,26 @@ export default class WorkspaceChooser extends Component<Signature> {
return this.realmServer.archivedRealms;
}

// Trusted realm servers that couldn't be reached during boot. When present,
// an unobtrusive notice names them so the user understands some workspaces
// may be missing; the notice clears once the background retry recovers them.
private get unreachableRealmServers() {
return this.realmServer.unreachableRealmServers;
}

private get unreachableRealmServersMessage() {
let hosts = this.unreachableRealmServers.map((serverURL) => {
try {
return new URL(serverURL).host;
} catch {
return serverURL;
}
});
let servers =
hosts.length === 1 ? hosts[0] : `${hosts.length} realm servers`;
return `Couldn’t reach ${servers}. Some workspaces may be missing — retrying…`;
}
Comment on lines +69 to +80

// Show the archived count once we've loaded the list (or already have entries
// from an archive action this session). Before the first load we don't know
// the count, so the row shows just "Archived".
Expand Down Expand Up @@ -376,6 +397,20 @@ export default class WorkspaceChooser extends Component<Signature> {
{{/in-element}}
{{/if}}
<div class='workspace-chooser__content boxel-dark-scrollbar'>
{{#if this.unreachableRealmServers.length}}
<div
class='unreachable-notice'
role='status'
data-test-unreachable-realm-servers-notice
>
<WarningIcon
width='16'
height='16'
class='unreachable-notice-icon'
/>
<span>{{this.unreachableRealmServersMessage}}</span>
</div>
{{/if}}
<div class='sections-wrapper'>
<div class='workspace-section' data-test-favorites-section>
<div class='section-header'>
Expand Down Expand Up @@ -601,6 +636,22 @@ export default class WorkspaceChooser extends Component<Signature> {
color: var(--boxel-400);
font: 400 var(--boxel-font-sm);
}
.unreachable-notice {
display: flex;
align-items: center;
gap: var(--boxel-sp-xs);
max-width: 40rem;
padding: var(--boxel-sp-xs) var(--boxel-sp-sm);
border-radius: var(--boxel-border-radius);
background-color: rgba(255 255 255 / 10%);
color: var(--boxel-light);
font: 400 var(--boxel-font-sm);
letter-spacing: var(--boxel-lsp-xs);
}
.unreachable-notice-icon {
--icon-color: var(--boxel-warning-100);
flex-shrink: 0;
}
</style>
</template>
}
138 changes: 125 additions & 13 deletions packages/host/app/services/matrix-service.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,13 @@ import Service, { service } from '@ember/service';
import { isTesting } from '@embroider/macros';
import { cached, tracked } from '@glimmer/tracking';

import { dropTask, task, timeout } from 'ember-concurrency';
import {
dropTask,
rawTimeout,
restartableTask,
task,
timeout,
} from 'ember-concurrency';
import window from 'ember-window-mock';
import { cloneDeep } from 'lodash-es';

Expand Down Expand Up @@ -156,6 +162,10 @@ import type * as MatrixSDK from 'matrix-js-sdk';

const { matrixURL } = ENV;
const STATE_EVENTS_OF_INTEREST = ['m.room.create', 'm.room.name'];
// Backoff for retrying trusted servers that were unreachable at boot. Bounded
// so a persistently-down server doesn't spin forever.
const UNREACHABLE_RETRY_INTERVAL_MS = 10_000;
const MAX_UNREACHABLE_RETRY_ATTEMPTS = 6;

const realmEventsLogger = logger('realm:events');

Expand Down Expand Up @@ -451,17 +461,7 @@ export default class MatrixService extends Service {
// start(); here we log and leave the available-realms list as
// it was.
try {
let realmURLs =
await this.realmServer.fetchUserRealmsFromTrustedServers(
realmServers,
);
await this.realmServer.setAvailableRealmIdentifiers(
realmURLs.map(ri),
);
if (this.postLoginCompleted) {
await this.loginToRealms();
await this.loadMoreAuthRooms(realmURLs);
}
await this.applyTrustedRealmServersAccountData(realmServers);
} catch (err) {
console.error(
'Failed to assemble realms from trusted servers in app.boxel.realm-servers account data',
Expand Down Expand Up @@ -1041,7 +1041,24 @@ export default class MatrixService extends Service {

if (isTesting())
console.warn('[start-phase] authenticateToAllAccessibleRealms');
await this.realmServer.authenticateToAllAccessibleRealms();
try {
await this.realmServer.authenticateToAllAccessibleRealms();
} catch (e) {
// A trusted server being unreachable must not fail boot: assembly
// recorded it in `unreachableRealmServers`, a retry is scheduled
// below, and realms from reachable servers still authenticate
// individually via `loginToRealms`. But only swallow when there's
// actually an unreachable server to blame — otherwise this is an
// unrelated auth failure and boot must fail loudly (logout) rather
// than proceed to `postLoginCompleted` while unauthenticated.
if (this.realmServer.unreachableRealmServers.length === 0) {
throw e;
}
console.error(
'Failed to authenticate to all accessible realms because a trusted server is unreachable',
e,
);
}
}
// Login here triggers other setup code that needs to happen after
// otherwise we don't have the realm info.
Expand All @@ -1051,6 +1068,11 @@ export default class MatrixService extends Service {

this.postLoginCompleted = true;
if (isTesting()) console.warn('[start-phase] postLoginCompleted=true');

// If any trusted server was unreachable during boot assembly, keep
// the reachable realms and retry the unreachable ones in the
// background so they load (and the notice clears) once they recover.
this.scheduleUnreachableRealmServerRetry();
} catch (e) {
console.log('Error starting Matrix client', e);
await this.logout();
Expand Down Expand Up @@ -1177,6 +1199,96 @@ export default class MatrixService extends Service {
);
}

// Re-assemble the available-realms list from a runtime
// `app.boxel.realm-servers` account-data event. Unlike the fail-loud boot
// assembly, an event-time refresh must be conservative: because
// `fetchUserRealmsFromTrustedServers` now returns a partial list when a
// trusted server is unreachable (rather than throwing), replacing the list
// with that partial result would erase the realms served by a server that's
// only transiently down. So when any server was unreachable this round we
// merge (add newly-discovered realms, never remove) and let the retry
// reconcile; only a fully reachable assembly is authoritative enough to
// remove realms. Called by the AccountData listener and directly by tests.
async applyTrustedRealmServersAccountData(realmServers: string[]) {
let realmURLs =
await this.realmServer.fetchUserRealmsFromTrustedServers(realmServers);
if (this.realmServer.unreachableRealmServers.length > 0) {
await this.realmServer.setAvailableRealmIdentifiers([
...new Set([
...this.realmServer.userRealmIdentifiers,
...realmURLs.map(ri),
]),
]);
} else {
await this.realmServer.setAvailableRealmIdentifiers(realmURLs.map(ri));
}
if (this.postLoginCompleted) {
await this.loginToRealms();
await this.loadMoreAuthRooms(realmURLs);
}
this.scheduleUnreachableRealmServerRetry();
}

// Re-attempt the trusted servers that were unreachable during boot
// assembly. On success their realms are merged into the available list and
// authenticated so they appear without a reload; the "couldn't reach
// <server>" notice clears as `unreachableRealmServers` empties. Returns true
// once every previously-unreachable server has recovered. Public so tests
// can drive recovery deterministically rather than waiting on the background
// timer.
async retryUnreachableRealmServers(): Promise<boolean> {
let toRetry = [...this.realmServer.unreachableRealmServers];
if (toRetry.length === 0) {
return true;
}
let recovered =
await this.realmServer.fetchUserRealmsFromTrustedServers(toRetry);
if (recovered.length > 0) {
let merged = [
...new Set([
...this.realmServer.userRealmIdentifiers,
...recovered.map(ri),
]),
];
await this.realmServer.setAvailableRealmIdentifiers(merged);
await this.loginToRealms();
await this.loadMoreAuthRooms(recovered);
}
return this.realmServer.unreachableRealmServers.length === 0;
}

private scheduleUnreachableRealmServerRetry() {
if (isTesting()) {
// Tests drive recovery via `retryUnreachableRealmServers()` directly so
// the assertions are deterministic; skip the background timer loop, which
// would otherwise keep firing while a stubbed server stays down.
return;
}
if (this.realmServer.unreachableRealmServers.length === 0) {
return;
}
this.retryUnreachableRealmServersTask.perform();
}

private retryUnreachableRealmServersTask = restartableTask(async () => {
for (
let attempt = 0;
attempt < MAX_UNREACHABLE_RETRY_ATTEMPTS &&
this.realmServer.unreachableRealmServers.length > 0;
attempt++
) {
await rawTimeout(UNREACHABLE_RETRY_INTERVAL_MS);
if (this.isDestroying || this.isDestroyed) {
return;
}
try {
await this.retryUnreachableRealmServers();
} catch (err) {
console.error('Failed to retry unreachable realm servers', err);
}
}
});

async createRealmSession(realmURL: URL) {
await this.#clientReadyDeferred.promise;
return this.client.createRealmSession(realmURL);
Expand Down
102 changes: 79 additions & 23 deletions packages/host/app/services/realm-server.ts
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,11 @@ export default class RealmServerService extends Service {
]);
private archivedRealmsList = new TrackedArray<ArchivedRealmInfo>([]);
private archivedRealmsFetched = false;
// Trusted servers whose `_realm-auth` call failed at boot assembly (network
// error, timeout, or non-2xx). Tracked so the UI can surface an unobtrusive
// "couldn't reach <server>" notice; entries clear as a retry recovers each
// server.
private unreachableRealmServersList = new TrackedArray<string>([]);
private _ready = new Deferred<void>();
private eventSubscribers: Map<string, RealmServerEventSubscriber[]> =
new Map();
Expand Down Expand Up @@ -142,6 +147,10 @@ export default class RealmServerService extends Service {
// reference) is what makes the getter recompute to the empty list.
this.archivedRealmsList.splice(0, this.archivedRealmsList.length);
this.archivedRealmsFetched = false;
this.unreachableRealmServersList.splice(
0,
this.unreachableRealmServersList.length,
);
this.eventSubscribers = new Map();
this._ready = new Deferred<void>();
this._ready.fulfill();
Expand Down Expand Up @@ -326,6 +335,10 @@ export default class RealmServerService extends Service {
type: 'base',
url: baseRealm.url,
});
this.unreachableRealmServersList.splice(
0,
this.unreachableRealmServersList.length,
);
window.localStorage.removeItem(sessionLocalStorageKey);
}

Expand Down Expand Up @@ -375,31 +388,74 @@ export default class RealmServerService extends Service {
// TODO: remove once multi-realm-server federation lands.
this.assertOwnRealmServer(trustedServerURLs);
await this.login();
let perServerRealmURLs = await Promise.all(
trustedServerURLs.map(async (serverURL) => {
let normalizedServerURL = ensureTrailingSlash(serverURL);
let response = await this.network.fetch(
`${normalizedServerURL}_realm-auth`,
{
method: 'POST',
headers: {
Accept: SupportedMimeType.JSONAPI,
'Content-Type': 'application/json',
Authorization: `Bearer ${this.token}`,
},
},
// A trusted server that's unreachable (network error, timeout, or a
// non-2xx `_realm-auth`) must never block boot or hide the realms served
// by the servers that *are* reachable. `allSettled` lets us assemble from
// the reachable servers, record the unreachable ones so a notice can name
// them, and (via matrix-service) schedule a retry.
let results = await Promise.allSettled(
trustedServerURLs.map((serverURL) =>
this.fetchUserRealmsFromServer(serverURL),
),
);
let realmURLs: string[] = [];
results.forEach((result, index) => {
let normalizedServerURL = ensureTrailingSlash(trustedServerURLs[index]);
if (result.status === 'fulfilled') {
realmURLs.push(...result.value);
this.markRealmServerReachable(normalizedServerURL);
} else {
this.markRealmServerUnreachable(normalizedServerURL);
console.error(
`Failed to fetch user realms from trusted server ${normalizedServerURL}`,
result.reason,
);
if (!response.ok) {
let responseText = await response.text();
throw new Error(
`Failed to fetch user realms from trusted server ${normalizedServerURL}: ${response.status} - ${responseText}`,
);
}
let tokens = (await response.json()) as Record<string, string>;
return Object.keys(tokens);
}),
}
});
return [...new Set(realmURLs)];
}

private async fetchUserRealmsFromServer(
serverURL: string,
): Promise<string[]> {
let normalizedServerURL = ensureTrailingSlash(serverURL);
let response = await this.network.fetch(
`${normalizedServerURL}_realm-auth`,
{
method: 'POST',
headers: {
Accept: SupportedMimeType.JSONAPI,
'Content-Type': 'application/json',
Authorization: `Bearer ${this.token}`,
},
},
);
return [...new Set(perServerRealmURLs.flat())];
if (!response.ok) {
let responseText = await response.text();
throw new Error(
`Failed to fetch user realms from trusted server ${normalizedServerURL}: ${response.status} - ${responseText}`,
);
}
let tokens = (await response.json()) as Record<string, string>;
return Object.keys(tokens);
}

@cached
get unreachableRealmServers(): string[] {
return [...this.unreachableRealmServersList];
}

private markRealmServerUnreachable(serverURL: string) {
if (!this.unreachableRealmServersList.includes(serverURL)) {
this.unreachableRealmServersList.push(serverURL);
}
}

private markRealmServerReachable(serverURL: string) {
let index = this.unreachableRealmServersList.indexOf(serverURL);
if (index >= 0) {
this.unreachableRealmServersList.splice(index, 1);
}
}

@cached
Expand Down
1 change: 1 addition & 0 deletions packages/host/tests/helpers/index.gts
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ export { createJWT, testRealmSecretSeed } from './test-auth';
export {
registerRealmAuthSessionRoomEnsurer,
resetCatalogRealmURL,
setRealmAuthFailure,
setupAuthEndpoints,
setCatalogRealmURL,
} from './realm-server-mock';
Expand Down
Loading
Loading