Skip to content

Commit 3bc2fe0

Browse files
committed
feat: show usage of messages
1 parent 45d53b7 commit 3bc2fe0

File tree

8 files changed

+100
-59
lines changed

8 files changed

+100
-59
lines changed

app/client/api.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import { CompletionUsage } from "@neet-nestor/web-llm";
12
import { CacheType, ModelType } from "../store";
23
export const ROLES = ["system", "user", "assistant"] as const;
34
export type MessageRole = (typeof ROLES)[number];
@@ -33,7 +34,7 @@ export interface ChatOptions {
3334
config: LLMConfig;
3435

3536
onUpdate?: (message: string, chunk: string) => void;
36-
onFinish: (message: string) => void;
37+
onFinish: (message: string, usage?: CompletionUsage) => void;
3738
onError?: (err: Error) => void;
3839
onController?: (controller: AbortController) => void;
3940
}

app/client/webllm.ts

Lines changed: 29 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import {
1010
ChatCompletionChunk,
1111
ChatCompletion,
1212
WebWorkerMLCEngine,
13+
CompletionUsage,
1314
} from "@neet-nestor/web-llm";
1415

1516
import { ChatOptions, LLMApi, LLMConfig, RequestMessage } from "./api";
@@ -107,12 +108,15 @@ export class WebLLMApi implements LLMApi {
107108
}
108109

109110
let reply: string | null = "";
111+
let usage: CompletionUsage | undefined = undefined;
110112
try {
111-
reply = await this.chatCompletion(
113+
const completion = await this.chatCompletion(
112114
!!options.config.stream,
113115
options.messages,
114116
options.onUpdate,
115117
);
118+
reply = completion.content;
119+
usage = completion.usage;
116120
} catch (err: any) {
117121
let errorMessage = err.message || err.toString() || "";
118122
if (errorMessage === "[object Object]") {
@@ -147,11 +151,13 @@ export class WebLLMApi implements LLMApi {
147151
return;
148152
}
149153
try {
150-
reply = await this.chatCompletion(
154+
const completion = await this.chatCompletion(
151155
!!options.config.stream,
152156
options.messages,
153157
options.onUpdate,
154158
);
159+
reply = completion.content;
160+
usage = completion.usage;
155161
} catch (err: any) {
156162
let errorMessage = err.message || err.toString() || "";
157163
if (errorMessage === "[object Object]") {
@@ -164,7 +170,7 @@ export class WebLLMApi implements LLMApi {
164170
}
165171

166172
if (reply) {
167-
options.onFinish(reply);
173+
options.onFinish(reply, usage);
168174
} else {
169175
options.onError?.(new Error("Empty response generated by LLM"));
170176
}
@@ -216,26 +222,38 @@ export class WebLLMApi implements LLMApi {
216222
async chatCompletion(
217223
stream: boolean,
218224
messages: RequestMessage[],
219-
onUpdate?: (message: string, chunk: string) => void,
225+
onUpdate?: (
226+
message: string,
227+
chunk: string,
228+
usage?: CompletionUsage,
229+
) => void,
220230
) {
221-
let reply: string | null = "";
222-
223231
const completion = await this.webllm.engine.chatCompletion({
224232
stream: stream,
225233
messages: messages as ChatCompletionMessageParam[],
234+
stream_options: { include_usage: true },
226235
});
227236

228237
if (stream) {
238+
let content: string | null = "";
239+
let usage: CompletionUsage | undefined = undefined;
229240
const asyncGenerator = completion as AsyncIterable<ChatCompletionChunk>;
230241
for await (const chunk of asyncGenerator) {
231-
if (chunk.choices[0].delta.content) {
232-
reply += chunk.choices[0].delta.content;
233-
onUpdate?.(reply, chunk.choices[0].delta.content);
242+
if (chunk.choices[0]?.delta.content) {
243+
content += chunk.choices[0].delta.content;
244+
onUpdate?.(content, chunk.choices[0].delta.content);
245+
}
246+
if (chunk.usage) {
247+
usage = chunk.usage;
234248
}
235249
}
236-
return reply;
250+
return { content, usage };
237251
}
238-
return (completion as ChatCompletion).choices[0].message.content;
252+
253+
return {
254+
content: (completion as ChatCompletion).choices[0].message.content,
255+
usage: (completion as ChatCompletion).usage,
256+
};
239257
}
240258
}
241259

app/components/chat.module.scss

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -518,8 +518,16 @@
518518
width: 100%;
519519
box-sizing: border-box;
520520
padding-right: 10px;
521-
pointer-events: none;
522521
z-index: 1;
522+
display: flex;
523+
justify-content: end;
524+
align-items: center;
525+
gap: 4px;
526+
527+
.tooltip {
528+
width: 14px;
529+
height: 14px;
530+
}
523531
}
524532

525533
.chat-message-user>.chat-message-container>.chat-message-item {

app/components/chat.tsx

Lines changed: 34 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -488,7 +488,6 @@ export function ChatActions(props: {
488488
showPromptHints: () => void;
489489
hitBottom: boolean;
490490
uploading: boolean;
491-
tootip?: ReactElement;
492491
}) {
493492
const config = useAppConfig();
494493
const chatStore = useChatStore();
@@ -560,19 +559,6 @@ export function ChatActions(props: {
560559
icon={<RobotIcon />}
561560
fullWidth
562561
/>
563-
{props.tootip && (
564-
<div className={styles.tooltip}>
565-
<Tooltip
566-
direction="left"
567-
content={
568-
<div style={{ fontSize: config.fontSize }}>{props.tootip}</div>
569-
}
570-
>
571-
{<InfoIcon />}
572-
</Tooltip>
573-
</div>
574-
)}
575-
576562
{showModelSelector && (
577563
<Selector
578564
defaultSelectedValue={currentModel}
@@ -702,23 +688,6 @@ function _Chat() {
702688
}
703689
};
704690

705-
const getEngineStats = () => {
706-
webllm.webllm.engine.runtimeStatsText().then((stats) => {
707-
const lines = stats.split(", ");
708-
setEngineStats(
709-
<>
710-
<b>WebLLM Engine Statistics</b>
711-
{lines.map((line) => (
712-
<>
713-
<br />
714-
<span>{line}</span>
715-
</>
716-
))}
717-
</>,
718-
);
719-
});
720-
};
721-
722691
const onSubmit = (userInput: string) => {
723692
if (userInput.trim() === "") return;
724693

@@ -732,7 +701,7 @@ function _Chat() {
732701

733702
if (isStreaming) return;
734703

735-
chatStore.onUserInput(userInput, webllm, attachImages, getEngineStats);
704+
chatStore.onUserInput(userInput, webllm, attachImages);
736705
setAttachImages([]);
737706
localStorage.setItem(LAST_INPUT_KEY, userInput);
738707
setUserInput("");
@@ -883,7 +852,7 @@ function _Chat() {
883852
// resend the message
884853
const textContent = getMessageTextContent(userMessage);
885854
const images = getMessageImages(userMessage);
886-
chatStore.onUserInput(textContent, webllm, images, getEngineStats);
855+
chatStore.onUserInput(textContent, webllm, images);
887856
inputRef.current?.focus();
888857
};
889858

@@ -1366,6 +1335,38 @@ function _Chat() {
13661335
</div>
13671336

13681337
<div className={styles["chat-message-action-date"]}>
1338+
{message.usage && (
1339+
<div className={styles.tooltip}>
1340+
<Tooltip
1341+
direction="top"
1342+
content={
1343+
<div style={{ fontSize: config.fontSize }}>
1344+
<span>
1345+
{`Prompt Tokens: ${message.usage.prompt_tokens}`}
1346+
</span>
1347+
<br />
1348+
<span>
1349+
{`Completion Tokens: ${message.usage.completion_tokens}`}
1350+
</span>
1351+
<br />
1352+
<span>
1353+
{`Prefill: ${message.usage.extra.prefill_tokens_per_s.toFixed(
1354+
4,
1355+
)} tokens/sec`}
1356+
</span>
1357+
<br />
1358+
<span>
1359+
{`Decoding: ${message.usage.extra.decode_tokens_per_s.toFixed(
1360+
4,
1361+
)} tokens/sec`}
1362+
</span>
1363+
</div>
1364+
}
1365+
>
1366+
{<InfoIcon />}
1367+
</Tooltip>
1368+
</div>
1369+
)}
13691370
{isContext
13701371
? Locale.Chat.IsContext
13711372
: message.date.toLocaleString()}
@@ -1388,7 +1389,6 @@ function _Chat() {
13881389
scrollToBottom={scrollToBottom}
13891390
hitBottom={hitBottom}
13901391
uploading={uploading}
1391-
tootip={engineStat}
13921392
showPromptSetting={() => setShowEditPromptModal(true)}
13931393
showPromptHints={() => {
13941394
// Click again to close

app/components/ui-lib.module.scss

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,20 @@
3838

3939
.tooltip {
4040
position: relative;
41-
display: inline-block;
41+
display: flex;
42+
align-items: center;
43+
height: 100%;
44+
width: 100%;
45+
}
46+
47+
.tooltip-icon {
48+
display: flex;
49+
align-items: center;
50+
51+
svg {
52+
height: 100%;
53+
width: 100%;
54+
}
4255
}
4356

4457
.tooltip-content {
@@ -50,6 +63,7 @@
5063
z-index: 3;
5164
white-space: nowrap;
5265
font-size: 12px;
66+
text-align: center;
5367

5468
&.tooltip-content-left {
5569
right: 100%;

app/components/ui-lib.tsx

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,11 @@ export function Tooltip(props: {
5353
return (
5454
<div className={styles.tooltip}>
5555
{/* The anchor component */}
56-
<div onMouseOver={showTooltip} onMouseOut={hideTooltip}>
56+
<div
57+
className={styles["tooltip-icon"]}
58+
onMouseOver={showTooltip}
59+
onMouseOut={hideTooltip}
60+
>
5761
{props.children}
5862
</div>
5963

app/icons/info.svg

Lines changed: 1 addition & 1 deletion
Loading

app/store/chat.ts

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -12,18 +12,19 @@ import {
1212
StoreKey,
1313
} from "../constant";
1414
import { RequestMessage, MultimodalContent } from "../client/api";
15-
import { prettyObject } from "../utils/format";
1615
import { estimateTokenLength } from "../utils/token";
1716
import { nanoid } from "nanoid";
1817
import { createPersistStore } from "../utils/store";
1918
import { WebLLMApi } from "../client/webllm";
19+
import { CompletionUsage } from "@neet-nestor/web-llm";
2020

2121
export type ChatMessage = RequestMessage & {
2222
date: string;
2323
streaming?: boolean;
2424
isError?: boolean;
2525
id: string;
2626
model?: ModelType;
27+
usage?: CompletionUsage;
2728
};
2829

2930
export function createMessage(override: Partial<ChatMessage>): ChatMessage {
@@ -275,12 +276,7 @@ export const useChatStore = createPersistStore(
275276
get().summarizeSession(webllm);
276277
},
277278

278-
onUserInput(
279-
content: string,
280-
webllm: WebLLMApi,
281-
attachImages?: string[],
282-
onFinish?: () => void,
283-
) {
279+
onUserInput(content: string, webllm: WebLLMApi, attachImages?: string[]) {
284280
const modelConfig = useAppConfig.getState().modelConfig;
285281

286282
const userContent = fillTemplateWith(content, modelConfig);
@@ -353,16 +349,16 @@ export const useChatStore = createPersistStore(
353349
session.messages = session.messages.concat();
354350
});
355351
},
356-
onFinish(message) {
352+
onFinish(message, usage) {
357353
botMessage.streaming = false;
354+
botMessage.usage = usage;
358355
if (message) {
359356
botMessage.content = message;
360357
get().onNewMessage(botMessage, webllm);
361358
}
362359
get().updateCurrentSession((session) => {
363360
session.isGenerating = false;
364361
});
365-
onFinish?.();
366362
},
367363
onError(error) {
368364
const errorMessage =

0 commit comments

Comments
 (0)