feat: show stop reason of messages

Neet-Nestor · Neet-Nestor · commit bfe7aad63c60 · 2024-06-09T03:17:31.000-04:00
diff --git a/app/client/api.ts b/app/client/api.ts
@@ -1,4 +1,7 @@
-import { CompletionUsage } from "@neet-nestor/web-llm";
+import {
+  ChatCompletionFinishReason,
+  CompletionUsage,
+} from "@neet-nestor/web-llm";
 import { CacheType, ModelType } from "../store";
 export const ROLES = ["system", "user", "assistant"] as const;
 export type MessageRole = (typeof ROLES)[number];
@@ -34,7 +37,11 @@ export interface ChatOptions {
   config: LLMConfig;
 
   onUpdate?: (message: string, chunk: string) => void;
-  onFinish: (message: string, usage?: CompletionUsage) => void;
+  onFinish: (
+    message: string,
+    stopReason: ChatCompletionFinishReason,
+    usage?: CompletionUsage,
+  ) => void;
   onError?: (err: Error) => void;
   onController?: (controller: AbortController) => void;
 }
diff --git a/app/client/webllm.ts b/app/client/webllm.ts
@@ -11,6 +11,7 @@ import {
   ChatCompletion,
   WebWorkerMLCEngine,
   CompletionUsage,
+  ChatCompletionFinishReason,
 } from "@neet-nestor/web-llm";
 
 import { ChatOptions, LLMApi, LLMConfig, RequestMessage } from "./api";
@@ -108,6 +109,7 @@ export class WebLLMApi implements LLMApi {
     }
 
     let reply: string | null = "";
+    let stopReason: ChatCompletionFinishReason = "stop";
     let usage: CompletionUsage | undefined = undefined;
     try {
       const completion = await this.chatCompletion(
@@ -116,6 +118,7 @@ export class WebLLMApi implements LLMApi {
         options.onUpdate,
       );
       reply = completion.content;
+      stopReason = completion.stopReason;
       usage = completion.usage;
     } catch (err: any) {
       let errorMessage = err.message || err.toString() || "";
@@ -157,6 +160,7 @@ export class WebLLMApi implements LLMApi {
           options.onUpdate,
         );
         reply = completion.content;
+        stopReason = completion.stopReason;
         usage = completion.usage;
       } catch (err: any) {
         let errorMessage = err.message || err.toString() || "";
@@ -170,7 +174,7 @@ export class WebLLMApi implements LLMApi {
     }
 
     if (reply) {
-      options.onFinish(reply, usage);
+      options.onFinish(reply, stopReason, usage);
     } else {
       options.onError?.(new Error("Empty response generated by LLM"));
     }
@@ -236,6 +240,7 @@ export class WebLLMApi implements LLMApi {
 
     if (stream) {
       let content: string | null = "";
+      let stopReason: ChatCompletionFinishReason = "stop";
       let usage: CompletionUsage | undefined = undefined;
       const asyncGenerator = completion as AsyncIterable<ChatCompletionChunk>;
       for await (const chunk of asyncGenerator) {
@@ -246,13 +251,18 @@ export class WebLLMApi implements LLMApi {
         if (chunk.usage) {
           usage = chunk.usage;
         }
+        if (chunk.choices[0]?.finish_reason) {
+          stopReason = chunk.choices[0].finish_reason;
+        }
       }
-      return { content, usage };
+      return { content, stopReason, usage };
     }
 
+    const chatCompletion = completion as ChatCompletion;
     return {
-      content: (completion as ChatCompletion).choices[0].message.content,
-      usage: (completion as ChatCompletion).usage,
+      content: chatCompletion.choices[0].message.content,
+      stopReason: chatCompletion.choices[0].finish_reason,
+      usage: chatCompletion.usage,
     };
   }
 }
diff --git a/app/components/chat.tsx b/app/components/chat.tsx
@@ -1335,31 +1335,43 @@ function _Chat() {
                   </div>
 
                   <div className={styles["chat-message-action-date"]}>
-                    {message.usage && (
+                    {(message.stopReason || message.usage) && (
                       <div className={styles.tooltip}>
                         <Tooltip
                           direction="top"
                           content={
                             <div style={{ fontSize: config.fontSize }}>
-                              <span>
-                                {`Prompt Tokens: ${message.usage.prompt_tokens}`}
-                              </span>
-                              <br />
-                              <span>
-                                {`Completion Tokens: ${message.usage.completion_tokens}`}
-                              </span>
-                              <br />
-                              <span>
-                                {`Prefill: ${message.usage.extra.prefill_tokens_per_s.toFixed(
-                                  4,
-                                )} tokens/sec`}
-                              </span>
-                              <br />
-                              <span>
-                                {`Decoding: ${message.usage.extra.decode_tokens_per_s.toFixed(
-                                  4,
-                                )} tokens/sec`}
-                              </span>
+                              {message.stopReason && (
+                                <>
+                                  <span>
+                                    {`Stop Reason: ${message.stopReason}`}
+                                  </span>
+                                  <br />
+                                </>
+                              )}
+                              {message.usage && (
+                                <>
+                                  <span>
+                                    {`Prompt Tokens: ${message.usage.prompt_tokens}`}
+                                  </span>
+                                  <br />
+                                  <span>
+                                    {`Completion Tokens: ${message.usage.completion_tokens}`}
+                                  </span>
+                                  <br />
+                                  <span>
+                                    {`Prefill: ${message.usage.extra.prefill_tokens_per_s.toFixed(
+                                      4,
+                                    )} tokens/sec`}
+                                  </span>
+                                  <br />
+                                  <span>
+                                    {`Decoding: ${message.usage.extra.decode_tokens_per_s.toFixed(
+                                      4,
+                                    )} tokens/sec`}
+                                  </span>
+                                </>
+                              )}
                             </div>
                           }
                         >
diff --git a/app/store/chat.ts b/app/store/chat.ts
@@ -16,13 +16,17 @@ import { estimateTokenLength } from "../utils/token";
 import { nanoid } from "nanoid";
 import { createPersistStore } from "../utils/store";
 import { WebLLMApi } from "../client/webllm";
-import { CompletionUsage } from "@neet-nestor/web-llm";
+import {
+  ChatCompletionFinishReason,
+  CompletionUsage,
+} from "@neet-nestor/web-llm";
 
 export type ChatMessage = RequestMessage & {
   date: string;
   streaming?: boolean;
   isError?: boolean;
   id: string;
+  stopReason: ChatCompletionFinishReason;
   model?: ModelType;
   usage?: CompletionUsage;
 };
@@ -33,6 +37,7 @@ export function createMessage(override: Partial<ChatMessage>): ChatMessage {
     date: new Date().toLocaleString(),
     role: "user",
     content: "",
+    stopReason: "stop",
     ...override,
   };
 }
@@ -349,9 +354,10 @@ export const useChatStore = createPersistStore(
               session.messages = session.messages.concat();
             });
           },
-          onFinish(message, usage) {
+          onFinish(message, stopReason, usage) {
             botMessage.streaming = false;
             botMessage.usage = usage;
+            botMessage.stopReason = stopReason;
             if (message) {
               botMessage.content = message;
               get().onNewMessage(botMessage, webllm);
@@ -668,5 +674,18 @@ export const useChatStore = createPersistStore(
   },
   {
     name: StoreKey.Chat,
+    version: 0.1,
+    migrate(persistedState, version): any {
+      if (version < 0.1) {
+        const store = persistedState as typeof DEFAULT_CHAT_STATE;
+        store.sessions.forEach((s) => {
+          s.messages.forEach((m) => {
+            m.stopReason = "stop";
+          });
+        });
+        return store;
+      }
+      return persistedState;
+    },
   },
 );