From 60d96dad9ab6dc9453209574f456706471851b8b Mon Sep 17 00:00:00 2001
From: arvinxx <arvinx@foxmail.com>
Date: Fri, 7 Mar 2025 01:52:21 +0800
Subject: [PATCH] refactor token calc

---
 .../Extras/Usage/UsageDetail/index.tsx        |  16 +-
 .../Extras/Usage/UsageDetail/tokens.test.ts   | 253 ++++++++++++++++++
 .../Extras/Usage/UsageDetail/tokens.ts        |  90 ++++---
 .../agent-runtime/perplexity/index.test.ts    |   2 +-
 .../utils/streams/anthropic.test.ts           |  94 ++++++-
 .../agent-runtime/utils/streams/anthropic.ts  |  30 ++-
 .../utils/streams/openai.test.ts              |  14 +-
 .../utils/usageConverter.test.ts              | 249 +++++++++++++++++
 .../agent-runtime/utils/usageConverter.ts     |  29 +-
 src/types/message/base.ts                     |   8 +-
 10 files changed, 705 insertions(+), 80 deletions(-)
 create mode 100644 src/features/Conversation/Extras/Usage/UsageDetail/tokens.test.ts
 create mode 100644 src/libs/agent-runtime/utils/usageConverter.test.ts
diff --git a/src/features/Conversation/Extras/Usage/UsageDetail/index.tsx b/src/features/Conversation/Extras/Usage/UsageDetail/index.tsx
index 0f1aa438190b5..5837546490d49 100644
--- a/src/features/Conversation/Extras/Usage/UsageDetail/index.tsx
+++ b/src/features/Conversation/Extras/Usage/UsageDetail/index.tsx
@@ -52,11 +52,13 @@ const TokenDetail = memo<TokenDetailProps>(({ usage, model, provider }) => {
   ].filter(Boolean) as TokenProgressItem[];
 
   const outputDetails = [
-    !!detailTokens.reasoning && {
+    !!detailTokens.outputReasoning && {
       color: theme.pink,
       id: 'reasoning',
       title: t('messages.tokenDetails.reasoning'),
-      value: isShowCredit ? detailTokens.reasoning.credit : detailTokens.reasoning.token,
+      value: isShowCredit
+        ? detailTokens.outputReasoning.credit
+        : detailTokens.outputReasoning.token,
     },
     !!detailTokens.outputAudio && {
       color: theme.cyan9,
@@ -73,18 +75,18 @@ const TokenDetail = memo<TokenDetailProps>(({ usage, model, provider }) => {
   ].filter(Boolean) as TokenProgressItem[];
 
   const totalDetail = [
-    !!detailTokens.uncachedInput && {
+    !!detailTokens.inputCached && {
       color: theme.colorFill,
 
       id: 'uncachedInput',
       title: t('messages.tokenDetails.inputUncached'),
-      value: isShowCredit ? detailTokens.uncachedInput.credit : detailTokens.uncachedInput.token,
+      value: isShowCredit ? detailTokens.inputCached.credit : detailTokens.inputCached.token,
     },
-    !!detailTokens.cachedInput && {
+    !!detailTokens.inputCacheMiss && {
       color: theme.orange,
-      id: 'cachedInput',
+      id: 'inputCacheMiss',
       title: t('messages.tokenDetails.inputCached'),
-      value: isShowCredit ? detailTokens.cachedInput.credit : detailTokens.cachedInput.token,
+      value: isShowCredit ? detailTokens.inputCacheMiss.credit : detailTokens.inputCacheMiss.token,
     },
     !!detailTokens.totalOutput && {
       color: theme.colorSuccess,
diff --git a/src/features/Conversation/Extras/Usage/UsageDetail/tokens.test.ts b/src/features/Conversation/Extras/Usage/UsageDetail/tokens.test.ts
new file mode 100644
index 0000000000000..68dffe4fe8874
--- /dev/null
+++ b/src/features/Conversation/Extras/Usage/UsageDetail/tokens.test.ts
@@ -0,0 +1,253 @@
+import { describe, expect, it } from 'vitest';
+
+import { LobeDefaultAiModelListItem } from '@/types/aiModel';
+import { ModelTokensUsage } from '@/types/message';
+
+import { getDetailsToken } from './tokens';
+
+describe('getDetailsToken', () => {
+  // 基本测试数据
+  const mockModelCard: LobeDefaultAiModelListItem = {
+    pricing: {
+      input: 0.01,
+      output: 0.02,
+      cachedInput: 0.005,
+      audioInput: 0.03,
+      audioOutput: 0.04,
+    },
+  } as LobeDefaultAiModelListItem;
+
+  it('should return empty object when usage is empty', () => {
+    const usage: ModelTokensUsage = {};
+    const result = getDetailsToken(usage);
+
+    expect(result).toEqual({
+      cachedInput: undefined,
+      inputAudio: undefined,
+      inputCitation: undefined,
+      inputText: undefined,
+      outputAudio: undefined,
+      outputText: undefined,
+      reasoning: undefined,
+      totalOutput: undefined,
+      totalTokens: undefined,
+      uncachedInput: undefined,
+    });
+  });
+
+  it('should handle inputTextTokens correctly', () => {
+    const usage: ModelTokensUsage = {
+      inputTextTokens: 100,
+    };
+
+    const result = getDetailsToken(usage, mockModelCard);
+
+    expect(result.inputText).toEqual({
+      credit: 1, // 100 * 0.01 = 1
+      token: 100,
+    });
+  });
+
+  it('should handle legacy inputTokens property', () => {
+    const usage = {
+      inputTokens: 100,
+    } as any;
+
+    const result = getDetailsToken(usage, mockModelCard);
+
+    expect(result.inputText).toEqual({
+      credit: 1, // 100 * 0.01 = 1
+      token: 100,
+    });
+  });
+
+  it('should handle cachedTokens correctly', () => {
+    const usage: ModelTokensUsage = {
+      totalInputTokens: 200,
+      inputCachedTokens: 50,
+    };
+
+    const result = getDetailsToken(usage, mockModelCard);
+
+    expect(result.inputCached).toEqual({
+      credit: 0, // 50 * 0.005 = 0.25, rounded to 0
+      token: 50,
+    });
+
+    expect(result.inputCacheMiss).toEqual({
+      credit: 2, // (200 - 50) * 0.01 = 1.5, rounded to 2
+      token: 150,
+    });
+  });
+
+  it('should handle outputTokens correctly', () => {
+    const usage = { outputTokens: 150 } as ModelTokensUsage;
+
+    const result = getDetailsToken(usage, mockModelCard);
+
+    expect(result.outputText).toEqual({
+      credit: 3, // 150 * 0.02 = 3
+      token: 150,
+    });
+
+    expect(result.totalOutput).toEqual({
+      credit: 3,
+      token: 150,
+    });
+  });
+
+  it('should handle reasoningTokens correctly', () => {
+    const usage = {
+      outputTokens: 200,
+      reasoningTokens: 50,
+    } as ModelTokensUsage;
+
+    const result = getDetailsToken(usage, mockModelCard);
+
+    expect(result.outputReasoning).toEqual({
+      credit: 1, // 50 * 0.02 = 1
+      token: 50,
+    });
+
+    expect(result.outputText).toEqual({
+      credit: 3, // (200 - 50) * 0.02 = 3
+      token: 150,
+    });
+  });
+
+  it('should handle audio tokens correctly', () => {
+    const usage = {
+      inputAudioTokens: 100,
+      outputAudioTokens: 50,
+      outputTokens: 150,
+    } as ModelTokensUsage;
+
+    const result = getDetailsToken(usage, mockModelCard);
+
+    expect(result.inputAudio).toEqual({
+      credit: 3, // 100 * 0.03 = 3
+      token: 100,
+    });
+
+    expect(result.outputAudio).toEqual({
+      credit: 2, // 50 * 0.04 = 2
+      id: 'outputAudio',
+      token: 50,
+    });
+
+    expect(result.outputText).toEqual({
+      credit: 2, // (150 - 50) * 0.02 = 2
+      token: 100,
+    });
+  });
+
+  it('should handle inputCitationTokens correctly', () => {
+    const usage: ModelTokensUsage = {
+      inputCitationTokens: 75,
+    };
+
+    const result = getDetailsToken(usage, mockModelCard);
+
+    expect(result.inputCitation).toEqual({
+      credit: 1, // 75 * 0.01 = 0.75, rounded to 1
+      token: 75,
+    });
+  });
+
+  it('should handle totalTokens correctly', () => {
+    const usage = {
+      totalTokens: 500,
+      totalInputTokens: 200,
+      inputCachedTokens: 50,
+      outputTokens: 300,
+    } as ModelTokensUsage;
+
+    const result = getDetailsToken(usage, mockModelCard);
+
+    // uncachedInput: (200 - 50) * 0.01 = 1.5 -> 2
+    // cachedInput: 50 * 0.005 = 0.25 -> 0
+    // totalOutput: 300 * 0.02 = 6
+    // totalCredit = 2 + 0 + 6 = 8
+
+    expect(result.totalTokens).toEqual({
+      credit: 8,
+      token: 500,
+    });
+  });
+
+  it('should handle missing pricing information', () => {
+    const usage = { inputTextTokens: 100, outputTokens: 200 } as ModelTokensUsage;
+
+    const result = getDetailsToken(usage);
+
+    expect(result.inputText).toEqual({
+      credit: '-',
+      token: 100,
+    });
+
+    expect(result.outputText).toEqual({
+      credit: '-',
+      token: 200,
+    });
+  });
+
+  it('should handle complex scenario with all token types', () => {
+    const usage: ModelTokensUsage = {
+      totalTokens: 1000,
+      totalInputTokens: 400,
+      inputTextTokens: 300,
+      inputAudioTokens: 50,
+      inputCitationTokens: 50,
+      inputCachedTokens: 100,
+      totalOutputTokens: 600,
+      outputAudioTokens: 100,
+      outputReasoningTokens: 200,
+    };
+
+    const result = getDetailsToken(usage, mockModelCard);
+
+    expect(result).toMatchObject({
+      inputCached: {
+        credit: 1, // 100 * 0.005 = 0.5, rounded to 1
+        token: 100,
+      },
+      inputCacheMiss: {
+        credit: 3, // (400 - 100) * 0.01 = 3
+        token: 300,
+      },
+      inputText: {
+        credit: 3, // 300 * 0.01 = 3
+        token: 300,
+      },
+      inputAudio: {
+        credit: 2, // 50 * 0.03 = 1.5, rounded to 2
+        token: 50,
+      },
+      inputCitation: {
+        credit: 1, // 50 * 0.01 = 0.5, rounded to 1
+        token: 50,
+      },
+      outputAudio: {
+        credit: 4, // 100 * 0.04 = 4
+        id: 'outputAudio',
+        token: 100,
+      },
+      outputReasoning: {
+        credit: 4, // 200 * 0.02 = 4
+        token: 200,
+      },
+      outputText: {
+        credit: 6, // (600 - 200 - 100) * 0.02 = 6
+        token: 300,
+      },
+      totalOutput: {
+        credit: 12, // 600 * 0.02 = 12
+        token: 600,
+      },
+      totalTokens: {
+        credit: 16, // 3 + 1 + 12 = 16
+        token: 1000,
+      },
+    });
+  });
+});
diff --git a/src/features/Conversation/Extras/Usage/UsageDetail/tokens.ts b/src/features/Conversation/Extras/Usage/UsageDetail/tokens.ts
index cc92de6124182..89ad976139772 100644
--- a/src/features/Conversation/Extras/Usage/UsageDetail/tokens.ts
+++ b/src/features/Conversation/Extras/Usage/UsageDetail/tokens.ts
@@ -11,51 +11,69 @@ export const getDetailsToken = (
   usage: ModelTokensUsage,
   modelCard?: LobeDefaultAiModelListItem,
 ) => {
-  const inputTextToken = usage.inputTextTokens || (usage as any).inputTokens || 0;
+  const inputTextTokens = usage.inputTextTokens || (usage as any).inputTokens || 0;
+  const totalInputTokens = usage.totalInputTokens || (usage as any).inputTokens || 0;
 
-  const totalInputToken = usage.totalInputTokens || (usage as any).inputTokens;
+  const totalOutputTokens = usage.totalOutputTokens || (usage as any).outputTokens || 0;
 
-  const uncachedInputCredit = (
-    !!totalInputToken
-      ? calcCredit(totalInputToken - (usage.cachedTokens || 0), modelCard?.pricing?.input)
-      : 0
+  const outputReasoningTokens = usage.outputReasoningTokens || (usage as any).reasoningTokens || 0;
+
+  const outputTextTokens = usage.outputTextTokens
+    ? usage.outputTextTokens
+    : totalOutputTokens - outputReasoningTokens - (usage.outputAudioTokens || 0);
+
+  const inputCacheMissTokens = usage?.inputCacheMissTokens
+    ? usage?.inputCacheMissTokens
+    : totalInputTokens - (usage.inputCachedTokens || 0);
+
+  const inputCacheMissCredit = (
+    !!inputCacheMissTokens ? calcCredit(inputCacheMissTokens, modelCard?.pricing?.input) : 0
   ) as number;
 
-  const cachedInputCredit = (
-    !!usage.cachedTokens ? calcCredit(usage.cachedTokens, modelCard?.pricing?.cachedInput) : 0
+  const inputCachedCredit = (
+    !!usage.inputCachedTokens
+      ? calcCredit(usage.inputCachedTokens, modelCard?.pricing?.cachedInput)
+      : 0
   ) as number;
 
-  const totalOutput = (
-    !!usage.outputTokens ? calcCredit(usage.outputTokens, modelCard?.pricing?.output) : 0
+  const totalOutputCredit = (
+    !!totalOutputTokens ? calcCredit(totalOutputTokens, modelCard?.pricing?.output) : 0
+  ) as number;
+  const totalInputCredit = (
+    !!totalInputTokens ? calcCredit(totalInputTokens, modelCard?.pricing?.output) : 0
   ) as number;
 
-  const totalCredit = uncachedInputCredit + cachedInputCredit + totalOutput;
+  const totalCredit = inputCacheMissCredit + inputCachedCredit + totalOutputCredit;
 
   return {
-    cachedInput: !!usage.cachedTokens
-      ? {
-          credit: cachedInputCredit,
-          token: usage.cachedTokens,
-        }
-      : undefined,
     inputAudio: !!usage.inputAudioTokens
       ? {
           credit: calcCredit(usage.inputAudioTokens, modelCard?.pricing?.audioInput),
           token: usage.inputAudioTokens,
         }
       : undefined,
+    inputCacheMiss: !!inputCacheMissTokens
+      ? { credit: inputCacheMissCredit, token: inputCacheMissTokens }
+      : undefined,
+    inputCached: !!usage.inputCachedTokens
+      ? {
+          credit: inputCachedCredit,
+          token: usage.inputCachedTokens,
+        }
+      : undefined,
     inputCitation: !!usage.inputCitationTokens
       ? {
           credit: calcCredit(usage.inputCitationTokens, modelCard?.pricing?.input),
           token: usage.inputCitationTokens,
         }
       : undefined,
-    inputText: !!inputTextToken
+    inputText: !!inputTextTokens
       ? {
-          credit: calcCredit(inputTextToken, modelCard?.pricing?.input),
-          token: inputTextToken,
+          credit: calcCredit(inputTextTokens, modelCard?.pricing?.input),
+          token: inputTextTokens,
         }
       : undefined,
+
     outputAudio: !!usage.outputAudioTokens
       ? {
           credit: calcCredit(usage.outputAudioTokens, modelCard?.pricing?.audioOutput),
@@ -63,37 +81,27 @@ export const getDetailsToken = (
           token: usage.outputAudioTokens,
         }
       : undefined,
-
-    outputText: !!usage.outputTokens
+    outputReasoning: !!outputReasoningTokens
       ? {
-          credit: calcCredit(
-            usage.outputTokens - (usage.reasoningTokens || 0) - (usage.outputAudioTokens || 0),
-            modelCard?.pricing?.output,
-          ),
-          token: usage.outputTokens - (usage.reasoningTokens || 0) - (usage.outputAudioTokens || 0),
+          credit: calcCredit(outputReasoningTokens, modelCard?.pricing?.output),
+          token: outputReasoningTokens,
         }
       : undefined,
-    reasoning: !!usage.reasoningTokens
+    outputText: !!outputTextTokens
       ? {
-          credit: calcCredit(usage.reasoningTokens, modelCard?.pricing?.output),
-          token: usage.reasoningTokens,
+          credit: calcCredit(outputTextTokens, modelCard?.pricing?.output),
+          token: outputTextTokens,
         }
       : undefined,
 
-    totalOutput: !!usage.outputTokens
-      ? {
-          credit: totalOutput,
-          token: usage.outputTokens,
-        }
+    totalInput: !!totalInputTokens
+      ? { credit: totalInputCredit, token: totalInputTokens }
+      : undefined,
+    totalOutput: !!totalOutputTokens
+      ? { credit: totalOutputCredit, token: totalOutputTokens }
       : undefined,
     totalTokens: !!usage.totalTokens
       ? { credit: totalCredit, token: usage.totalTokens }
       : undefined,
-    uncachedInput: !!totalInputToken
-      ? {
-          credit: uncachedInputCredit,
-          token: totalInputToken - (usage.cachedTokens || 0),
-        }
-      : undefined,
   };
 };
diff --git a/src/libs/agent-runtime/perplexity/index.test.ts b/src/libs/agent-runtime/perplexity/index.test.ts
index c8a186e267dde..814ddd549bd68 100644
--- a/src/libs/agent-runtime/perplexity/index.test.ts
+++ b/src/libs/agent-runtime/perplexity/index.test.ts
@@ -241,7 +241,7 @@ describe('LobePerplexityAI', () => {
           'data: "天和未来几天的"\n',
           'id: 506d64fb-e7f2-4d94-b80f-158369e9446d',
           'event: usage',
-          'data: {"inputCitationTokens":3058,"inputTextTokens":2,"outputTokens":685,"totalInputTokens":3060,"totalTokens":3745}\n',
+          'data: {"inputCitationTokens":3058,"inputTextTokens":2,"outputTextTokens":685,"totalInputTokens":3060,"totalOutputTokens":685,"totalTokens":3745}\n',
         ].map((line) => `${line}\n`),
       );
 
diff --git a/src/libs/agent-runtime/utils/streams/anthropic.test.ts b/src/libs/agent-runtime/utils/streams/anthropic.test.ts
index fd8282d2688a1..4e38eab7a5a13 100644
--- a/src/libs/agent-runtime/utils/streams/anthropic.test.ts
+++ b/src/libs/agent-runtime/utils/streams/anthropic.test.ts
@@ -225,7 +225,7 @@ describe('AnthropicStream', () => {
 
         'id: msg_017aTuY86wNxth5TE544yqJq',
         'event: usage',
-        'data: {"inputTokens":457,"outputTokens":84,"totalInputTokens":457,"totalTokens":541}\n',
+        'data: {"inputCacheMissTokens":457,"totalInputTokens":457,"totalOutputTokens":84,"totalTokens":541}\n',
       ].map((item) => `${item}\n`),
     );
 
@@ -381,8 +381,7 @@ describe('AnthropicStream', () => {
 
         'id: msg_0175ryA67RbGrnRrGBXFQEYK',
         'event: usage',
-        'data: {"inputTokens":485,"outputTokens":154,"totalInputTokens":485,"totalTokens":639}\n',
-
+        'data: {"inputCacheMissTokens":485,"totalInputTokens":485,"totalOutputTokens":154,"totalTokens":639}\n',
         'id: msg_0175ryA67RbGrnRrGBXFQEYK',
         'event: stop',
         'data: "message_stop"\n',
@@ -392,6 +391,91 @@ describe('AnthropicStream', () => {
     expect(onToolCallMock).toHaveBeenCalledTimes(6);
   });
 
+  it('should handle prompts context caching', async () => {
+    const streams = [
+      {
+        type: 'message_start',
+        message: {
+          id: 'msg_01Vxc4yQTEjkDSba3N3BMbH8',
+          type: 'message',
+          role: 'assistant',
+          model: 'claude-3-7-sonnet-20250219',
+          content: [],
+          stop_reason: null,
+          stop_sequence: null,
+          usage: {
+            input_tokens: 6,
+            cache_creation_input_tokens: 457,
+            cache_read_input_tokens: 17918,
+            output_tokens: 2,
+          },
+        },
+      },
+      { type: 'content_block_start', index: 0, content_block: { type: 'text', text: '' } },
+      { type: 'content_block_delta', index: 0, delta: { type: 'text_delta', text: '\n\n根' } },
+      {
+        type: 'content_block_delta',
+        index: 0,
+        delta: { type: 'text_delta', text: '/\n[^20]: https://s' },
+      },
+      { type: 'content_block_stop', index: 0 },
+      {
+        type: 'message_delta',
+        delta: { stop_reason: 'end_turn', stop_sequence: null },
+        usage: { output_tokens: 3222 },
+      },
+      { type: 'message_stop' },
+    ];
+
+    const mockReadableStream = new ReadableStream({
+      start(controller) {
+        streams.forEach((chunk) => {
+          controller.enqueue(chunk);
+        });
+        controller.close();
+      },
+    });
+
+    const protocolStream = AnthropicStream(mockReadableStream);
+
+    const decoder = new TextDecoder();
+    const chunks = [];
+
+    // @ts-ignore
+    for await (const chunk of protocolStream) {
+      chunks.push(decoder.decode(chunk, { stream: true }));
+    }
+
+    expect(chunks).toEqual(
+      [
+        'id: msg_01Vxc4yQTEjkDSba3N3BMbH8',
+        'event: data',
+        'data: {"id":"msg_01Vxc4yQTEjkDSba3N3BMbH8","type":"message","role":"assistant","model":"claude-3-7-sonnet-20250219","content":[],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":6,"cache_creation_input_tokens":457,"cache_read_input_tokens":17918,"output_tokens":2}}\n',
+        'id: msg_01Vxc4yQTEjkDSba3N3BMbH8',
+        'event: data',
+        'data: ""\n',
+        'id: msg_01Vxc4yQTEjkDSba3N3BMbH8',
+        'event: text',
+        'data: "\\n\\n根"\n',
+        'id: msg_01Vxc4yQTEjkDSba3N3BMbH8',
+        'event: text',
+        'data: "/\\n[^20]: https://s"\n',
+        'id: msg_01Vxc4yQTEjkDSba3N3BMbH8',
+        'event: data',
+        'data: {"type":"content_block_stop","index":0}\n',
+        'id: msg_01Vxc4yQTEjkDSba3N3BMbH8',
+        'event: stop',
+        'data: "end_turn"\n',
+        'id: msg_01Vxc4yQTEjkDSba3N3BMbH8',
+        'event: usage',
+        'data: {"inputCacheMissTokens":6,"inputCachedTokens":17918,"inputWriteCacheTokens":457,"totalInputTokens":18381,"totalOutputTokens":3224,"totalTokens":21605}\n',
+
+        'id: msg_01Vxc4yQTEjkDSba3N3BMbH8',
+        'event: stop',
+        'data: "message_stop"\n',
+      ].map((item) => `${item}\n`),
+    );
+  });
   describe('thinking', () => {
     it('should handle normal thinking ', async () => {
       const streams = [
@@ -515,7 +599,7 @@ describe('AnthropicStream', () => {
           'data: "end_turn"\n',
           'id: msg_01MNsLe7n1uVLtu6W8rCFujD',
           'event: usage',
-          'data: {"inputTokens":46,"outputTokens":365,"totalInputTokens":46,"totalTokens":411}\n',
+          'data: {"inputCacheMissTokens":46,"totalInputTokens":46,"totalOutputTokens":365,"totalTokens":411}\n',
           'id: msg_01MNsLe7n1uVLtu6W8rCFujD',
           'event: stop',
           'data: "message_stop"\n',
@@ -675,7 +759,7 @@ describe('AnthropicStream', () => {
           'data: "end_turn"\n',
           'id: msg_019q32esPvu3TftzZnL6JPys',
           'event: usage',
-          'data: {"inputTokens":92,"outputTokens":263,"totalInputTokens":92,"totalTokens":355}\n',
+          'data: {"inputCacheMissTokens":92,"totalInputTokens":92,"totalOutputTokens":263,"totalTokens":355}\n',
           'id: msg_019q32esPvu3TftzZnL6JPys',
           'event: stop',
           'data: "message_stop"\n',
diff --git a/src/libs/agent-runtime/utils/streams/anthropic.ts b/src/libs/agent-runtime/utils/streams/anthropic.ts
index 24b0c516a44a9..4e3af5ad9d66f 100644
--- a/src/libs/agent-runtime/utils/streams/anthropic.ts
+++ b/src/libs/agent-runtime/utils/streams/anthropic.ts
@@ -22,9 +22,24 @@ export const transformAnthropicStream = (
   switch (chunk.type) {
     case 'message_start': {
       context.id = chunk.message.id;
+      let totalInputTokens = chunk.message.usage?.input_tokens;
+
+      if (
+        chunk.message.usage?.cache_creation_input_tokens ||
+        chunk.message.usage?.cache_read_input_tokens
+      ) {
+        totalInputTokens =
+          chunk.message.usage?.input_tokens +
+          (chunk.message.usage.cache_creation_input_tokens || 0) +
+          (chunk.message.usage.cache_read_input_tokens || 0);
+      }
+
       context.usage = {
-        outputTokens: chunk.message.usage?.output_tokens,
-        totalInputTokens: chunk.message.usage?.input_tokens,
+        inputCacheMissTokens: chunk.message.usage?.input_tokens,
+        inputCachedTokens: chunk.message.usage?.cache_read_input_tokens || undefined,
+        inputWriteCacheTokens: chunk.message.usage?.cache_creation_input_tokens || undefined,
+        totalInputTokens,
+        totalOutputTokens: chunk.message.usage?.output_tokens,
       };
 
       return { data: chunk.message, id: chunk.message.id, type: 'data' };
@@ -140,18 +155,19 @@ export const transformAnthropicStream = (
     }
 
     case 'message_delta': {
-      const outputTokens = chunk.usage?.output_tokens + (context.usage?.outputTokens || 0);
+      const totalOutputTokens =
+        chunk.usage?.output_tokens + (context.usage?.totalOutputTokens || 0);
       const totalInputTokens = context.usage?.totalInputTokens || 0;
-      const totalTokens = totalInputTokens + outputTokens;
+      const totalTokens = totalInputTokens + totalOutputTokens;
 
       if (totalTokens > 0) {
         return [
           { data: chunk.delta.stop_reason, id: context.id, type: 'stop' },
           {
             data: {
-              inputTokens: totalInputTokens,
-              outputTokens: outputTokens,
-              totalInputTokens: totalInputTokens,
+              ...context.usage,
+              totalInputTokens,
+              totalOutputTokens,
               totalTokens,
             } as ModelTokensUsage,
             id: context.id,
diff --git a/src/libs/agent-runtime/utils/streams/openai.test.ts b/src/libs/agent-runtime/utils/streams/openai.test.ts
index 3c64765a4a187..6a7be86e917f8 100644
--- a/src/libs/agent-runtime/utils/streams/openai.test.ts
+++ b/src/libs/agent-runtime/utils/streams/openai.test.ts
@@ -434,7 +434,7 @@ describe('OpenAIStream', () => {
           `data: "stop"\n`,
           'id: chatcmpl-B7CcnaeK3jqWBMOhxg7SSKFwlk7dC',
           'event: usage',
-          `data: {"inputTextTokens":1646,"outputTokens":11,"totalInputTokens":1646,"totalTokens":1657}\n`,
+          `data: {"inputCacheMissTokens":1646,"inputTextTokens":1646,"outputTextTokens":11,"totalInputTokens":1646,"totalOutputTokens":11,"totalTokens":1657}\n`,
         ].map((i) => `${i}\n`),
       );
     });
@@ -536,7 +536,7 @@ describe('OpenAIStream', () => {
           `data: {"delta":{},"id":"chatcmpl-c1f6a6a6-fcf8-463a-96bf-cf634d3e98a5","index":0}\n`,
           'id: chatcmpl-c1f6a6a6-fcf8-463a-96bf-cf634d3e98a5',
           'event: usage',
-          `data: {"inputTextTokens":1797,"outputTokens":1720,"totalInputTokens":1797,"totalTokens":3517}\n`,
+          `data: {"inputCacheMissTokens":1797,"inputTextTokens":1797,"outputTextTokens":1720,"totalInputTokens":1797,"totalOutputTokens":1720,"totalTokens":3517}\n`,
         ].map((i) => `${i}\n`),
       );
     });
@@ -944,7 +944,7 @@ describe('OpenAIStream', () => {
           `data: "帮助。"\n`,
           'id: 1',
           'event: usage',
-          `data: {"inputCacheMissTokens":6,"inputTextTokens":6,"outputTokens":104,"totalInputTokens":6,"totalTokens":110}\n`,
+          `data: {"inputCacheMissTokens":6,"inputTextTokens":6,"outputReasoningTokens":70,"outputTextTokens":34,"totalInputTokens":6,"totalOutputTokens":104,"totalTokens":110}\n`,
         ].map((i) => `${i}\n`),
       );
     });
@@ -1163,7 +1163,7 @@ describe('OpenAIStream', () => {
           `data: "帮助。"\n`,
           'id: 1',
           'event: usage',
-          `data: {"inputCacheMissTokens":6,"inputTextTokens":6,"outputTokens":104,"totalInputTokens":6,"totalTokens":110}\n`,
+          `data: {"inputCacheMissTokens":6,"inputTextTokens":6,"outputReasoningTokens":70,"outputTextTokens":34,"totalInputTokens":6,"totalOutputTokens":104,"totalTokens":110}\n`,
         ].map((i) => `${i}\n`),
       );
     });
@@ -1364,7 +1364,7 @@ describe('OpenAIStream', () => {
           `data: "帮助。"\n`,
           'id: 1',
           'event: usage',
-          `data: {"inputCacheMissTokens":6,"inputTextTokens":6,"outputTokens":104,"totalInputTokens":6,"totalTokens":110}\n`,
+          `data: {"inputCacheMissTokens":6,"inputTextTokens":6,"outputReasoningTokens":70,"outputTextTokens":34,"totalInputTokens":6,"totalOutputTokens":104,"totalTokens":110}\n`,
         ].map((i) => `${i}\n`),
       );
     });
@@ -1565,7 +1565,7 @@ describe('OpenAIStream', () => {
           `data: "帮助。"\n`,
           'id: 1',
           'event: usage',
-          `data: {"inputCacheMissTokens":6,"inputTextTokens":6,"outputTokens":104,"totalInputTokens":6,"totalTokens":110}\n`,
+          `data: {"inputCacheMissTokens":6,"inputTextTokens":6,"outputReasoningTokens":70,"outputTextTokens":34,"totalInputTokens":6,"totalOutputTokens":104,"totalTokens":110}\n`,
         ].map((i) => `${i}\n`),
       );
     });
@@ -1766,7 +1766,7 @@ describe('OpenAIStream', () => {
           `data: "帮助。"\n`,
           'id: 1',
           'event: usage',
-          `data: {"inputCacheMissTokens":6,"inputTextTokens":6,"outputTokens":104,"totalInputTokens":6,"totalTokens":110}\n`,
+          `data: {"inputCacheMissTokens":6,"inputTextTokens":6,"outputReasoningTokens":70,"outputTextTokens":34,"totalInputTokens":6,"totalOutputTokens":104,"totalTokens":110}\n`,
         ].map((i) => `${i}\n`),
       );
     });
diff --git a/src/libs/agent-runtime/utils/usageConverter.test.ts b/src/libs/agent-runtime/utils/usageConverter.test.ts
new file mode 100644
index 0000000000000..5e55d06b19a4f
--- /dev/null
+++ b/src/libs/agent-runtime/utils/usageConverter.test.ts
@@ -0,0 +1,249 @@
+import OpenAI from 'openai';
+import { describe, expect, it } from 'vitest';
+
+import { convertUsage } from './usageConverter';
+
+describe('convertUsage', () => {
+  it('should convert basic OpenAI usage data correctly', () => {
+    // Arrange
+    const openaiUsage: OpenAI.Completions.CompletionUsage = {
+      prompt_tokens: 100,
+      completion_tokens: 50,
+      total_tokens: 150,
+    };
+
+    // Act
+    const result = convertUsage(openaiUsage);
+
+    // Assert
+    expect(result).toEqual({
+      inputTextTokens: 100,
+      totalInputTokens: 100,
+      totalOutputTokens: 50,
+      outputTextTokens: 50,
+      totalTokens: 150,
+    });
+  });
+
+  it('should handle PPLX citation tokens correctly', () => {
+    // Arrange
+    const pplxUsage = {
+      prompt_tokens: 80,
+      citation_tokens: 20,
+      completion_tokens: 50,
+      total_tokens: 150,
+    } as OpenAI.Completions.CompletionUsage;
+
+    // Act
+    const result = convertUsage(pplxUsage);
+
+    // Assert
+    expect(result).toEqual({
+      inputTextTokens: 80,
+      inputCitationTokens: 20,
+      totalInputTokens: 100,
+      totalOutputTokens: 50,
+      outputTextTokens: 50,
+      totalTokens: 170, // 150 + 20 (citation tokens)
+    });
+  });
+
+  it('should handle cached tokens correctly', () => {
+    // Arrange
+    const usageWithCache = {
+      prompt_tokens: 100,
+      prompt_cache_hit_tokens: 30,
+      prompt_cache_miss_tokens: 70,
+      completion_tokens: 50,
+      total_tokens: 150,
+    } as OpenAI.Completions.CompletionUsage;
+
+    // Act
+    const result = convertUsage(usageWithCache);
+
+    // Assert
+    expect(result).toEqual({
+      inputTextTokens: 100,
+      inputCachedTokens: 30,
+      inputCacheMissTokens: 70,
+      totalInputTokens: 100,
+      totalOutputTokens: 50,
+      outputTextTokens: 50,
+      totalTokens: 150,
+    });
+  });
+
+  it('should handle cached tokens using prompt_tokens_details', () => {
+    // Arrange
+    const usageWithTokenDetails = {
+      prompt_tokens: 100,
+      prompt_tokens_details: {
+        cached_tokens: 30,
+      },
+      completion_tokens: 50,
+      total_tokens: 150,
+    } as OpenAI.Completions.CompletionUsage;
+
+    // Act
+    const result = convertUsage(usageWithTokenDetails);
+
+    // Assert
+    expect(result).toEqual({
+      inputTextTokens: 100,
+      inputCachedTokens: 30,
+      inputCacheMissTokens: 70, // 100 - 30
+      totalInputTokens: 100,
+      totalOutputTokens: 50,
+      outputTextTokens: 50,
+      totalTokens: 150,
+    });
+  });
+
+  it('should handle audio tokens in input correctly', () => {
+    // Arrange
+    const usageWithAudioInput = {
+      prompt_tokens: 100,
+      prompt_tokens_details: {
+        audio_tokens: 20,
+      },
+      completion_tokens: 50,
+      total_tokens: 150,
+    } as OpenAI.Completions.CompletionUsage;
+
+    // Act
+    const result = convertUsage(usageWithAudioInput);
+
+    // Assert
+    expect(result).toEqual({
+      inputTextTokens: 100,
+      inputAudioTokens: 20,
+      totalInputTokens: 100,
+      totalOutputTokens: 50,
+      outputTextTokens: 50,
+      totalTokens: 150,
+    });
+  });
+
+  it('should handle detailed output tokens correctly', () => {
+    // Arrange
+    const usageWithOutputDetails = {
+      prompt_tokens: 100,
+      completion_tokens: 100,
+      completion_tokens_details: {
+        reasoning_tokens: 30,
+        audio_tokens: 20,
+      },
+      total_tokens: 200,
+    } as OpenAI.Completions.CompletionUsage;
+
+    // Act
+    const result = convertUsage(usageWithOutputDetails);
+
+    // Assert
+    expect(result).toEqual({
+      inputTextTokens: 100,
+      totalInputTokens: 100,
+      totalOutputTokens: 100,
+      outputReasoningTokens: 30,
+      outputAudioTokens: 20,
+      outputTextTokens: 50, // 100 - 30 - 20
+      totalTokens: 200,
+    });
+  });
+
+  it('should handle prediction tokens correctly', () => {
+    // Arrange
+    const usageWithPredictions = {
+      prompt_tokens: 100,
+      completion_tokens: 80,
+      completion_tokens_details: {
+        accepted_prediction_tokens: 30,
+        rejected_prediction_tokens: 10,
+      },
+      total_tokens: 180,
+    } as OpenAI.Completions.CompletionUsage;
+
+    // Act
+    const result = convertUsage(usageWithPredictions);
+
+    // Assert
+    expect(result).toEqual({
+      inputTextTokens: 100,
+      totalInputTokens: 100,
+      totalOutputTokens: 80,
+      outputTextTokens: 80,
+      acceptedPredictionTokens: 30,
+      rejectedPredictionTokens: 10,
+      totalTokens: 180,
+    });
+  });
+
+  it('should handle complex usage with all fields correctly', () => {
+    // Arrange
+    const complexUsage = {
+      prompt_tokens: 150,
+      prompt_tokens_details: {
+        audio_tokens: 50,
+        cached_tokens: 40,
+      },
+      citation_tokens: 30,
+      completion_tokens: 120,
+      completion_tokens_details: {
+        reasoning_tokens: 40,
+        audio_tokens: 30,
+        accepted_prediction_tokens: 20,
+        rejected_prediction_tokens: 5,
+      },
+      total_tokens: 300,
+    } as OpenAI.Completions.CompletionUsage;
+
+    // Act
+    const result = convertUsage(complexUsage);
+
+    // Assert
+    expect(result).toEqual({
+      inputTextTokens: 150,
+      inputAudioTokens: 50,
+      inputCachedTokens: 40,
+      inputCacheMissTokens: 140, // 180 - 40 (totalInputTokens - cachedTokens)
+      inputCitationTokens: 30,
+      totalInputTokens: 180, // 150 + 30
+      outputTextTokens: 50, // 120 - 40 - 30
+      outputReasoningTokens: 40,
+      outputAudioTokens: 30,
+      totalOutputTokens: 120,
+      acceptedPredictionTokens: 20,
+      rejectedPredictionTokens: 5,
+      totalTokens: 330, // 300 + 30 (citation_tokens)
+    });
+  });
+
+  it('should omit zero or undefined values in the final output', () => {
+    // Arrange
+    const usageWithZeros = {
+      prompt_tokens: 100,
+      completion_tokens: 50,
+      total_tokens: 150,
+      completion_tokens_details: {
+        reasoning_tokens: 0,
+        audio_tokens: undefined,
+      },
+    } as OpenAI.Completions.CompletionUsage;
+
+    // Act
+    const result = convertUsage(usageWithZeros);
+
+    // Assert
+    expect(result).toEqual({
+      inputTextTokens: 100,
+      totalInputTokens: 100,
+      totalOutputTokens: 50,
+      outputTextTokens: 50,
+      totalTokens: 150,
+    });
+
+    // These should not be present in the result
+    expect(result).not.toHaveProperty('outputReasoningTokens');
+    expect(result).not.toHaveProperty('outputAudioTokens');
+  });
+});
diff --git a/src/libs/agent-runtime/utils/usageConverter.ts b/src/libs/agent-runtime/utils/usageConverter.ts
index e234797d0ab8f..8badc9bdfbee1 100644
--- a/src/libs/agent-runtime/utils/usageConverter.ts
+++ b/src/libs/agent-runtime/utils/usageConverter.ts
@@ -4,27 +4,38 @@ import { ModelTokensUsage } from '@/types/message';
 
 export const convertUsage = (usage: OpenAI.Completions.CompletionUsage): ModelTokensUsage => {
   // 目前只有 pplx 才有 citation_tokens
+  const inputTextTokens = usage.prompt_tokens || 0;
   const inputCitationTokens = (usage as any).citation_tokens || 0;
+  const totalInputTokens = inputCitationTokens + inputTextTokens;
 
-  const totalInputTokens = inputCitationTokens + usage.prompt_tokens;
+  const cachedTokens =
+    (usage as any).prompt_cache_hit_tokens || usage.prompt_tokens_details?.cached_tokens;
+
+  const inputCacheMissTokens =
+    (usage as any).prompt_cache_miss_tokens || totalInputTokens - cachedTokens;
+
+  const totalOutputTokens = usage.completion_tokens;
+  const outputReasoning = usage.completion_tokens_details?.reasoning_tokens || 0;
+  const outputAudioTokens = usage.completion_tokens_details?.audio_tokens || 0;
+  const outputTextTokens = totalOutputTokens - outputReasoning - outputAudioTokens;
 
   const totalTokens = inputCitationTokens + usage.total_tokens;
 
   const data = {
     acceptedPredictionTokens: usage.completion_tokens_details?.accepted_prediction_tokens,
-    cachedTokens:
-      (usage as any).prompt_cache_hit_tokens || usage.prompt_tokens_details?.cached_tokens,
     inputAudioTokens: usage.prompt_tokens_details?.audio_tokens,
-    inputCacheMissTokens: (usage as any).prompt_cache_miss_tokens,
+    inputCacheMissTokens: inputCacheMissTokens,
+    inputCachedTokens: cachedTokens,
     inputCitationTokens: inputCitationTokens,
-    inputTextTokens: usage.prompt_tokens,
-    outputAudioTokens: usage.completion_tokens_details?.audio_tokens,
-    outputTokens: usage.completion_tokens,
-    reasoningTokens: usage.completion_tokens_details?.rejected_prediction_tokens,
+    inputTextTokens: inputTextTokens,
+    outputAudioTokens: outputAudioTokens,
+    outputReasoningTokens: outputReasoning,
+    outputTextTokens: outputTextTokens,
     rejectedPredictionTokens: usage.completion_tokens_details?.rejected_prediction_tokens,
     totalInputTokens,
+    totalOutputTokens: totalOutputTokens,
     totalTokens,
-  };
+  } satisfies ModelTokensUsage;
 
   const finalData = {};
 
diff --git a/src/types/message/base.ts b/src/types/message/base.ts
index 8e83694328fac..fde4b2166e2e5 100644
--- a/src/types/message/base.ts
+++ b/src/types/message/base.ts
@@ -15,9 +15,9 @@ export interface ModelReasoning {
 
 export interface ModelTokensUsage {
   acceptedPredictionTokens?: number;
-  cachedTokens?: number;
   inputAudioTokens?: number;
   inputCacheMissTokens?: number;
+  inputCachedTokens?: number;
   /**
    * currently only pplx has citation_tokens
    */
@@ -26,11 +26,13 @@ export interface ModelTokensUsage {
    * user prompt input
    */
   inputTextTokens?: number;
+  inputWriteCacheTokens?: number;
   outputAudioTokens?: number;
-  outputTokens?: number;
-  reasoningTokens?: number;
+  outputReasoningTokens?: number;
+  outputTextTokens?: number;
   rejectedPredictionTokens?: number;
   totalInputTokens?: number;
+  totalOutputTokens?: number;
   totalTokens?: number;
 }