From 60d96dad9ab6dc9453209574f456706471851b8b Mon Sep 17 00:00:00 2001 From: arvinxx Date: Fri, 7 Mar 2025 01:52:21 +0800 Subject: [PATCH] refactor token calc --- .../Extras/Usage/UsageDetail/index.tsx | 16 +- .../Extras/Usage/UsageDetail/tokens.test.ts | 253 ++++++++++++++++++ .../Extras/Usage/UsageDetail/tokens.ts | 90 ++++--- .../agent-runtime/perplexity/index.test.ts | 2 +- .../utils/streams/anthropic.test.ts | 94 ++++++- .../agent-runtime/utils/streams/anthropic.ts | 30 ++- .../utils/streams/openai.test.ts | 14 +- .../utils/usageConverter.test.ts | 249 +++++++++++++++++ .../agent-runtime/utils/usageConverter.ts | 29 +- src/types/message/base.ts | 8 +- 10 files changed, 705 insertions(+), 80 deletions(-) create mode 100644 src/features/Conversation/Extras/Usage/UsageDetail/tokens.test.ts create mode 100644 src/libs/agent-runtime/utils/usageConverter.test.ts diff --git a/src/features/Conversation/Extras/Usage/UsageDetail/index.tsx b/src/features/Conversation/Extras/Usage/UsageDetail/index.tsx index 0f1aa438190b5..5837546490d49 100644 --- a/src/features/Conversation/Extras/Usage/UsageDetail/index.tsx +++ b/src/features/Conversation/Extras/Usage/UsageDetail/index.tsx @@ -52,11 +52,13 @@ const TokenDetail = memo(({ usage, model, provider }) => { ].filter(Boolean) as TokenProgressItem[]; const outputDetails = [ - !!detailTokens.reasoning && { + !!detailTokens.outputReasoning && { color: theme.pink, id: 'reasoning', title: t('messages.tokenDetails.reasoning'), - value: isShowCredit ? detailTokens.reasoning.credit : detailTokens.reasoning.token, + value: isShowCredit + ? detailTokens.outputReasoning.credit + : detailTokens.outputReasoning.token, }, !!detailTokens.outputAudio && { color: theme.cyan9, @@ -73,18 +75,18 @@ const TokenDetail = memo(({ usage, model, provider }) => { ].filter(Boolean) as TokenProgressItem[]; const totalDetail = [ - !!detailTokens.uncachedInput && { + !!detailTokens.inputCached && { color: theme.colorFill, id: 'uncachedInput', title: t('messages.tokenDetails.inputUncached'), - value: isShowCredit ? detailTokens.uncachedInput.credit : detailTokens.uncachedInput.token, + value: isShowCredit ? detailTokens.inputCached.credit : detailTokens.inputCached.token, }, - !!detailTokens.cachedInput && { + !!detailTokens.inputCacheMiss && { color: theme.orange, - id: 'cachedInput', + id: 'inputCacheMiss', title: t('messages.tokenDetails.inputCached'), - value: isShowCredit ? detailTokens.cachedInput.credit : detailTokens.cachedInput.token, + value: isShowCredit ? detailTokens.inputCacheMiss.credit : detailTokens.inputCacheMiss.token, }, !!detailTokens.totalOutput && { color: theme.colorSuccess, diff --git a/src/features/Conversation/Extras/Usage/UsageDetail/tokens.test.ts b/src/features/Conversation/Extras/Usage/UsageDetail/tokens.test.ts new file mode 100644 index 0000000000000..68dffe4fe8874 --- /dev/null +++ b/src/features/Conversation/Extras/Usage/UsageDetail/tokens.test.ts @@ -0,0 +1,253 @@ +import { describe, expect, it } from 'vitest'; + +import { LobeDefaultAiModelListItem } from '@/types/aiModel'; +import { ModelTokensUsage } from '@/types/message'; + +import { getDetailsToken } from './tokens'; + +describe('getDetailsToken', () => { + // 基本测试数据 + const mockModelCard: LobeDefaultAiModelListItem = { + pricing: { + input: 0.01, + output: 0.02, + cachedInput: 0.005, + audioInput: 0.03, + audioOutput: 0.04, + }, + } as LobeDefaultAiModelListItem; + + it('should return empty object when usage is empty', () => { + const usage: ModelTokensUsage = {}; + const result = getDetailsToken(usage); + + expect(result).toEqual({ + cachedInput: undefined, + inputAudio: undefined, + inputCitation: undefined, + inputText: undefined, + outputAudio: undefined, + outputText: undefined, + reasoning: undefined, + totalOutput: undefined, + totalTokens: undefined, + uncachedInput: undefined, + }); + }); + + it('should handle inputTextTokens correctly', () => { + const usage: ModelTokensUsage = { + inputTextTokens: 100, + }; + + const result = getDetailsToken(usage, mockModelCard); + + expect(result.inputText).toEqual({ + credit: 1, // 100 * 0.01 = 1 + token: 100, + }); + }); + + it('should handle legacy inputTokens property', () => { + const usage = { + inputTokens: 100, + } as any; + + const result = getDetailsToken(usage, mockModelCard); + + expect(result.inputText).toEqual({ + credit: 1, // 100 * 0.01 = 1 + token: 100, + }); + }); + + it('should handle cachedTokens correctly', () => { + const usage: ModelTokensUsage = { + totalInputTokens: 200, + inputCachedTokens: 50, + }; + + const result = getDetailsToken(usage, mockModelCard); + + expect(result.inputCached).toEqual({ + credit: 0, // 50 * 0.005 = 0.25, rounded to 0 + token: 50, + }); + + expect(result.inputCacheMiss).toEqual({ + credit: 2, // (200 - 50) * 0.01 = 1.5, rounded to 2 + token: 150, + }); + }); + + it('should handle outputTokens correctly', () => { + const usage = { outputTokens: 150 } as ModelTokensUsage; + + const result = getDetailsToken(usage, mockModelCard); + + expect(result.outputText).toEqual({ + credit: 3, // 150 * 0.02 = 3 + token: 150, + }); + + expect(result.totalOutput).toEqual({ + credit: 3, + token: 150, + }); + }); + + it('should handle reasoningTokens correctly', () => { + const usage = { + outputTokens: 200, + reasoningTokens: 50, + } as ModelTokensUsage; + + const result = getDetailsToken(usage, mockModelCard); + + expect(result.outputReasoning).toEqual({ + credit: 1, // 50 * 0.02 = 1 + token: 50, + }); + + expect(result.outputText).toEqual({ + credit: 3, // (200 - 50) * 0.02 = 3 + token: 150, + }); + }); + + it('should handle audio tokens correctly', () => { + const usage = { + inputAudioTokens: 100, + outputAudioTokens: 50, + outputTokens: 150, + } as ModelTokensUsage; + + const result = getDetailsToken(usage, mockModelCard); + + expect(result.inputAudio).toEqual({ + credit: 3, // 100 * 0.03 = 3 + token: 100, + }); + + expect(result.outputAudio).toEqual({ + credit: 2, // 50 * 0.04 = 2 + id: 'outputAudio', + token: 50, + }); + + expect(result.outputText).toEqual({ + credit: 2, // (150 - 50) * 0.02 = 2 + token: 100, + }); + }); + + it('should handle inputCitationTokens correctly', () => { + const usage: ModelTokensUsage = { + inputCitationTokens: 75, + }; + + const result = getDetailsToken(usage, mockModelCard); + + expect(result.inputCitation).toEqual({ + credit: 1, // 75 * 0.01 = 0.75, rounded to 1 + token: 75, + }); + }); + + it('should handle totalTokens correctly', () => { + const usage = { + totalTokens: 500, + totalInputTokens: 200, + inputCachedTokens: 50, + outputTokens: 300, + } as ModelTokensUsage; + + const result = getDetailsToken(usage, mockModelCard); + + // uncachedInput: (200 - 50) * 0.01 = 1.5 -> 2 + // cachedInput: 50 * 0.005 = 0.25 -> 0 + // totalOutput: 300 * 0.02 = 6 + // totalCredit = 2 + 0 + 6 = 8 + + expect(result.totalTokens).toEqual({ + credit: 8, + token: 500, + }); + }); + + it('should handle missing pricing information', () => { + const usage = { inputTextTokens: 100, outputTokens: 200 } as ModelTokensUsage; + + const result = getDetailsToken(usage); + + expect(result.inputText).toEqual({ + credit: '-', + token: 100, + }); + + expect(result.outputText).toEqual({ + credit: '-', + token: 200, + }); + }); + + it('should handle complex scenario with all token types', () => { + const usage: ModelTokensUsage = { + totalTokens: 1000, + totalInputTokens: 400, + inputTextTokens: 300, + inputAudioTokens: 50, + inputCitationTokens: 50, + inputCachedTokens: 100, + totalOutputTokens: 600, + outputAudioTokens: 100, + outputReasoningTokens: 200, + }; + + const result = getDetailsToken(usage, mockModelCard); + + expect(result).toMatchObject({ + inputCached: { + credit: 1, // 100 * 0.005 = 0.5, rounded to 1 + token: 100, + }, + inputCacheMiss: { + credit: 3, // (400 - 100) * 0.01 = 3 + token: 300, + }, + inputText: { + credit: 3, // 300 * 0.01 = 3 + token: 300, + }, + inputAudio: { + credit: 2, // 50 * 0.03 = 1.5, rounded to 2 + token: 50, + }, + inputCitation: { + credit: 1, // 50 * 0.01 = 0.5, rounded to 1 + token: 50, + }, + outputAudio: { + credit: 4, // 100 * 0.04 = 4 + id: 'outputAudio', + token: 100, + }, + outputReasoning: { + credit: 4, // 200 * 0.02 = 4 + token: 200, + }, + outputText: { + credit: 6, // (600 - 200 - 100) * 0.02 = 6 + token: 300, + }, + totalOutput: { + credit: 12, // 600 * 0.02 = 12 + token: 600, + }, + totalTokens: { + credit: 16, // 3 + 1 + 12 = 16 + token: 1000, + }, + }); + }); +}); diff --git a/src/features/Conversation/Extras/Usage/UsageDetail/tokens.ts b/src/features/Conversation/Extras/Usage/UsageDetail/tokens.ts index cc92de6124182..89ad976139772 100644 --- a/src/features/Conversation/Extras/Usage/UsageDetail/tokens.ts +++ b/src/features/Conversation/Extras/Usage/UsageDetail/tokens.ts @@ -11,51 +11,69 @@ export const getDetailsToken = ( usage: ModelTokensUsage, modelCard?: LobeDefaultAiModelListItem, ) => { - const inputTextToken = usage.inputTextTokens || (usage as any).inputTokens || 0; + const inputTextTokens = usage.inputTextTokens || (usage as any).inputTokens || 0; + const totalInputTokens = usage.totalInputTokens || (usage as any).inputTokens || 0; - const totalInputToken = usage.totalInputTokens || (usage as any).inputTokens; + const totalOutputTokens = usage.totalOutputTokens || (usage as any).outputTokens || 0; - const uncachedInputCredit = ( - !!totalInputToken - ? calcCredit(totalInputToken - (usage.cachedTokens || 0), modelCard?.pricing?.input) - : 0 + const outputReasoningTokens = usage.outputReasoningTokens || (usage as any).reasoningTokens || 0; + + const outputTextTokens = usage.outputTextTokens + ? usage.outputTextTokens + : totalOutputTokens - outputReasoningTokens - (usage.outputAudioTokens || 0); + + const inputCacheMissTokens = usage?.inputCacheMissTokens + ? usage?.inputCacheMissTokens + : totalInputTokens - (usage.inputCachedTokens || 0); + + const inputCacheMissCredit = ( + !!inputCacheMissTokens ? calcCredit(inputCacheMissTokens, modelCard?.pricing?.input) : 0 ) as number; - const cachedInputCredit = ( - !!usage.cachedTokens ? calcCredit(usage.cachedTokens, modelCard?.pricing?.cachedInput) : 0 + const inputCachedCredit = ( + !!usage.inputCachedTokens + ? calcCredit(usage.inputCachedTokens, modelCard?.pricing?.cachedInput) + : 0 ) as number; - const totalOutput = ( - !!usage.outputTokens ? calcCredit(usage.outputTokens, modelCard?.pricing?.output) : 0 + const totalOutputCredit = ( + !!totalOutputTokens ? calcCredit(totalOutputTokens, modelCard?.pricing?.output) : 0 + ) as number; + const totalInputCredit = ( + !!totalInputTokens ? calcCredit(totalInputTokens, modelCard?.pricing?.output) : 0 ) as number; - const totalCredit = uncachedInputCredit + cachedInputCredit + totalOutput; + const totalCredit = inputCacheMissCredit + inputCachedCredit + totalOutputCredit; return { - cachedInput: !!usage.cachedTokens - ? { - credit: cachedInputCredit, - token: usage.cachedTokens, - } - : undefined, inputAudio: !!usage.inputAudioTokens ? { credit: calcCredit(usage.inputAudioTokens, modelCard?.pricing?.audioInput), token: usage.inputAudioTokens, } : undefined, + inputCacheMiss: !!inputCacheMissTokens + ? { credit: inputCacheMissCredit, token: inputCacheMissTokens } + : undefined, + inputCached: !!usage.inputCachedTokens + ? { + credit: inputCachedCredit, + token: usage.inputCachedTokens, + } + : undefined, inputCitation: !!usage.inputCitationTokens ? { credit: calcCredit(usage.inputCitationTokens, modelCard?.pricing?.input), token: usage.inputCitationTokens, } : undefined, - inputText: !!inputTextToken + inputText: !!inputTextTokens ? { - credit: calcCredit(inputTextToken, modelCard?.pricing?.input), - token: inputTextToken, + credit: calcCredit(inputTextTokens, modelCard?.pricing?.input), + token: inputTextTokens, } : undefined, + outputAudio: !!usage.outputAudioTokens ? { credit: calcCredit(usage.outputAudioTokens, modelCard?.pricing?.audioOutput), @@ -63,37 +81,27 @@ export const getDetailsToken = ( token: usage.outputAudioTokens, } : undefined, - - outputText: !!usage.outputTokens + outputReasoning: !!outputReasoningTokens ? { - credit: calcCredit( - usage.outputTokens - (usage.reasoningTokens || 0) - (usage.outputAudioTokens || 0), - modelCard?.pricing?.output, - ), - token: usage.outputTokens - (usage.reasoningTokens || 0) - (usage.outputAudioTokens || 0), + credit: calcCredit(outputReasoningTokens, modelCard?.pricing?.output), + token: outputReasoningTokens, } : undefined, - reasoning: !!usage.reasoningTokens + outputText: !!outputTextTokens ? { - credit: calcCredit(usage.reasoningTokens, modelCard?.pricing?.output), - token: usage.reasoningTokens, + credit: calcCredit(outputTextTokens, modelCard?.pricing?.output), + token: outputTextTokens, } : undefined, - totalOutput: !!usage.outputTokens - ? { - credit: totalOutput, - token: usage.outputTokens, - } + totalInput: !!totalInputTokens + ? { credit: totalInputCredit, token: totalInputTokens } + : undefined, + totalOutput: !!totalOutputTokens + ? { credit: totalOutputCredit, token: totalOutputTokens } : undefined, totalTokens: !!usage.totalTokens ? { credit: totalCredit, token: usage.totalTokens } : undefined, - uncachedInput: !!totalInputToken - ? { - credit: uncachedInputCredit, - token: totalInputToken - (usage.cachedTokens || 0), - } - : undefined, }; }; diff --git a/src/libs/agent-runtime/perplexity/index.test.ts b/src/libs/agent-runtime/perplexity/index.test.ts index c8a186e267dde..814ddd549bd68 100644 --- a/src/libs/agent-runtime/perplexity/index.test.ts +++ b/src/libs/agent-runtime/perplexity/index.test.ts @@ -241,7 +241,7 @@ describe('LobePerplexityAI', () => { 'data: "天和未来几天的"\n', 'id: 506d64fb-e7f2-4d94-b80f-158369e9446d', 'event: usage', - 'data: {"inputCitationTokens":3058,"inputTextTokens":2,"outputTokens":685,"totalInputTokens":3060,"totalTokens":3745}\n', + 'data: {"inputCitationTokens":3058,"inputTextTokens":2,"outputTextTokens":685,"totalInputTokens":3060,"totalOutputTokens":685,"totalTokens":3745}\n', ].map((line) => `${line}\n`), ); diff --git a/src/libs/agent-runtime/utils/streams/anthropic.test.ts b/src/libs/agent-runtime/utils/streams/anthropic.test.ts index fd8282d2688a1..4e38eab7a5a13 100644 --- a/src/libs/agent-runtime/utils/streams/anthropic.test.ts +++ b/src/libs/agent-runtime/utils/streams/anthropic.test.ts @@ -225,7 +225,7 @@ describe('AnthropicStream', () => { 'id: msg_017aTuY86wNxth5TE544yqJq', 'event: usage', - 'data: {"inputTokens":457,"outputTokens":84,"totalInputTokens":457,"totalTokens":541}\n', + 'data: {"inputCacheMissTokens":457,"totalInputTokens":457,"totalOutputTokens":84,"totalTokens":541}\n', ].map((item) => `${item}\n`), ); @@ -381,8 +381,7 @@ describe('AnthropicStream', () => { 'id: msg_0175ryA67RbGrnRrGBXFQEYK', 'event: usage', - 'data: {"inputTokens":485,"outputTokens":154,"totalInputTokens":485,"totalTokens":639}\n', - + 'data: {"inputCacheMissTokens":485,"totalInputTokens":485,"totalOutputTokens":154,"totalTokens":639}\n', 'id: msg_0175ryA67RbGrnRrGBXFQEYK', 'event: stop', 'data: "message_stop"\n', @@ -392,6 +391,91 @@ describe('AnthropicStream', () => { expect(onToolCallMock).toHaveBeenCalledTimes(6); }); + it('should handle prompts context caching', async () => { + const streams = [ + { + type: 'message_start', + message: { + id: 'msg_01Vxc4yQTEjkDSba3N3BMbH8', + type: 'message', + role: 'assistant', + model: 'claude-3-7-sonnet-20250219', + content: [], + stop_reason: null, + stop_sequence: null, + usage: { + input_tokens: 6, + cache_creation_input_tokens: 457, + cache_read_input_tokens: 17918, + output_tokens: 2, + }, + }, + }, + { type: 'content_block_start', index: 0, content_block: { type: 'text', text: '' } }, + { type: 'content_block_delta', index: 0, delta: { type: 'text_delta', text: '\n\n根' } }, + { + type: 'content_block_delta', + index: 0, + delta: { type: 'text_delta', text: '/\n[^20]: https://s' }, + }, + { type: 'content_block_stop', index: 0 }, + { + type: 'message_delta', + delta: { stop_reason: 'end_turn', stop_sequence: null }, + usage: { output_tokens: 3222 }, + }, + { type: 'message_stop' }, + ]; + + const mockReadableStream = new ReadableStream({ + start(controller) { + streams.forEach((chunk) => { + controller.enqueue(chunk); + }); + controller.close(); + }, + }); + + const protocolStream = AnthropicStream(mockReadableStream); + + const decoder = new TextDecoder(); + const chunks = []; + + // @ts-ignore + for await (const chunk of protocolStream) { + chunks.push(decoder.decode(chunk, { stream: true })); + } + + expect(chunks).toEqual( + [ + 'id: msg_01Vxc4yQTEjkDSba3N3BMbH8', + 'event: data', + 'data: {"id":"msg_01Vxc4yQTEjkDSba3N3BMbH8","type":"message","role":"assistant","model":"claude-3-7-sonnet-20250219","content":[],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":6,"cache_creation_input_tokens":457,"cache_read_input_tokens":17918,"output_tokens":2}}\n', + 'id: msg_01Vxc4yQTEjkDSba3N3BMbH8', + 'event: data', + 'data: ""\n', + 'id: msg_01Vxc4yQTEjkDSba3N3BMbH8', + 'event: text', + 'data: "\\n\\n根"\n', + 'id: msg_01Vxc4yQTEjkDSba3N3BMbH8', + 'event: text', + 'data: "/\\n[^20]: https://s"\n', + 'id: msg_01Vxc4yQTEjkDSba3N3BMbH8', + 'event: data', + 'data: {"type":"content_block_stop","index":0}\n', + 'id: msg_01Vxc4yQTEjkDSba3N3BMbH8', + 'event: stop', + 'data: "end_turn"\n', + 'id: msg_01Vxc4yQTEjkDSba3N3BMbH8', + 'event: usage', + 'data: {"inputCacheMissTokens":6,"inputCachedTokens":17918,"inputWriteCacheTokens":457,"totalInputTokens":18381,"totalOutputTokens":3224,"totalTokens":21605}\n', + + 'id: msg_01Vxc4yQTEjkDSba3N3BMbH8', + 'event: stop', + 'data: "message_stop"\n', + ].map((item) => `${item}\n`), + ); + }); describe('thinking', () => { it('should handle normal thinking ', async () => { const streams = [ @@ -515,7 +599,7 @@ describe('AnthropicStream', () => { 'data: "end_turn"\n', 'id: msg_01MNsLe7n1uVLtu6W8rCFujD', 'event: usage', - 'data: {"inputTokens":46,"outputTokens":365,"totalInputTokens":46,"totalTokens":411}\n', + 'data: {"inputCacheMissTokens":46,"totalInputTokens":46,"totalOutputTokens":365,"totalTokens":411}\n', 'id: msg_01MNsLe7n1uVLtu6W8rCFujD', 'event: stop', 'data: "message_stop"\n', @@ -675,7 +759,7 @@ describe('AnthropicStream', () => { 'data: "end_turn"\n', 'id: msg_019q32esPvu3TftzZnL6JPys', 'event: usage', - 'data: {"inputTokens":92,"outputTokens":263,"totalInputTokens":92,"totalTokens":355}\n', + 'data: {"inputCacheMissTokens":92,"totalInputTokens":92,"totalOutputTokens":263,"totalTokens":355}\n', 'id: msg_019q32esPvu3TftzZnL6JPys', 'event: stop', 'data: "message_stop"\n', diff --git a/src/libs/agent-runtime/utils/streams/anthropic.ts b/src/libs/agent-runtime/utils/streams/anthropic.ts index 24b0c516a44a9..4e3af5ad9d66f 100644 --- a/src/libs/agent-runtime/utils/streams/anthropic.ts +++ b/src/libs/agent-runtime/utils/streams/anthropic.ts @@ -22,9 +22,24 @@ export const transformAnthropicStream = ( switch (chunk.type) { case 'message_start': { context.id = chunk.message.id; + let totalInputTokens = chunk.message.usage?.input_tokens; + + if ( + chunk.message.usage?.cache_creation_input_tokens || + chunk.message.usage?.cache_read_input_tokens + ) { + totalInputTokens = + chunk.message.usage?.input_tokens + + (chunk.message.usage.cache_creation_input_tokens || 0) + + (chunk.message.usage.cache_read_input_tokens || 0); + } + context.usage = { - outputTokens: chunk.message.usage?.output_tokens, - totalInputTokens: chunk.message.usage?.input_tokens, + inputCacheMissTokens: chunk.message.usage?.input_tokens, + inputCachedTokens: chunk.message.usage?.cache_read_input_tokens || undefined, + inputWriteCacheTokens: chunk.message.usage?.cache_creation_input_tokens || undefined, + totalInputTokens, + totalOutputTokens: chunk.message.usage?.output_tokens, }; return { data: chunk.message, id: chunk.message.id, type: 'data' }; @@ -140,18 +155,19 @@ export const transformAnthropicStream = ( } case 'message_delta': { - const outputTokens = chunk.usage?.output_tokens + (context.usage?.outputTokens || 0); + const totalOutputTokens = + chunk.usage?.output_tokens + (context.usage?.totalOutputTokens || 0); const totalInputTokens = context.usage?.totalInputTokens || 0; - const totalTokens = totalInputTokens + outputTokens; + const totalTokens = totalInputTokens + totalOutputTokens; if (totalTokens > 0) { return [ { data: chunk.delta.stop_reason, id: context.id, type: 'stop' }, { data: { - inputTokens: totalInputTokens, - outputTokens: outputTokens, - totalInputTokens: totalInputTokens, + ...context.usage, + totalInputTokens, + totalOutputTokens, totalTokens, } as ModelTokensUsage, id: context.id, diff --git a/src/libs/agent-runtime/utils/streams/openai.test.ts b/src/libs/agent-runtime/utils/streams/openai.test.ts index 3c64765a4a187..6a7be86e917f8 100644 --- a/src/libs/agent-runtime/utils/streams/openai.test.ts +++ b/src/libs/agent-runtime/utils/streams/openai.test.ts @@ -434,7 +434,7 @@ describe('OpenAIStream', () => { `data: "stop"\n`, 'id: chatcmpl-B7CcnaeK3jqWBMOhxg7SSKFwlk7dC', 'event: usage', - `data: {"inputTextTokens":1646,"outputTokens":11,"totalInputTokens":1646,"totalTokens":1657}\n`, + `data: {"inputCacheMissTokens":1646,"inputTextTokens":1646,"outputTextTokens":11,"totalInputTokens":1646,"totalOutputTokens":11,"totalTokens":1657}\n`, ].map((i) => `${i}\n`), ); }); @@ -536,7 +536,7 @@ describe('OpenAIStream', () => { `data: {"delta":{},"id":"chatcmpl-c1f6a6a6-fcf8-463a-96bf-cf634d3e98a5","index":0}\n`, 'id: chatcmpl-c1f6a6a6-fcf8-463a-96bf-cf634d3e98a5', 'event: usage', - `data: {"inputTextTokens":1797,"outputTokens":1720,"totalInputTokens":1797,"totalTokens":3517}\n`, + `data: {"inputCacheMissTokens":1797,"inputTextTokens":1797,"outputTextTokens":1720,"totalInputTokens":1797,"totalOutputTokens":1720,"totalTokens":3517}\n`, ].map((i) => `${i}\n`), ); }); @@ -944,7 +944,7 @@ describe('OpenAIStream', () => { `data: "帮助。"\n`, 'id: 1', 'event: usage', - `data: {"inputCacheMissTokens":6,"inputTextTokens":6,"outputTokens":104,"totalInputTokens":6,"totalTokens":110}\n`, + `data: {"inputCacheMissTokens":6,"inputTextTokens":6,"outputReasoningTokens":70,"outputTextTokens":34,"totalInputTokens":6,"totalOutputTokens":104,"totalTokens":110}\n`, ].map((i) => `${i}\n`), ); }); @@ -1163,7 +1163,7 @@ describe('OpenAIStream', () => { `data: "帮助。"\n`, 'id: 1', 'event: usage', - `data: {"inputCacheMissTokens":6,"inputTextTokens":6,"outputTokens":104,"totalInputTokens":6,"totalTokens":110}\n`, + `data: {"inputCacheMissTokens":6,"inputTextTokens":6,"outputReasoningTokens":70,"outputTextTokens":34,"totalInputTokens":6,"totalOutputTokens":104,"totalTokens":110}\n`, ].map((i) => `${i}\n`), ); }); @@ -1364,7 +1364,7 @@ describe('OpenAIStream', () => { `data: "帮助。"\n`, 'id: 1', 'event: usage', - `data: {"inputCacheMissTokens":6,"inputTextTokens":6,"outputTokens":104,"totalInputTokens":6,"totalTokens":110}\n`, + `data: {"inputCacheMissTokens":6,"inputTextTokens":6,"outputReasoningTokens":70,"outputTextTokens":34,"totalInputTokens":6,"totalOutputTokens":104,"totalTokens":110}\n`, ].map((i) => `${i}\n`), ); }); @@ -1565,7 +1565,7 @@ describe('OpenAIStream', () => { `data: "帮助。"\n`, 'id: 1', 'event: usage', - `data: {"inputCacheMissTokens":6,"inputTextTokens":6,"outputTokens":104,"totalInputTokens":6,"totalTokens":110}\n`, + `data: {"inputCacheMissTokens":6,"inputTextTokens":6,"outputReasoningTokens":70,"outputTextTokens":34,"totalInputTokens":6,"totalOutputTokens":104,"totalTokens":110}\n`, ].map((i) => `${i}\n`), ); }); @@ -1766,7 +1766,7 @@ describe('OpenAIStream', () => { `data: "帮助。"\n`, 'id: 1', 'event: usage', - `data: {"inputCacheMissTokens":6,"inputTextTokens":6,"outputTokens":104,"totalInputTokens":6,"totalTokens":110}\n`, + `data: {"inputCacheMissTokens":6,"inputTextTokens":6,"outputReasoningTokens":70,"outputTextTokens":34,"totalInputTokens":6,"totalOutputTokens":104,"totalTokens":110}\n`, ].map((i) => `${i}\n`), ); }); diff --git a/src/libs/agent-runtime/utils/usageConverter.test.ts b/src/libs/agent-runtime/utils/usageConverter.test.ts new file mode 100644 index 0000000000000..5e55d06b19a4f --- /dev/null +++ b/src/libs/agent-runtime/utils/usageConverter.test.ts @@ -0,0 +1,249 @@ +import OpenAI from 'openai'; +import { describe, expect, it } from 'vitest'; + +import { convertUsage } from './usageConverter'; + +describe('convertUsage', () => { + it('should convert basic OpenAI usage data correctly', () => { + // Arrange + const openaiUsage: OpenAI.Completions.CompletionUsage = { + prompt_tokens: 100, + completion_tokens: 50, + total_tokens: 150, + }; + + // Act + const result = convertUsage(openaiUsage); + + // Assert + expect(result).toEqual({ + inputTextTokens: 100, + totalInputTokens: 100, + totalOutputTokens: 50, + outputTextTokens: 50, + totalTokens: 150, + }); + }); + + it('should handle PPLX citation tokens correctly', () => { + // Arrange + const pplxUsage = { + prompt_tokens: 80, + citation_tokens: 20, + completion_tokens: 50, + total_tokens: 150, + } as OpenAI.Completions.CompletionUsage; + + // Act + const result = convertUsage(pplxUsage); + + // Assert + expect(result).toEqual({ + inputTextTokens: 80, + inputCitationTokens: 20, + totalInputTokens: 100, + totalOutputTokens: 50, + outputTextTokens: 50, + totalTokens: 170, // 150 + 20 (citation tokens) + }); + }); + + it('should handle cached tokens correctly', () => { + // Arrange + const usageWithCache = { + prompt_tokens: 100, + prompt_cache_hit_tokens: 30, + prompt_cache_miss_tokens: 70, + completion_tokens: 50, + total_tokens: 150, + } as OpenAI.Completions.CompletionUsage; + + // Act + const result = convertUsage(usageWithCache); + + // Assert + expect(result).toEqual({ + inputTextTokens: 100, + inputCachedTokens: 30, + inputCacheMissTokens: 70, + totalInputTokens: 100, + totalOutputTokens: 50, + outputTextTokens: 50, + totalTokens: 150, + }); + }); + + it('should handle cached tokens using prompt_tokens_details', () => { + // Arrange + const usageWithTokenDetails = { + prompt_tokens: 100, + prompt_tokens_details: { + cached_tokens: 30, + }, + completion_tokens: 50, + total_tokens: 150, + } as OpenAI.Completions.CompletionUsage; + + // Act + const result = convertUsage(usageWithTokenDetails); + + // Assert + expect(result).toEqual({ + inputTextTokens: 100, + inputCachedTokens: 30, + inputCacheMissTokens: 70, // 100 - 30 + totalInputTokens: 100, + totalOutputTokens: 50, + outputTextTokens: 50, + totalTokens: 150, + }); + }); + + it('should handle audio tokens in input correctly', () => { + // Arrange + const usageWithAudioInput = { + prompt_tokens: 100, + prompt_tokens_details: { + audio_tokens: 20, + }, + completion_tokens: 50, + total_tokens: 150, + } as OpenAI.Completions.CompletionUsage; + + // Act + const result = convertUsage(usageWithAudioInput); + + // Assert + expect(result).toEqual({ + inputTextTokens: 100, + inputAudioTokens: 20, + totalInputTokens: 100, + totalOutputTokens: 50, + outputTextTokens: 50, + totalTokens: 150, + }); + }); + + it('should handle detailed output tokens correctly', () => { + // Arrange + const usageWithOutputDetails = { + prompt_tokens: 100, + completion_tokens: 100, + completion_tokens_details: { + reasoning_tokens: 30, + audio_tokens: 20, + }, + total_tokens: 200, + } as OpenAI.Completions.CompletionUsage; + + // Act + const result = convertUsage(usageWithOutputDetails); + + // Assert + expect(result).toEqual({ + inputTextTokens: 100, + totalInputTokens: 100, + totalOutputTokens: 100, + outputReasoningTokens: 30, + outputAudioTokens: 20, + outputTextTokens: 50, // 100 - 30 - 20 + totalTokens: 200, + }); + }); + + it('should handle prediction tokens correctly', () => { + // Arrange + const usageWithPredictions = { + prompt_tokens: 100, + completion_tokens: 80, + completion_tokens_details: { + accepted_prediction_tokens: 30, + rejected_prediction_tokens: 10, + }, + total_tokens: 180, + } as OpenAI.Completions.CompletionUsage; + + // Act + const result = convertUsage(usageWithPredictions); + + // Assert + expect(result).toEqual({ + inputTextTokens: 100, + totalInputTokens: 100, + totalOutputTokens: 80, + outputTextTokens: 80, + acceptedPredictionTokens: 30, + rejectedPredictionTokens: 10, + totalTokens: 180, + }); + }); + + it('should handle complex usage with all fields correctly', () => { + // Arrange + const complexUsage = { + prompt_tokens: 150, + prompt_tokens_details: { + audio_tokens: 50, + cached_tokens: 40, + }, + citation_tokens: 30, + completion_tokens: 120, + completion_tokens_details: { + reasoning_tokens: 40, + audio_tokens: 30, + accepted_prediction_tokens: 20, + rejected_prediction_tokens: 5, + }, + total_tokens: 300, + } as OpenAI.Completions.CompletionUsage; + + // Act + const result = convertUsage(complexUsage); + + // Assert + expect(result).toEqual({ + inputTextTokens: 150, + inputAudioTokens: 50, + inputCachedTokens: 40, + inputCacheMissTokens: 140, // 180 - 40 (totalInputTokens - cachedTokens) + inputCitationTokens: 30, + totalInputTokens: 180, // 150 + 30 + outputTextTokens: 50, // 120 - 40 - 30 + outputReasoningTokens: 40, + outputAudioTokens: 30, + totalOutputTokens: 120, + acceptedPredictionTokens: 20, + rejectedPredictionTokens: 5, + totalTokens: 330, // 300 + 30 (citation_tokens) + }); + }); + + it('should omit zero or undefined values in the final output', () => { + // Arrange + const usageWithZeros = { + prompt_tokens: 100, + completion_tokens: 50, + total_tokens: 150, + completion_tokens_details: { + reasoning_tokens: 0, + audio_tokens: undefined, + }, + } as OpenAI.Completions.CompletionUsage; + + // Act + const result = convertUsage(usageWithZeros); + + // Assert + expect(result).toEqual({ + inputTextTokens: 100, + totalInputTokens: 100, + totalOutputTokens: 50, + outputTextTokens: 50, + totalTokens: 150, + }); + + // These should not be present in the result + expect(result).not.toHaveProperty('outputReasoningTokens'); + expect(result).not.toHaveProperty('outputAudioTokens'); + }); +}); diff --git a/src/libs/agent-runtime/utils/usageConverter.ts b/src/libs/agent-runtime/utils/usageConverter.ts index e234797d0ab8f..8badc9bdfbee1 100644 --- a/src/libs/agent-runtime/utils/usageConverter.ts +++ b/src/libs/agent-runtime/utils/usageConverter.ts @@ -4,27 +4,38 @@ import { ModelTokensUsage } from '@/types/message'; export const convertUsage = (usage: OpenAI.Completions.CompletionUsage): ModelTokensUsage => { // 目前只有 pplx 才有 citation_tokens + const inputTextTokens = usage.prompt_tokens || 0; const inputCitationTokens = (usage as any).citation_tokens || 0; + const totalInputTokens = inputCitationTokens + inputTextTokens; - const totalInputTokens = inputCitationTokens + usage.prompt_tokens; + const cachedTokens = + (usage as any).prompt_cache_hit_tokens || usage.prompt_tokens_details?.cached_tokens; + + const inputCacheMissTokens = + (usage as any).prompt_cache_miss_tokens || totalInputTokens - cachedTokens; + + const totalOutputTokens = usage.completion_tokens; + const outputReasoning = usage.completion_tokens_details?.reasoning_tokens || 0; + const outputAudioTokens = usage.completion_tokens_details?.audio_tokens || 0; + const outputTextTokens = totalOutputTokens - outputReasoning - outputAudioTokens; const totalTokens = inputCitationTokens + usage.total_tokens; const data = { acceptedPredictionTokens: usage.completion_tokens_details?.accepted_prediction_tokens, - cachedTokens: - (usage as any).prompt_cache_hit_tokens || usage.prompt_tokens_details?.cached_tokens, inputAudioTokens: usage.prompt_tokens_details?.audio_tokens, - inputCacheMissTokens: (usage as any).prompt_cache_miss_tokens, + inputCacheMissTokens: inputCacheMissTokens, + inputCachedTokens: cachedTokens, inputCitationTokens: inputCitationTokens, - inputTextTokens: usage.prompt_tokens, - outputAudioTokens: usage.completion_tokens_details?.audio_tokens, - outputTokens: usage.completion_tokens, - reasoningTokens: usage.completion_tokens_details?.rejected_prediction_tokens, + inputTextTokens: inputTextTokens, + outputAudioTokens: outputAudioTokens, + outputReasoningTokens: outputReasoning, + outputTextTokens: outputTextTokens, rejectedPredictionTokens: usage.completion_tokens_details?.rejected_prediction_tokens, totalInputTokens, + totalOutputTokens: totalOutputTokens, totalTokens, - }; + } satisfies ModelTokensUsage; const finalData = {}; diff --git a/src/types/message/base.ts b/src/types/message/base.ts index 8e83694328fac..fde4b2166e2e5 100644 --- a/src/types/message/base.ts +++ b/src/types/message/base.ts @@ -15,9 +15,9 @@ export interface ModelReasoning { export interface ModelTokensUsage { acceptedPredictionTokens?: number; - cachedTokens?: number; inputAudioTokens?: number; inputCacheMissTokens?: number; + inputCachedTokens?: number; /** * currently only pplx has citation_tokens */ @@ -26,11 +26,13 @@ export interface ModelTokensUsage { * user prompt input */ inputTextTokens?: number; + inputWriteCacheTokens?: number; outputAudioTokens?: number; - outputTokens?: number; - reasoningTokens?: number; + outputReasoningTokens?: number; + outputTextTokens?: number; rejectedPredictionTokens?: number; totalInputTokens?: number; + totalOutputTokens?: number; totalTokens?: number; }