Skip to content

Commit

Permalink
support context caching
Browse files Browse the repository at this point in the history
fix tests
  • Loading branch information
arvinxx committed Mar 6, 2025
1 parent 9f09952 commit 3e8cd2e
Show file tree
Hide file tree
Showing 5 changed files with 109 additions and 13 deletions.
36 changes: 31 additions & 5 deletions src/libs/agent-runtime/anthropic/index.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,12 @@ describe('LobeAnthropicAI', () => {
expect(instance['client'].messages.create).toHaveBeenCalledWith(
{
max_tokens: 4096,
messages: [{ content: 'Hello', role: 'user' }],
messages: [
{
content: [{ cache_control: { type: 'ephemeral' }, text: 'Hello', type: 'text' }],
role: 'user',
},
],
model: 'claude-3-haiku-20240307',
stream: true,
temperature: 0,
Expand Down Expand Up @@ -117,10 +122,21 @@ describe('LobeAnthropicAI', () => {
expect(instance['client'].messages.create).toHaveBeenCalledWith(
{
max_tokens: 4096,
messages: [{ content: 'Hello', role: 'user' }],
messages: [
{
content: [{ cache_control: { type: 'ephemeral' }, text: 'Hello', type: 'text' }],
role: 'user',
},
],
model: 'claude-3-haiku-20240307',
stream: true,
system: 'You are an awesome greeter',
system: [
{
cache_control: { type: 'ephemeral' },
type: 'text',
text: 'You are an awesome greeter',
},
],
temperature: 0,
},
{},
Expand Down Expand Up @@ -152,7 +168,12 @@ describe('LobeAnthropicAI', () => {
expect(instance['client'].messages.create).toHaveBeenCalledWith(
{
max_tokens: 2048,
messages: [{ content: 'Hello', role: 'user' }],
messages: [
{
content: [{ cache_control: { type: 'ephemeral' }, text: 'Hello', type: 'text' }],
role: 'user',
},
],
model: 'claude-3-haiku-20240307',
stream: true,
temperature: 0.25,
Expand Down Expand Up @@ -189,7 +210,12 @@ describe('LobeAnthropicAI', () => {
expect(instance['client'].messages.create).toHaveBeenCalledWith(
{
max_tokens: 2048,
messages: [{ content: 'Hello', role: 'user' }],
messages: [
{
content: [{ cache_control: { type: 'ephemeral' }, text: 'Hello', type: 'text' }],
role: 'user',
},
],
model: 'claude-3-haiku-20240307',
stream: true,
temperature: 0.25,
Expand Down
38 changes: 30 additions & 8 deletions src/libs/agent-runtime/anthropic/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -97,10 +97,33 @@ export class LobeAnthropicAI implements LobeRuntimeAI {
}

private async buildAnthropicPayload(payload: ChatStreamPayload) {
const { messages, model, max_tokens, temperature, top_p, tools, thinking } = payload;
const {
messages,
model,
max_tokens,
temperature,
top_p,
tools,
thinking,
enabledContextCaching = true,
} = payload;
const system_message = messages.find((m) => m.role === 'system');
const user_messages = messages.filter((m) => m.role !== 'system');

const systemPrompts = !!system_message?.content
? ([
{
cache_control: enabledContextCaching ? { type: 'ephemeral' } : undefined,
text: system_message?.content as string,
type: 'text',
},
] as Anthropic.TextBlockParam[])
: undefined;

const postMessages = await buildAnthropicMessages(user_messages, { enabledContextCaching });

const postTools = buildAnthropicTools(tools);

if (!!thinking) {
const maxTokens =
max_tokens ?? (thinking?.budget_tokens ? thinking?.budget_tokens + 4096 : 4096);
Expand All @@ -109,22 +132,21 @@ export class LobeAnthropicAI implements LobeRuntimeAI {
// `top_p` must be unset when thinking is enabled.
return {
max_tokens: maxTokens,
messages: await buildAnthropicMessages(user_messages),
messages: postMessages,
model,
system: system_message?.content as string,

system: systemPrompts,
thinking,
tools: buildAnthropicTools(tools),
tools: postTools,
} satisfies Anthropic.MessageCreateParams;
}

return {
max_tokens: max_tokens ?? 4096,
messages: await buildAnthropicMessages(user_messages),
messages: postMessages,
model,
system: system_message?.content as string,
system: systemPrompts,
temperature: payload.temperature !== undefined ? temperature / 2 : undefined,
tools: buildAnthropicTools(tools),
tools: postTools,
top_p,
} satisfies Anthropic.MessageCreateParams;
}
Expand Down
4 changes: 4 additions & 0 deletions src/libs/agent-runtime/types/chat.ts
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,10 @@ export interface OpenAIChatMessage {
* @title Chat Stream Payload
*/
export interface ChatStreamPayload {
/**
* 开启上下文缓存
*/
enabledContextCaching?: boolean;
/**
* 是否开启搜索
*/
Expand Down
20 changes: 20 additions & 0 deletions src/libs/agent-runtime/utils/anthropicHelpers.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -619,6 +619,26 @@ describe('anthropicHelpers', () => {
{ content: '继续', role: 'user' },
]);
});

it('should enable cache control', async () => {
const messages: OpenAIChatMessage[] = [
{ content: 'Hello', role: 'user' },
{ content: 'Hello', role: 'user' },
{ content: 'Hi', role: 'assistant' },
];

const contents = await buildAnthropicMessages(messages, { enabledContextCaching: true });

expect(contents).toHaveLength(3);
expect(contents).toEqual([
{ content: 'Hello', role: 'user' },
{ content: 'Hello', role: 'user' },
{
content: [{ cache_control: { type: 'ephemeral' }, text: 'Hi', type: 'text' }],
role: 'assistant',
},
]);
});
});

describe('buildAnthropicTools', () => {
Expand Down
24 changes: 24 additions & 0 deletions src/libs/agent-runtime/utils/anthropicHelpers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,7 @@ export const buildAnthropicMessage = async (

export const buildAnthropicMessages = async (
oaiMessages: OpenAIChatMessage[],
options: { enabledContextCaching?: boolean } = {},
): Promise<Anthropic.Messages.MessageParam[]> => {
const messages: Anthropic.Messages.MessageParam[] = [];
let pendingToolResults: Anthropic.ToolResultBlockParam[] = [];
Expand Down Expand Up @@ -180,8 +181,31 @@ export const buildAnthropicMessages = async (
}
}

const lastMessage = messages.at(-1);
if (options.enabledContextCaching && !!lastMessage) {
if (typeof lastMessage.content === 'string') {
lastMessage.content = [
{
cache_control: { type: 'ephemeral' },
text: lastMessage.content as string,
type: 'text',
},
];
} else {
const lastContent = lastMessage.content.at(-1);

if (
lastContent &&
lastContent.type !== 'thinking' &&
lastContent.type !== 'redacted_thinking'
) {
lastContent.cache_control = { type: 'ephemeral' };
}
}
}
return messages;
};

export const buildAnthropicTools = (tools?: OpenAI.ChatCompletionTool[]) =>
tools?.map(
(tool): Anthropic.Tool => ({
Expand Down

0 comments on commit 3e8cd2e

Please sign in to comment.