Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
498 changes: 498 additions & 0 deletions docs/docs/prompts/prompt-creation/cache-control.md

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions docs/docs/prompts/prompt-creation/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -493,3 +493,4 @@ This creates a new prompt that includes all messages from `basePrompt` and the n
- Learn how to work with [multimodal content](multimodal-content.md).
- Run prompts with [LLM clients](../llm-clients.md) if you work with a single LLM provider.
- Run prompts with [prompt executors](../prompt-executors.md) if you work with multiple LLM providers.
- Learn how to use llm cache with [cache control](cache-control.md).
4 changes: 3 additions & 1 deletion docs/mkdocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ nav:
- prompts/index.md
- Creating prompts:
- prompts/prompt-creation/index.md
- Multimodal content: prompts/prompt-creation/multimodal-content.md
- Multimodal content: prompts/prompt-creation/cache-control.md
- Cache control: prompts/prompt-creation/multimodal-content.md
- Running prompts:
- LLM clients: prompts/llm-clients.md
- Prompt executors: prompts/prompt-executors.md
Expand Down Expand Up @@ -168,6 +169,7 @@ plugins:
- prompts/prompt-creation/multimodal-content.md: This page provides details about multimodal inputs, which allow you to send text, images, audio, video, and documents in your prompts.
- prompts/handling-failures.md: This page provides details about handling failures in prompts.
- prompts/llm-response-caching.md: This page provides details about prompt caching, which lets you cache responses from LLMs to avoid repeated requests.
- prompts/prompt-creation/cache-control.md: This page provides details about provider-side prompt caching control for Anthropic and Amazon Bedrock, including automatic and block-level cache breakpoints and TTL options.
Running prompts:
- prompts/llm-clients.md: This page provides details about LLM clients, which are used to run prompts and receive responses from LLMs. They allow working with a single LLM provider only.
- prompts/prompt-executors.md: This page provides details about prompt executors, which wrap LLM clients and are used to run prompts and receive responses from LLMs. They allow working with multiple LLM providers in a unified way.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
package ai.koog.integration.tests.executor

import ai.koog.agents.core.tools.ToolDescriptor
import ai.koog.agents.core.tools.ToolParameterDescriptor
import ai.koog.agents.core.tools.ToolParameterType
import ai.koog.integration.tests.utils.PromptUtils
import ai.koog.integration.tests.utils.RetryUtils
import ai.koog.integration.tests.utils.annotations.Retry
import ai.koog.integration.tests.utils.getLLMClientForProvider
import ai.koog.prompt.dsl.Prompt
import ai.koog.prompt.executor.clients.InternalLLMClientApi
import ai.koog.prompt.executor.clients.anthropic.AnthropicCacheControl
import ai.koog.prompt.executor.clients.anthropic.AnthropicModels
import ai.koog.prompt.executor.clients.anthropic.AnthropicParams
import ai.koog.prompt.executor.llms.MultiLLMPromptExecutor
import ai.koog.prompt.llm.LLModel
import ai.koog.prompt.message.Message
import io.kotest.assertions.withClue
import io.kotest.matchers.booleans.shouldBeTrue
import io.kotest.matchers.collections.shouldNotBeEmpty
import io.kotest.matchers.nulls.shouldNotBeNull
import kotlinx.coroutines.test.runTest
import kotlinx.serialization.json.JsonObject
import kotlinx.serialization.json.intOrNull
import kotlinx.serialization.json.jsonPrimitive
import org.junit.jupiter.api.Test
import org.junit.jupiter.params.ParameterizedTest
import org.junit.jupiter.params.provider.MethodSource
import java.util.stream.Stream
import kotlin.time.Duration.Companion.seconds

/**
* Integration tests for Anthropic cache control.
*
* Caching requires a minimum prompt length (usually ≥ 1024 tokens).
* https://platform.claude.com/docs/en/build-with-claude/prompt-caching#cache-limitations
* Tests use [ai.koog.integration.tests.utils.PromptUtils.assistantPromptOfAtLeastLength] to ensure
* the prompt is long enough for the API to accept the cache breakpoint.
*/
@OptIn(InternalLLMClientApi::class)
class AnthropicCacheControlIntegrationTest {

companion object {
private val model = AnthropicModels.Sonnet_4_5
private val client = getLLMClientForProvider(model.provider)
private val executor = MultiLLMPromptExecutor(client)

/**
* Asserts that the response metadata shows cache was used (write or read).
* On the first cached request `cacheCreationInputTokens` > 0.
* On a subsequent request hitting the same prefix `cacheReadInputTokens` > 0.
*/
private fun JsonObject.assertCacheWasUsed() {
val cacheWrite = this["cacheCreationInputTokens"]?.jsonPrimitive?.intOrNull ?: 0
val cacheRead = this["cacheReadInputTokens"]?.jsonPrimitive?.intOrNull ?: 0
withClue("Expected cacheCreationInputTokens or cacheReadInputTokens > 0 in metadata $this") {
(cacheWrite > 0 || cacheRead > 0).shouldBeTrue()
}
}

private suspend fun testCacheControl(
executor: MultiLLMPromptExecutor,
prompt: Prompt,
model: LLModel,
tools: List<ToolDescriptor> = emptyList()
) {
val responseWithMetadata = executor.execute(prompt, model, tools)
.shouldNotBeNull()
.shouldNotBeEmpty()
.let { messages ->
messages.filterIsInstance<Message.Assistant>().firstOrNull()
?: if (tools.isNotEmpty()) {
messages.filterIsInstance<Message.Tool.Call>().firstOrNull()
} else {
null
}
}

responseWithMetadata.shouldNotBeNull {
metaInfo.metadata
.shouldNotBeNull()
.assertCacheWasUsed()
}
}

@JvmStatic
fun cacheControlType(): Stream<AnthropicCacheControl> = Stream.of(
AnthropicCacheControl.Default,
AnthropicCacheControl.OneHour,
)
}

@ParameterizedTest
@MethodSource("cacheControlType")
fun integration_testAutomaticCacheControl(cacheControl: AnthropicCacheControl) = runTest(timeout = 120.seconds) {
val params = AnthropicParams(cacheControl = cacheControl)
val prompt = Prompt.build("test-auto-cache-1h", params = params) {
system(PromptUtils.assistantPromptOfAtLeastLength(1200))
user("What is the capital of Italy?")
}

RetryUtils.withRetry(
times = 3,
testName = "integration_testAutomaticCacheControl"
) {
testCacheControl(executor, prompt, model)
}
}

@Retry
@Test
fun integration_testCacheControlOnSystemMessage() = runTest(timeout = 120.seconds) {
val prompt = Prompt.build("test-cache-system-msg") {
system(PromptUtils.assistantPromptOfAtLeastLength(1200), AnthropicCacheControl.Default)
user("What is the capital of France?")
}
testCacheControl(executor, prompt, model)
}

@Retry
@Test
fun integration_testCacheControlOnUserMessage() = runTest(timeout = 120.seconds) {
val prompt = Prompt.build("test-cache-user-msg") {
system(PromptUtils.assistantPromptOfAtLeastLength(1200))
user("What is the capital of France?", AnthropicCacheControl.Default)
}
testCacheControl(executor, prompt, model)
}

@Retry
@Test
fun integration_testCacheControlOnToolDefinition() = runTest(timeout = 120.seconds) {
val cachedTool = ToolDescriptor(
name = "calculator",
description = PromptUtils.assistantPromptOfAtLeastLength(1600, "A calculator tool"),
requiredParameters = listOf(
ToolParameterDescriptor("expression", "Math expression to evaluate", ToolParameterType.String)
),
cacheControl = AnthropicCacheControl.Default
)
val prompt = Prompt.build("test-cache-tool") {
system(PromptUtils.assistantPromptOfAtLeastLength(1200))
user("What is 2 + 2?")
}

testCacheControl(executor, prompt, model, listOf(cachedTool))
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import ai.koog.integration.tests.utils.tools.CalculatorTool
import ai.koog.prompt.dsl.Prompt
import ai.koog.prompt.executor.clients.LLMClient
import ai.koog.prompt.executor.clients.bedrock.BedrockAPIMethod
import ai.koog.prompt.executor.clients.bedrock.BedrockCacheControl
import ai.koog.prompt.executor.clients.bedrock.BedrockClientSettings
import ai.koog.prompt.executor.clients.bedrock.BedrockGuardrailsSettings
import ai.koog.prompt.executor.clients.bedrock.BedrockLLMClient
Expand All @@ -26,7 +27,6 @@ import ai.koog.prompt.executor.model.PromptExecutor
import ai.koog.prompt.llm.LLMCapability
import ai.koog.prompt.llm.LLMProvider
import ai.koog.prompt.llm.LLModel
import ai.koog.prompt.message.CacheControl
import ai.koog.prompt.message.ContentPart
import ai.koog.prompt.message.Message
import ai.koog.prompt.params.LLMParams
Expand Down Expand Up @@ -335,7 +335,7 @@ class BedrockConverseApiIntegrationTest : ExecutorIntegrationTestBase() {

val prompt = Prompt.build("test-cache-system") {
// Caching requires a minimum prompt length to work.
system(assistantPromptOfAtLeastLength(1600), CacheControl.Bedrock.Default)
system(assistantPromptOfAtLeastLength(1600), BedrockCacheControl.Default)
user("What is the capital of France?")
}

Expand All @@ -358,7 +358,7 @@ class BedrockConverseApiIntegrationTest : ExecutorIntegrationTestBase() {
val prompt = Prompt.build("test-cache-user") {
// Caching requires a minimum prompt length to work.
system(assistantPromptOfAtLeastLength(1600))
user(listOf(ContentPart.Text("What is the capital of France?")), CacheControl.Bedrock.Default)
user(listOf(ContentPart.Text("What is the capital of France?")), BedrockCacheControl.Default)
}

withRetry(times = 3, testName = "integration_testCacheControlOnUserMessage[${model.id}]") {
Expand All @@ -378,7 +378,7 @@ class BedrockConverseApiIntegrationTest : ExecutorIntegrationTestBase() {
Models.assumeAvailable(model.provider)
assumeTrue(model.capabilities?.contains(LLMCapability.Tools) ?: false, "Model $model does not support tools")

val cachedDescriptor = CalculatorTool.descriptor.withCacheControl(CacheControl.Bedrock.Default).copy(
val cachedDescriptor = CalculatorTool.descriptor.withCacheControl(BedrockCacheControl.Default).copy(
// Caching requires a minimum prompt length to work - in the case of tools, this appears to apply specifically to the tool section
// rather than the prompt as a whole.
description = assistantPromptOfAtLeastLength(1600, CalculatorTool.descriptor.description)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
package ai.koog.prompt.executor.clients.anthropic

import ai.koog.prompt.message.CacheControl
import kotlinx.serialization.Serializable

/**
* Represents caching strategies specific to Anthropic's LLM provider.
*/
@Serializable
public sealed interface AnthropicCacheControl : CacheControl {
/** Cache with the default TTL (no explicit TTL sent to Anthropic, caches for 5 minutes by default). */
@Serializable
public data object Default : AnthropicCacheControl

/** Cache for 1 hour. */
@Serializable
public data object OneHour : AnthropicCacheControl
}
Loading