JetBrains · tiginamaria · Apr 7, 2026 · Apr 7, 2026 · Apr 7, 2026 · Apr 7, 2026
diff --git a/docs/docs/prompts/prompt-creation/cache-control.md b/docs/docs/prompts/prompt-creation/cache-control.md
diff --git a/docs/docs/prompts/prompt-creation/index.md b/docs/docs/prompts/prompt-creation/index.md
@@ -493,3 +493,4 @@ This creates a new prompt that includes all messages from `basePrompt` and the n
 - Learn how to work with [multimodal content](multimodal-content.md).
 - Run prompts with [LLM clients](../llm-clients.md) if you work with a single LLM provider.
 - Run prompts with [prompt executors](../prompt-executors.md) if you work with multiple LLM providers.
+- Learn how to use llm cache with [cache control](cache-control.md).
diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml
@@ -21,7 +21,8 @@ nav:
           - prompts/index.md
           - Creating prompts:
             - prompts/prompt-creation/index.md
-            - Multimodal content: prompts/prompt-creation/multimodal-content.md
+            - Multimodal content: prompts/prompt-creation/cache-control.md
+            - Cache control: prompts/prompt-creation/multimodal-content.md
           - Running prompts:
             - LLM clients: prompts/llm-clients.md
             - Prompt executors: prompts/prompt-executors.md
@@ -168,6 +169,7 @@ plugins:
           - prompts/prompt-creation/multimodal-content.md: This page provides details about multimodal inputs, which allow you to send text, images, audio, video, and documents in your prompts.
           - prompts/handling-failures.md: This page provides details about handling failures in prompts.
           - prompts/llm-response-caching.md: This page provides details about prompt caching, which lets you cache responses from LLMs to avoid repeated requests.
+          - prompts/prompt-creation/cache-control.md: This page provides details about provider-side prompt caching control for Anthropic and Amazon Bedrock, including automatic and block-level cache breakpoints and TTL options.
         Running prompts:
           - prompts/llm-clients.md: This page provides details about LLM clients, which are used to run prompts and receive responses from LLMs. They allow working with a single LLM provider only.
           - prompts/prompt-executors.md: This page provides details about prompt executors, which wrap LLM clients and are used to run prompts and receive responses from LLMs. They allow working with multiple LLM providers in a unified way.

diff --git a/...jvmTest/kotlin/ai/koog/integration/tests/executor/AnthropicCacheControlIntegrationTest.kt b/...jvmTest/kotlin/ai/koog/integration/tests/executor/AnthropicCacheControlIntegrationTest.kt
@@ -0,0 +1,148 @@
+package ai.koog.integration.tests.executor
+
+import ai.koog.agents.core.tools.ToolDescriptor
+import ai.koog.agents.core.tools.ToolParameterDescriptor
+import ai.koog.agents.core.tools.ToolParameterType
+import ai.koog.integration.tests.utils.PromptUtils
+import ai.koog.integration.tests.utils.RetryUtils
+import ai.koog.integration.tests.utils.annotations.Retry
+import ai.koog.integration.tests.utils.getLLMClientForProvider
+import ai.koog.prompt.dsl.Prompt
+import ai.koog.prompt.executor.clients.InternalLLMClientApi
+import ai.koog.prompt.executor.clients.anthropic.AnthropicCacheControl
+import ai.koog.prompt.executor.clients.anthropic.AnthropicModels
+import ai.koog.prompt.executor.clients.anthropic.AnthropicParams
+import ai.koog.prompt.executor.llms.MultiLLMPromptExecutor
+import ai.koog.prompt.llm.LLModel
+import ai.koog.prompt.message.Message
+import io.kotest.assertions.withClue
+import io.kotest.matchers.booleans.shouldBeTrue
+import io.kotest.matchers.collections.shouldNotBeEmpty
+import io.kotest.matchers.nulls.shouldNotBeNull
+import kotlinx.coroutines.test.runTest
+import kotlinx.serialization.json.JsonObject
+import kotlinx.serialization.json.intOrNull
+import kotlinx.serialization.json.jsonPrimitive
+import org.junit.jupiter.api.Test
+import org.junit.jupiter.params.ParameterizedTest
+import org.junit.jupiter.params.provider.MethodSource
+import java.util.stream.Stream
+import kotlin.time.Duration.Companion.seconds
+
+/**
+ * Integration tests for Anthropic cache control.
+ *
+ * Caching requires a minimum prompt length (usually ≥ 1024 tokens).
+ * https://platform.claude.com/docs/en/build-with-claude/prompt-caching#cache-limitations
+ * Tests use [ai.koog.integration.tests.utils.PromptUtils.assistantPromptOfAtLeastLength] to ensure
+ * the prompt is long enough for the API to accept the cache breakpoint.
+ */
+@OptIn(InternalLLMClientApi::class)
+class AnthropicCacheControlIntegrationTest {
+
+    companion object {
+        private val model = AnthropicModels.Sonnet_4_5
+        private val client = getLLMClientForProvider(model.provider)
+        private val executor = MultiLLMPromptExecutor(client)
+
+        /**
+         * Asserts that the response metadata shows cache was used (write or read).
+         * On the first cached request `cacheCreationInputTokens` > 0.
+         * On a subsequent request hitting the same prefix `cacheReadInputTokens` > 0.
+         */
+        private fun JsonObject.assertCacheWasUsed() {
+            val cacheWrite = this["cacheCreationInputTokens"]?.jsonPrimitive?.intOrNull ?: 0
+            val cacheRead = this["cacheReadInputTokens"]?.jsonPrimitive?.intOrNull ?: 0
+            withClue("Expected cacheCreationInputTokens or cacheReadInputTokens > 0 in metadata $this") {
+                (cacheWrite > 0 || cacheRead > 0).shouldBeTrue()
+            }
+        }
+
+        private suspend fun testCacheControl(
+            executor: MultiLLMPromptExecutor,
+            prompt: Prompt,
+            model: LLModel,
+            tools: List<ToolDescriptor> = emptyList()
+        ) {
+            val responseWithMetadata = executor.execute(prompt, model, tools)
+                .shouldNotBeNull()
+                .shouldNotBeEmpty()
+                .let { messages ->
+                    messages.filterIsInstance<Message.Assistant>().firstOrNull()
+                        ?: if (tools.isNotEmpty()) {
+                            messages.filterIsInstance<Message.Tool.Call>().firstOrNull()
+                        } else {
+                            null
+                        }
+                }
+
+            responseWithMetadata.shouldNotBeNull {
+                metaInfo.metadata
+                    .shouldNotBeNull()
+                    .assertCacheWasUsed()
+            }
+        }
+
+        @JvmStatic
+        fun cacheControlType(): Stream<AnthropicCacheControl> = Stream.of(
+            AnthropicCacheControl.Default,
+            AnthropicCacheControl.OneHour,
+        )
+    }
+
+    @ParameterizedTest
+    @MethodSource("cacheControlType")
+    fun integration_testAutomaticCacheControl(cacheControl: AnthropicCacheControl) = runTest(timeout = 120.seconds) {
+        val params = AnthropicParams(cacheControl = cacheControl)
+        val prompt = Prompt.build("test-auto-cache-1h", params = params) {
+            system(PromptUtils.assistantPromptOfAtLeastLength(1200))
+            user("What is the capital of Italy?")
+        }
+
+        RetryUtils.withRetry(
+            times = 3,
+            testName = "integration_testAutomaticCacheControl"
+        ) {
+            testCacheControl(executor, prompt, model)
+        }
+    }
+
+    @Retry
+    @Test
+    fun integration_testCacheControlOnSystemMessage() = runTest(timeout = 120.seconds) {
+        val prompt = Prompt.build("test-cache-system-msg") {
+            system(PromptUtils.assistantPromptOfAtLeastLength(1200), AnthropicCacheControl.Default)
+            user("What is the capital of France?")
+        }
+        testCacheControl(executor, prompt, model)
+    }
+
+    @Retry
+    @Test
+    fun integration_testCacheControlOnUserMessage() = runTest(timeout = 120.seconds) {
+        val prompt = Prompt.build("test-cache-user-msg") {
+            system(PromptUtils.assistantPromptOfAtLeastLength(1200))
+            user("What is the capital of France?", AnthropicCacheControl.Default)
+        }
+        testCacheControl(executor, prompt, model)
+    }
+
+    @Retry
+    @Test
+    fun integration_testCacheControlOnToolDefinition() = runTest(timeout = 120.seconds) {
+        val cachedTool = ToolDescriptor(
+            name = "calculator",
+            description = PromptUtils.assistantPromptOfAtLeastLength(1600, "A calculator tool"),
+            requiredParameters = listOf(
+                ToolParameterDescriptor("expression", "Math expression to evaluate", ToolParameterType.String)
+            ),
+            cacheControl = AnthropicCacheControl.Default
+        )
+        val prompt = Prompt.build("test-cache-tool") {
+            system(PromptUtils.assistantPromptOfAtLeastLength(1200))
+            user("What is 2 + 2?")
+        }
+
+        testCacheControl(executor, prompt, model, listOf(cachedTool))
+    }
+}
diff --git a/...rc/jvmTest/kotlin/ai/koog/integration/tests/executor/BedrockConverseApiIntegrationTest.kt b/...rc/jvmTest/kotlin/ai/koog/integration/tests/executor/BedrockConverseApiIntegrationTest.kt
@@ -16,6 +16,7 @@ import ai.koog.integration.tests.utils.tools.CalculatorTool
 import ai.koog.prompt.dsl.Prompt
 import ai.koog.prompt.executor.clients.LLMClient
 import ai.koog.prompt.executor.clients.bedrock.BedrockAPIMethod
+import ai.koog.prompt.executor.clients.bedrock.BedrockCacheControl
 import ai.koog.prompt.executor.clients.bedrock.BedrockClientSettings
 import ai.koog.prompt.executor.clients.bedrock.BedrockGuardrailsSettings
 import ai.koog.prompt.executor.clients.bedrock.BedrockLLMClient
@@ -26,7 +27,6 @@ import ai.koog.prompt.executor.model.PromptExecutor
 import ai.koog.prompt.llm.LLMCapability
 import ai.koog.prompt.llm.LLMProvider
 import ai.koog.prompt.llm.LLModel
-import ai.koog.prompt.message.CacheControl
 import ai.koog.prompt.message.ContentPart
 import ai.koog.prompt.message.Message
 import ai.koog.prompt.params.LLMParams
@@ -335,7 +335,7 @@ class BedrockConverseApiIntegrationTest : ExecutorIntegrationTestBase() {
 
         val prompt = Prompt.build("test-cache-system") {
             // Caching requires a minimum prompt length to work.
-            system(assistantPromptOfAtLeastLength(1600), CacheControl.Bedrock.Default)
+            system(assistantPromptOfAtLeastLength(1600), BedrockCacheControl.Default)
             user("What is the capital of France?")
         }
 
@@ -358,7 +358,7 @@ class BedrockConverseApiIntegrationTest : ExecutorIntegrationTestBase() {
         val prompt = Prompt.build("test-cache-user") {
             // Caching requires a minimum prompt length to work.
             system(assistantPromptOfAtLeastLength(1600))
-            user(listOf(ContentPart.Text("What is the capital of France?")), CacheControl.Bedrock.Default)
+            user(listOf(ContentPart.Text("What is the capital of France?")), BedrockCacheControl.Default)
         }
 
         withRetry(times = 3, testName = "integration_testCacheControlOnUserMessage[${model.id}]") {
@@ -378,7 +378,7 @@ class BedrockConverseApiIntegrationTest : ExecutorIntegrationTestBase() {
         Models.assumeAvailable(model.provider)
         assumeTrue(model.capabilities?.contains(LLMCapability.Tools) ?: false, "Model $model does not support tools")
 
-        val cachedDescriptor = CalculatorTool.descriptor.withCacheControl(CacheControl.Bedrock.Default).copy(
+        val cachedDescriptor = CalculatorTool.descriptor.withCacheControl(BedrockCacheControl.Default).copy(
             // Caching requires a minimum prompt length to work - in the case of tools, this appears to apply specifically to the tool section
             // rather than the prompt as a whole.
             description = assistantPromptOfAtLeastLength(1600, CalculatorTool.descriptor.description)

diff --git a/.../src/commonMain/kotlin/ai/koog/prompt/executor/clients/anthropic/AnthropicCacheControl.kt b/.../src/commonMain/kotlin/ai/koog/prompt/executor/clients/anthropic/AnthropicCacheControl.kt
@@ -0,0 +1,18 @@
+package ai.koog.prompt.executor.clients.anthropic
+
+import ai.koog.prompt.message.CacheControl
+import kotlinx.serialization.Serializable
+
+/**
+ * Represents caching strategies specific to Anthropic's LLM provider.
+ */
+@Serializable
+public sealed interface AnthropicCacheControl : CacheControl {
+    /** Cache with the default TTL (no explicit TTL sent to Anthropic, caches for 5 minutes by default). */
+    @Serializable
+    public data object Default : AnthropicCacheControl
+
+    /** Cache for 1 hour. */
+    @Serializable
+    public data object OneHour : AnthropicCacheControl
+}