diff --git a/src/google/adk/models/anthropic_llm.py b/src/google/adk/models/anthropic_llm.py index a14c767f23..e8b91fe651 100644 --- a/src/google/adk/models/anthropic_llm.py +++ b/src/google/adk/models/anthropic_llm.py @@ -36,6 +36,7 @@ from anthropic import AsyncAnthropicVertex from anthropic import NOT_GIVEN from anthropic import NotGiven +from anthropic import RateLimitError from anthropic import types as anthropic_types from google.genai import types from pydantic import BaseModel @@ -53,6 +54,28 @@ logger = logging.getLogger("google_adk." + __name__) +_RATE_LIMIT_POSSIBLE_FIX_MESSAGE = """ +On how to mitigate this issue, please refer to: + +https://docs.anthropic.com/en/api/errors#http-errors +""" + + +class _AnthropicRateLimitError(RateLimitError): # type: ignore[misc] + """Represents a rate limit error received from Anthropic.""" + + def __init__(self, rate_limit_error: RateLimitError): + super().__init__( + str(rate_limit_error), + response=rate_limit_error.response, + body=getattr(rate_limit_error, "body", None), + ) + + def __str__(self) -> str: + base_message = super().__str__() + return f"{_RATE_LIMIT_POSSIBLE_FIX_MESSAGE}\n\n{base_message}" + + @dataclasses.dataclass class _ToolUseAccumulator: """Accumulates streamed tool_use content block data.""" @@ -402,21 +425,24 @@ async def generate_content_async( else NOT_GIVEN ) - if not stream: - message = await self._anthropic_client.messages.create( - model=model_to_use, - system=llm_request.config.system_instruction, - messages=messages, - tools=tools, - tool_choice=tool_choice, - max_tokens=self.max_tokens, - ) - yield message_to_generate_content_response(message) - else: - async for response in self._generate_content_streaming( - llm_request, messages, tools, tool_choice - ): - yield response + try: + if not stream: + message = await self._anthropic_client.messages.create( + model=model_to_use, + system=llm_request.config.system_instruction, + messages=messages, + tools=tools, + tool_choice=tool_choice, + max_tokens=self.max_tokens, + ) + yield message_to_generate_content_response(message) + else: + async for response in self._generate_content_streaming( + llm_request, messages, tools, tool_choice + ): + yield response + except RateLimitError as rate_limit_error: + raise _AnthropicRateLimitError(rate_limit_error) from rate_limit_error async def _generate_content_streaming( self, diff --git a/tests/unittests/models/test_anthropic_llm.py b/tests/unittests/models/test_anthropic_llm.py index fb44d5c8e7..86702bb50d 100644 --- a/tests/unittests/models/test_anthropic_llm.py +++ b/tests/unittests/models/test_anthropic_llm.py @@ -20,9 +20,11 @@ from unittest.mock import AsyncMock from unittest.mock import MagicMock +from anthropic import RateLimitError from anthropic import types as anthropic_types from google.adk import version as adk_version from google.adk.models import anthropic_llm +from google.adk.models.anthropic_llm import _AnthropicRateLimitError from google.adk.models.anthropic_llm import AnthropicLlm from google.adk.models.anthropic_llm import Claude from google.adk.models.anthropic_llm import content_to_message_param @@ -34,6 +36,7 @@ from google.genai import version as genai_version from google.genai.types import Content from google.genai.types import Part +import httpx import pytest @@ -1350,3 +1353,52 @@ async def test_non_streaming_does_not_pass_stream_param(): mock_client.messages.create.assert_called_once() _, kwargs = mock_client.messages.create.call_args assert "stream" not in kwargs + + +def _make_rate_limit_error() -> RateLimitError: + request = httpx.Request("POST", "https://api.anthropic.com/v1/messages") + response = httpx.Response(429, request=request) + return RateLimitError( + "rate limited", + response=response, + body={"type": "error", "error": {"type": "rate_limit_error"}}, + ) + + +@pytest.mark.asyncio +async def test_non_streaming_wraps_anthropic_rate_limit_error(): + llm = AnthropicLlm(model="claude-sonnet-4-20250514") + mock_client = MagicMock() + mock_client.messages.create = AsyncMock(side_effect=_make_rate_limit_error()) + + llm_request = LlmRequest( + model="claude-sonnet-4-20250514", + contents=[Content(role="user", parts=[Part.from_text(text="Hi")])], + config=types.GenerateContentConfig(system_instruction="Test"), + ) + + with mock.patch.object(llm, "_anthropic_client", mock_client): + with pytest.raises(_AnthropicRateLimitError) as excinfo: + _ = [r async for r in llm.generate_content_async(llm_request)] + + assert "docs.anthropic.com/en/api/errors#http-errors" in str(excinfo.value) + assert "rate limited" in str(excinfo.value) + + +@pytest.mark.asyncio +async def test_streaming_wraps_anthropic_rate_limit_error(): + llm = AnthropicLlm(model="claude-sonnet-4-20250514") + mock_client = MagicMock() + mock_client.messages.create = AsyncMock(side_effect=_make_rate_limit_error()) + + llm_request = LlmRequest( + model="claude-sonnet-4-20250514", + contents=[Content(role="user", parts=[Part.from_text(text="Hi")])], + config=types.GenerateContentConfig(system_instruction="Test"), + ) + + with mock.patch.object(llm, "_anthropic_client", mock_client): + with pytest.raises(_AnthropicRateLimitError): + _ = [ + r async for r in llm.generate_content_async(llm_request, stream=True) + ]