Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 41 additions & 15 deletions src/google/adk/models/anthropic_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
from anthropic import AsyncAnthropicVertex
from anthropic import NOT_GIVEN
from anthropic import NotGiven
from anthropic import RateLimitError
from anthropic import types as anthropic_types
from google.genai import types
from pydantic import BaseModel
Expand All @@ -53,6 +54,28 @@
logger = logging.getLogger("google_adk." + __name__)


_RATE_LIMIT_POSSIBLE_FIX_MESSAGE = """
On how to mitigate this issue, please refer to:

https://docs.anthropic.com/en/api/errors#http-errors
"""


class _AnthropicRateLimitError(RateLimitError): # type: ignore[misc]
"""Represents a rate limit error received from Anthropic."""

def __init__(self, rate_limit_error: RateLimitError):
super().__init__(
str(rate_limit_error),
response=rate_limit_error.response,
body=getattr(rate_limit_error, "body", None),
)

def __str__(self) -> str:
base_message = super().__str__()
return f"{_RATE_LIMIT_POSSIBLE_FIX_MESSAGE}\n\n{base_message}"


@dataclasses.dataclass
class _ToolUseAccumulator:
"""Accumulates streamed tool_use content block data."""
Expand Down Expand Up @@ -402,21 +425,24 @@ async def generate_content_async(
else NOT_GIVEN
)

if not stream:
message = await self._anthropic_client.messages.create(
model=model_to_use,
system=llm_request.config.system_instruction,
messages=messages,
tools=tools,
tool_choice=tool_choice,
max_tokens=self.max_tokens,
)
yield message_to_generate_content_response(message)
else:
async for response in self._generate_content_streaming(
llm_request, messages, tools, tool_choice
):
yield response
try:
if not stream:
message = await self._anthropic_client.messages.create(
model=model_to_use,
system=llm_request.config.system_instruction,
messages=messages,
tools=tools,
tool_choice=tool_choice,
max_tokens=self.max_tokens,
)
yield message_to_generate_content_response(message)
else:
async for response in self._generate_content_streaming(
llm_request, messages, tools, tool_choice
):
yield response
except RateLimitError as rate_limit_error:
raise _AnthropicRateLimitError(rate_limit_error) from rate_limit_error

async def _generate_content_streaming(
self,
Expand Down
52 changes: 52 additions & 0 deletions tests/unittests/models/test_anthropic_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,11 @@
from unittest.mock import AsyncMock
from unittest.mock import MagicMock

from anthropic import RateLimitError
from anthropic import types as anthropic_types
from google.adk import version as adk_version
from google.adk.models import anthropic_llm
from google.adk.models.anthropic_llm import _AnthropicRateLimitError
from google.adk.models.anthropic_llm import AnthropicLlm
from google.adk.models.anthropic_llm import Claude
from google.adk.models.anthropic_llm import content_to_message_param
Expand All @@ -34,6 +36,7 @@
from google.genai import version as genai_version
from google.genai.types import Content
from google.genai.types import Part
import httpx
import pytest


Expand Down Expand Up @@ -1350,3 +1353,52 @@ async def test_non_streaming_does_not_pass_stream_param():
mock_client.messages.create.assert_called_once()
_, kwargs = mock_client.messages.create.call_args
assert "stream" not in kwargs


def _make_rate_limit_error() -> RateLimitError:
request = httpx.Request("POST", "https://api.anthropic.com/v1/messages")
response = httpx.Response(429, request=request)
return RateLimitError(
"rate limited",
response=response,
body={"type": "error", "error": {"type": "rate_limit_error"}},
)


@pytest.mark.asyncio
async def test_non_streaming_wraps_anthropic_rate_limit_error():
llm = AnthropicLlm(model="claude-sonnet-4-20250514")
mock_client = MagicMock()
mock_client.messages.create = AsyncMock(side_effect=_make_rate_limit_error())

llm_request = LlmRequest(
model="claude-sonnet-4-20250514",
contents=[Content(role="user", parts=[Part.from_text(text="Hi")])],
config=types.GenerateContentConfig(system_instruction="Test"),
)

with mock.patch.object(llm, "_anthropic_client", mock_client):
with pytest.raises(_AnthropicRateLimitError) as excinfo:
_ = [r async for r in llm.generate_content_async(llm_request)]

assert "docs.anthropic.com/en/api/errors#http-errors" in str(excinfo.value)
assert "rate limited" in str(excinfo.value)


@pytest.mark.asyncio
async def test_streaming_wraps_anthropic_rate_limit_error():
llm = AnthropicLlm(model="claude-sonnet-4-20250514")
mock_client = MagicMock()
mock_client.messages.create = AsyncMock(side_effect=_make_rate_limit_error())

llm_request = LlmRequest(
model="claude-sonnet-4-20250514",
contents=[Content(role="user", parts=[Part.from_text(text="Hi")])],
config=types.GenerateContentConfig(system_instruction="Test"),
)

with mock.patch.object(llm, "_anthropic_client", mock_client):
with pytest.raises(_AnthropicRateLimitError):
_ = [
r async for r in llm.generate_content_async(llm_request, stream=True)
]