From d6c765cf3e604d74cb3cfb70734ba7061f887e58 Mon Sep 17 00:00:00 2001
From: Sandesh Veerani <sandeshveerani4@gmail.com>
Date: Mon, 20 Apr 2026 16:00:55 +0200
Subject: [PATCH 1/3] fix: yield tool_call_parts immediately in live mode
 receive()

Tool call parts received via LiveServerToolCall were accumulated in
tool_call_parts but only yielded when turn_complete arrived or the
receive loop exited. Gemini 3.1 Flash Live models send tool calls
via LiveServerToolCall and do not emit turn_complete until the tool
response is received, causing a deadlock where the framework never
executes the tool because it never sees the function call event.

Yield tool_call_parts immediately after receiving them so the
framework can execute tools and send responses back to the model.
---
 src/google/adk/models/gemini_llm_connection.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/src/google/adk/models/gemini_llm_connection.py b/src/google/adk/models/gemini_llm_connection.py
index 3eb2fe5b3f..7dbc294b40 100644
--- a/src/google/adk/models/gemini_llm_connection.py
+++ b/src/google/adk/models/gemini_llm_connection.py
@@ -366,6 +366,21 @@ async def receive(self) -> AsyncGenerator[LlmResponse, None]:
               types.Part(function_call=function_call)
               for function_call in message.tool_call.function_calls
           ])
+          # Yield tool call parts immediately so the framework can execute
+          # the tools and send responses back to the model. Models like
+          # Gemini 3.1 send tool calls via LiveServerToolCall and do not
+          # emit turn_complete until they receive the tool response, so
+          # deferring the yield would deadlock the conversation.
+          if tool_call_parts:
+            logger.debug(
+                'Yielding tool_call_parts immediately for live tool call'
+            )
+            yield LlmResponse(
+                content=types.Content(role='model', parts=tool_call_parts),
+                model_version=self._model_version,
+                live_session_id=live_session_id,
+            )
+            tool_call_parts = []
         if message.session_resumption_update:
           logger.debug('Received session resumption message: %s', message)
           yield (

From cb41c33690f60f486c2b529d4a9267f6b6daeee9 Mon Sep 17 00:00:00 2001
From: Sandesh Veerani <sandeshveerani4@gmail.com>
Date: Mon, 20 Apr 2026 16:08:56 +0200
Subject: [PATCH 2/3] chore: trigger CLA re-check


From 9dcced1195ea17a8f07cb97e9d432ad85d58c18f Mon Sep 17 00:00:00 2001
From: Sandesh Veerani <sandeshveerani4@gmail.com>
Date: Tue, 21 Apr 2026 14:26:41 +0200
Subject: [PATCH 3/3] Fix: only yield tool calls immediately for Gemini 3.1
 live models

Other models (2.5-pro, native-audio) send turn_complete after tool calls,
so they should buffer and merge tool call parts into a single response.
Gemini 3.1 does not send turn_complete until a tool response is received,
so it must yield immediately to avoid deadlocking.
---
 src/google/adk/models/gemini_llm_connection.py | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/src/google/adk/models/gemini_llm_connection.py b/src/google/adk/models/gemini_llm_connection.py
index 7dbc294b40..c708ace928 100644
--- a/src/google/adk/models/gemini_llm_connection.py
+++ b/src/google/adk/models/gemini_llm_connection.py
@@ -366,14 +366,16 @@ async def receive(self) -> AsyncGenerator[LlmResponse, None]:
               types.Part(function_call=function_call)
               for function_call in message.tool_call.function_calls
           ])
-          # Yield tool call parts immediately so the framework can execute
-          # the tools and send responses back to the model. Models like
-          # Gemini 3.1 send tool calls via LiveServerToolCall and do not
-          # emit turn_complete until they receive the tool response, so
-          # deferring the yield would deadlock the conversation.
-          if tool_call_parts:
+          # Gemini 3.1 does not emit turn_complete until it receives the
+          # tool response, so yield tool calls immediately to avoid
+          # deadlocking the conversation. Other models (e.g. 2.5-pro,
+          # native-audio) send turn_complete after tool calls, so buffer
+          # and merge them into a single response at turn_complete.
+          if model_name_utils.is_gemini_3_1_flash_live(
+              self._model_version
+          ) and tool_call_parts:
             logger.debug(
-                'Yielding tool_call_parts immediately for live tool call'
+                'Yielding tool_call_parts immediately for Gemini 3.1 live tool call'
             )
             yield LlmResponse(
                 content=types.Content(role='model', parts=tool_call_parts),