Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions examples/evaluation/callbacks/.env
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# Set TRPC_AGENT_API_KEY、TRPC_AGENT_BASE_URL、TRPC_AGENT_MODEL_NAME
22 changes: 22 additions & 0 deletions examples/evaluation/callbacks/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# 回调 Callbacks 示例

在评测中注册 **Callbacks**:在推理集/用例推理、打分集/用例打分的 8 个生命周期节点挂载钩子,用于打点、日志、采样或上报。

## 目录结构

- `callbacks/`:示例根目录
- `agent/`:内含 `agent.py`、`callbacks_example.evalset.json`、`test_config.json`、`config.py`
- `test_callbacks.py`:调用 `AgentEvaluator.evaluate(..., callbacks=callbacks)`,注册 `before_inference_set`、`after_inference_case`、`before_evaluate_set`、`after_evaluate_case` 并打日志

## 环境要求

Python 3.10+。需配置 `TRPC_AGENT_API_KEY` 等环境变量(同 quickstart)。

## 运行

```bash
cd examples/evaluation/callbacks
pytest test_callbacks.py -v --tb=short -s
```

`-s` 可看到回调中的 print 输出。
6 changes: 6 additions & 0 deletions examples/evaluation/callbacks/agent/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# -*- coding: utf-8 -*-
#
# Copyright @ 2025 Tencent.com
from .agent import root_agent

__all__ = ["root_agent"]
36 changes: 36 additions & 0 deletions examples/evaluation/callbacks/agent/agent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# -*- coding: utf-8 -*-
#
# Copyright @ 2025 Tencent.com
"""Weather agent for callbacks example."""

from typing import Any, Dict

from trpc_agent_sdk.agents import LlmAgent
from trpc_agent_sdk.models import OpenAIModel
from trpc_agent_sdk.tools import FunctionTool

from .config import get_model_config


def get_weather(city: str) -> Dict[str, Any]:
"""查询指定城市当前天气。"""
data = {
"上海": {"temperature": 18, "condition": "多云"},
"北京": {"temperature": 15, "condition": "晴"},
}
result = data.get(city, {"temperature": 20, "condition": "未知"})
return {"city": city, **result}


def create_agent() -> LlmAgent:
api_key, url, model_name = get_model_config()
return LlmAgent(
name="weather_agent",
description="天气查询助手",
model=OpenAIModel(model_name=model_name, api_key=api_key, base_url=url),
instruction="你是天气助手,用 get_weather 查询城市天气并简要回答。",
tools=[FunctionTool(get_weather)],
)


root_agent = create_agent()
37 changes: 37 additions & 0 deletions examples/evaluation/callbacks/agent/callbacks_example.evalset.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
{
"eval_set_id": "callbacks_example",
"name": "回调示例",
"description": "演示在评测中注册 Callbacks,在推理/打分生命周期打日志或计数",
"eval_cases": [
{
"eval_id": "callbacks_001",
"conversation": [
{
"invocation_id": "e-1",
"user_content": {
"parts": [{"text": "上海天气怎么样"}],
"role": "user"
},
"final_response": {
"parts": [{"text": "18"}],
"role": "model"
},
"intermediate_data": {
"tool_uses": [
{
"id": "t1",
"name": "get_weather",
"args": {"city": "上海"}
}
]
}
}
],
"session_input": {
"app_name": "weather_agent",
"user_id": "user",
"state": {}
}
}
]
}
21 changes: 21 additions & 0 deletions examples/evaluation/callbacks/agent/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# -*- coding: utf-8 -*-
#
# Copyright @ 2025 Tencent.com
"""Agent config module."""

import os


def get_model_config() -> tuple[str, str, str]:
api_key = (
os.environ.get("TRPC_AGENT_API_KEY") or os.environ.get("API_KEY", "")
)
url = os.environ.get(
"TRPC_AGENT_BASE_URL", "http://v2.open.venus.woa.com/llmproxy"
)
model_name = os.environ.get("TRPC_AGENT_MODEL_NAME", "glm-4.7")
if not api_key:
raise ValueError(
"TRPC_AGENT_API_KEY or API_KEY must be set in environment variables"
)
return api_key, url, model_name
27 changes: 27 additions & 0 deletions examples/evaluation/callbacks/agent/test_config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
{
"metrics": [
{
"metric_name": "tool_trajectory_avg_score",
"threshold": 0.8,
"criterion": {
"tool_trajectory": {
"default": {
"name": {"match": "exact", "case_insensitive": false},
"arguments": {"match": "exact"}
},
"order_sensitive": false,
"subset_matching": false
}
}
},
{
"metric_name": "final_response_avg_score",
"threshold": 0.6,
"criterion": {
"final_response": {
"text": {"match": "contains", "case_insensitive": true}
}
}
}
]
}
116 changes: 116 additions & 0 deletions examples/evaluation/callbacks/test_callbacks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
# -*- coding: utf-8 -*-
#
# Copyright @ 2025 Tencent.com
"""回调示例:在评测中注册 8 个生命周期 Callbacks,打日志并演示 context 传递。"""

import os
import pytest
from trpc_agent_sdk.evaluation import (
AgentEvaluator,
Callbacks,
Callback,
CallbackResult,
BeforeInferenceSetArgs,
AfterInferenceSetArgs,
BeforeInferenceCaseArgs,
AfterInferenceCaseArgs,
BeforeEvaluateSetArgs,
AfterEvaluateSetArgs,
BeforeEvaluateCaseArgs,
AfterEvaluateCaseArgs,
)

triggered: list[str] = []


def before_inference_set(ctx, args: BeforeInferenceSetArgs):
triggered.append("before_inference_set")
print("[callback] 推理集开始", args.request.eval_set_id, flush=True)
return None


def after_inference_set(ctx, args: AfterInferenceSetArgs):
triggered.append("after_inference_set")
n = len(args.results) if args.results else 0
print("[callback] 推理集结束,共", n, "个用例", flush=True)
return None


def before_inference_case(ctx, args: BeforeInferenceCaseArgs):
triggered.append("before_inference_case")
print("[callback] 用例推理开始", args.eval_case_id, flush=True)
return None


def after_inference_case(ctx, args: AfterInferenceCaseArgs):
triggered.append("after_inference_case")
print("[callback] 用例推理结束", args.result.eval_case_id, flush=True)
return None


def before_evaluate_set(ctx, args: BeforeEvaluateSetArgs):
triggered.append("before_evaluate_set")
n = len(args.request.inference_results)
print("[callback] 打分集开始 cases=", n, flush=True)
return CallbackResult(context={"phase": "evaluate"})


def after_evaluate_set(ctx, args: AfterEvaluateSetArgs):
triggered.append("after_evaluate_set")
n = len(args.result.eval_case_results) if args.result else 0
phase = (ctx.get("context") or {}).get("phase", "?")
print("[callback] 打分集结束,共", n, "个用例,ctx.phase=", phase, flush=True)
return None


def before_evaluate_case(ctx, args: BeforeEvaluateCaseArgs):
triggered.append("before_evaluate_case")
print("[callback] 用例打分开始", args.eval_case_id, flush=True)
return None


def after_evaluate_case(ctx, args: AfterEvaluateCaseArgs):
triggered.append("after_evaluate_case")
print("[callback] 用例打分结束", args.result.eval_id, flush=True)
return None


@pytest.mark.asyncio
async def test_with_callbacks():
triggered.clear()
test_dir = os.path.dirname(os.path.abspath(__file__))
eval_set_path = os.path.join(test_dir, "agent", "callbacks_example.evalset.json")

callbacks = Callbacks()
callbacks.register(
"demo",
Callback(
before_inference_set=before_inference_set,
after_inference_set=after_inference_set,
before_inference_case=before_inference_case,
after_inference_case=after_inference_case,
before_evaluate_set=before_evaluate_set,
after_evaluate_set=after_evaluate_set,
before_evaluate_case=before_evaluate_case,
after_evaluate_case=after_evaluate_case,
),
)

await AgentEvaluator.evaluate(
agent_module="agent",
agent_name="weather_agent",
eval_dataset_file_path_or_dir=eval_set_path,
callbacks=callbacks,
)

expected = [
"before_inference_set",
"before_inference_case",
"after_inference_case",
"after_inference_set",
"before_evaluate_set",
"before_evaluate_case",
"after_evaluate_case",
"after_evaluate_set",
]
assert triggered == expected, triggered
1 change: 1 addition & 0 deletions examples/evaluation/context_messages/.env
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# Set TRPC_AGENT_API_KEY、TRPC_AGENT_BASE_URL、TRPC_AGENT_MODEL_NAME
20 changes: 20 additions & 0 deletions examples/evaluation/context_messages/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# 上下文注入示例

在评测用例上配置 **context_messages**:评估服务在每轮推理前会将其中内容注入会话上下文,用于传递系统说明、领域知识或格式约束。

## 目录结构

- `context_messages/`:示例根目录
- `agent/`:内含 `agent.py`、`context_example.evalset.json`(含带 context_messages 的用例)、`test_config.json`、`config.py`
- `test_context_messages.py`:调用 `AgentEvaluator.evaluate` 跑评测

## 环境要求

Python 3.10+。需配置 `TRPC_AGENT_API_KEY` 等环境变量(同 quickstart)。

## 运行

```bash
cd examples/evaluation/context_messages
pytest test_context_messages.py -v --tb=short -s
```
6 changes: 6 additions & 0 deletions examples/evaluation/context_messages/agent/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# -*- coding: utf-8 -*-
#
# Copyright @ 2025 Tencent.com
from .agent import root_agent

__all__ = ["root_agent"]
36 changes: 36 additions & 0 deletions examples/evaluation/context_messages/agent/agent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# -*- coding: utf-8 -*-
#
# Copyright @ 2025 Tencent.com
"""Weather agent for context_messages example."""

from typing import Any, Dict

from trpc_agent_sdk.agents import LlmAgent
from trpc_agent_sdk.models import OpenAIModel
from trpc_agent_sdk.tools import FunctionTool

from .config import get_model_config


def get_weather(city: str) -> Dict[str, Any]:
"""查询指定城市当前天气。"""
data = {
"上海": {"temperature": 18, "condition": "多云"},
"北京": {"temperature": 15, "condition": "晴"},
}
result = data.get(city, {"temperature": 20, "condition": "未知"})
return {"city": city, **result}


def create_agent() -> LlmAgent:
api_key, url, model_name = get_model_config()
return LlmAgent(
name="weather_agent",
description="天气查询助手",
model=OpenAIModel(model_name=model_name, api_key=api_key, base_url=url),
instruction="你是天气助手,用 get_weather 查询城市天气并简要回答。",
tools=[FunctionTool(get_weather)],
)


root_agent = create_agent()
21 changes: 21 additions & 0 deletions examples/evaluation/context_messages/agent/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# -*- coding: utf-8 -*-
#
# Copyright @ 2025 Tencent.com
"""Agent config module."""

import os


def get_model_config() -> tuple[str, str, str]:
api_key = (
os.environ.get("TRPC_AGENT_API_KEY") or os.environ.get("API_KEY", "")
)
url = os.environ.get(
"TRPC_AGENT_BASE_URL", "http://v2.open.venus.woa.com/llmproxy"
)
model_name = os.environ.get("TRPC_AGENT_MODEL_NAME", "glm-4.7")
if not api_key:
raise ValueError(
"TRPC_AGENT_API_KEY or API_KEY must be set in environment variables"
)
return api_key, url, model_name
Loading
Loading