- Notifications
You must be signed in to change notification settings - Fork1k
Closed
Description
Initial Checks
- I confirm that I'm using the latest version of Pydantic AI
- I confirm that I searched for my issue inhttps://github.com/pydantic/pydantic-ai/issues before opening this issue
Description
Running the exampleMyEvaluator
and logging to Logfire leads to an error
- Example fromhttps://ai.pydantic.dev/evals/#evaluators:~:text=run%20%22as%20is%22)-,Evaluation%20Process,-The%20evaluation%20process
Failed to render details panel. Most likely, the record attributes do not match the expected panel data type.

In my console I can see the expected output

Full Raw Data from Logfire
{ "created_at": 1745330098358.305, "start_timestamp": "2025-04-22T13:54:56.700515Z", "end_timestamp": "2025-04-22T13:54:56.758743Z", "trace_id": "01965dc7ca3c485d375962d4b734f177", "span_id": "8c46ea45d9e9801e", "kind": "span", "level": 9, "parent_span_id": null, "span_name": "evaluate {name}", "message": "evaluate guess_city", "is_exception": false, "exception_type": null, "tags": [], "otel_scope_name": "pydantic-evals", "otel_scope_version": "3.14.0", "service_name": "evals", "service_version": null, "http_response_status_code": null, "gen_ai_operation_name": null, "gen_ai_request_model": null, "gen_ai_response_model": null, "gen_ai_system": null, "gen_ai_usage_input_tokens": 0, "gen_ai_usage_output_tokens": 0, "matched_filter": true, "is_extra_span": false, "day": "2025-04-22", "duration": 0.058228, "otel_status_code": "UNSET", "otel_status_message": null, "otel_links": [], "otel_events": [], "url_path": null, "url_query": null, "url_full": null, "http_route": null, "http_method": null, "attributes": { "averages": { "name": "Averages", "scores": { "MyEvaluator": 1 }, "labels": {}, "metrics": {}, "assertions": 1, "task_duration": 0.000249, "total_duration": 0.001984 }, "cases": [ { "name": "simple_case", "inputs": "What is the capital of France?", "metadata": { "difficulty": "easy" }, "expected_output": "Paris", "output": "Paris", "metrics": {}, "attributes": {}, "scores": { "MyEvaluator": { "name": "MyEvaluator", "value": 1, "reason": null, "source": { "name": "MyEvaluator", "arguments": null } } }, "labels": {}, "assertions": { "IsInstance": { "name": "IsInstance", "value": true, "reason": null, "source": { "name": "IsInstance", "arguments": [ "str" ] } } }, "task_duration": 0.000249, "total_duration": 0.001984, "trace_id": "01965dc7ca3c485d375962d4b734f177", "span_id": "6310642f2d6c41d0" } ], "code.filepath": "test.py", "code.lineno": 45, "logfire.msg_template": "evaluate {name}", "name": "guess_city" }, "attributes_json_schema": "{\"type\":\"object\",\"properties\":{\"averages\":{\"type\":\"object\",\"title\":\"ReportCaseAggregate\",\"x-python-datatype\":\"PydanticModel\"},\"cases\":{\"type\":\"array\",\"items\":{\"type\":\"object\",\"title\":\"ReportCase\",\"x-python-datatype\":\"PydanticModel\",\"properties\":{\"scores\":{\"type\":\"object\",\"properties\":{\"MyEvaluator\":{\"type\":\"object\",\"title\":\"EvaluationResult\",\"x-python-datatype\":\"dataclass\",\"properties\":{\"source\":{\"type\":\"object\",\"title\":\"MyEvaluator\",\"x-python-datatype\":\"dataclass\"}}}}},\"assertions\":{\"type\":\"object\",\"properties\":{\"IsInstance\":{\"type\":\"object\",\"title\":\"EvaluationResult\",\"x-python-datatype\":\"dataclass\",\"properties\":{\"source\":{\"type\":\"object\",\"title\":\"IsInstance\",\"x-python-datatype\":\"dataclass\"}}}}}}}},\"name\":{}}}", "otel_scope_attributes": {}, "service_namespace": "", "service_instance_id": "737e6cc97d7e4feeada8e3ac031618b1", "process_pid": 97902, "otel_resource_attributes": { "process.pid": 97902, "process.runtime.description": "3.13.0 (main, Oct 16 2024, 08:05:40) [Clang 18.1.8 ]", "process.runtime.name": "cpython", "process.runtime.version": "3.13.0", "service.instance.id": "737e6cc97d7e4feeada8e3ac031618b1", "service.name": "evals", "telemetry.sdk.language": "python", "telemetry.sdk.name": "opentelemetry", "telemetry.sdk.version": "1.32.1" }, "telemetry_sdk_name": "opentelemetry", "telemetry_sdk_language": "python", "telemetry_sdk_version": "1.32.1", "deployment_environment": null}
Example Code
importlogfirefromdotenvimportload_dotenvfrompydantic_evalsimportCase,Datasetfrompydantic_evals.evaluatorsimportEvaluator,EvaluatorContext,IsInstanceload_dotenv()# Configure logging to logfire if LOGFIRE_TOKEN is set in environmentlogfire.configure(send_to_logfire="if-token-present",environment="development",service_name="evals",)case1=Case(name="simple_case",inputs="What is the capital of France?",expected_output="Paris",metadata={"difficulty":"easy"},)classMyEvaluator(Evaluator[str,str]):defevaluate(self,ctx:EvaluatorContext[str,str])->float:ifctx.output==ctx.expected_output:return1.0elif (isinstance(ctx.output,str)andctx.expected_output.lower()inctx.output.lower() ):return0.8else:return0.0dataset=Dataset(cases=[case1],evaluators=[IsInstance(type_name="str"),MyEvaluator()],)asyncdefguess_city(question:str)->str:return"Paris"report=dataset.evaluate_sync(guess_city)report.print(include_input=True,include_output=True,include_durations=False)
Python, Pydantic AI & LLM client version
"logfire>=3.14.0","pydantic-ai-slim[mcp]>=0.1.3","pydantic-evals[logfire]>=0.1.3",
Metadata
Metadata
Assignees
Labels
No labels