fromtypesimportSimpleNamespace# importsimportpytest# used for our unit testsfrompydantic_ai.models.anthropicimport_map_usage# function to test# --- Mocking the required classes and usage.Usage for testability ---# In real usage, these would be imported as in the provided code, but for testing, we define mocks.classUsage:"""Mock of usage.Usage, stores token counts and details."""def__init__(self,request_tokens=None,response_tokens=None,total_tokens=None,details=None):self.request_tokens=request_tokensself.response_tokens=response_tokensself.total_tokens=total_tokensself.details=detailsdef__eq__(self,other):ifnotisinstance(other,Usage):returnFalsereturn (self.request_tokens==other.request_tokensandself.response_tokens==other.response_tokensandself.total_tokens==other.total_tokensandself.details==other.details )def__repr__(self):return (f"Usage(request_tokens={self.request_tokens}, "f"response_tokens={self.response_tokens}, "f"total_tokens={self.total_tokens}, "f"details={self.details})")# Helper for usage-like objectsclassUsageData:def__init__(self,**kwargs):self.__dict__.update(kwargs)defmodel_dump(self):returnself.__dict__.copy()# Mock BetaMessage and event classesclassBetaMessage:def__init__(self,usage):self.usage=usageclassBetaRawMessageStartEvent:def__init__(self,message):self.message=messageclassBetaRawMessageDeltaEvent:def__init__(self,usage):self.usage=usage# Simulate other BetaRawMessageStreamEvent types (not containing usage)classBetaRawMessageStreamEvent:passfrompydantic_ai.models.anthropicimport_map_usage# ------------------- UNIT TESTS -------------------# 1. BASIC TEST CASESdeftest_basic_beta_message_all_tokens():"""Test BetaMessage with all token types present."""usage_obj=UsageData(input_tokens=10,output_tokens=20,cache_creation_input_tokens=0,cache_read_input_tokens=0,other_info=123# Non-int should be ignored )msg=BetaMessage(usage_obj)expected=Usage(request_tokens=10,response_tokens=20,total_tokens=30,details={'input_tokens':10,'output_tokens':20,'cache_creation_input_tokens':0,'cache_read_input_tokens':0,'other_info':123} )codeflash_output=_map_usage(msg);result=codeflash_output# 2.96μs -> 567ns (422% faster)deftest_basic_start_event():"""Test BetaRawMessageStartEvent with usage present."""usage_obj=UsageData(input_tokens=5,output_tokens=7,cache_creation_input_tokens=3,cache_read_input_tokens=2 )msg=BetaRawMessageStartEvent(BetaMessage(usage_obj))expected=Usage(request_tokens=5+3+2,response_tokens=7,total_tokens=5+3+2+7,details={'input_tokens':5,'output_tokens':7,'cache_creation_input_tokens':3,'cache_read_input_tokens':2} )codeflash_output=_map_usage(msg);result=codeflash_output# 2.65μs -> 552ns (380% faster)deftest_basic_delta_event():"""Test BetaRawMessageDeltaEvent with minimal usage info."""usage_obj=UsageData(output_tokens=4 )msg=BetaRawMessageDeltaEvent(usage_obj)expected=Usage(request_tokens=None,response_tokens=4,total_tokens=0+4,details={'output_tokens':4} )codeflash_output=_map_usage(msg);result=codeflash_output# 2.86μs -> 551ns (420% faster)# 2. EDGE TEST CASESdeftest_edge_zero_tokens():"""Test with all token counts zero."""usage_obj=UsageData(input_tokens=0,output_tokens=0,cache_creation_input_tokens=0,cache_read_input_tokens=0 )msg=BetaMessage(usage_obj)expected=Usage(request_tokens=None,response_tokens=0,total_tokens=0,details={'input_tokens':0,'output_tokens':0,'cache_creation_input_tokens':0,'cache_read_input_tokens':0} )codeflash_output=_map_usage(msg);result=codeflash_output# 2.58μs -> 524ns (391% faster)deftest_edge_missing_input_tokens():"""Test when input_tokens are missing, but cache tokens present."""usage_obj=UsageData(output_tokens=9,cache_creation_input_tokens=4,cache_read_input_tokens=0 )msg=BetaMessage(usage_obj)expected=Usage(request_tokens=4,response_tokens=9,total_tokens=13,details={'output_tokens':9,'cache_creation_input_tokens':4,'cache_read_input_tokens':0} )codeflash_output=_map_usage(msg);result=codeflash_output# 2.63μs -> 570ns (361% faster)deftest_edge_missing_all_input_tokens():"""Test when all input token fields are missing."""usage_obj=UsageData(output_tokens=12, )msg=BetaMessage(usage_obj)expected=Usage(request_tokens=None,response_tokens=12,total_tokens=12,details={'output_tokens':12} )codeflash_output=_map_usage(msg);result=codeflash_output# 2.69μs -> 536ns (403% faster)deftest_edge_non_integer_fields():"""Test that non-integer fields are ignored in details."""usage_obj=UsageData(input_tokens=1,output_tokens=2,info="not_an_int",another=None )msg=BetaMessage(usage_obj)expected=Usage(request_tokens=1,response_tokens=2,total_tokens=3,details={'input_tokens':1,'output_tokens':2} )codeflash_output=_map_usage(msg);result=codeflash_output# 2.55μs -> 539ns (372% faster)deftest_edge_negative_tokens():"""Test negative token counts (should be included as-is in details)."""usage_obj=UsageData(input_tokens=-5,output_tokens=-10,cache_creation_input_tokens=-2,cache_read_input_tokens=-3 )msg=BetaMessage(usage_obj)expected=Usage(request_tokens=-5+-2+-3,response_tokens=-10,total_tokens=-5+-2+-3+-10,details={'input_tokens':-5,'output_tokens':-10,'cache_creation_input_tokens':-2,'cache_read_input_tokens':-3} )codeflash_output=_map_usage(msg);result=codeflash_output# 2.60μs -> 573ns (354% faster)deftest_edge_no_usage_info():"""Test BetaRawMessageStreamEvent with no usage info (should return empty Usage)."""msg=BetaRawMessageStreamEvent()expected=Usage()codeflash_output=_map_usage(msg);result=codeflash_output# 2.76μs -> 545ns (406% faster)deftest_edge_details_none_when_empty():"""Test that details is None when there are no int fields."""usage_obj=UsageData(info="abc",data=None )msg=BetaMessage(usage_obj)expected=Usage(request_tokens=None,response_tokens=None,total_tokens=None,details=None )codeflash_output=_map_usage(msg);result=codeflash_output# 2.72μs -> 584ns (365% faster)deftest_edge_output_tokens_zero_but_input_present():"""Test output_tokens is zero but input tokens are present."""usage_obj=UsageData(input_tokens=7,output_tokens=0 )msg=BetaMessage(usage_obj)expected=Usage(request_tokens=7,response_tokens=0,total_tokens=7,details={'input_tokens':7,'output_tokens':0} )codeflash_output=_map_usage(msg);result=codeflash_output# 2.57μs -> 565ns (355% faster)# 3. LARGE SCALE TEST CASESdeftest_large_many_fields():"""Test with a large number of integer fields in usage."""# 100 fields: input_tokens, output_tokens, cache_creation_input_tokens, cache_read_input_tokens, plus 96 extrasusage_kwargs= {'input_tokens':100,'output_tokens':200,'cache_creation_input_tokens':300,'cache_read_input_tokens':400, }# Add 96 more integer fieldsforiinrange(1,97):usage_kwargs[f'extra_field_{i}']=iusage_obj=UsageData(**usage_kwargs)msg=BetaMessage(usage_obj)expected_details=usage_kwargs.copy()expected=Usage(request_tokens=100+300+400,response_tokens=200,total_tokens=100+300+400+200,details=expected_details )codeflash_output=_map_usage(msg);result=codeflash_output# 2.78μs -> 572ns (386% faster)deftest_large_high_token_counts():"""Test with very high token counts to check for integer overflow or performance issues."""big=10**9usage_obj=UsageData(input_tokens=big,output_tokens=big,cache_creation_input_tokens=big,cache_read_input_tokens=big )msg=BetaMessage(usage_obj)expected=Usage(request_tokens=big*3,response_tokens=big,total_tokens=big*4,details={'input_tokens':big,'output_tokens':big,'cache_creation_input_tokens':big,'cache_read_input_tokens':big} )codeflash_output=_map_usage(msg);result=codeflash_output# 2.71μs -> 515ns (427% faster)deftest_large_delta_event_no_input_tokens():"""Test BetaRawMessageDeltaEvent with large output_tokens and no input token fields."""usage_obj=UsageData(output_tokens=999_999_999 )msg=BetaRawMessageDeltaEvent(usage_obj)expected=Usage(request_tokens=None,response_tokens=999_999_999,total_tokens=999_999_999,details={'output_tokens':999_999_999} )codeflash_output=_map_usage(msg);result=codeflash_output# 2.62μs -> 518ns (407% faster)deftest_large_many_stream_events():"""Test mapping over a list of 1000 BetaMessage objects."""# Only test the function's scalability, not correctness of aggregationmessages= []foriinrange(1000):usage_obj=UsageData(input_tokens=i,output_tokens=i*2,cache_creation_input_tokens=0,cache_read_input_tokens=0 )messages.append(BetaMessage(usage_obj))fori,msginenumerate(messages):expected=Usage(request_tokens=i,response_tokens=i*2,total_tokens=i+i*2,details={'input_tokens':i,'output_tokens':i*2,'cache_creation_input_tokens':0,'cache_read_input_tokens':0} )codeflash_output=_map_usage(msg);result=codeflash_output# 592μs -> 140μs (320% faster)deftest_large_sparse_fields():"""Test with 1000 fields, only a few are integers."""usage_kwargs= {f'field_{i}':'x'foriinrange(1000)}usage_kwargs['input_tokens']=42usage_kwargs['output_tokens']=99usage_obj=UsageData(**usage_kwargs)msg=BetaMessage(usage_obj)expected=Usage(request_tokens=42,response_tokens=99,total_tokens=141,details={'input_tokens':42,'output_tokens':99} )codeflash_output=_map_usage(msg);result=codeflash_output# 2.89μs -> 566ns (411% faster)# codeflash_output is used to check that the output of the original code is the same as that of the optimized code.importpytestfrompydantic_ai.models.anthropicimport_map_usage# Simulate minimal versions of the required classes and usage.Usage for testing purposesclassUsage:def__init__(self,request_tokens=None,response_tokens=None,total_tokens=None,details=None):self.request_tokens=request_tokensself.response_tokens=response_tokensself.total_tokens=total_tokensself.details=detailsdef__eq__(self,other):ifnotisinstance(other,Usage):returnFalsereturn (self.request_tokens==other.request_tokensandself.response_tokens==other.response_tokensandself.total_tokens==other.total_tokensandself.details==other.details )def__repr__(self):return (f"Usage(request_tokens={self.request_tokens}, "f"response_tokens={self.response_tokens}, "f"total_tokens={self.total_tokens}, "f"details={self.details})" )classDummyUsage:def__init__(self,**kwargs):self.__dict__.update(kwargs)defmodel_dump(self):returndict(self.__dict__)# BetaMessage: has .usageclassBetaMessage:def__init__(self,usage):self.usage=usage# BetaRawMessageStartEvent: has .message.usageclassBetaRawMessageStartEvent:def__init__(self,message):self.message=message# BetaRawMessageDeltaEvent: has .usageclassBetaRawMessageDeltaEvent:def__init__(self,usage):self.usage=usage# BetaRawMessageStreamEvent: parent class, not directly used except for type checkingclassBetaRawMessageStreamEvent:pass# Other event types (simulate as subclasses of BetaRawMessageStreamEvent)classRawMessageStopEvent(BetaRawMessageStreamEvent):passclassRawContentBlockStartEvent(BetaRawMessageStreamEvent):passclassRawContentBlockDeltaEvent(BetaRawMessageStreamEvent):passclassRawContentBlockStopEvent(BetaRawMessageStreamEvent):pass# Patch the usage module for testclassusage:Usage=Usagefrompydantic_ai.models.anthropicimport_map_usage# -----------------------------# Unit Tests# -----------------------------# 1. Basic Test Casesdeftest_basic_betamessage_minimal():# Basic BetaMessage with only input_tokens and output_tokensusage_obj=DummyUsage(input_tokens=10,output_tokens=5)msg=BetaMessage(usage_obj)expected=Usage(request_tokens=10,response_tokens=5,total_tokens=15,details={'input_tokens':10,'output_tokens':5} )codeflash_output=_map_usage(msg)# 2.90μs -> 571ns (407% faster)deftest_basic_betarawmessagestartevent():# BetaRawMessageStartEvent with nested message.usageusage_obj=DummyUsage(input_tokens=20,output_tokens=7)msg=BetaRawMessageStartEvent(BetaMessage(usage_obj))expected=Usage(request_tokens=20,response_tokens=7,total_tokens=27,details={'input_tokens':20,'output_tokens':7} )codeflash_output=_map_usage(msg)# 2.92μs -> 623ns (368% faster)deftest_basic_betarawmessagedeltaevent():# BetaRawMessageDeltaEvent with only output_tokens, no input tokensusage_obj=DummyUsage(output_tokens=12)msg=BetaRawMessageDeltaEvent(usage_obj)expected=Usage(request_tokens=None,response_tokens=12,total_tokens=12,details={'output_tokens':12} )codeflash_output=_map_usage(msg)# 2.58μs -> 551ns (368% faster)deftest_basic_multiple_token_types():# BetaMessage with input_tokens, cache_creation_input_tokens, cache_read_input_tokens, output_tokensusage_obj=DummyUsage(input_tokens=5,cache_creation_input_tokens=7,cache_read_input_tokens=3,output_tokens=2 )msg=BetaMessage(usage_obj)# request_tokens = 5 + 7 + 3 = 15expected=Usage(request_tokens=15,response_tokens=2,total_tokens=17,details={'input_tokens':5,'cache_creation_input_tokens':7,'cache_read_input_tokens':3,'output_tokens':2 } )codeflash_output=_map_usage(msg)# 2.69μs -> 557ns (382% faster)# 2. Edge Test Casesdeftest_edge_zero_tokens():# All tokens are zerousage_obj=DummyUsage(input_tokens=0,output_tokens=0)msg=BetaMessage(usage_obj)expected=Usage(request_tokens=None,# 0 or None, but function uses None if 0response_tokens=0,total_tokens=0,details={'input_tokens':0,'output_tokens':0} )codeflash_output=_map_usage(msg)# 2.68μs -> 636ns (322% faster)deftest_edge_non_integer_usage_values():# Details should only keep integer valuesusage_obj=DummyUsage(input_tokens=10,output_tokens=2,foo="bar",bar=3.14 )msg=BetaMessage(usage_obj)expected=Usage(request_tokens=10,response_tokens=2,total_tokens=12,details={'input_tokens':10,'output_tokens':2} )codeflash_output=_map_usage(msg)# 2.93μs -> 600ns (388% faster)deftest_edge_extra_keys_in_usage():# Extra keys in usage, only integers are kept in detailsusage_obj=DummyUsage(input_tokens=8,output_tokens=4,irrelevant=None,another='string' )msg=BetaMessage(usage_obj)expected=Usage(request_tokens=8,response_tokens=4,total_tokens=12,details={'input_tokens':8,'output_tokens':4} )codeflash_output=_map_usage(msg)# 2.74μs -> 532ns (415% faster)deftest_edge_no_usage_in_event():# Event types with no usage info should return Usage() with all Noneforevent_clsin [RawMessageStopEvent,RawContentBlockStartEvent,RawContentBlockDeltaEvent,RawContentBlockStopEvent]:msg=event_cls()expected=Usage()codeflash_output=_map_usage(msg)# 4.91μs -> 1.04μs (371% faster)deftest_edge_negative_tokens():# Negative tokens (should be included as-is)usage_obj=DummyUsage(input_tokens=-3,output_tokens=-2)msg=BetaMessage(usage_obj)expected=Usage(request_tokens=-3,response_tokens=-2,total_tokens=-5,details={'input_tokens':-3,'output_tokens':-2} )codeflash_output=_map_usage(msg)# 2.93μs -> 581ns (405% faster)deftest_edge_large_token_values():# Very large token countsusage_obj=DummyUsage(input_tokens=10**9,output_tokens=10**9)msg=BetaMessage(usage_obj)expected=Usage(request_tokens=10**9,response_tokens=10**9,total_tokens=2*10**9,details={'input_tokens':10**9,'output_tokens':10**9} )codeflash_output=_map_usage(msg)# 2.67μs -> 503ns (430% faster)deftest_edge_all_token_types_missing():# No input_tokens, no cache tokens, only output_tokensusage_obj=DummyUsage(output_tokens=7)msg=BetaMessage(usage_obj)expected=Usage(request_tokens=None,response_tokens=7,total_tokens=7,details={'output_tokens':7} )codeflash_output=_map_usage(msg)# 2.78μs -> 517ns (438% faster)# 3. Large Scale Test Casesdeftest_large_scale_many_token_types():# Simulate a usage object with many integer keys (simulate up to 1000 keys)usage_dict= {f'key_{i}':iforiinrange(1000)}usage_dict['input_tokens']=500usage_dict['output_tokens']=300usage_obj=DummyUsage(**usage_dict)msg=BetaMessage(usage_obj)# request_tokens = input_tokens + (cache creation/read tokens, which are 0)expected_details= {k:vfork,vinusage_dict.items()ifisinstance(v,int)}expected=Usage(request_tokens=500,response_tokens=300,total_tokens=800,details=expected_details )codeflash_output=_map_usage(msg)# 2.67μs -> 586ns (356% faster)deftest_large_scale_high_token_counts():# Simulate high token counts for all three input token typesusage_obj=DummyUsage(input_tokens=300,cache_creation_input_tokens=400,cache_read_input_tokens=200,output_tokens=1000 )msg=BetaMessage(usage_obj)expected=Usage(request_tokens=300+400+200,response_tokens=1000,total_tokens=1900,details={'input_tokens':300,'cache_creation_input_tokens':400,'cache_read_input_tokens':200,'output_tokens':1000 } )codeflash_output=_map_usage(msg)# 2.69μs -> 562ns (379% faster)deftest_large_scale_batch_of_events():# Test a batch of 100 different BetaMessagesforiinrange(1,101):usage_obj=DummyUsage(input_tokens=i,output_tokens=2*i)msg=BetaMessage(usage_obj)expected=Usage(request_tokens=i,response_tokens=2*i,total_tokens=3*i,details={'input_tokens':i,'output_tokens':2*i} )codeflash_output=_map_usage(msg)# 62.1μs -> 14.8μs (320% faster)deftest_large_scale_betarawmessagedeltaevent_batch():# Test a batch of BetaRawMessageDeltaEvent with only output_tokensforiinrange(1,101):usage_obj=DummyUsage(output_tokens=i)msg=BetaRawMessageDeltaEvent(usage_obj)expected=Usage(request_tokens=None,response_tokens=i,total_tokens=i,details={'output_tokens':i} )codeflash_output=_map_usage(msg)# 62.0μs -> 14.6μs (324% faster)
Saurabh's comments - The type checks are expensive, also this seems to be used very frequently.
This speeds up the test
models/test_anthropic.py::test_usage
by 25%📄 172% (1.72x) speedup for
_map_usage
inpydantic_ai_slim/pydantic_ai/models/anthropic.py
⏱️ Runtime :
1.12 milliseconds
→413 microseconds
(best of76
runs)📝 Explanation and details
Here is an optimized version of the provided program.
Key optimizations with rationale:
isinstance()
checks by merging logic.details.get()
calls and reuse of variables.Usage()
when possible.model_dump().items()
out of dict comprehension when it's not needed.Summary of changes:
_EMPTY_USAGE
for "no details" paths—eliminates unnecessary object allocations.This implementation should provide improved (lower) latency per call, specifically for high-throughput scenarios.
✅Correctness verification report:
⚙️ Existing Unit Tests and Runtime
models/test_anthropic.py::test_usage
test_pytest_inlinesnapshotdisable_testsproviderstest_bedrock_py_testsproviderstest_google_gla_py_teststes__replay_test_0.py::test_pydantic_ai_models_anthropic__map_usage
test_pytest_inlinesnapshotdisable_teststest_messages_py_teststest_mcp_py_teststest_deps_py__replay_test_0.py::test_pydantic_ai_models_anthropic__map_usage
🌀 Generated Regression Tests and Runtime
⏪ Replay Tests and Runtime
models/test_anthropic.py::test_usage
test_pytest_inlinesnapshotdisable_testsproviderstest_bedrock_py_testsproviderstest_google_gla_py_teststes__replay_test_0.py::test_pydantic_ai_models_anthropic__map_usage
test_pytest_inlinesnapshotdisable_teststest_messages_py_teststest_mcp_py_teststest_deps_py__replay_test_0.py::test_pydantic_ai_models_anthropic__map_usage
To edit these changes
git checkout codeflash/optimize-_map_usage-md0d6dgh
and push.