classLitellmModel(Model):"""This class enables using any model via LiteLLM. LiteLLM allows you to acess OpenAPI, Anthropic, Gemini, Mistral, and many other models. See supported models here: [litellm models](https://docs.litellm.ai/docs/providers). """def__init__(self,model:str,base_url:str|None=None,api_key:str|None=None,):self.model=modelself.base_url=base_urlself.api_key=api_keyasyncdefget_response(self,system_instructions:str|None,input:str|list[TResponseInputItem],model_settings:ModelSettings,tools:list[Tool],output_schema:AgentOutputSchemaBase|None,handoffs:list[Handoff],tracing:ModelTracing,previous_response_id:str|None=None,# unusedconversation_id:str|None=None,# unusedprompt:Any|None=None,)->ModelResponse:withgeneration_span(model=str(self.model),model_config=model_settings.to_json_dict()|{"base_url":str(self.base_urlor""),"model_impl":"litellm"},disabled=tracing.is_disabled(),)asspan_generation:response=awaitself._fetch_response(system_instructions,input,model_settings,tools,output_schema,handoffs,span_generation,tracing,stream=False,prompt=prompt,)message:litellm.types.utils.Message|None=Nonefirst_choice:litellm.types.utils.Choices|None=Noneifresponse.choicesandlen(response.choices)>0:choice=response.choices[0]ifisinstance(choice,litellm.types.utils.Choices):first_choice=choicemessage=first_choice.messageif_debug.DONT_LOG_MODEL_DATA:logger.debug("Received model response")else:ifmessageisnotNone:logger.debug(f"""LLM resp:\n{json.dumps(message.model_dump(),indent=2,ensure_ascii=False)}\n""")else:finish_reason=first_choice.finish_reasoniffirst_choiceelse"-"logger.debug(f"LLM resp had no message. finish_reason:{finish_reason}")ifhasattr(response,"usage"):response_usage=response.usageusage=(Usage(requests=1,input_tokens=response_usage.prompt_tokens,output_tokens=response_usage.completion_tokens,total_tokens=response_usage.total_tokens,input_tokens_details=InputTokensDetails(cached_tokens=getattr(response_usage.prompt_tokens_details,"cached_tokens",0)or0),output_tokens_details=OutputTokensDetails(reasoning_tokens=getattr(response_usage.completion_tokens_details,"reasoning_tokens",0)or0),)ifresponse.usageelseUsage())else:usage=Usage()logger.warning("No usage information returned from Litellm")iftracing.include_data():span_generation.span_data.output=([message.model_dump()]ifmessageisnotNoneelse[])span_generation.span_data.usage={"input_tokens":usage.input_tokens,"output_tokens":usage.output_tokens,}items=(Converter.message_to_output_items(LitellmConverter.convert_message_to_openai(message))ifmessageisnotNoneelse[])returnModelResponse(output=items,usage=usage,response_id=None,)asyncdefstream_response(self,system_instructions:str|None,input:str|list[TResponseInputItem],model_settings:ModelSettings,tools:list[Tool],output_schema:AgentOutputSchemaBase|None,handoffs:list[Handoff],tracing:ModelTracing,previous_response_id:str|None=None,# unusedconversation_id:str|None=None,# unusedprompt:Any|None=None,)->AsyncIterator[TResponseStreamEvent]:withgeneration_span(model=str(self.model),model_config=model_settings.to_json_dict()|{"base_url":str(self.base_urlor""),"model_impl":"litellm"},disabled=tracing.is_disabled(),)asspan_generation:response,stream=awaitself._fetch_response(system_instructions,input,model_settings,tools,output_schema,handoffs,span_generation,tracing,stream=True,prompt=prompt,)final_response:Response|None=NoneasyncforchunkinChatCmplStreamHandler.handle_stream(response,stream):yieldchunkifchunk.type=="response.completed":final_response=chunk.responseiftracing.include_data()andfinal_response:span_generation.span_data.output=[final_response.model_dump()]iffinal_responseandfinal_response.usage:span_generation.span_data.usage={"input_tokens":final_response.usage.input_tokens,"output_tokens":final_response.usage.output_tokens,}@overloadasyncdef_fetch_response(self,system_instructions:str|None,input:str|list[TResponseInputItem],model_settings:ModelSettings,tools:list[Tool],output_schema:AgentOutputSchemaBase|None,handoffs:list[Handoff],span:Span[GenerationSpanData],tracing:ModelTracing,stream:Literal[True],prompt:Any|None=None,)->tuple[Response,AsyncStream[ChatCompletionChunk]]:...@overloadasyncdef_fetch_response(self,system_instructions:str|None,input:str|list[TResponseInputItem],model_settings:ModelSettings,tools:list[Tool],output_schema:AgentOutputSchemaBase|None,handoffs:list[Handoff],span:Span[GenerationSpanData],tracing:ModelTracing,stream:Literal[False],prompt:Any|None=None,)->litellm.types.utils.ModelResponse:...asyncdef_fetch_response(self,system_instructions:str|None,input:str|list[TResponseInputItem],model_settings:ModelSettings,tools:list[Tool],output_schema:AgentOutputSchemaBase|None,handoffs:list[Handoff],span:Span[GenerationSpanData],tracing:ModelTracing,stream:bool=False,prompt:Any|None=None,)->litellm.types.utils.ModelResponse|tuple[Response,AsyncStream[ChatCompletionChunk]]:# Preserve reasoning messages for tool calls when reasoning is on# This is needed for models like Claude 4 Sonnet/Opus which support interleaved thinkingpreserve_thinking_blocks=(model_settings.reasoningisnotNoneandmodel_settings.reasoning.effortisnotNone)converted_messages=Converter.items_to_messages(input,preserve_thinking_blocks=preserve_thinking_blocks)# Fix for interleaved thinking bug: reorder messages to ensure tool_use comes before tool_result # noqa: E501if"anthropic"inself.model.lower()or"claude"inself.model.lower():converted_messages=self._fix_tool_message_ordering(converted_messages)ifsystem_instructions:converted_messages.insert(0,{"content":system_instructions,"role":"system",},)converted_messages=_to_dump_compatible(converted_messages)iftracing.include_data():span.span_data.input=converted_messagesparallel_tool_calls=(Trueifmodel_settings.parallel_tool_callsandtoolsandlen(tools)>0elseFalseifmodel_settings.parallel_tool_callsisFalseelseNone)tool_choice=Converter.convert_tool_choice(model_settings.tool_choice)response_format=Converter.convert_response_format(output_schema)converted_tools=[Converter.tool_to_openai(tool)fortoolintools]iftoolselse[]forhandoffinhandoffs:converted_tools.append(Converter.convert_handoff_tool(handoff))converted_tools=_to_dump_compatible(converted_tools)if_debug.DONT_LOG_MODEL_DATA:logger.debug("Calling LLM")else:messages_json=json.dumps(converted_messages,indent=2,ensure_ascii=False,)tools_json=json.dumps(converted_tools,indent=2,ensure_ascii=False,)logger.debug(f"Calling Litellm model:{self.model}\n"f"{messages_json}\n"f"Tools:\n{tools_json}\n"f"Stream:{stream}\n"f"Tool choice:{tool_choice}\n"f"Response format:{response_format}\n")# Build reasoning_effort - use dict only when summary is present (OpenAI feature)# Otherwise pass string for backward compatibility with all providersreasoning_effort:dict[str,Any]|str|None=Noneifmodel_settings.reasoning:ifmodel_settings.reasoning.summaryisnotNone:# Dict format when summary is needed (OpenAI only)reasoning_effort={"effort":model_settings.reasoning.effort,"summary":model_settings.reasoning.summary,}elifmodel_settings.reasoning.effortisnotNone:# String format for compatibility with all providersreasoning_effort=model_settings.reasoning.effort# Enable developers to pass non-OpenAI compatible reasoning_effort data like "none"# Priority order:# 1. model_settings.reasoning (effort + summary)# 2. model_settings.extra_body["reasoning_effort"]# 3. model_settings.extra_args["reasoning_effort"]if(reasoning_effortisNone# Unset in model_settingsandisinstance(model_settings.extra_body,dict)and"reasoning_effort"inmodel_settings.extra_body):reasoning_effort=model_settings.extra_body["reasoning_effort"]if(reasoning_effortisNone# Unset in both model_settings and model_settings.extra_bodyandmodel_settings.extra_argsand"reasoning_effort"inmodel_settings.extra_args):reasoning_effort=model_settings.extra_args["reasoning_effort"]stream_options=Noneifstreamandmodel_settings.include_usageisnotNone:stream_options={"include_usage":model_settings.include_usage}extra_kwargs={}ifmodel_settings.extra_query:extra_kwargs["extra_query"]=copy(model_settings.extra_query)ifmodel_settings.metadata:extra_kwargs["metadata"]=copy(model_settings.metadata)ifmodel_settings.extra_bodyandisinstance(model_settings.extra_body,dict):extra_kwargs.update(model_settings.extra_body)# Add kwargs from model_settings.extra_args, filtering out None valuesifmodel_settings.extra_args:extra_kwargs.update(model_settings.extra_args)# Prevent duplicate reasoning_effort kwargs when it was promoted to a top-level argument.extra_kwargs.pop("reasoning_effort",None)ret=awaitlitellm.acompletion(model=self.model,messages=converted_messages,tools=converted_toolsorNone,temperature=model_settings.temperature,top_p=model_settings.top_p,frequency_penalty=model_settings.frequency_penalty,presence_penalty=model_settings.presence_penalty,max_tokens=model_settings.max_tokens,tool_choice=self._remove_not_given(tool_choice),response_format=self._remove_not_given(response_format),parallel_tool_calls=parallel_tool_calls,stream=stream,stream_options=stream_options,reasoning_effort=reasoning_effort,top_logprobs=model_settings.top_logprobs,extra_headers=self._merge_headers(model_settings),api_key=self.api_key,base_url=self.base_url,**extra_kwargs,)ifisinstance(ret,litellm.types.utils.ModelResponse):returnretresponses_tool_choice=OpenAIResponsesConverter.convert_tool_choice(model_settings.tool_choice)ifresponses_tool_choiceisNoneorresponses_tool_choiceisomit:responses_tool_choice="auto"response=Response(id=FAKE_RESPONSES_ID,created_at=time.time(),model=self.model,object="response",output=[],tool_choice=responses_tool_choice,# type: ignore[arg-type]top_p=model_settings.top_p,temperature=model_settings.temperature,tools=[],parallel_tool_calls=parallel_tool_callsorFalse,reasoning=model_settings.reasoning,)returnresponse,retdef_fix_tool_message_ordering(self,messages:list[ChatCompletionMessageParam])->list[ChatCompletionMessageParam]:""" Fix the ordering of tool messages to ensure tool_use messages come before tool_result messages. This addresses the interleaved thinking bug where conversation histories may contain tool results before their corresponding tool calls, causing Anthropic API to reject the request. """# noqa: E501ifnotmessages:returnmessages# Collect all tool calls and tool resultstool_call_messages={}# tool_id -> (index, message)tool_result_messages={}# tool_id -> (index, message)other_messages=[]# (index, message) for non-tool messagesfori,messageinenumerate(messages):ifnotisinstance(message,dict):other_messages.append((i,message))continuerole=message.get("role")ifrole=="assistant"andmessage.get("tool_calls"):# Extract tool calls from this assistant messagetool_calls=message.get("tool_calls",[])ifisinstance(tool_calls,list):fortool_callintool_calls:ifisinstance(tool_call,dict):tool_id=tool_call.get("id")iftool_id:# Create a separate assistant message for each tool callsingle_tool_msg=cast(dict[str,Any],message.copy())single_tool_msg["tool_calls"]=[tool_call]tool_call_messages[tool_id]=(i,cast(ChatCompletionMessageParam,single_tool_msg),)elifrole=="tool":tool_call_id=message.get("tool_call_id")iftool_call_id:tool_result_messages[tool_call_id]=(i,message)else:other_messages.append((i,message))else:other_messages.append((i,message))# First, identify which tool results will be paired to avoid duplicatespaired_tool_result_indices=set()fortool_idintool_call_messages:iftool_idintool_result_messages:tool_result_idx,_=tool_result_messages[tool_id]paired_tool_result_indices.add(tool_result_idx)# Create the fixed message sequencefixed_messages:list[ChatCompletionMessageParam]=[]used_indices=set()# Add messages in their original order, but ensure tool_use → tool_result pairingfori,original_messageinenumerate(messages):ifiinused_indices:continueifnotisinstance(original_message,dict):fixed_messages.append(original_message)used_indices.add(i)continuerole=original_message.get("role")ifrole=="assistant"andoriginal_message.get("tool_calls"):# Process each tool call in this assistant messagetool_calls=original_message.get("tool_calls",[])ifisinstance(tool_calls,list):fortool_callintool_calls:ifisinstance(tool_call,dict):tool_id=tool_call.get("id")if(tool_idandtool_idintool_call_messagesandtool_idintool_result_messages):# Add tool_use → tool_result pair_,tool_call_msg=tool_call_messages[tool_id]tool_result_idx,tool_result_msg=tool_result_messages[tool_id]fixed_messages.append(tool_call_msg)fixed_messages.append(tool_result_msg)# Mark both as usedused_indices.add(tool_call_messages[tool_id][0])used_indices.add(tool_result_idx)eliftool_idandtool_idintool_call_messages:# Tool call without result - add just the tool call_,tool_call_msg=tool_call_messages[tool_id]fixed_messages.append(tool_call_msg)used_indices.add(tool_call_messages[tool_id][0])used_indices.add(i)# Mark original multi-tool message as usedelifrole=="tool":# Only preserve unmatched tool results to avoid duplicatesifinotinpaired_tool_result_indices:fixed_messages.append(original_message)used_indices.add(i)else:# Regular message - add it normallyfixed_messages.append(original_message)used_indices.add(i)returnfixed_messagesdef_remove_not_given(self,value:Any)->Any:ifvalueisomitorisinstance(value,NotGiven):returnNonereturnvaluedef_merge_headers(self,model_settings:ModelSettings):return{**HEADERS,**(model_settings.extra_headersor{}),**(HEADERS_OVERRIDE.get()or{})}