Sep 13, 2024 · Sep 2, 2024 · Sep 2, 2024 · Sep 5, 2024 · Sep 5, 2024 · Sep 5, 2024
diff --git a/autogen/oai/bedrock.py b/autogen/oai/bedrock.py
        if len(tool_config["tools"]) > 0:
            request_args["toolConfig"] = tool_config

        try:
            response = self.bedrock_runtime.converse(
                **request_args,
            )
        except Exception as e:
            raise RuntimeError(f"Failed to get response from Bedrock: {e}")

        response = self.bedrock_runtime.converse(**request_args)
        if response is None:
            raise RuntimeError(f"Failed to get response from Bedrock after retrying {self._retries} times.")

diff --git a/autogen/oai/client.py b/autogen/oai/client.py

 try:
    from autogen.oai.gemini import GeminiClient

    from google.api_core.exceptions import InternalServerError, ResourceExhausted
    gemini_import_exception: Optional[ImportError] = None
 except ImportError as e:
    InternalServerError = ResourceExhausted = None
    gemini_import_exception = e

 try:
                    raise TimeoutError(
                        "OpenAI API call timed out. This could be due to congestion or too small a timeout value. The timeout can be specified by setting the 'timeout' value (in seconds) in the llm_config (if you are using agents) or the OpenAIWrapper constructor (if you are using the OpenAIWrapper directly)."
                    ) from err
            except (InternalServerError, ResourceExhausted) as err:
                logger.debug(f"config {i} failed", exc_info=True)
                if i == last:
                    raise
            except APIError as err:
                error_code = getattr(err, "code", None)
                if logging_enabled():
diff --git a/autogen/oai/cohere.py b/autogen/oai/cohere.py
        streaming = True if "stream" in params and params["stream"] else False
        cohere_finish = ""

        max_retries = 5
        for attempt in range(max_retries):
            ans = None
            try:
                if streaming:
                    response = client.chat_stream(**cohere_params)
                else:
                    response = client.chat(**cohere_params)
            except CohereRateLimitError as e:
                raise RuntimeError(f"Cohere exception occurred: {e}")
            else:
        ans = None
        if streaming:
            response = client.chat_stream(**cohere_params)
            # Streaming...
            ans = ""
            for event in response:
                if event.event_type == "text-generation":
                    ans = ans + event.text
                elif event.event_type == "tool-calls-generation":
                    # When streaming, tool calls are compiled at the end into a single event_type
                    ans = event.text
                    cohere_finish = "tool_calls"
                    tool_calls = []
                    for tool_call in event.tool_calls:
                        tool_calls.append(
                            ChatCompletionMessageToolCall(
                                id=str(random.randint(0, 100000)),
                                function={
                                    "name": tool_call.name,
                                    "arguments": (
                                        "" if tool_call.parameters is None else json.dumps(tool_call.parameters)
                                    ),
                                },
                                type="function",
                            )
                        )

                if streaming:
                    # Streaming...
                    ans = ""
                    for event in response:
                        if event.event_type == "text-generation":
                            ans = ans + event.text
                        elif event.event_type == "tool-calls-generation":
                            # When streaming, tool calls are compiled at the end into a single event_type
                            ans = event.text
                            cohere_finish = "tool_calls"
                            tool_calls = []
                            for tool_call in event.tool_calls:
                                tool_calls.append(
                                    ChatCompletionMessageToolCall(
                                        id=str(random.randint(0, 100000)),
                                        function={
                                            "name": tool_call.name,
                                            "arguments": (
                                                "" if tool_call.parameters is None else json.dumps(tool_call.parameters)
                                            ),
                                        },
                                        type="function",
                                    )
                                )

                    # Not using billed_units, but that may be better for cost purposes
                    prompt_tokens = event.response.meta.tokens.input_tokens
                    completion_tokens = event.response.meta.tokens.output_tokens
                    total_tokens = prompt_tokens + completion_tokens

                    response_id = event.response.response_id
                else:
                    # Non-streaming finished
                    ans: str = response.text
            # Not using billed_units, but that may be better for cost purposes
            prompt_tokens = event.response.meta.tokens.input_tokens
            completion_tokens = event.response.meta.tokens.output_tokens
            total_tokens = prompt_tokens + completion_tokens

            response_id = event.response.response_id
        else:
            response = client.chat(**cohere_params)
            # Non-streaming finished
            ans: str = response.text

 # Not using billed_units, but that may be better for cost purposes
 prompt_tokens = response.meta.tokens.input_tokens
 completion_tokens = response.meta.tokens.output_tokens
 total_tokens = prompt_tokens + completion_tokens
            # Not using billed_units, but that may be better for cost purposes
            prompt_tokens = response.meta.tokens.input_tokens
            completion_tokens = response.meta.tokens.output_tokens
            total_tokens = prompt_tokens + completion_tokens

 response_id = response.response_id
    break
            response_id = response.response_id


        if response is not None:

diff --git a/autogen/oai/gemini.py b/autogen/oai/gemini.py
 import requests
 import vertexai
 from google.ai.generativelanguage import Content, Part
 from google.api_core.exceptions import InternalServerError
 from google.auth.credentials import Credentials
 from openai.types.chat import ChatCompletion
 from openai.types.chat.chat_completion import ChatCompletionMessage, Choice
                )
                genai.configure(api_key=self.api_key)
                chat = model.start_chat(history=gemini_messages[:-1])
            max_retries = 5
            for attempt in range(max_retries):
                ans = None
                try:
                    response = chat.send_message(

            response = chat.send_message(
                        gemini_messages[-1].parts, stream=stream, safety_settings=safety_settings
                    )
                except InternalServerError:
                    delay = 5 * (2**attempt)
                    warnings.warn(
                        f"InternalServerError `500` occurs when calling Gemini's chat model. Retry in {delay} seconds...",
                        UserWarning,
                    )
                    time.sleep(delay)
                except Exception as e:
                    raise RuntimeError(f"Google GenAI exception occurred while calling Gemini API: {e}")
                else:
                    # `ans = response.text` is unstable. Use the following code instead.
                    ans: str = chat.history[-1].parts[0].text
                    break

            if ans is None:
                raise RuntimeError(f"Fail to get response from Google AI after retrying {attempt + 1} times.")

            prompt_tokens = model.count_tokens(chat.history[:-1]).total_tokens
            completion_tokens = model.count_tokens(ans).total_tokens
        elif model_name == "gemini-pro-vision":
diff --git a/autogen/oai/groq.py b/autogen/oai/groq.py
        streaming_tool_calls = []

        ans = None
        try:
            response = client.chat.completions.create(**groq_params)
        except Exception as e:
            raise RuntimeError(f"Groq exception occurred: {e}")
        else:

            if groq_params["stream"]:
                # Read in the chunks as they stream, taking in tool_calls which may be across
                # multiple chunks if more than one suggested
                ans = ""
                for chunk in response:
                    ans = ans + (chunk.choices[0].delta.content or "")

                    if chunk.choices[0].delta.tool_calls:
                        # We have a tool call recommendation
                        for tool_call in chunk.choices[0].delta.tool_calls:
                            streaming_tool_calls.append(
                                ChatCompletionMessageToolCall(
                                    id=tool_call.id,
                                    function={
                                        "name": tool_call.function.name,
                                        "arguments": tool_call.function.arguments,
                                    },
                                    type="function",
                                )
        response = client.chat.completions.create(**groq_params)
        if groq_params["stream"]:
            # Read in the chunks as they stream, taking in tool_calls which may be across
            # multiple chunks if more than one suggested
            ans = ""
            for chunk in response:
                ans = ans + (chunk.choices[0].delta.content or "")

                if chunk.choices[0].delta.tool_calls:
                    # We have a tool call recommendation
                    for tool_call in chunk.choices[0].delta.tool_calls:
                        streaming_tool_calls.append(
                            ChatCompletionMessageToolCall(
                                id=tool_call.id,
                                function={
                                    "name": tool_call.function.name,
                                    "arguments": tool_call.function.arguments,
                                },
                                type="function",
                            )
                        )

                    if chunk.choices[0].finish_reason:
                        prompt_tokens = chunk.x_groq.usage.prompt_tokens
                        completion_tokens = chunk.x_groq.usage.completion_tokens
                        total_tokens = chunk.x_groq.usage.total_tokens
            else:
                # Non-streaming finished
                ans: str = response.choices[0].message.content

                prompt_tokens = response.usage.prompt_tokens
                completion_tokens = response.usage.completion_tokens
                total_tokens = response.usage.total_tokens
                if chunk.choices[0].finish_reason:
                    prompt_tokens = chunk.x_groq.usage.prompt_tokens
                    completion_tokens = chunk.x_groq.usage.completion_tokens
                    total_tokens = chunk.x_groq.usage.total_tokens
        else:
            # Non-streaming finished
            ans: str = response.choices[0].message.content
            prompt_tokens = response.usage.prompt_tokens
            completion_tokens = response.usage.completion_tokens
            total_tokens = response.usage.total_tokens

        if response is not None:

            if isinstance(response, Stream):
                # Streaming response
                if chunk.choices[0].finish_reason == "tool_calls":
diff --git a/autogen/oai/together.py b/autogen/oai/together.py
        completion_tokens = 0
        total_tokens = 0

        max_retries = 5
        for attempt in range(max_retries):
            ans = None
            try:
                response = client.chat.completions.create(**together_params)
            except Exception as e:
                raise RuntimeError(f"Together.AI exception occurred: {e}")
            else:

                if together_params["stream"]:
                    # Read in the chunks as they stream
                    ans = ""
                    for chunk in response:
                        ans = ans + (chunk.choices[0].delta.content or "")

                    prompt_tokens = chunk.usage.prompt_tokens
                    completion_tokens = chunk.usage.completion_tokens
                    total_tokens = chunk.usage.total_tokens
                else:
                    ans: str = response.choices[0].message.content

                    prompt_tokens = response.usage.prompt_tokens
                    completion_tokens = response.usage.completion_tokens
                    total_tokens = response.usage.total_tokens
                break
        response = client.chat.completions.create(**together_params)
        if together_params["stream"]:
            # Read in the chunks as they stream
            ans = ""
            for chunk in response:
                ans = ans + (chunk.choices[0].delta.content or "")

            prompt_tokens = chunk.usage.prompt_tokens
            completion_tokens = chunk.usage.completion_tokens
            total_tokens = chunk.usage.total_tokens
        else:
            ans: str = response.choices[0].message.content

            prompt_tokens = response.usage.prompt_tokens
            completion_tokens = response.usage.completion_tokens
            total_tokens = response.usage.total_tokens

        if response is not None:
            # If we have tool calls as the response, populate completed tool calls for our return OAI response
            if response.choices[0].finish_reason == "tool_calls":
Original file line number	Diff line number	Diff line change
Expand Up		@@ -204,13 +204,7 @@ def create(self, params):
		if len(tool_config["tools"]) > 0:
		request_args["toolConfig"] = tool_config

		try:
		response = self.bedrock_runtime.converse(
		**request_args,
		)
		except Exception as e:
		raise RuntimeError(f"Failed to get response from Bedrock: {e}")

		response = self.bedrock_runtime.converse(**request_args)
		if response is None:
		raise RuntimeError(f"Failed to get response from Bedrock after retrying {self._retries} times.")

Expand Down
Original file line number	Diff line number	Diff line change
Expand Up		@@ -50,9 +50,10 @@

		try:
		from autogen.oai.gemini import GeminiClient

		from google.api_core.exceptions import InternalServerError, ResourceExhausted
		gemini_import_exception: Optional[ImportError] = None
		except ImportError as e:
		InternalServerError = ResourceExhausted = None
		gemini_import_exception = e

		try:
Expand DownExpand Up		@@ -758,6 +759,10 @@ def yes_or_no_filter(context, response):
		raise TimeoutError(
		"OpenAI API call timed out. This could be due to congestion or too small a timeout value. The timeout can be specified by setting the 'timeout' value (in seconds) in the llm_config (if you are using agents) or the OpenAIWrapper constructor (if you are using the OpenAIWrapper directly)."
		) from err
		except (InternalServerError, ResourceExhausted) as err:
		logger.debug(f"config {i} failed", exc_info=True)
		if i == last:
		raise
		except APIError as err:
		error_code = getattr(err, "code", None)
		if logging_enabled():
Expand Down
Original file line number	Diff line number	Diff line change
Expand Up		@@ -173,60 +173,51 @@ def create(self, params: Dict) -> ChatCompletion:
		streaming = True if "stream" in params and params["stream"] else False
		cohere_finish = ""

		max_retries = 5
		for attempt in range(max_retries):
		ans = None
		try:
		if streaming:
		response = client.chat_stream(**cohere_params)
		else:
		response = client.chat(**cohere_params)
		except CohereRateLimitError as e:
		raise RuntimeError(f"Cohere exception occurred: {e}")
		else:
		ans = None
		if streaming:
		response = client.chat_stream(**cohere_params)
		# Streaming...
		ans = ""
		for event in response:
		if event.event_type == "text-generation":
		ans = ans + event.text
		elif event.event_type == "tool-calls-generation":
		# When streaming, tool calls are compiled at the end into a single event_type
		ans = event.text
		cohere_finish = "tool_calls"
		tool_calls = []
		for tool_call in event.tool_calls:
		tool_calls.append(
		ChatCompletionMessageToolCall(
		id=str(random.randint(0, 100000)),
		function={
		"name": tool_call.name,
		"arguments": (
		"" if tool_call.parameters is None else json.dumps(tool_call.parameters)
		),
		},
		type="function",
		)
		)

		if streaming:
		# Streaming...
		ans = ""
		for event in response:
		if event.event_type == "text-generation":
		ans = ans + event.text
		elif event.event_type == "tool-calls-generation":
		# When streaming, tool calls are compiled at the end into a single event_type
		ans = event.text
		cohere_finish = "tool_calls"
		tool_calls = []
		for tool_call in event.tool_calls:
		tool_calls.append(
		ChatCompletionMessageToolCall(
		id=str(random.randint(0, 100000)),
		function={
		"name": tool_call.name,
		"arguments": (
		"" if tool_call.parameters is None else json.dumps(tool_call.parameters)
		),
		},
		type="function",
		)
		)

		# Not using billed_units, but that may be better for cost purposes
		prompt_tokens = event.response.meta.tokens.input_tokens
		completion_tokens = event.response.meta.tokens.output_tokens
		total_tokens = prompt_tokens + completion_tokens

		response_id = event.response.response_id
		else:
		# Non-streaming finished
		ans: str = response.text
		# Not using billed_units, but that may be better for cost purposes
		prompt_tokens = event.response.meta.tokens.input_tokens
		completion_tokens = event.response.meta.tokens.output_tokens
		total_tokens = prompt_tokens + completion_tokens

		response_id = event.response.response_id
		else:
		response = client.chat(**cohere_params)
		# Non-streaming finished
		ans: str = response.text

		# Not using billed_units, but that may be better for cost purposes
		prompt_tokens = response.meta.tokens.input_tokens
		completion_tokens = response.meta.tokens.output_tokens
		total_tokens = prompt_tokens + completion_tokens
		# Not using billed_units, but that may be better for cost purposes
		prompt_tokens = response.meta.tokens.input_tokens
		completion_tokens = response.meta.tokens.output_tokens
		total_tokens = prompt_tokens + completion_tokens

		response_id = response.response_id
		break
		response_id = response.response_id


		if response is not None:

Expand Down
Original file line number	Diff line number	Diff line change
Expand Up		@@ -51,7 +51,6 @@
		import requests
		import vertexai
		from google.ai.generativelanguage import Content, Part
		from google.api_core.exceptions import InternalServerError
		from google.auth.credentials import Credentials
		from openai.types.chat import ChatCompletion
		from openai.types.chat.chat_completion import ChatCompletionMessage, Choice
Expand DownExpand Up		@@ -222,30 +221,10 @@ def create(self, params: Dict) -> ChatCompletion:
		)
		genai.configure(api_key=self.api_key)
		chat = model.start_chat(history=gemini_messages[:-1])
		max_retries = 5
		for attempt in range(max_retries):
		ans = None
		try:
		response = chat.send_message(

		response = chat.send_message(
		gemini_messages[-1].parts, stream=stream, safety_settings=safety_settings
		)
		except InternalServerError:
		delay = 5 * (2**attempt)
		warnings.warn(
		f"InternalServerError `500` occurs when calling Gemini's chat model. Retry in {delay} seconds...",
		UserWarning,
		)
		time.sleep(delay)
		except Exception as e:
		raise RuntimeError(f"Google GenAI exception occurred while calling Gemini API: {e}")
		else:
		# `ans = response.text` is unstable. Use the following code instead.
		ans: str = chat.history[-1].parts[0].text
		break

		if ans is None:
		raise RuntimeError(f"Fail to get response from Google AI after retrying {attempt + 1} times.")

		prompt_tokens = model.count_tokens(chat.history[:-1]).total_tokens
		completion_tokens = model.count_tokens(ans).total_tokens
		elif model_name == "gemini-pro-vision":
Expand Down
Original file line number	Diff line number	Diff line change
Expand Up		@@ -157,47 +157,40 @@ def create(self, params: Dict) -> ChatCompletion:
		streaming_tool_calls = []

		ans = None
		try:
		response = client.chat.completions.create(**groq_params)
		except Exception as e:
		raise RuntimeError(f"Groq exception occurred: {e}")
		else:

		if groq_params["stream"]:
		# Read in the chunks as they stream, taking in tool_calls which may be across
		# multiple chunks if more than one suggested
		ans = ""
		for chunk in response:
		ans = ans + (chunk.choices[0].delta.content or "")

		if chunk.choices[0].delta.tool_calls:
		# We have a tool call recommendation
		for tool_call in chunk.choices[0].delta.tool_calls:
		streaming_tool_calls.append(
		ChatCompletionMessageToolCall(
		id=tool_call.id,
		function={
		"name": tool_call.function.name,
		"arguments": tool_call.function.arguments,
		},
		type="function",
		)
		response = client.chat.completions.create(**groq_params)
		if groq_params["stream"]:
		# Read in the chunks as they stream, taking in tool_calls which may be across
		# multiple chunks if more than one suggested
		ans = ""
		for chunk in response:
		ans = ans + (chunk.choices[0].delta.content or "")

		if chunk.choices[0].delta.tool_calls:
		# We have a tool call recommendation
		for tool_call in chunk.choices[0].delta.tool_calls:
		streaming_tool_calls.append(
		ChatCompletionMessageToolCall(
		id=tool_call.id,
		function={
		"name": tool_call.function.name,
		"arguments": tool_call.function.arguments,
		},
		type="function",
		)
		)

		if chunk.choices[0].finish_reason:
		prompt_tokens = chunk.x_groq.usage.prompt_tokens
		completion_tokens = chunk.x_groq.usage.completion_tokens
		total_tokens = chunk.x_groq.usage.total_tokens
		else:
		# Non-streaming finished
		ans: str = response.choices[0].message.content

		prompt_tokens = response.usage.prompt_tokens
		completion_tokens = response.usage.completion_tokens
		total_tokens = response.usage.total_tokens
		if chunk.choices[0].finish_reason:
		prompt_tokens = chunk.x_groq.usage.prompt_tokens
		completion_tokens = chunk.x_groq.usage.completion_tokens
		total_tokens = chunk.x_groq.usage.total_tokens
		else:
		# Non-streaming finished
		ans: str = response.choices[0].message.content
		prompt_tokens = response.usage.prompt_tokens
		completion_tokens = response.usage.completion_tokens
		total_tokens = response.usage.total_tokens

		if response is not None:

		if isinstance(response, Stream):
		# Streaming response
		if chunk.choices[0].finish_reason == "tool_calls":
Expand Down
Original file line number	Diff line number	Diff line change
Expand Up		@@ -157,32 +157,23 @@ def create(self, params: Dict) -> ChatCompletion:
		completion_tokens = 0
		total_tokens = 0

		max_retries = 5
		for attempt in range(max_retries):
		ans = None
		try:
		response = client.chat.completions.create(**together_params)
		except Exception as e:
		raise RuntimeError(f"Together.AI exception occurred: {e}")
		else:

		if together_params["stream"]:
		# Read in the chunks as they stream
		ans = ""
		for chunk in response:
		ans = ans + (chunk.choices[0].delta.content or "")

		prompt_tokens = chunk.usage.prompt_tokens
		completion_tokens = chunk.usage.completion_tokens
		total_tokens = chunk.usage.total_tokens
		else:
		ans: str = response.choices[0].message.content

		prompt_tokens = response.usage.prompt_tokens
		completion_tokens = response.usage.completion_tokens
		total_tokens = response.usage.total_tokens
		break
		response = client.chat.completions.create(**together_params)
		if together_params["stream"]:
		# Read in the chunks as they stream
		ans = ""
		for chunk in response:
		ans = ans + (chunk.choices[0].delta.content or "")

		prompt_tokens = chunk.usage.prompt_tokens
		completion_tokens = chunk.usage.completion_tokens
		total_tokens = chunk.usage.total_tokens
		else:
		ans: str = response.choices[0].message.content

		prompt_tokens = response.usage.prompt_tokens
		completion_tokens = response.usage.completion_tokens
		total_tokens = response.usage.total_tokens

		if response is not None:
		# If we have tool calls as the response, populate completed tool calls for our return OAI response
		if response.choices[0].finish_reason == "tool_calls":
Expand Down