Apr 11, 2025 · Apr 24, 2025
diff --git a/README.md b/README.md
   pip install -r requirements.txt
   ```

 3. Set up LLM in [`utils/call_llm.py`](./utils/call_llm.py) by providing credentials. By default, you can use the AI Studio key with this client for Gemini Pro 2.5:

   ```python
   client = genai.Client(
     api_key=os.getenv("GEMINI_API_KEY", "your-api_key"),
   )
   ```
 3. Set up LLM in [`utils/call_llm.py`](./utils/call_llm.py) by providing credentials. To do so, you can put the values in a `.env` file. By default, you can use the AI Studio key with this client for Gemini Pro 2.5 by setting the `GEMINI_API_KEY` environment variable. If you want to use another LLM, you can set the `LLM_PROVIDER` environment variable (e.g. `XAI`), and then set the model, url, and API key (e.g. `XAI_MODEL`, `XAI_URL`,`XAI_API_KEY`). If using Ollama, the url is `http://localhost:11434/` and the API key can be omitted.

   You can use your own models. We highly recommend the latest models with thinking capabilities (Claude 3.7 with thinking, O1). You can verify that it is correctly set up by running:
   ```bash
diff --git a/utils/call_llm.py b/utils/call_llm.py
 import os
 import logging
 import json
 import requests
 from datetime import datetime

 # Configure logging
 # Simple cache configuration
 cache_file = "llm_cache.json"


 def load_cache():
    try:
        with open(cache_file, 'r') as f:
            return json.load(f)
    except:
        logger.warning(f"Failed to load cache.")
    return {}


 def save_cache(cache):
    try:
        with open(cache_file, 'w') as f:
            json.dump(cache, f)
    except:
        logger.warning(f"Failed to save cache")


 def get_llm_provider():
    provider = os.getenv("LLM_PROVIDER")
    if not provider and (os.getenv("GEMINI_PROJECT_ID") or os.getenv("GEMINI_API_KEY")):
        provider = "GEMINI"
    # if necessary, add ANTHROPIC/OPENAI
    return provider


 def _call_llm_provider(prompt: str) -> str:
    """
    Call an LLM provider based on environment variables.
    Environment variables:
    - LLM_PROVIDER: "OLLAMA" or "XAI"
    - <provider>_MODEL: Model name (e.g., OLLAMA_MODEL, XAI_MODEL)
    - <provider>_BASE_URL: Base URL without endpoint (e.g., OLLAMA_BASE_URL, XAI_BASE_URL)
    - <provider>_API_KEY: API key (e.g., OLLAMA_API_KEY, XAI_API_KEY; optional for providers that don't require it)
    The endpoint /v1/chat/completions will be appended to the base URL.
    """
    logger.info(f"PROMPT: {prompt}") # log the prompt

    # Read the provider from environment variable
    provider = os.environ.get("LLM_PROVIDER")
    if not provider:
        raise ValueError("LLM_PROVIDER environment variable is required")

    # Construct the names of the other environment variables
    model_var = f"{provider}_MODEL"
    base_url_var = f"{provider}_BASE_URL"
    api_key_var = f"{provider}_API_KEY"

    # Read the provider-specific variables
    model = os.environ.get(model_var)
    base_url = os.environ.get(base_url_var)
    api_key = os.environ.get(api_key_var, "")  # API key is optional, default to empty string

    # Validate required variables
    if not model:
        raise ValueError(f"{model_var} environment variable is required")
    if not base_url:
        raise ValueError(f"{base_url_var} environment variable is required")

    # Append the endpoint to the base URL
    url = f"{base_url.rstrip('/')}/v1/chat/completions"

    # Configure headers and payload based on provider
    headers = {
        "Content-Type": "application/json",
    }
    if api_key:  # Only add Authorization header if API key is provided
        headers["Authorization"] = f"Bearer {api_key}"

    payload = {
        "model": model,
        "messages": [{"role": "user", "content": prompt}],
        "temperature": 0.7,
    }

    try:
        response = requests.post(url, headers=headers, json=payload)
        response_json = response.json() # Log the response
        logger.info("RESPONSE:\n%s", json.dumps(response_json, indent=2))
        #logger.info(f"RESPONSE: {response.json()}")
        response.raise_for_status()
        return response.json()["choices"][0]["message"]["content"]
    except requests.exceptions.HTTPError as e:
        error_message = f"HTTP error occurred: {e}"
        try:
            error_details = response.json().get("error", "No additional details")
            error_message += f" (Details: {error_details})"
        except:
            pass
        raise Exception(error_message)
    except requests.exceptions.ConnectionError:
        raise Exception(f"Failed to connect to {provider} API. Check your network connection.")
    except requests.exceptions.Timeout:
        raise Exception(f"Request to {provider} API timed out.")
    except requests.exceptions.RequestException as e:
        raise Exception(f"An error occurred while making the request to {provider}: {e}")
    except ValueError:
        raise Exception(f"Failed to parse response as JSON from {provider}. The server might have returned an invalid response.")


 # By default, we Google Gemini 2.5 pro, as it shows great performance for code understanding
 def call_llm(prompt: str, use_cache: bool = True) -> str:
    # Log the prompt
    logger.info(f"PROMPT: {prompt}")


    # Check cache if enabled
    if use_cache:
        # Load cache from disk
        cache = {}
        if os.path.exists(cache_file):
            try:
                with open(cache_file, 'r') as f:
                    cache = json.load(f)
            except:
                logger.warning(f"Failed to load cache, starting with empty cache")

        cache = load_cache()
        # Return from cache if exists
        if prompt in cache:
            logger.info(f"RESPONSE: {cache[prompt]}")
            return cache[prompt]

    # Call the LLM if not in cache or cache disabled
    client = genai.Client(
        vertexai=True,
        # TODO: change to your own project id and location
        project=os.getenv("GEMINI_PROJECT_ID", "your-project-id"),
        location=os.getenv("GEMINI_LOCATION", "us-central1")
    )
    # You can comment the previous line and use the AI Studio key instead:
    # client = genai.Client(
    #     api_key=os.getenv("GEMINI_API_KEY", "your-api_key"),
    # )
    model = os.getenv("GEMINI_MODEL", "gemini-2.5-pro-exp-03-25")
    response = client.models.generate_content(
        model=model,
        contents=[prompt]
    )
    response_text = response.text


    provider = get_llm_provider()
    if provider == "GEMINI":
        response_text = _call_llm_gemini(prompt)
    else:  # generic method using a URL that is OpenAI compatible API (Ollama, ...)
        response_text = _call_llm_provider(prompt)

    # Log the response
    logger.info(f"RESPONSE: {response_text}")


    # Update cache if enabled
    if use_cache:
        # Load cache again to avoid overwrites
        cache = {}
        if os.path.exists(cache_file):
            try:
                with open(cache_file, 'r') as f:
                    cache = json.load(f)
            except:
                pass

        cache = load_cache()
        # Add to cache and save
        cache[prompt] = response_text
        try:
            with open(cache_file, 'w') as f:
                json.dump(cache, f)
        except Exception as e:
            logger.error(f"Failed to save cache: {e}")

        save_cache(cache)

    return response_text


 def _call_llm_gemini(prompt: str) -> str:
    if os.getenv("GEMINI_PROJECT_ID"):
        client = genai.Client(
            vertexai=True,
            project=os.getenv("GEMINI_PROJECT_ID"),
            location=os.getenv("GEMINI_LOCATION", "us-central1")
        )
    elif os.getenv("GEMINI_API_KEY"):
        client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))
    else:
        raise ValueError("Either GEMINI_PROJECT_ID or GEMINI_API_KEY must be set in the environment")
    model = os.getenv("GEMINI_MODEL", "gemini-2.5-pro-exp-03-25")
    response = client.models.generate_content(
        model=model,
        contents=[prompt]
    )
    return response.text

 # # Use Anthropic Claude 3.7 Sonnet Extended Thinking
 # def call_llm(prompt, use_cache: bool = True):
 #     from anthropic import Anthropic
 #     return response.content[1].text

 # # Use OpenAI o1
 # def call_llm(prompt, use_cache: bool = True):
 # def call_llm(prompt, use_cache: bool = True):
 #     from openai import OpenAI
 #     client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY", "your-api-key"))
 #     r = client.chat.completions.create(

 if __name__ == "__main__":
    test_prompt = "Hello, how are you?"


    # First call - should hit the API
    print("Making call...")
    response1 = call_llm(test_prompt, use_cache=False)
    print(f"Response: {response1}")
Original file line number	Diff line number	Diff line change
Expand Up		@@ -77,13 +77,7 @@ This is a tutorial project of [Pocket Flow](https://github.com/The-Pocket/Pocket
		pip install -r requirements.txt
		```

		3. Set up LLM in [`utils/call_llm.py`](./utils/call_llm.py) by providing credentials. By default, you can use the AI Studio key with this client for Gemini Pro 2.5:

		```python
		client = genai.Client(
		api_key=os.getenv("GEMINI_API_KEY", "your-api_key"),
		)
		```
		3. Set up LLM in [`utils/call_llm.py`](./utils/call_llm.py) by providing credentials. To do so, you can put the values in a `.env` file. By default, you can use the AI Studio key with this client for Gemini Pro 2.5 by setting the `GEMINI_API_KEY` environment variable. If you want to use another LLM, you can set the `LLM_PROVIDER` environment variable (e.g. `XAI`), and then set the model, url, and API key (e.g. `XAI_MODEL`, `XAI_URL`,`XAI_API_KEY`). If using Ollama, the url is `http://localhost:11434/` and the API key can be omitted.

		You can use your own models. We highly recommend the latest models with thinking capabilities (Claude 3.7 with thinking, O1). You can verify that it is correctly set up by running:
		```bash
Expand Down
Original file line number	Diff line number	Diff line change
Expand Up		@@ -2,6 +2,7 @@
		import os
		import logging
		import json
		import requests
		from datetime import datetime

		# Configure logging
Expand All		@@ -20,69 +21,158 @@
		# Simple cache configuration
		cache_file = "llm_cache.json"


		def load_cache():
		try:
		with open(cache_file, 'r') as f:
		return json.load(f)
		except:
		logger.warning(f"Failed to load cache.")
		return {}


		def save_cache(cache):
		try:
		with open(cache_file, 'w') as f:
		json.dump(cache, f)
		except:
		logger.warning(f"Failed to save cache")


		def get_llm_provider():
		provider = os.getenv("LLM_PROVIDER")
		if not provider and (os.getenv("GEMINI_PROJECT_ID") or os.getenv("GEMINI_API_KEY")):
		provider = "GEMINI"
		# if necessary, add ANTHROPIC/OPENAI
		return provider


		def _call_llm_provider(prompt: str) -> str:
		"""
		Call an LLM provider based on environment variables.
		Environment variables:
		- LLM_PROVIDER: "OLLAMA" or "XAI"
		- <provider>_MODEL: Model name (e.g., OLLAMA_MODEL, XAI_MODEL)
		- <provider>_BASE_URL: Base URL without endpoint (e.g., OLLAMA_BASE_URL, XAI_BASE_URL)
		- <provider>_API_KEY: API key (e.g., OLLAMA_API_KEY, XAI_API_KEY; optional for providers that don't require it)
		The endpoint /v1/chat/completions will be appended to the base URL.
		"""
		logger.info(f"PROMPT: {prompt}") # log the prompt

		# Read the provider from environment variable
		provider = os.environ.get("LLM_PROVIDER")
		if not provider:
		raise ValueError("LLM_PROVIDER environment variable is required")

		# Construct the names of the other environment variables
		model_var = f"{provider}_MODEL"
		base_url_var = f"{provider}_BASE_URL"
		api_key_var = f"{provider}_API_KEY"

		# Read the provider-specific variables
		model = os.environ.get(model_var)
		base_url = os.environ.get(base_url_var)
		api_key = os.environ.get(api_key_var, "") # API key is optional, default to empty string

		# Validate required variables
		if not model:
		raise ValueError(f"{model_var} environment variable is required")
		if not base_url:
		raise ValueError(f"{base_url_var} environment variable is required")

		# Append the endpoint to the base URL
		url = f"{base_url.rstrip('/')}/v1/chat/completions"

		# Configure headers and payload based on provider
		headers = {
		"Content-Type": "application/json",
		}
		if api_key: # Only add Authorization header if API key is provided
		headers["Authorization"] = f"Bearer {api_key}"

		payload = {
		"model": model,
		"messages": [{"role": "user", "content": prompt}],
		"temperature": 0.7,
		}

		try:
		response = requests.post(url, headers=headers, json=payload)
		response_json = response.json() # Log the response
		logger.info("RESPONSE:\n%s", json.dumps(response_json, indent=2))
		#logger.info(f"RESPONSE: {response.json()}")
		response.raise_for_status()
		return response.json()["choices"][0]["message"]["content"]
		except requests.exceptions.HTTPError as e:
		error_message = f"HTTP error occurred: {e}"
		try:
		error_details = response.json().get("error", "No additional details")
		error_message += f" (Details: {error_details})"
		except:
		pass
		raise Exception(error_message)
		except requests.exceptions.ConnectionError:
		raise Exception(f"Failed to connect to {provider} API. Check your network connection.")
		except requests.exceptions.Timeout:
		raise Exception(f"Request to {provider} API timed out.")
		except requests.exceptions.RequestException as e:
		raise Exception(f"An error occurred while making the request to {provider}: {e}")
		except ValueError:
		raise Exception(f"Failed to parse response as JSON from {provider}. The server might have returned an invalid response.")


		# By default, we Google Gemini 2.5 pro, as it shows great performance for code understanding
		def call_llm(prompt: str, use_cache: bool = True) -> str:
		# Log the prompt
		logger.info(f"PROMPT: {prompt}")


		# Check cache if enabled
		if use_cache:
		# Load cache from disk
		cache = {}
		if os.path.exists(cache_file):
		try:
		with open(cache_file, 'r') as f:
		cache = json.load(f)
		except:
		logger.warning(f"Failed to load cache, starting with empty cache")

		cache = load_cache()
		# Return from cache if exists
		if prompt in cache:
		logger.info(f"RESPONSE: {cache[prompt]}")
		return cache[prompt]

		# Call the LLM if not in cache or cache disabled
		client = genai.Client(
		vertexai=True,
		# TODO: change to your own project id and location
		project=os.getenv("GEMINI_PROJECT_ID", "your-project-id"),
		location=os.getenv("GEMINI_LOCATION", "us-central1")
		)
		# You can comment the previous line and use the AI Studio key instead:
		# client = genai.Client(
		# api_key=os.getenv("GEMINI_API_KEY", "your-api_key"),
		# )
		model = os.getenv("GEMINI_MODEL", "gemini-2.5-pro-exp-03-25")
		response = client.models.generate_content(
		model=model,
		contents=[prompt]
		)
		response_text = response.text


		provider = get_llm_provider()
		if provider == "GEMINI":
		response_text = _call_llm_gemini(prompt)
		else: # generic method using a URL that is OpenAI compatible API (Ollama, ...)
		response_text = _call_llm_provider(prompt)

		# Log the response
		logger.info(f"RESPONSE: {response_text}")


		# Update cache if enabled
		if use_cache:
		# Load cache again to avoid overwrites
		cache = {}
		if os.path.exists(cache_file):
		try:
		with open(cache_file, 'r') as f:
		cache = json.load(f)
		except:
		pass

		cache = load_cache()
		# Add to cache and save
		cache[prompt] = response_text
		try:
		with open(cache_file, 'w') as f:
		json.dump(cache, f)
		except Exception as e:
		logger.error(f"Failed to save cache: {e}")

		save_cache(cache)

		return response_text


		def _call_llm_gemini(prompt: str) -> str:
		if os.getenv("GEMINI_PROJECT_ID"):
		client = genai.Client(
		vertexai=True,
		project=os.getenv("GEMINI_PROJECT_ID"),
		location=os.getenv("GEMINI_LOCATION", "us-central1")
		)
		elif os.getenv("GEMINI_API_KEY"):
		client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))
		else:
		raise ValueError("Either GEMINI_PROJECT_ID or GEMINI_API_KEY must be set in the environment")
		model = os.getenv("GEMINI_MODEL", "gemini-2.5-pro-exp-03-25")
		response = client.models.generate_content(
		model=model,
		contents=[prompt]
		)
		return response.text

		# # Use Anthropic Claude 3.7 Sonnet Extended Thinking
		# def call_llm(prompt, use_cache: bool = True):
		# from anthropic import Anthropic
Expand All		@@ -101,7 +191,7 @@ def call_llm(prompt: str, use_cache: bool = True) -> str:
		# return response.content[1].text

		# # Use OpenAI o1
		# def call_llm(prompt, use_cache: bool = True):
		# def call_llm(prompt, use_cache: bool = True):
		# from openai import OpenAI
		# client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY", "your-api-key"))
		# r = client.chat.completions.create(
Expand All		@@ -117,9 +207,9 @@ def call_llm(prompt: str, use_cache: bool = True) -> str:

		if __name__ == "__main__":
		test_prompt = "Hello, how are you?"


		# First call - should hit the API
		print("Making call...")
		response1 = call_llm(test_prompt, use_cache=False)
		print(f"Response: {response1}")