- Notifications
You must be signed in to change notification settings - Fork328
Added rag-retrieval-timing-tests#1361
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to ourterms of service andprivacy statement. We’ll occasionally send you account related emails.
Already on GitHub?Sign in to your account
Open
SilasMarvin wants to merge1 commit intomasterChoose a base branch fromsilas-rag-retrieval-timing-tests
base:master
Could not load branches
Branch not found:{{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline, and old review comments may become outdated.
Uh oh!
There was an error while loading.Please reload this page.
Open
Changes fromall commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Uh oh!
There was an error while loading.Please reload this page.
Jump to
Jump to file
Failed to load files.
Loading
Uh oh!
There was an error while loading.Please reload this page.
Diff view
Diff view
There are no files selected for viewing
6 changes: 6 additions & 0 deletionspgml-apps/rag-retrieval-timing-tests/.env.development
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
PINECONE_API_KEY= | ||
QDRANT_API_KEY= | ||
ZILLIZ_API_KEY= | ||
WCS_API_KEY= | ||
OPENAI_API_KEY= | ||
HF_TOKEN= |
7 changes: 7 additions & 0 deletionspgml-apps/rag-retrieval-timing-tests/README.md
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
# Rag Timing Tests | ||
This script runs timing tests for common rag systems. | ||
To run it copy `.env.deveopment` to `.env` and make sure to set the appropriate variables in the `.env` file, install the dependencies in `requirements.txt` and run `python3 __main__.py`. | ||
Notice that this script assumes certain actions to create databases or setup "collections" have been performed for each cloud provider. See the script for more details. |
161 changes: 161 additions & 0 deletionspgml-apps/rag-retrieval-timing-tests/__main__.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,161 @@ | ||
import time | ||
import asyncio | ||
import postgresml as pgl | ||
import zilliz_local as zl | ||
import pinecone_local as pl | ||
import qdrant_local as ql | ||
import openai_local as al | ||
import huggingface as hf | ||
import weaviate_local as wl | ||
TRIAL_COUNT = 2 | ||
# The pairs we are testing with | ||
tests = [ | ||
{ | ||
"name": "PostgresML", | ||
"vector_store": pgl, | ||
"rag+": True, | ||
"chatbot_service": al, | ||
"async": True, | ||
}, | ||
{"name": "Weaviate", "vector_store": wl, "chatbot_service": al, "rag++": True}, | ||
{ | ||
"name": "Zilliz", | ||
"vector_store": zl, | ||
"embedding_service": hf, | ||
"chatbot_service": al, | ||
}, | ||
{ | ||
"name": "Pinecone", | ||
"vector_store": pl, | ||
"embedding_service": hf, | ||
"chatbot_service": al, | ||
}, | ||
{ | ||
"name": "Qdrant", | ||
"vector_store": ql, | ||
"embedding_service": hf, | ||
"chatbot_service": al, | ||
}, | ||
] | ||
# Our documents | ||
# We only really need to test on 2. When we search we are trying to get the first document back | ||
documents = [ | ||
{"id": "0", "metadata": {"text": "The hidden value is 1000"}}, | ||
{ | ||
"id": "1", | ||
"metadata": {"text": "This is just some random text"}, | ||
}, | ||
] | ||
def maybe_do_async(func, check_dict, *args): | ||
if "async" in check_dict and check_dict["async"]: | ||
return asyncio.run(func(*args)) | ||
else: | ||
return func(*args) | ||
def do_data_upsert(name, vector_store, **kwargs): | ||
print(f"Doing Data Upsert For: {name}") | ||
if "rag++" in kwargs or "rag+" in kwargs: | ||
maybe_do_async(vector_store.upsert_data, kwargs, documents) | ||
else: | ||
texts = [d["metadata"]["text"] for d in documents] | ||
(embeddings, time_to_embed) = kwargs["embedding_service"].get_embeddings(texts) | ||
maybe_do_async(vector_store.upsert_data, kwargs, documents, embeddings) | ||
print(f"Done Doing Data Upsert For: {name}\n") | ||
def do_normal_rag_test(name, vector_store, **kwargs): | ||
print(f"Doing RAG Test For: {name}") | ||
query = "What is the hidden value?" | ||
if "rag++" in kwargs: | ||
(result, time_to_complete) = maybe_do_async( | ||
vector_store.get_llm_response, kwargs, query | ||
) | ||
time_to_embed = 0 | ||
time_to_search = 0 | ||
elif "rag+" in kwargs: | ||
time_to_embed = 0 | ||
(context, time_to_search) = maybe_do_async( | ||
vector_store.do_search, kwargs, query | ||
) | ||
(result, time_to_complete) = kwargs["chatbot_service"].get_llm_response( | ||
query, context | ||
) | ||
else: | ||
(embeddings, time_to_embed) = kwargs["embedding_service"].get_embeddings( | ||
[query] | ||
) | ||
(context, time_to_search) = vector_store.do_search(embeddings[0]) | ||
(result, time_to_complete) = kwargs["chatbot_service"].get_llm_response( | ||
query, context | ||
) | ||
print(f"\tThe LLM Said: {result}") | ||
time_for_retrieval = time_to_embed + time_to_search | ||
total_time = time_to_embed + time_to_search + time_to_complete | ||
print(f"Done Doing RAG Test For: {name}") | ||
print(f"- Time to Embed: {time_to_embed}") | ||
print(f"- Time to Search: {time_to_search}") | ||
print(f"- Total Time for Retrieval: {time_for_retrieval}") | ||
print(f"- Time for Chatbot Completion: {time_to_complete}") | ||
print(f"- Total Time Taken: {total_time}\n") | ||
return { | ||
"time_to_embed": time_to_embed, | ||
"time_to_search": time_to_search, | ||
"time_for_retrieval": time_for_retrieval, | ||
"time_to_complete": time_to_complete, | ||
"total_time": total_time, | ||
} | ||
if __name__ == "__main__": | ||
print("----------Doing Data Setup-------------------------\n") | ||
for test in tests: | ||
do_data_upsert(**test) | ||
print("\n----------Done Doing Data Setup------------------\n\n") | ||
print("----------Doing Rag Tests-------------------------\n") | ||
stats = {} | ||
for i in range(TRIAL_COUNT): | ||
for test in tests: | ||
times = do_normal_rag_test(**test) | ||
if not test["name"] in stats: | ||
stats[test["name"]] = [] | ||
stats[test["name"]].append(times) | ||
print("\n----------Done Doing Rag Tests---------------------\n") | ||
print("------------Final Results---------------------------\n") | ||
for test in tests: | ||
trials = stats[test["name"]] | ||
( | ||
time_to_embed, | ||
time_to_search, | ||
time_for_retrieval, | ||
time_to_complete, | ||
total_time, | ||
) = [ | ||
sum(trial[key] for trial in trials) | ||
for key in [ | ||
"time_to_embed", | ||
"time_to_search", | ||
"time_for_retrieval", | ||
"time_to_complete", | ||
"total_time", | ||
] | ||
] | ||
print(f'Done Doing RAG Test For: {test["name"]}') | ||
print(f"- Average Time to Embed: {(time_to_embed / TRIAL_COUNT):0.4f}") | ||
print(f"- Average Time to Search: {(time_to_search / TRIAL_COUNT):0.4f}") | ||
print( | ||
f"- Average Total Time for Retrieval: {(time_for_retrieval / TRIAL_COUNT):0.4f}" | ||
) | ||
print( | ||
f"- Average Time for Chatbot Completion: {(time_to_complete / TRIAL_COUNT):0.4f}" | ||
) | ||
print(f"- Average Total Time Taken: {(total_time / TRIAL_COUNT):0.4f}\n") |
29 changes: 29 additions & 0 deletionspgml-apps/rag-retrieval-timing-tests/huggingface.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
import requests | ||
import time | ||
import os | ||
import sys | ||
from dotenv import load_dotenv | ||
# Load our environment variables | ||
load_dotenv() | ||
HF_TOKEN = os.getenv("HF_TOKEN") | ||
# Get the embedding from HuggingFace | ||
def get_embeddings(inputs): | ||
print("\tGetting embeddings from HuggingFace") | ||
tic = time.perf_counter() | ||
headers = {"Authorization": f"Bearer {HF_TOKEN}"} | ||
payload = {"inputs": inputs} | ||
response = requests.post( | ||
"https://api-inference.huggingface.co/pipeline/feature-extraction/intfloat/e5-small", | ||
headers=headers, | ||
json=payload, | ||
) | ||
toc = time.perf_counter() | ||
time_taken = toc - tic | ||
print(f"\tDone getting embeddings: {toc - tic:0.4f}\n") | ||
response = response.json() | ||
if "error" in response: | ||
sys.exit(response) | ||
return (response, time_taken) |
26 changes: 26 additions & 0 deletionspgml-apps/rag-retrieval-timing-tests/openai_local.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
from openai import OpenAI | ||
import time | ||
# Create our OpenAI client | ||
client = OpenAI() | ||
# Get LLM response from OpenAI | ||
def get_llm_response(query, context): | ||
print("\tGetting LLM response from OpenAI") | ||
tic = time.perf_counter() | ||
completion = client.chat.completions.create( | ||
model="gpt-3.5-turbo", | ||
messages=[ | ||
{ | ||
"role": "system", | ||
"content": f"You are a helpful assistant. Given the context, provide an answer to the user: \n{context}", | ||
}, | ||
{"role": "user", "content": query}, | ||
], | ||
) | ||
toc = time.perf_counter() | ||
time_taken = toc - tic | ||
print(f"\tDone getting the LLM response: {time_taken:0.4f}") | ||
response = completion.choices[0].message.content | ||
return (response, time_taken) |
43 changes: 43 additions & 0 deletionspgml-apps/rag-retrieval-timing-tests/pinecone_local.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
from pinecone import Pinecone, ServerlessSpec | ||
from dotenv import load_dotenv | ||
import time | ||
import os | ||
# Load our environment variables | ||
load_dotenv() | ||
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY") | ||
# Create our Pinecone client | ||
# Note we created their default index using their gcp-start region and us-central1 region | ||
pc = Pinecone(api_key=PINECONE_API_KEY) | ||
index = pc.Index("test") | ||
# Store some initial documents to retrieve | ||
def upsert_data(documents, embeddings): | ||
for document, embedding in zip(documents, embeddings): | ||
document["values"] = embedding | ||
print("\tStarting PineCone upsert") | ||
tic = time.perf_counter() | ||
index.upsert(documents, namespace="ns1") | ||
toc = time.perf_counter() | ||
time_taken_to_upsert = toc - tic | ||
print(f"\tDone PineCone upsert: {time_taken_to_upsert:0.4f}") | ||
return time_taken_to_upsert | ||
# Do cosine similarity search over our pinecone index | ||
def do_search(vector): | ||
print("\tDoing cosine similarity search with PineCone") | ||
tic = time.perf_counter() | ||
results = index.query( | ||
namespace="ns1", | ||
vector=vector, | ||
top_k=1, | ||
include_metadata=True, | ||
) | ||
toc = time.perf_counter() | ||
time_done = toc - tic | ||
print(f"\tDone doing cosine similarity search: {time_done:0.4f}\n") | ||
result = results["matches"][0]["metadata"]["text"] | ||
return (result, time_done) |
62 changes: 62 additions & 0 deletionspgml-apps/rag-retrieval-timing-tests/postgresml.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
from pgml import Collection, Pipeline | ||
from dotenv import load_dotenv | ||
import time | ||
# Load our environment variables | ||
load_dotenv() | ||
# Initialize our Collection and Pipeline | ||
collection = Collection("test_collection") | ||
pipeline = Pipeline( | ||
"test_pipeline", | ||
{ | ||
"text": { | ||
"semantic_search": { | ||
"model": "intfloat/e5-small", | ||
}, | ||
} | ||
}, | ||
) | ||
# Add the Pipeline to our collection | ||
# We only need to do this once | ||
async def setup_pipeline(): | ||
await collection.add_pipeline(pipeline) | ||
async def upsert_data(documents): | ||
documents = [ | ||
{"id": document["id"], "text": document["metadata"]["text"]} | ||
for document in documents | ||
] | ||
print("Starting PostgresML upsert") | ||
tic = time.perf_counter() | ||
await collection.upsert_documents(documents) | ||
toc = time.perf_counter() | ||
time_taken = toc - tic | ||
print(f"Done PostgresML upsert: {time_taken:0.4f}\n") | ||
async def do_search(query): | ||
print( | ||
"\tDoing embedding and cosine similarity search over our PostgresML Collection" | ||
) | ||
tic = time.perf_counter() | ||
results = await collection.vector_search( | ||
{ | ||
"query": { | ||
"fields": { | ||
"text": { | ||
"query": query, | ||
}, | ||
} | ||
}, | ||
"limit": 1, | ||
}, | ||
pipeline, | ||
) | ||
toc = time.perf_counter() | ||
time_taken = toc - tic | ||
print(f"\tDone doing embedding and cosine similarity search: {time_taken:0.4f}\n") | ||
return (results[0]["chunk"], time_taken) |
49 changes: 49 additions & 0 deletionspgml-apps/rag-retrieval-timing-tests/qdrant_local.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
from qdrant_client import QdrantClient | ||
from qdrant_client.models import Distance, VectorParams, PointStruct | ||
from dotenv import load_dotenv | ||
import time | ||
import os | ||
# Load our environment variables | ||
load_dotenv() | ||
QDRANT_API_KEY = os.getenv("QDRANT_API_KEY") | ||
# Create our Qdrant client | ||
qdrant = QdrantClient( | ||
url="https://059364f6-62c5-4f80-9f19-cf6d6394caae.us-east4-0.gcp.cloud.qdrant.io:6333", | ||
api_key=QDRANT_API_KEY, | ||
) | ||
# Create our Qdrant collection | ||
qdrant.recreate_collection( | ||
collection_name="test", | ||
vectors_config=VectorParams(size=384, distance=Distance.COSINE), | ||
) | ||
# Store some initial documents to retrieve | ||
def upsert_data(documents, embeddings): | ||
points = [ | ||
PointStruct( | ||
id=int(document["id"]), vector=embedding, payload=document["metadata"] | ||
) | ||
for document, embedding in zip(documents, embeddings) | ||
] | ||
print("\tStarting Qdrant upsert") | ||
tic = time.perf_counter() | ||
qdrant.upsert(collection_name="test", points=points) | ||
toc = time.perf_counter() | ||
time_taken_to_upsert = toc - tic | ||
print(f"\tDone Qdrant upsert: {time_taken_to_upsert:0.4f}") | ||
return time_taken_to_upsert | ||
# Do cosine similarity search over our Qdrant collection | ||
def do_search(vector): | ||
print("\tDoing cosine similarity search with Qdrant") | ||
tic = time.perf_counter() | ||
results = qdrant.search(collection_name="test", query_vector=vector, limit=1) | ||
toc = time.perf_counter() | ||
time_done = toc - tic | ||
print(f"\tDone doing cosine similarity search: {time_done:0.4f}\n") | ||
return (results, time_done) |
Oops, something went wrong.
Uh oh!
There was an error while loading.Please reload this page.
Add this suggestion to a batch that can be applied as a single commit.This suggestion is invalid because no changes were made to the code.Suggestions cannot be applied while the pull request is closed.Suggestions cannot be applied while viewing a subset of changes.Only one suggestion per line can be applied in a batch.Add this suggestion to a batch that can be applied as a single commit.Applying suggestions on deleted lines is not supported.You must change the existing code in this line in order to create a valid suggestion.Outdated suggestions cannot be applied.This suggestion has been applied or marked resolved.Suggestions cannot be applied from pending reviews.Suggestions cannot be applied on multi-line comments.Suggestions cannot be applied while the pull request is queued to merge.Suggestion cannot be applied right now. Please check back later.