Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit5f1a2dc

Browse files
committed
Added rag-retrieval-timing-tests
1 parent0842673 commit5f1a2dc

File tree

11 files changed

+555
-0
lines changed

11 files changed

+555
-0
lines changed
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
PINECONE_API_KEY=
2+
QDRANT_API_KEY=
3+
ZILLIZ_API_KEY=
4+
WCS_API_KEY=
5+
OPENAI_API_KEY=
6+
HF_TOKEN=
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
#Rag Timing Tests
2+
3+
This script runs timing tests for common rag systems.
4+
5+
To run it copy`.env.deveopment` to`.env` and make sure to set the appropriate variables in the`.env` file, install the dependencies in`requirements.txt` and run`python3 __main__.py`.
6+
7+
Notice that this script assumes certain actions to create databases or setup "collections" have been performed for each cloud provider. See the script for more details.
Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
importtime
2+
importasyncio
3+
4+
importpostgresmlaspgl
5+
importzilliz_localaszl
6+
importpinecone_localaspl
7+
importqdrant_localasql
8+
importopenai_localasal
9+
importhuggingfaceashf
10+
importweaviate_localaswl
11+
12+
TRIAL_COUNT=2
13+
14+
# The pairs we are testing with
15+
tests= [
16+
{
17+
"name":"PostgresML",
18+
"vector_store":pgl,
19+
"rag+":True,
20+
"chatbot_service":al,
21+
"async":True,
22+
},
23+
{"name":"Weaviate","vector_store":wl,"chatbot_service":al,"rag++":True},
24+
{
25+
"name":"Zilliz",
26+
"vector_store":zl,
27+
"embedding_service":hf,
28+
"chatbot_service":al,
29+
},
30+
{
31+
"name":"Pinecone",
32+
"vector_store":pl,
33+
"embedding_service":hf,
34+
"chatbot_service":al,
35+
},
36+
{
37+
"name":"Qdrant",
38+
"vector_store":ql,
39+
"embedding_service":hf,
40+
"chatbot_service":al,
41+
},
42+
]
43+
44+
45+
# Our documents
46+
# We only really need to test on 2. When we search we are trying to get the first document back
47+
documents= [
48+
{"id":"0","metadata": {"text":"The hidden value is 1000"}},
49+
{
50+
"id":"1",
51+
"metadata": {"text":"This is just some random text"},
52+
},
53+
]
54+
55+
56+
defmaybe_do_async(func,check_dict,*args):
57+
if"async"incheck_dictandcheck_dict["async"]:
58+
returnasyncio.run(func(*args))
59+
else:
60+
returnfunc(*args)
61+
62+
63+
defdo_data_upsert(name,vector_store,**kwargs):
64+
print(f"Doing Data Upsert For:{name}")
65+
if"rag++"inkwargsor"rag+"inkwargs:
66+
maybe_do_async(vector_store.upsert_data,kwargs,documents)
67+
else:
68+
texts= [d["metadata"]["text"]fordindocuments]
69+
(embeddings,time_to_embed)=kwargs["embedding_service"].get_embeddings(texts)
70+
maybe_do_async(vector_store.upsert_data,kwargs,documents,embeddings)
71+
print(f"Done Doing Data Upsert For:{name}\n")
72+
73+
74+
defdo_normal_rag_test(name,vector_store,**kwargs):
75+
print(f"Doing RAG Test For:{name}")
76+
query="What is the hidden value?"
77+
if"rag++"inkwargs:
78+
(result,time_to_complete)=maybe_do_async(
79+
vector_store.get_llm_response,kwargs,query
80+
)
81+
time_to_embed=0
82+
time_to_search=0
83+
elif"rag+"inkwargs:
84+
time_to_embed=0
85+
(context,time_to_search)=maybe_do_async(
86+
vector_store.do_search,kwargs,query
87+
)
88+
(result,time_to_complete)=kwargs["chatbot_service"].get_llm_response(
89+
query,context
90+
)
91+
else:
92+
(embeddings,time_to_embed)=kwargs["embedding_service"].get_embeddings(
93+
[query]
94+
)
95+
(context,time_to_search)=vector_store.do_search(embeddings[0])
96+
(result,time_to_complete)=kwargs["chatbot_service"].get_llm_response(
97+
query,context
98+
)
99+
print(f"\tThe LLM Said:{result}")
100+
time_for_retrieval=time_to_embed+time_to_search
101+
total_time=time_to_embed+time_to_search+time_to_complete
102+
print(f"Done Doing RAG Test For:{name}")
103+
print(f"- Time to Embed:{time_to_embed}")
104+
print(f"- Time to Search:{time_to_search}")
105+
print(f"- Total Time for Retrieval:{time_for_retrieval}")
106+
print(f"- Time for Chatbot Completion:{time_to_complete}")
107+
print(f"- Total Time Taken:{total_time}\n")
108+
return {
109+
"time_to_embed":time_to_embed,
110+
"time_to_search":time_to_search,
111+
"time_for_retrieval":time_for_retrieval,
112+
"time_to_complete":time_to_complete,
113+
"total_time":total_time,
114+
}
115+
116+
117+
if__name__=="__main__":
118+
print("----------Doing Data Setup-------------------------\n")
119+
fortestintests:
120+
do_data_upsert(**test)
121+
print("\n----------Done Doing Data Setup------------------\n\n")
122+
123+
print("----------Doing Rag Tests-------------------------\n")
124+
stats= {}
125+
foriinrange(TRIAL_COUNT):
126+
fortestintests:
127+
times=do_normal_rag_test(**test)
128+
ifnottest["name"]instats:
129+
stats[test["name"]]= []
130+
stats[test["name"]].append(times)
131+
print("\n----------Done Doing Rag Tests---------------------\n")
132+
133+
print("------------Final Results---------------------------\n")
134+
fortestintests:
135+
trials=stats[test["name"]]
136+
(
137+
time_to_embed,
138+
time_to_search,
139+
time_for_retrieval,
140+
time_to_complete,
141+
total_time,
142+
)= [
143+
sum(trial[key]fortrialintrials)
144+
forkeyin [
145+
"time_to_embed",
146+
"time_to_search",
147+
"time_for_retrieval",
148+
"time_to_complete",
149+
"total_time",
150+
]
151+
]
152+
print(f'Done Doing RAG Test For:{test["name"]}')
153+
print(f"- Average Time to Embed:{(time_to_embed/TRIAL_COUNT):0.4f}")
154+
print(f"- Average Time to Search:{(time_to_search/TRIAL_COUNT):0.4f}")
155+
print(
156+
f"- Average Total Time for Retrieval:{(time_for_retrieval/TRIAL_COUNT):0.4f}"
157+
)
158+
print(
159+
f"- Average Time for Chatbot Completion:{(time_to_complete/TRIAL_COUNT):0.4f}"
160+
)
161+
print(f"- Average Total Time Taken:{(total_time/TRIAL_COUNT):0.4f}\n")
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
importrequests
2+
importtime
3+
importos
4+
importsys
5+
fromdotenvimportload_dotenv
6+
7+
# Load our environment variables
8+
load_dotenv()
9+
HF_TOKEN=os.getenv("HF_TOKEN")
10+
11+
12+
# Get the embedding from HuggingFace
13+
defget_embeddings(inputs):
14+
print("\tGetting embeddings from HuggingFace")
15+
tic=time.perf_counter()
16+
headers= {"Authorization":f"Bearer{HF_TOKEN}"}
17+
payload= {"inputs":inputs}
18+
response=requests.post(
19+
"https://api-inference.huggingface.co/pipeline/feature-extraction/intfloat/e5-small",
20+
headers=headers,
21+
json=payload,
22+
)
23+
toc=time.perf_counter()
24+
time_taken=toc-tic
25+
print(f"\tDone getting embeddings:{toc-tic:0.4f}\n")
26+
response=response.json()
27+
if"error"inresponse:
28+
sys.exit(response)
29+
return (response,time_taken)
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
fromopenaiimportOpenAI
2+
importtime
3+
4+
# Create our OpenAI client
5+
client=OpenAI()
6+
7+
8+
# Get LLM response from OpenAI
9+
defget_llm_response(query,context):
10+
print("\tGetting LLM response from OpenAI")
11+
tic=time.perf_counter()
12+
completion=client.chat.completions.create(
13+
model="gpt-3.5-turbo",
14+
messages=[
15+
{
16+
"role":"system",
17+
"content":f"You are a helpful assistant. Given the context, provide an answer to the user:\n{context}",
18+
},
19+
{"role":"user","content":query},
20+
],
21+
)
22+
toc=time.perf_counter()
23+
time_taken=toc-tic
24+
print(f"\tDone getting the LLM response:{time_taken:0.4f}")
25+
response=completion.choices[0].message.content
26+
return (response,time_taken)
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
frompineconeimportPinecone,ServerlessSpec
2+
fromdotenvimportload_dotenv
3+
importtime
4+
importos
5+
6+
# Load our environment variables
7+
load_dotenv()
8+
PINECONE_API_KEY=os.getenv("PINECONE_API_KEY")
9+
10+
# Create our Pinecone client
11+
# Note we created their default index using their gcp-start region and us-central1 region
12+
pc=Pinecone(api_key=PINECONE_API_KEY)
13+
index=pc.Index("test")
14+
15+
16+
# Store some initial documents to retrieve
17+
defupsert_data(documents,embeddings):
18+
fordocument,embeddinginzip(documents,embeddings):
19+
document["values"]=embedding
20+
print("\tStarting PineCone upsert")
21+
tic=time.perf_counter()
22+
index.upsert(documents,namespace="ns1")
23+
toc=time.perf_counter()
24+
time_taken_to_upsert=toc-tic
25+
print(f"\tDone PineCone upsert:{time_taken_to_upsert:0.4f}")
26+
returntime_taken_to_upsert
27+
28+
29+
# Do cosine similarity search over our pinecone index
30+
defdo_search(vector):
31+
print("\tDoing cosine similarity search with PineCone")
32+
tic=time.perf_counter()
33+
results=index.query(
34+
namespace="ns1",
35+
vector=vector,
36+
top_k=1,
37+
include_metadata=True,
38+
)
39+
toc=time.perf_counter()
40+
time_done=toc-tic
41+
print(f"\tDone doing cosine similarity search:{time_done:0.4f}\n")
42+
result=results["matches"][0]["metadata"]["text"]
43+
return (result,time_done)
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
frompgmlimportCollection,Pipeline
2+
fromdotenvimportload_dotenv
3+
importtime
4+
5+
# Load our environment variables
6+
load_dotenv()
7+
8+
# Initialize our Collection and Pipeline
9+
collection=Collection("test_collection")
10+
pipeline=Pipeline(
11+
"test_pipeline",
12+
{
13+
"text": {
14+
"semantic_search": {
15+
"model":"intfloat/e5-small",
16+
},
17+
}
18+
},
19+
)
20+
21+
22+
# Add the Pipeline to our collection
23+
# We only need to do this once
24+
asyncdefsetup_pipeline():
25+
awaitcollection.add_pipeline(pipeline)
26+
27+
28+
asyncdefupsert_data(documents):
29+
documents= [
30+
{"id":document["id"],"text":document["metadata"]["text"]}
31+
fordocumentindocuments
32+
]
33+
print("Starting PostgresML upsert")
34+
tic=time.perf_counter()
35+
awaitcollection.upsert_documents(documents)
36+
toc=time.perf_counter()
37+
time_taken=toc-tic
38+
print(f"Done PostgresML upsert:{time_taken:0.4f}\n")
39+
40+
41+
asyncdefdo_search(query):
42+
print(
43+
"\tDoing embedding and cosine similarity search over our PostgresML Collection"
44+
)
45+
tic=time.perf_counter()
46+
results=awaitcollection.vector_search(
47+
{
48+
"query": {
49+
"fields": {
50+
"text": {
51+
"query":query,
52+
},
53+
}
54+
},
55+
"limit":1,
56+
},
57+
pipeline,
58+
)
59+
toc=time.perf_counter()
60+
time_taken=toc-tic
61+
print(f"\tDone doing embedding and cosine similarity search:{time_taken:0.4f}\n")
62+
return (results[0]["chunk"],time_taken)
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
fromqdrant_clientimportQdrantClient
2+
fromqdrant_client.modelsimportDistance,VectorParams,PointStruct
3+
fromdotenvimportload_dotenv
4+
importtime
5+
importos
6+
7+
# Load our environment variables
8+
load_dotenv()
9+
QDRANT_API_KEY=os.getenv("QDRANT_API_KEY")
10+
11+
# Create our Qdrant client
12+
qdrant=QdrantClient(
13+
url="https://059364f6-62c5-4f80-9f19-cf6d6394caae.us-east4-0.gcp.cloud.qdrant.io:6333",
14+
api_key=QDRANT_API_KEY,
15+
)
16+
17+
# Create our Qdrant collection
18+
qdrant.recreate_collection(
19+
collection_name="test",
20+
vectors_config=VectorParams(size=384,distance=Distance.COSINE),
21+
)
22+
23+
24+
# Store some initial documents to retrieve
25+
defupsert_data(documents,embeddings):
26+
points= [
27+
PointStruct(
28+
id=int(document["id"]),vector=embedding,payload=document["metadata"]
29+
)
30+
fordocument,embeddinginzip(documents,embeddings)
31+
]
32+
print("\tStarting Qdrant upsert")
33+
tic=time.perf_counter()
34+
qdrant.upsert(collection_name="test",points=points)
35+
toc=time.perf_counter()
36+
time_taken_to_upsert=toc-tic
37+
print(f"\tDone Qdrant upsert:{time_taken_to_upsert:0.4f}")
38+
returntime_taken_to_upsert
39+
40+
41+
# Do cosine similarity search over our Qdrant collection
42+
defdo_search(vector):
43+
print("\tDoing cosine similarity search with Qdrant")
44+
tic=time.perf_counter()
45+
results=qdrant.search(collection_name="test",query_vector=vector,limit=1)
46+
toc=time.perf_counter()
47+
time_done=toc-tic
48+
print(f"\tDone doing cosine similarity search:{time_done:0.4f}\n")
49+
return (results,time_done)

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp