NotificationsYou must be signed in to change notification settings
Fork352
Star6.6k

Commit5f1a2dc

committed

Added rag-retrieval-timing-tests

1 parent0842673 commit5f1a2dcCopy full SHA for 5f1a2dc

File tree

11 files changed

+555

-0

lines changed

pgml-apps/rag-retrieval-timing-tests

11 files changed

+555

-0

lines changed

`‎pgml-apps/rag-retrieval-timing-tests/.env.development‎`

Lines changed: 6 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,6 @@`
	`1`	`+PINECONE_API_KEY=`
	`2`	`+QDRANT_API_KEY=`
	`3`	`+ZILLIZ_API_KEY=`
	`4`	`+WCS_API_KEY=`
	`5`	`+OPENAI_API_KEY=`
	`6`	`+HF_TOKEN=`

`‎pgml-apps/rag-retrieval-timing-tests/README.md‎`

Lines changed: 7 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,7 @@`
	`1`	`+#Rag Timing Tests`
	`2`	`+`
	`3`	`+This script runs timing tests for common rag systems.`
	`4`	`+`
	`5`	+To run it copy`.env.deveopment` to`.env` and make sure to set the appropriate variables in the`.env` file, install the dependencies in`requirements.txt` and run`python3 __main__.py`.
	`6`	`+`
	`7`	`+Notice that this script assumes certain actions to create databases or setup "collections" have been performed for each cloud provider. See the script for more details.`

`‎pgml-apps/rag-retrieval-timing-tests/main.py‎`

Lines changed: 161 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,161 @@`
	`1`	`+importtime`
	`2`	`+importasyncio`
	`3`	`+`
	`4`	`+importpostgresmlaspgl`
	`5`	`+importzilliz_localaszl`
	`6`	`+importpinecone_localaspl`
	`7`	`+importqdrant_localasql`
	`8`	`+importopenai_localasal`
	`9`	`+importhuggingfaceashf`
	`10`	`+importweaviate_localaswl`
	`11`	`+`
	`12`	`+TRIAL_COUNT=2`
	`13`	`+`
	`14`	`+# The pairs we are testing with`
	`15`	`+tests= [`
	`16`	`+ {`
	`17`	`+"name":"PostgresML",`
	`18`	`+"vector_store":pgl,`
	`19`	`+"rag+":True,`
	`20`	`+"chatbot_service":al,`
	`21`	`+"async":True,`
	`22`	`+ },`
	`23`	`+ {"name":"Weaviate","vector_store":wl,"chatbot_service":al,"rag++":True},`
	`24`	`+ {`
	`25`	`+"name":"Zilliz",`
	`26`	`+"vector_store":zl,`
	`27`	`+"embedding_service":hf,`
	`28`	`+"chatbot_service":al,`
	`29`	`+ },`
	`30`	`+ {`
	`31`	`+"name":"Pinecone",`
	`32`	`+"vector_store":pl,`
	`33`	`+"embedding_service":hf,`
	`34`	`+"chatbot_service":al,`
	`35`	`+ },`
	`36`	`+ {`
	`37`	`+"name":"Qdrant",`
	`38`	`+"vector_store":ql,`
	`39`	`+"embedding_service":hf,`
	`40`	`+"chatbot_service":al,`
	`41`	`+ },`
	`42`	`+]`
	`43`	`+`
	`44`	`+`
	`45`	`+# Our documents`
	`46`	`+# We only really need to test on 2. When we search we are trying to get the first document back`
	`47`	`+documents= [`
	`48`	`+ {"id":"0","metadata": {"text":"The hidden value is 1000"}},`
	`49`	`+ {`
	`50`	`+"id":"1",`
	`51`	`+"metadata": {"text":"This is just some random text"},`
	`52`	`+ },`
	`53`	`+]`
	`54`	`+`
	`55`	`+`
	`56`	`+defmaybe_do_async(func,check_dict,*args):`
	`57`	`+if"async"incheck_dictandcheck_dict["async"]:`
	`58`	`+returnasyncio.run(func(*args))`
	`59`	`+else:`
	`60`	`+returnfunc(*args)`
	`61`	`+`
	`62`	`+`
	`63`	`+defdo_data_upsert(name,vector_store,**kwargs):`
	`64`	`+print(f"Doing Data Upsert For:{name}")`
	`65`	`+if"rag++"inkwargsor"rag+"inkwargs:`
	`66`	`+maybe_do_async(vector_store.upsert_data,kwargs,documents)`
	`67`	`+else:`
	`68`	`+texts= [d["metadata"]["text"]fordindocuments]`
	`69`	`+ (embeddings,time_to_embed)=kwargs["embedding_service"].get_embeddings(texts)`
	`70`	`+maybe_do_async(vector_store.upsert_data,kwargs,documents,embeddings)`
	`71`	`+print(f"Done Doing Data Upsert For:{name}\n")`
	`72`	`+`
	`73`	`+`
	`74`	`+defdo_normal_rag_test(name,vector_store,**kwargs):`
	`75`	`+print(f"Doing RAG Test For:{name}")`
	`76`	`+query="What is the hidden value?"`
	`77`	`+if"rag++"inkwargs:`
	`78`	`+ (result,time_to_complete)=maybe_do_async(`
	`79`	`+vector_store.get_llm_response,kwargs,query`
	`80`	`+ )`
	`81`	`+time_to_embed=0`
	`82`	`+time_to_search=0`
	`83`	`+elif"rag+"inkwargs:`
	`84`	`+time_to_embed=0`
	`85`	`+ (context,time_to_search)=maybe_do_async(`
	`86`	`+vector_store.do_search,kwargs,query`
	`87`	`+ )`
	`88`	`+ (result,time_to_complete)=kwargs["chatbot_service"].get_llm_response(`
	`89`	`+query,context`
	`90`	`+ )`
	`91`	`+else:`
	`92`	`+ (embeddings,time_to_embed)=kwargs["embedding_service"].get_embeddings(`
	`93`	`+ [query]`
	`94`	`+ )`
	`95`	`+ (context,time_to_search)=vector_store.do_search(embeddings[0])`
	`96`	`+ (result,time_to_complete)=kwargs["chatbot_service"].get_llm_response(`
	`97`	`+query,context`
	`98`	`+ )`
	`99`	`+print(f"\tThe LLM Said:{result}")`
	`100`	`+time_for_retrieval=time_to_embed+time_to_search`
	`101`	`+total_time=time_to_embed+time_to_search+time_to_complete`
	`102`	`+print(f"Done Doing RAG Test For:{name}")`
	`103`	`+print(f"- Time to Embed:{time_to_embed}")`
	`104`	`+print(f"- Time to Search:{time_to_search}")`
	`105`	`+print(f"- Total Time for Retrieval:{time_for_retrieval}")`
	`106`	`+print(f"- Time for Chatbot Completion:{time_to_complete}")`
	`107`	`+print(f"- Total Time Taken:{total_time}\n")`
	`108`	`+return {`
	`109`	`+"time_to_embed":time_to_embed,`
	`110`	`+"time_to_search":time_to_search,`
	`111`	`+"time_for_retrieval":time_for_retrieval,`
	`112`	`+"time_to_complete":time_to_complete,`
	`113`	`+"total_time":total_time,`
	`114`	`+ }`
	`115`	`+`
	`116`	`+`
	`117`	`+if__name__=="__main__":`
	`118`	`+print("----------Doing Data Setup-------------------------\n")`
	`119`	`+fortestintests:`
	`120`	`+do_data_upsert(**test)`
	`121`	`+print("\n----------Done Doing Data Setup------------------\n\n")`
	`122`	`+`
	`123`	`+print("----------Doing Rag Tests-------------------------\n")`
	`124`	`+stats= {}`
	`125`	`+foriinrange(TRIAL_COUNT):`
	`126`	`+fortestintests:`
	`127`	`+times=do_normal_rag_test(**test)`
	`128`	`+ifnottest["name"]instats:`
	`129`	`+stats[test["name"]]= []`
	`130`	`+stats[test["name"]].append(times)`
	`131`	`+print("\n----------Done Doing Rag Tests---------------------\n")`
	`132`	`+`
	`133`	`+print("------------Final Results---------------------------\n")`
	`134`	`+fortestintests:`
	`135`	`+trials=stats[test["name"]]`
	`136`	`+ (`
	`137`	`+time_to_embed,`
	`138`	`+time_to_search,`
	`139`	`+time_for_retrieval,`
	`140`	`+time_to_complete,`
	`141`	`+total_time,`
	`142`	`+ )= [`
	`143`	`+sum(trial[key]fortrialintrials)`
	`144`	`+forkeyin [`
	`145`	`+"time_to_embed",`
	`146`	`+"time_to_search",`
	`147`	`+"time_for_retrieval",`
	`148`	`+"time_to_complete",`
	`149`	`+"total_time",`
	`150`	`+ ]`
	`151`	`+ ]`
	`152`	`+print(f'Done Doing RAG Test For:{test["name"]}')`
	`153`	`+print(f"- Average Time to Embed:{(time_to_embed/TRIAL_COUNT):0.4f}")`
	`154`	`+print(f"- Average Time to Search:{(time_to_search/TRIAL_COUNT):0.4f}")`
	`155`	`+print(`
	`156`	`+f"- Average Total Time for Retrieval:{(time_for_retrieval/TRIAL_COUNT):0.4f}"`
	`157`	`+ )`
	`158`	`+print(`
	`159`	`+f"- Average Time for Chatbot Completion:{(time_to_complete/TRIAL_COUNT):0.4f}"`
	`160`	`+ )`
	`161`	`+print(f"- Average Total Time Taken:{(total_time/TRIAL_COUNT):0.4f}\n")`

`‎pgml-apps/rag-retrieval-timing-tests/huggingface.py‎`

Lines changed: 29 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,29 @@`
	`1`	`+importrequests`
	`2`	`+importtime`
	`3`	`+importos`
	`4`	`+importsys`
	`5`	`+fromdotenvimportload_dotenv`
	`6`	`+`
	`7`	`+# Load our environment variables`
	`8`	`+load_dotenv()`
	`9`	`+HF_TOKEN=os.getenv("HF_TOKEN")`
	`10`	`+`
	`11`	`+`
	`12`	`+# Get the embedding from HuggingFace`
	`13`	`+defget_embeddings(inputs):`
	`14`	`+print("\tGetting embeddings from HuggingFace")`
	`15`	`+tic=time.perf_counter()`
	`16`	`+headers= {"Authorization":f"Bearer{HF_TOKEN}"}`
	`17`	`+payload= {"inputs":inputs}`
	`18`	`+response=requests.post(`
	`19`	`+"https://api-inference.huggingface.co/pipeline/feature-extraction/intfloat/e5-small",`
	`20`	`+headers=headers,`
	`21`	`+json=payload,`
	`22`	`+ )`
	`23`	`+toc=time.perf_counter()`
	`24`	`+time_taken=toc-tic`
	`25`	`+print(f"\tDone getting embeddings:{toc-tic:0.4f}\n")`
	`26`	`+response=response.json()`
	`27`	`+if"error"inresponse:`
	`28`	`+sys.exit(response)`
	`29`	`+return (response,time_taken)`

`‎pgml-apps/rag-retrieval-timing-tests/openai_local.py‎`

Lines changed: 26 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,26 @@`
	`1`	`+fromopenaiimportOpenAI`
	`2`	`+importtime`
	`3`	`+`
	`4`	`+# Create our OpenAI client`
	`5`	`+client=OpenAI()`
	`6`	`+`
	`7`	`+`
	`8`	`+# Get LLM response from OpenAI`
	`9`	`+defget_llm_response(query,context):`
	`10`	`+print("\tGetting LLM response from OpenAI")`
	`11`	`+tic=time.perf_counter()`
	`12`	`+completion=client.chat.completions.create(`
	`13`	`+model="gpt-3.5-turbo",`
	`14`	`+messages=[`
	`15`	`+ {`
	`16`	`+"role":"system",`
	`17`	`+"content":f"You are a helpful assistant. Given the context, provide an answer to the user:\n{context}",`
	`18`	`+ },`
	`19`	`+ {"role":"user","content":query},`
	`20`	`+ ],`
	`21`	`+ )`
	`22`	`+toc=time.perf_counter()`
	`23`	`+time_taken=toc-tic`
	`24`	`+print(f"\tDone getting the LLM response:{time_taken:0.4f}")`
	`25`	`+response=completion.choices[0].message.content`
	`26`	`+return (response,time_taken)`

`‎pgml-apps/rag-retrieval-timing-tests/pinecone_local.py‎`

Lines changed: 43 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,43 @@`
	`1`	`+frompineconeimportPinecone,ServerlessSpec`
	`2`	`+fromdotenvimportload_dotenv`
	`3`	`+importtime`
	`4`	`+importos`
	`5`	`+`
	`6`	`+# Load our environment variables`
	`7`	`+load_dotenv()`
	`8`	`+PINECONE_API_KEY=os.getenv("PINECONE_API_KEY")`
	`9`	`+`
	`10`	`+# Create our Pinecone client`
	`11`	`+# Note we created their default index using their gcp-start region and us-central1 region`
	`12`	`+pc=Pinecone(api_key=PINECONE_API_KEY)`
	`13`	`+index=pc.Index("test")`
	`14`	`+`
	`15`	`+`
	`16`	`+# Store some initial documents to retrieve`
	`17`	`+defupsert_data(documents,embeddings):`
	`18`	`+fordocument,embeddinginzip(documents,embeddings):`
	`19`	`+document["values"]=embedding`
	`20`	`+print("\tStarting PineCone upsert")`
	`21`	`+tic=time.perf_counter()`
	`22`	`+index.upsert(documents,namespace="ns1")`
	`23`	`+toc=time.perf_counter()`
	`24`	`+time_taken_to_upsert=toc-tic`
	`25`	`+print(f"\tDone PineCone upsert:{time_taken_to_upsert:0.4f}")`
	`26`	`+returntime_taken_to_upsert`
	`27`	`+`
	`28`	`+`
	`29`	`+# Do cosine similarity search over our pinecone index`
	`30`	`+defdo_search(vector):`
	`31`	`+print("\tDoing cosine similarity search with PineCone")`
	`32`	`+tic=time.perf_counter()`
	`33`	`+results=index.query(`
	`34`	`+namespace="ns1",`
	`35`	`+vector=vector,`
	`36`	`+top_k=1,`
	`37`	`+include_metadata=True,`
	`38`	`+ )`
	`39`	`+toc=time.perf_counter()`
	`40`	`+time_done=toc-tic`
	`41`	`+print(f"\tDone doing cosine similarity search:{time_done:0.4f}\n")`
	`42`	`+result=results["matches"][0]["metadata"]["text"]`
	`43`	`+return (result,time_done)`

`‎pgml-apps/rag-retrieval-timing-tests/postgresml.py‎`

Lines changed: 62 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,62 @@`
	`1`	`+frompgmlimportCollection,Pipeline`
	`2`	`+fromdotenvimportload_dotenv`
	`3`	`+importtime`
	`4`	`+`
	`5`	`+# Load our environment variables`
	`6`	`+load_dotenv()`
	`7`	`+`
	`8`	`+# Initialize our Collection and Pipeline`
	`9`	`+collection=Collection("test_collection")`
	`10`	`+pipeline=Pipeline(`
	`11`	`+"test_pipeline",`
	`12`	`+ {`
	`13`	`+"text": {`
	`14`	`+"semantic_search": {`
	`15`	`+"model":"intfloat/e5-small",`
	`16`	`+ },`
	`17`	`+ }`
	`18`	`+ },`
	`19`	`+)`
	`20`	`+`
	`21`	`+`
	`22`	`+# Add the Pipeline to our collection`
	`23`	`+# We only need to do this once`
	`24`	`+asyncdefsetup_pipeline():`
	`25`	`+awaitcollection.add_pipeline(pipeline)`
	`26`	`+`
	`27`	`+`
	`28`	`+asyncdefupsert_data(documents):`
	`29`	`+documents= [`
	`30`	`+ {"id":document["id"],"text":document["metadata"]["text"]}`
	`31`	`+fordocumentindocuments`
	`32`	`+ ]`
	`33`	`+print("Starting PostgresML upsert")`
	`34`	`+tic=time.perf_counter()`
	`35`	`+awaitcollection.upsert_documents(documents)`
	`36`	`+toc=time.perf_counter()`
	`37`	`+time_taken=toc-tic`
	`38`	`+print(f"Done PostgresML upsert:{time_taken:0.4f}\n")`
	`39`	`+`
	`40`	`+`
	`41`	`+asyncdefdo_search(query):`
	`42`	`+print(`
	`43`	`+"\tDoing embedding and cosine similarity search over our PostgresML Collection"`
	`44`	`+ )`
	`45`	`+tic=time.perf_counter()`
	`46`	`+results=awaitcollection.vector_search(`
	`47`	`+ {`
	`48`	`+"query": {`
	`49`	`+"fields": {`
	`50`	`+"text": {`
	`51`	`+"query":query,`
	`52`	`+ },`
	`53`	`+ }`
	`54`	`+ },`
	`55`	`+"limit":1,`
	`56`	`+ },`
	`57`	`+pipeline,`
	`58`	`+ )`
	`59`	`+toc=time.perf_counter()`
	`60`	`+time_taken=toc-tic`
	`61`	`+print(f"\tDone doing embedding and cosine similarity search:{time_taken:0.4f}\n")`
	`62`	`+return (results[0]["chunk"],time_taken)`

`‎pgml-apps/rag-retrieval-timing-tests/qdrant_local.py‎`

Lines changed: 49 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,49 @@`
	`1`	`+fromqdrant_clientimportQdrantClient`
	`2`	`+fromqdrant_client.modelsimportDistance,VectorParams,PointStruct`
	`3`	`+fromdotenvimportload_dotenv`
	`4`	`+importtime`
	`5`	`+importos`
	`6`	`+`
	`7`	`+# Load our environment variables`
	`8`	`+load_dotenv()`
	`9`	`+QDRANT_API_KEY=os.getenv("QDRANT_API_KEY")`
	`10`	`+`
	`11`	`+# Create our Qdrant client`
	`12`	`+qdrant=QdrantClient(`
	`13`	`+url="https://059364f6-62c5-4f80-9f19-cf6d6394caae.us-east4-0.gcp.cloud.qdrant.io:6333",`
	`14`	`+api_key=QDRANT_API_KEY,`
	`15`	`+)`
	`16`	`+`
	`17`	`+# Create our Qdrant collection`
	`18`	`+qdrant.recreate_collection(`
	`19`	`+collection_name="test",`
	`20`	`+vectors_config=VectorParams(size=384,distance=Distance.COSINE),`
	`21`	`+)`
	`22`	`+`
	`23`	`+`
	`24`	`+# Store some initial documents to retrieve`
	`25`	`+defupsert_data(documents,embeddings):`
	`26`	`+points= [`
	`27`	`+PointStruct(`
	`28`	`+id=int(document["id"]),vector=embedding,payload=document["metadata"]`
	`29`	`+ )`
	`30`	`+fordocument,embeddinginzip(documents,embeddings)`
	`31`	`+ ]`
	`32`	`+print("\tStarting Qdrant upsert")`
	`33`	`+tic=time.perf_counter()`
	`34`	`+qdrant.upsert(collection_name="test",points=points)`
	`35`	`+toc=time.perf_counter()`
	`36`	`+time_taken_to_upsert=toc-tic`
	`37`	`+print(f"\tDone Qdrant upsert:{time_taken_to_upsert:0.4f}")`
	`38`	`+returntime_taken_to_upsert`
	`39`	`+`
	`40`	`+`
	`41`	`+# Do cosine similarity search over our Qdrant collection`
	`42`	`+defdo_search(vector):`
	`43`	`+print("\tDoing cosine similarity search with Qdrant")`
	`44`	`+tic=time.perf_counter()`
	`45`	`+results=qdrant.search(collection_name="test",query_vector=vector,limit=1)`
	`46`	`+toc=time.perf_counter()`
	`47`	`+time_done=toc-tic`
	`48`	`+print(f"\tDone doing cosine similarity search:{time_done:0.4f}\n")`
	`49`	`+return (results,time_done)`

0 commit comments

Comments

(0)

Movatterモバイル変換

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit5f1a2dc

File tree

11 files changed

11 files changed

`‎pgml-apps/rag-retrieval-timing-tests/.env.development‎`

`‎pgml-apps/rag-retrieval-timing-tests/README.md‎`

`‎pgml-apps/rag-retrieval-timing-tests/main.py‎`

`‎pgml-apps/rag-retrieval-timing-tests/huggingface.py‎`

`‎pgml-apps/rag-retrieval-timing-tests/openai_local.py‎`

`‎pgml-apps/rag-retrieval-timing-tests/pinecone_local.py‎`

`‎pgml-apps/rag-retrieval-timing-tests/postgresml.py‎`

`‎pgml-apps/rag-retrieval-timing-tests/qdrant_local.py‎`

0 commit comments