Spaces:
Sleeping
Sleeping
File size: 2,870 Bytes
716ce63 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_pinecone import PineconeVectorStore
from langchain_core.documents import Document
from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate
from uuid import uuid4
from prompt import *
from pydantic import BaseModel, Field
from dotenv import load_dotenv
import os
from langchain_core.tools import tool
import unicodedata
load_dotenv()
index_name = os.environ.get("INDEX_NAME")
# Global initialization
embedding_model = "text-embedding-3-small"
embedding = OpenAIEmbeddings(model=embedding_model)
vector_store = PineconeVectorStore(index=index_name, embedding=embedding)
def get_vectorstore(text_chunk,index,title,model="text-embedding-3-small"):
try:
embedding = OpenAIEmbeddings(model=model)
print("loaded embedding")
vector_store = PineconeVectorStore(index=index, embedding=embedding)
print("loaded vector store")
document = Document(
page_content=text_chunk,
metadata={"title": title}
)
print("loaded document")
uuid = f"{title}_{uuid4()}"
vector_store.add_documents(documents=[document], ids=[uuid])
print("added document")
return {"filename_id":uuid}
except Exception as e:
print(e)
return False
def retreive_context(query:str,index:str, model="text-embedding-3-small",vector_store=None):
try:
#vector_store = PineconeVectorStore(index=index, embedding=embedding)
retriever = vector_store.as_retriever(
search_type="similarity_score_threshold",
search_kwargs={"k": 3, "score_threshold": 0.5},
)
return retriever.invoke(query)
except Exception as e:
print(e)
return False
llm = ChatOpenAI(model="gpt-4o-mini", max_tokens=300, temperature=0.5)
def generate_stream(query:str,messages = [], model = "gpt-4o-mini", max_tokens = 300, temperature = 0.5,index_name="",stream=True,vector_store=None):
try:
print("init chat")
print("init template")
prompt = PromptTemplate.from_template(template)
print("retreiving context")
context = retreive_context(query=query,index=index_name,vector_store=vector_store)
print(f"Context: {context}")
llm_chain = prompt | llm | StrOutputParser()
print("streaming")
if stream:
return llm_chain.stream({"context":context,"history":messages,"query":query})
else:
return llm_chain.invoke({"context":context,"history":messages,"query":query})
except Exception as e:
print(e)
return False |