File size: 2,870 Bytes
716ce63
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_pinecone import PineconeVectorStore
from langchain_core.documents import Document

from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate
from uuid import uuid4
from prompt import *

from pydantic import BaseModel, Field
from dotenv import load_dotenv
import os

from langchain_core.tools import tool

import unicodedata

load_dotenv()
index_name = os.environ.get("INDEX_NAME")
# Global initialization
embedding_model = "text-embedding-3-small"

embedding = OpenAIEmbeddings(model=embedding_model)
vector_store = PineconeVectorStore(index=index_name, embedding=embedding)

def get_vectorstore(text_chunk,index,title,model="text-embedding-3-small"):
    try:
        embedding = OpenAIEmbeddings(model=model)
        print("loaded embedding")
        vector_store = PineconeVectorStore(index=index, embedding=embedding)
        print("loaded vector store")
        document = Document(
        page_content=text_chunk,
        metadata={"title": title}
        )
        print("loaded document")
        uuid = f"{title}_{uuid4()}"
        
        vector_store.add_documents(documents=[document], ids=[uuid])
        print("added document")
        return {"filename_id":uuid}
    
    except Exception as e:
        print(e)
        return False


def retreive_context(query:str,index:str, model="text-embedding-3-small",vector_store=None):
    try:
        #vector_store = PineconeVectorStore(index=index, embedding=embedding)
        retriever = vector_store.as_retriever(
                search_type="similarity_score_threshold",
                search_kwargs={"k": 3, "score_threshold": 0.5},
            )
        return retriever.invoke(query)
    
    except Exception as e:
        print(e)
        return False

llm = ChatOpenAI(model="gpt-4o-mini", max_tokens=300, temperature=0.5)
        
def generate_stream(query:str,messages = [], model = "gpt-4o-mini", max_tokens = 300, temperature = 0.5,index_name="",stream=True,vector_store=None):
    try:
        print("init chat")
        print("init template")
        prompt = PromptTemplate.from_template(template)
        print("retreiving context")
        context = retreive_context(query=query,index=index_name,vector_store=vector_store)
        print(f"Context: {context}")
        llm_chain = prompt | llm | StrOutputParser()

        print("streaming")
        if stream:
            return llm_chain.stream({"context":context,"history":messages,"query":query})
        else:
            return llm_chain.invoke({"context":context,"history":messages,"query":query})

    except Exception as e:
        print(e)
        return False