from sentence_transformers import SentenceTransformer import pinecone import openai import streamlit as st from indic_transliteration import sanscript from indic_transliteration.sanscript import transliterate # Set OpenAI API key openai.api_key = "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" # Initialize SentenceTransformer model model = SentenceTransformer('all-MiniLM-L6-v2') # Initialize Pinecone index pinecone.init(api_key='14b2909a-c00c-4ff8-9b79-87eb51b9d891', environment='gcp-starter') index = pinecone.Index('langchain-chatbot') # Function to find the most relevant match in Pinecone index def find_match(input): input_em = model.encode(input).tolist() result = index.query(input_em, top_k=2, includeMetadata=True) return result['matches'][0]['metadata']['text'] + "\n" + result['matches'][1]['metadata']['text'] # Function to refine a user query using OpenAI's Completion API def query_refiner(conversation, query): response = openai.Completion.create( model="text-davinci-003", prompt=f"Given the following user query and conversation log, formulate a question that would be the most relevant to provide the user with an answer from a knowledge base.\n\nCONVERSATION LOG: \n{conversation}\n\nQuery: {query}\n\nRefined Query:", temperature=0.7, max_tokens=256, top_p=1, frequency_penalty=0, presence_penalty=0 ) return response['choices'][0]['text'] # Function to get the conversation string for display def get_conversation_string(): conversation_string = "" for i in range(len(st.session_state['responses']) - 1): conversation_string += "Human: " + st.session_state['requests'][i] + "\n" conversation_string += "Bot: " + st.session_state['responses'][i + 1] + "\n" return conversation_string def transliterate_tamil_to_english(text): return transliterate(text, sanscript.TAMIL, sanscript.ITRANS) # Function to find the most relevant match in Pinecone index def find_match(input): input_em = model.encode(input).tolist() result = index.query(input_em, top_k=2, includeMetadata=True) return result['matches'][0]['metadata']['text'] + "\n" + result['matches'][1]['metadata']['text'] # ... (your existing functions) # Streamlit app def main(): st.title("LangChain Chatbot") # User input for the conversation in Tamil user_input_tamil = st.text_input("User Input (Tamil):", "") # Transliterate Tamil input to English for processing user_input_english = transliterate_tamil_to_english(user_input_tamil) # Retrieve refined query using OpenAI refined_query = query_refiner(get_conversation_string(), user_input_english) # Find the most relevant match using Sentence Transformers and Pinecone match_result = find_match(refined_query) # Display results st.text("User Input (Tamil): " + user_input_tamil) st.text("User Input (English): " + user_input_english) st.text("Refined Query: " + refined_query) st.text("Top Matches:") st.text(match_result) st.title("LangChain Chatbot") # User input for the conversation user_input = st.text_input("User Input:", "") # Retrieve refined query using OpenAI refined_query = query_refiner(get_conversation_string(), user_input) # Find the most relevant match using Sentence Transformers and Pinecone match_result = find_match(refined_query) # Display results st.text("Refined Query: " + refined_query) st.text("Top Matches:") st.text(match_result) # Run the Streamlit app if __name__ == "__main__": main()