import os, streamlit as st import spaces import langchain from langchain.document_loaders import UnstructuredURLLoader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.embeddings import OpenAIEmbeddings from langchain_google_genai import GoogleGenerativeAIEmbeddings from langchain.vectorstores import FAISS from langchain.chains import RetrievalQAWithSourcesChain from langchain.llms import OpenAI from langchain_google_genai import ChatGoogleGenerativeAI # LLM config # api_key = secrets.get(geminiapi) os.environ['OPENAI_API_KEY'] = os.getenv('openaiapi') os.environ['GOOGLE_API_KEY'] = os.getenv('geminiapi') llm_openai = OpenAI(temperature=0.7, max_tokens=300) # using gpt-3.5-turbo-instruct llm_gemini = ChatGoogleGenerativeAI(model="gemini-pro") # Page config st.title("URL Research Tool") # adding model selection choice model_selection = st.radio(label='Choose LLM👇', options=['Gemini','OpenAI']) # display model selection st.write(f"Selected Model: :rainbow[{model_selection}]") # Sidebar config st.sidebar.title("Enter URLs:") no_of_sidebars = 3 urls = [] file_name = 'all_url_data_vectors' # Sidebars for URL input for i in range(no_of_sidebars): url = st.sidebar.text_input(f"URL {i+1}") urls.append(url) # Placeholders for query and progress query_placeholder = st.empty() user_query = query_placeholder.text_input("Question: ") query_button = st.button("Submit Query") progress_placeholder = st.empty() if query_button: # on button click progress_placeholder.text("Work in Progress...") # Loading URL Data in form of Text url_loader = UnstructuredURLLoader(urls=urls) url_data = url_loader.load() # Splitting loaded data into chunks text_splitter = RecursiveCharacterTextSplitter( separators=['\n\n', '\n', '.', ' '], chunk_size=1000, ) progress_placeholder.text("Work in Progress: Text Splitting") chunked_url_data = text_splitter.split_documents(url_data) # Create Embeddings if model_selection=="OpenAI": selected_model = llm_openai embedding_creator = OpenAIEmbeddings() else: selected_model = llm_gemini embedding_creator = GoogleGenerativeAIEmbeddings(model="models/embedding-001") progress_placeholder.text("Work in Progress: Creating Embeddings") data_vectors = FAISS.from_documents(chunked_url_data, embedding_creator) # Save Embeddings data_vectors.save_local(file_name) if os.path.exists(file_name): # check for testing file saving progress_placeholder.text("Work in Progress: Loading Results") # fetching data vectors data_vectors_loaded = FAISS.load_local(file_name, embedding_creator, allow_dangerous_deserialization=True) # querying LLM main_chain = RetrievalQAWithSourcesChain.from_llm(llm=selected_model, retriever=data_vectors_loaded.as_retriever()) llm_result = main_chain({'question': user_query}) progress_placeholder.text("Task Completed: Displaying Results") st.header('Answer:') # fetching and printing LLM's answer st.write(llm_result['answer']) # getting source(s) of answer from llm answer_sources = llm_result.get('sources','') # check for no sources if answer_sources: answer_sources_list = answer_sources.split('\n') st.subheader('Sources:') for source in answer_sources_list: st.write(source)