Spaces:
Build error
Build error
| import os, streamlit as st | |
| import spaces | |
| import langchain | |
| from langchain.document_loaders import UnstructuredURLLoader | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain.embeddings import OpenAIEmbeddings | |
| from langchain_google_genai import GoogleGenerativeAIEmbeddings | |
| from langchain.vectorstores import FAISS | |
| from langchain.chains import RetrievalQAWithSourcesChain | |
| from langchain.llms import OpenAI | |
| from langchain_google_genai import ChatGoogleGenerativeAI | |
| # LLM config | |
| # api_key = secrets.get(geminiapi) | |
| os.environ['OPENAI_API_KEY'] = os.getenv('openaiapi') | |
| os.environ['GOOGLE_API_KEY'] = os.getenv('geminiapi') | |
| llm_openai = OpenAI(temperature=0.7, max_tokens=300) # using gpt-3.5-turbo-instruct | |
| llm_gemini = ChatGoogleGenerativeAI(model="gemini-pro") | |
| # Page config | |
| st.title("URL Research Tool") | |
| # adding model selection choice | |
| model_selection = st.radio(label='Choose LLM👇', options=['OpenAI','Gemini']) | |
| # display model selection | |
| st.write(f"Selected Model: :rainbow[{model_selection}]") | |
| # Sidebar config | |
| st.sidebar.title("Enter URLs:") | |
| no_of_sidebars = 3 | |
| urls = [] | |
| file_name = 'all_url_data_vectors' | |
| # Sidebars for URL input | |
| for i in range(no_of_sidebars): | |
| url = st.sidebar.text_input(f"URL {i+1}") | |
| urls.append(url) | |
| # Placeholders for query and progress | |
| query_placeholder = st.empty() | |
| user_query = query_placeholder.text_input("Question: ") | |
| query_button = st.button("Submit Query") | |
| progress_placeholder = st.empty() | |
| if query_button: # on button click | |
| progress_placeholder.text("Work in Progress...") | |
| # Loading URL Data in form of Text | |
| url_loader = UnstructuredURLLoader(urls=urls) | |
| url_data = url_loader.load() | |
| # Splitting loaded data into chunks | |
| text_splitter = RecursiveCharacterTextSplitter( | |
| separators=['\n\n', '\n', '.', ' '], | |
| chunk_size=1000, | |
| ) | |
| progress_placeholder.text("Work in Progress: Text Splitting") | |
| chunked_url_data = text_splitter.split_documents(url_data) | |
| # Create Embeddings | |
| if model_selection=="OpenAI": | |
| selected_model = llm_openai | |
| embedding_creator = OpenAIEmbeddings() | |
| else: | |
| selected_model = llm_gemini | |
| embedding_creator = GoogleGenerativeAIEmbeddings(model="models/embedding-001") | |
| progress_placeholder.text("Work in Progress: Creating Embeddings") | |
| data_vectors = FAISS.from_documents(chunked_url_data, embedding_creator) | |
| # Save Embeddings | |
| data_vectors.save_local(file_name) | |
| if os.path.exists(file_name): # check for testing file saving | |
| progress_placeholder.text("Work in Progress: Loading Results") | |
| # fetching data vectors | |
| data_vectors_loaded = FAISS.load_local(file_name, embedding_creator, allow_dangerous_deserialization=True) | |
| # querying LLM | |
| main_chain = RetrievalQAWithSourcesChain.from_llm(llm=selected_model, retriever=data_vectors_loaded.as_retriever()) | |
| llm_result = main_chain({'question': user_query}) | |
| progress_placeholder.text("Task Completed: Displaying Results") | |
| st.header('Answer:') | |
| # fetching and printing LLM's answer | |
| st.write(llm_result['answer']) | |
| # getting source(s) of answer from llm | |
| answer_sources = llm_result.get('sources','') # check for no sources | |
| if answer_sources: | |
| answer_sources_list = answer_sources.split('\n') | |
| st.subheader('Sources:') | |
| for source in answer_sources_list: | |
| st.write(source) | |