the-confused-coder commited on
Commit
f1f1e0a
·
verified ·
1 Parent(s): f9c5302

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +86 -0
app.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, streamlit as st
2
+ from vish_api_keys import openaiapi, geminiapi # for api key, modify accordingly
3
+ from langchain.document_loaders import UnstructuredURLLoader
4
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
5
+ from langchain.embeddings import OpenAIEmbeddings
6
+ from langchain_google_genai import GoogleGenerativeAIEmbeddings
7
+ from langchain.vectorstores import FAISS
8
+ from langchain.chains import RetrievalQAWithSourcesChain
9
+ from langchain.llms import OpenAI
10
+ from langchain_google_genai import ChatGoogleGenerativeAI
11
+
12
+
13
+ # LLM config
14
+ os.environ['OPENAI_API_KEY'] = openaiapi # insert your api key here
15
+ os.environ['GOOGLE_API_KEY'] = geminiapi
16
+ llm_openai = OpenAI(temperature=0.7, max_tokens=500) # using gpt-3.5-turbo-instruct
17
+ llm_gemini = ChatGoogleGenerativeAI(model="gemini-pro")
18
+
19
+ # Page config
20
+ st.title("URL Research Tool")
21
+ # adding model selection choice
22
+ model_selection = st.radio(label='Choose LLM👇', options=['OpenAI','Gemini'])
23
+ # display model selection
24
+ st.write(f"Selected Model: :rainbow[{model_selection}]")
25
+ # Sidebar config
26
+ st.sidebar.title("Enter URLs:")
27
+ no_of_sidebars = 3
28
+ urls = []
29
+ file_name = 'all_url_data_vectors'
30
+ # Sidebars for URL input
31
+ for i in range(no_of_sidebars):
32
+ url = st.sidebar.text_input(f"URL {i+1}")
33
+ urls.append(url)
34
+ # Placeholders for query and progress
35
+ query_placeholder = st.empty()
36
+ user_query = query_placeholder.text_input("Question: ")
37
+ query_button = st.button("Submit Query")
38
+ progress_placeholder = st.empty()
39
+
40
+
41
+ if query_button: # on button click
42
+ progress_placeholder.text("Work in Progress...")
43
+
44
+ # Loading URL Data in form of Text
45
+ url_loader = UnstructuredURLLoader(urls=urls)
46
+ url_data = url_loader.load()
47
+
48
+ # Splitting loaded data into chunks
49
+ text_splitter = RecursiveCharacterTextSplitter(
50
+ separators=['\n\n', '\n', '.', ' '],
51
+ chunk_size=1000,
52
+ )
53
+ progress_placeholder.text("Work in Progress: Text Splitting")
54
+ chunked_url_data = text_splitter.split_documents(url_data)
55
+
56
+ # Create Embeddings
57
+ if model_selection=="OpenAI":
58
+ selected_model = llm_openai
59
+ embedding_creator = OpenAIEmbeddings()
60
+ else:
61
+ selected_model = llm_gemini
62
+ embedding_creator = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
63
+
64
+ progress_placeholder.text("Work in Progress: Creating Embeddings")
65
+ data_vectors = FAISS.from_documents(chunked_url_data, embedding_creator)
66
+ # Save Embeddings
67
+ data_vectors.save_local(file_name)
68
+
69
+ if os.path.exists(file_name): # check for testing file saving
70
+ progress_placeholder.text("Work in Progress: Loading Results")
71
+ # fetching data vectors
72
+ data_vectors_loaded = FAISS.load_local(file_name, embedding_creator, allow_dangerous_deserialization=True)
73
+ # querying LLM
74
+ main_chain = RetrievalQAWithSourcesChain.from_llm(llm=selected_model, retriever=data_vectors_loaded.as_retriever())
75
+ llm_result = main_chain({'question': user_query})
76
+ progress_placeholder.text("Task Completed: Displaying Results")
77
+ st.header('Answer:')
78
+ # fetching and printing LLM's answer
79
+ st.write(llm_result['answer'])
80
+ # getting source(s) of answer from llm
81
+ answer_sources = llm_result.get('sources','') # check for no sources
82
+ if answer_sources:
83
+ answer_sources_list = answer_sources.split('\n')
84
+ st.subheader('Sources:')
85
+ for source in answer_sources_list:
86
+ st.write(source)