File size: 7,138 Bytes
2e3951e 60ab212 23dedaa 2a104f1 23dedaa 2a104f1 23dedaa 2a104f1 2e3951e 23dedaa 2e3951e 65df949 a9f3a9e 2e3951e 65df949 2e3951e 23dedaa 2e3951e c8a1528 2e3951e eea3603 2e3951e 55c15b5 2e3951e 23dedaa 2e3951e 03e1590 0e25d1f 03e1590 60ab212 23dedaa 2a104f1 23dedaa 60ab212 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 |
import streamlit as st
import os
from llama_index.core.indices.vector_store.base import VectorStoreIndex
from llama_index.vector_stores.qdrant import QdrantVectorStore
from llama_index.embeddings.fastembed import FastEmbedEmbedding
from llama_index.core import Settings
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, StorageContext
import qdrant_client
from llama_index.core.indices.query.schema import QueryBundle
from llama_index.llms.gemini import Gemini
from llama_index.embeddings.gemini import GeminiEmbedding
from llama_index.core.memory import ChatMemoryBuffer
from llama_index.readers.web import FireCrawlWebReader
from llama_index.core import SummaryIndex
#import streamlit_analytics2 as streamlit_analytics
import time
import dotenv
dotenv.load_dotenv()
# Set page config
#st.set_page_config(page_title="Talk to Software Documentation", page_icon="📚", layout="wide")
# Initialize session state
if 'setup_complete' not in st.session_state:
st.session_state['setup_complete'] = False
if 'documents' not in st.session_state:
st.session_state['documents'] = None
if 'chat_history' not in st.session_state:
st.session_state['chat_history'] = []
if 'index' not in st.session_state:
st.session_state['index'] = None
if 'url' not in st.session_state:
st.session_state['url'] = ""
if 'collection_name' not in st.session_state:
st.session_state['collection_name'] = ""
if 'query' not in st.session_state:
st.session_state['query'] = ""
os.environ["GOOGLE_API_KEY"] = os.getenv("GOOGLE_API_KEY")
os.environ["GOOGLE_API_KEY"] = os.getenv("GOOGLE_API_KEY")
# Setup functions
def embed_setup():
Settings.embed_model = FastEmbedEmbedding(model_name="BAAI/bge-small-en-v1.5")
Settings.llm = Gemini(temperature=0.1, model_name="models/gemini-pro")
def qdrant_setup():
client = qdrant_client.QdrantClient(
os.getenv("QDRANT_URL"),
api_key = os.getenv("QDRANT_API_KEY"),
)
return client
def llm_setup():
llm = Gemini(api_key=os.getenv("GOOGLE_API_KEY"), temperature=0.1, model_name="models/gemini-pro")
return llm
def query_index(index, streaming=True):
memory = ChatMemoryBuffer.from_defaults(token_limit=4000)
chat_engine = index.as_chat_engine(
chat_mode="context",
memory=memory,
system_prompt=(
"""You are an AI assistant for developers, specializing in technical documentation. Your task is to provide accurate, detailed, and helpful responses based on the given documentation context.
Context information is below:
{context_str}
Always answer based on the information in the context and general knowledge and be precise
Given this context, please respond to the following user query:
{query_str}
Your response should:
Directly address the query using information from the context
Include relevant code examples or direct quotes if applicable
Mention specific sections or pages of the documentation
Highlight any best practices or potential pitfalls related to the query
After your response, suggest 3 follow-up questions based on the context that the user might find helpful for deeper understanding.
ALWAYS SUGGEST FOLLOW UP QUESTIONS
Your response:"""
),
)
return chat_engine
# Document ingestion function
def ingest_documents(url):
firecrawl_reader = FireCrawlWebReader(
api_key=os.getenv("FIRECRAWL_API_KEY"),
mode="scrape",
)
documents = firecrawl_reader.load_data(url=url)
print(type(documents[0]))
return documents
# Streamlit app
st.title("Talk to Software Documentation")
st.markdown("""
Be the programmer you've always wanted to be.
1. Paste doc link
2. Enter a Collection name
3. Ask any question you want
""")
# URL input for document ingestion
st.session_state['url'] = st.text_input("Enter URL to crawl and ingest documents (optional):", value=st.session_state['url'])
# Collection name input
st.session_state['collection_name'] = st.text_input("Enter collection name for vector store (compulsory):", value=st.session_state['collection_name'])
# Combined Ingest and Setup button
if st.button("Ingest and Setup"):
with st.spinner("Setting up query engine..."):
embed_setup()
client = qdrant_setup()
llm = llm_setup()
vector_store = QdrantVectorStore(client=client, collection_name=st.session_state['collection_name'])
storage_context = StorageContext.from_defaults(vector_store=vector_store)
if st.session_state['url']:
st.session_state['documents'] = ingest_documents(st.session_state['url'])
st.session_state['index'] = VectorStoreIndex.from_documents(st.session_state['documents'], vector_store=vector_store, storage_context=storage_context)
st.success(f"Documents ingested from {st.session_state['url']} and query engine setup completed successfully!")
else:
st.session_state['index'] = VectorStoreIndex.from_vector_store(vector_store=vector_store, storage_context=storage_context)
st.success(f"Query engine setup completed successfully using existing collection: {st.session_state['collection_name']}")
st.session_state['setup_complete'] = True
# Query input
st.session_state['query'] = st.text_input("Enter your query:", value=st.session_state['query'])
# Search button
if st.button("Search"):
if not st.session_state['setup_complete']:
st.error("Please complete the setup first")
elif st.session_state['query']:
with st.spinner("Searching..."):
try:
chat_engine = query_index(st.session_state['index'])
response = chat_engine.chat(st.session_state['query'])
except Exception as e:
st.error(f"An error occurred: {str(e)}")
st.info("Retrying in 120 seconds...")
time.sleep(120)
try:
chat_engine = query_index(st.session_state['index'])
response = chat_engine.chat(st.session_state['query'])
except Exception as e:
st.error(f"Retry failed. Error: {str(e)}")
st.stop()
# Add the query and response to chat history
st.session_state['chat_history'].append(("User", st.session_state['query']))
st.session_state['chat_history'].append(("Assistant", str(response.response)))
# Display the most recent response prominently
st.subheader("Assistant's Response:")
st.write(response.response)
else:
st.error("Please enter a query")
# Sidebar for chat history
st.sidebar.title("Chat History")
for role, message in st.session_state['chat_history']:
st.sidebar.text(f"{role}: {message}")
# Clear chat history button in sidebar
if st.sidebar.button("Clear Chat History"):
st.session_state['chat_history'] = []
st.sidebar.success("Chat history cleared!") |