|
import streamlit as st |
|
from langchain.embeddings import HuggingFaceEmbeddings |
|
from langchain.vectorstores import FAISS |
|
from langchain.text_splitter import RecursiveCharacterTextSplitter |
|
from langchain_community.llms import Ollama |
|
import requests |
|
|
|
|
|
def fetch_github_data(repo_url): |
|
parts = repo_url.split('/') |
|
owner, repo = parts[-2], parts[-1] |
|
|
|
headers = {'Accept': 'application/vnd.github.v3+json'} |
|
base_url = 'https://api.github.com' |
|
|
|
content = "" |
|
repo_response = requests.get(f"{base_url}/repos/{owner}/{repo}", headers=headers) |
|
if repo_response.status_code == 200: |
|
repo_data = repo_response.json() |
|
content += f"Description: {repo_data.get('description', '')}\n" |
|
|
|
readme_response = requests.get(f"{base_url}/repos/{owner}/{repo}/readme", headers=headers) |
|
if readme_response.status_code == 200: |
|
import base64 |
|
readme_data = readme_response.json() |
|
content += base64.b64decode(readme_data['content']).decode('utf-8') + "\n" |
|
|
|
return content |
|
|
|
|
|
def create_vector_store(text_data): |
|
text_splitter = RecursiveCharacterTextSplitter( |
|
chunk_size=1000, |
|
chunk_overlap=200 |
|
) |
|
chunks = text_splitter.split_text(text_data) |
|
|
|
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") |
|
vector_store = FAISS.from_texts(chunks, embeddings) |
|
return vector_store |
|
|
|
|
|
def main(): |
|
st.title("Project Resilience Q&A Assistant") |
|
st.write("Ask anything about Project Resilience - answers always come from repo data!") |
|
|
|
|
|
github_url = 'https://github.com/Project-Resilience/platform' |
|
repo_data = fetch_github_data(github_url) |
|
|
|
|
|
if 'vector_store' not in st.session_state: |
|
st.session_state.vector_store = create_vector_store(repo_data) |
|
st.session_state.llm = Ollama(model="llama3.2", temperature=0.7) |
|
|
|
|
|
question = st.text_input("Ask a question about the project") |
|
|
|
|
|
if question: |
|
with st.spinner("Generating answer..."): |
|
|
|
k = 3 |
|
docs = st.session_state.vector_store.similarity_search(question, k=k) |
|
|
|
|
|
context = "\n\n".join([doc.page_content for doc in docs]) |
|
|
|
|
|
prompt = ( |
|
f"You are a helpful assistant answers to the question about project Project Resilience.\n" |
|
f"Based on the following context, answer the question:\n\n" |
|
f"Context:\n{context}\n\n" |
|
f"Question: {question}\n\n" |
|
f"If the question cannot be answered by the document, say so.\n\n" |
|
f"Answer:" |
|
) |
|
|
|
|
|
answer_container = st.empty() |
|
stream = st.session_state.llm.stream(prompt) |
|
answer = "" |
|
for chunk in stream: |
|
answer += chunk |
|
answer_container.write(answer) |
|
|
|
|
|
|
|
|
|
|
|
st.sidebar.header("Project Resilience Assistant") |
|
st.sidebar.write(""" |
|
Project Resilience's platform for decision makers, data scientists and the public. |
|
|
|
Project Resilience, initiated under the Global Initiative on AI and Data Commons, is a collaborative effort to build a public AI utility that could inform and help address global decision-augmentation challenges. |
|
|
|
The project empowers a global community of innovators, thought leaders, and the public to enhance and use a shared collection of data and AI tools, improving preparedness, intervention, and response to environmental, health, information, or economic threats in our communities. It also supports broader efforts toward achieving the Sustainable Development Goals (SDGs). |
|
""") |
|
|
|
if __name__ == "__main__": |
|
main() |