File size: 4,166 Bytes
f64c13f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
import streamlit as st
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.llms import Ollama
import requests

# Function to fetch GitHub repo data
def fetch_github_data(repo_url):
    parts = repo_url.split('/')
    owner, repo = parts[-2], parts[-1]
    
    headers = {'Accept': 'application/vnd.github.v3+json'}
    base_url = 'https://api.github.com'
    
    content = ""
    repo_response = requests.get(f"{base_url}/repos/{owner}/{repo}", headers=headers)
    if repo_response.status_code == 200:
        repo_data = repo_response.json()
        content += f"Description: {repo_data.get('description', '')}\n"
    
    readme_response = requests.get(f"{base_url}/repos/{owner}/{repo}/readme", headers=headers)
    if readme_response.status_code == 200:
        import base64
        readme_data = readme_response.json()
        content += base64.b64decode(readme_data['content']).decode('utf-8') + "\n"
    
    return content

# Function to create vector store
def create_vector_store(text_data):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000,
        chunk_overlap=200
    )
    chunks = text_splitter.split_text(text_data)
    
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    vector_store = FAISS.from_texts(chunks, embeddings)
    return vector_store

# Streamlit app
def main():
    st.title("Project Resilience Q&A Assistant")
    st.write("Ask anything about Project Resilience - answers always come from repo data!")
    
    # Hardcoded GitHub URL
    github_url = 'https://github.com/Project-Resilience/platform'
    repo_data = fetch_github_data(github_url)
    
    # Initialize session state
    if 'vector_store' not in st.session_state:
        st.session_state.vector_store = create_vector_store(repo_data)
        st.session_state.llm = Ollama(model="llama3.2", temperature=0.7)
    
    # Question input
    question = st.text_input("Ask a question about the project")
    
    # Get and display answer
    if question:
        with st.spinner("Generating answer..."):
            # Retrieve top-k documents
            k = 3
            docs = st.session_state.vector_store.similarity_search(question, k=k)
            
            # Extract the text from the documents
            context = "\n\n".join([doc.page_content for doc in docs])
            
            # Create the custom prompt for zero-shot prompting
            prompt = (
                f"You are a helpful assistant answers to the question about project Project Resilience.\n"
                f"Based on the following context, answer the question:\n\n"
                f"Context:\n{context}\n\n"
                f"Question: {question}\n\n"
                f"If the question cannot be answered by the document, say so.\n\n"
                f"Answer:"
            )
            
            # Generate the answer using the language model
            answer_container = st.empty()
            stream = st.session_state.llm.stream(prompt)
            answer = ""
            for chunk in stream:
                answer += chunk
                answer_container.write(answer)
            
            
            
    
    # Sidebar with additional info
    st.sidebar.header("Project Resilience Assistant")
    st.sidebar.write("""
    Project Resilience's platform for decision makers, data scientists and the public.

    Project Resilience, initiated under the Global Initiative on AI and Data Commons, is a collaborative effort to build a public AI utility that could inform and help address global decision-augmentation challenges.

    The project empowers a global community of innovators, thought leaders, and the public to enhance and use a shared collection of data and AI tools, improving preparedness, intervention, and response to environmental, health, information, or economic threats in our communities. It also supports broader efforts toward achieving the Sustainable Development Goals (SDGs).
    """)

if __name__ == "__main__":
    main()