Spaces:

Haseeb-001
/

GenomiXplorer

Sleeping

File size: 6,529 Bytes

c51117f

import streamlit as st
import pandas as pd
import chromadb
import os
from langgraph.graph import StateGraph
from fpdf import FPDF
import json
from groq import Groq

# Securely load API key from environment variables
GROQ_API_KEY = os.environ.get("GROQ_API_KEY")

if not GROQ_API_KEY:
    st.error("Please set GROQ_API_KEY environment variable.")
    st.stop()

# Groq API Setup
try:
    client = Groq(api_key=GROQ_API_KEY)
except Exception as e:
    st.error(f"Error initializing Groq API: {e}")
    st.stop()

# ChromaDB Setup
try:
    chroma_client = chromadb.PersistentClient(path="./chromadb_store")
    collection = chroma_client.get_or_create_collection(name="dna_analysis")
except Exception as e:
    st.error(f"Error initializing ChromaDB: {e}")
    st.stop()

def load_and_preprocess(file):
    """Load and preprocess the uploaded genomic data."""
    try:
        if file.name.endswith('.csv'):
            df = pd.read_csv(file)
        elif file.name.endswith('.xlsx'):
            df = pd.read_excel(file)
        elif file.name.endswith('.txt'):
            df = pd.read_csv(file, delimiter="\t")
        else:
            return None
        return df
    except Exception as e:
        st.error(f"Error loading file: {e}")
        return None

def query_llm(category, data):
    """Query Groq LLM with retrieved DNA data insights."""
    try:
        prompt = f"Analyze the following DNA data under the category {category}: {data}"
        chat_completion = client.chat.completions.create(
            messages=[
                {
                    "role": "user",
                    "content": prompt,
                }
            ],
            model="llama-3.3-70b-versatile", # or mixtral-8x7b-390ms
            stream=False,
        )
        return chat_completion.choices[0].message.content
    except Exception as e:
        st.error(f"Error querying LLM: {e}")
        return "Error occurred during analysis."

# Define Graph for LangGraph
class DNAAnalysisState:
    def __init__(self, data, results=None):
        self.data = data
        self.results = results or {}

graph = StateGraph(DNAAnalysisState)

# Define Analysis Nodes
def analyze_genomic_disorders(state):
    insights = query_llm("Genomic Disorders", state.data)
    state.results["Genomic Disorders"] = insights
    return state

def analyze_physical_traits(state):
    insights = query_llm("Physical Characteristics", state.data)
    state.results["Physical Characteristics"] = insights
    return state

def analyze_disease_risk(state):
    insights = query_llm("Future Disease Risks", state.data)
    state.results["Future Disease Risks"] = insights
    return state

def analyze_ancestry(state):
    insights = query_llm("Ancestry & Heritage", state.data)
    state.results["Ancestry & Heritage"] = insights
    return state

def analyze_dna_matching(state, second_data):
    """Analyze relationship between two DNA datasets."""
    try:
        prompt = f"Compare the following two DNA datasets and determine the relationship: {state.data} and {second_data}"
        chat_completion = client.chat.completions.create(
            messages=[
                {
                    "role": "user",
                    "content": prompt,
                }
            ],
            model="llama-3.3-70b-versatile", # or mixtral-8x7b-390ms
            stream=False,
        )
        insights = chat_completion.choices[0].message.content
        state.results["DNA Matching"] = insights
        return state
    except Exception as e:
        st.error(f"Error comparing DNA: {e}")
        state.results["DNA Matching"] = "Error during comparison."
        return state

# Add Nodes to Graph
graph.add_node("genomic_disorders", analyze_genomic_disorders)
graph.add_node("physical_traits", analyze_physical_traits)
graph.add_node("disease_risk", analyze_disease_risk)
graph.add_node("ancestry", analyze_ancestry)

graph.add_edge("genomic_disorders", "physical_traits")
graph.add_edge("physical_traits", "disease_risk")
graph.add_edge("disease_risk", "ancestry")

graph.set_entry_point("genomic_disorders")

# Streamlit UI
st.title("DNA Analysis Using AI")
uploaded_file = st.file_uploader("Upload your genomic data (CSV, XLSX, TXT)", type=["csv", "xlsx", "txt"])

if uploaded_file:
    df = load_and_preprocess(uploaded_file)
    if df is not None:
        st.dataframe(df.head())
        if st.button("Start Analysis"):
            state = DNAAnalysisState(df.to_json())
            try:
                result = graph.run(state)
                st.session_state["analysis_results"] = result.results
                st.success("Analysis completed!")
            except Exception as e:
                st.error(f"Error during analysis: {e}")
    else:
        st.error("Invalid file format.")

if "analysis_results" in st.session_state:
    results = st.session_state["analysis_results"]
    for category, insight in results.items():
        with st.expander(f"{category}"):
            st.write(insight)

    if st.button("Download Report as PDF"):
        pdf = FPDF()
        pdf.set_auto_page_break(auto=True, margin=15)
        pdf.add_page()
        pdf.set_font("Arial", size=12)
        pdf.cell(200, 10, "DNA Analysis Report", ln=True, align="C")
        for category, insight in results.items():
            pdf.add_page()
            pdf.cell(200, 10, category, ln=True, align="C")
            pdf.multi_cell(0, 10, insight)
        pdf_path = "DNA_Analysis_Report.pdf"
        try:
            pdf.output(pdf_path)
            with open(pdf_path, "rb") as f:
                st.download_button("Download PDF", f, file_name=pdf_path, mime="application/pdf")
        except Exception as e:
            st.error(f"Error creating PDF: {e}")

st.header("DNA Matching")
file1 = st.file_uploader("Upload First DNA Dataset", type=["csv", "xlsx", "txt"], key="file1")
file2 = st.file_uploader("Upload Second DNA Dataset", type=["csv", "xlsx", "txt"], key="file2")

if file1 and file2:
    df1 = load_and_preprocess(file1)
    df2 = load_and_preprocess(file2)
    if df1 is not None and df2 is not None:
        if st.button("Compare DNA"):
            state = DNAAnalysisState(df1.to_json())
            result = analyze_dna_matching(state, df2.to_json())
            st.session_state["dna_matching_result"] = result.results["DNA Matching"]
            st.success("DNA Matching completed!")

if "dna_matching_result" in st.session_state:
    st.subheader("DNA Matching Results")
    st.write(st.session_state["dna_matching_result"])