|
import streamlit as st |
|
import os |
|
import google.generativeai as genai |
|
from huggingface_hub import hf_hub_download |
|
import base64 |
|
from PIL import Image |
|
|
|
MODEL_ID = "gemini-2.0-flash-exp" |
|
try: |
|
api_key = os.getenv("GEMINI_API_KEY") |
|
model_id = MODEL_ID |
|
genai.configure(api_key=api_key) |
|
except Exception as e: |
|
st.error(f"Error: {e}") |
|
st.stop |
|
|
|
model = genai.GenerativeModel(MODEL_ID) |
|
chat = model.start_chat() |
|
|
|
def download_pdf(): |
|
""" |
|
Downloads the PDF file from the Hugging Face Hub using the correct repo path and filename. |
|
""" |
|
try: |
|
hf_token = os.getenv("HF_TOKEN") |
|
repo_id = "wvsuaidev/authorship_verfication_dataset" |
|
filename = "Authorship_Verification_Linguistic_Divergence.pdf" |
|
filepath = hf_hub_download(repo_id=repo_id, filename=filename, token=hf_token, repo_type="dataset") |
|
return filepath |
|
except Exception as e: |
|
st.error(f"Failed to download PDF from Hugging Face Hub: {e}") |
|
st.stop() |
|
|
|
|
|
if "conversation_history" not in st.session_state: |
|
st.session_state.conversation_history = [] |
|
if "uploaded_file_part" not in st.session_state: |
|
st.session_state.uploaded_file_part = None |
|
if "uploaded_pdf_path" not in st.session_state: |
|
st.session_state.uploaded_pdf_path = download_pdf() |
|
|
|
def multimodal_prompt(pdf_path, text_prompt): |
|
""" |
|
Sends a multimodal prompt to Gemini, handling file uploads efficiently. |
|
Args: |
|
pdf_path: The path to the PDF file. |
|
text_prompt: The text prompt for the model. |
|
Returns: |
|
The model's response as a string, or an error message. |
|
""" |
|
try: |
|
if st.session_state.uploaded_file_part is None: |
|
pdf_part = genai.upload_file(pdf_path, mime_type="application/pdf") |
|
st.session_state.uploaded_file_part = pdf_part |
|
prompt = [text_prompt, pdf_part] |
|
else: |
|
|
|
prompt = [text_prompt, st.session_state.uploaded_file_part] |
|
|
|
response = chat.send_message(prompt) |
|
|
|
|
|
st.session_state.conversation_history.append({"role": "user", "content": text_prompt, "has_pdf": True}) |
|
st.session_state.conversation_history.append({"role": "assistant", "content": response.text}) |
|
return response.text |
|
|
|
except Exception as e: |
|
return f"An error occurred: {e}" |
|
|
|
def display_download_button(file_path, file_name): |
|
try: |
|
with open(file_path, "rb") as f: |
|
file_bytes = f.read() |
|
b64 = base64.b64encode(file_bytes).decode() |
|
href = f'<a href="data:application/pdf;base64,{b64}" download="{file_name}">Download the source document (PDF)</a>' |
|
st.markdown(href, unsafe_allow_html=True) |
|
except FileNotFoundError: |
|
st.error("File not found for download.") |
|
except Exception as e: |
|
st.error(f"Error during download: {e}") |
|
|
|
|
|
|
|
st.title("π VQA on the Authorship Attribution and Verification Paper") |
|
about = """ |
|
**How to use this App** |
|
This app leverages Gemini 2.0 to provide insights on the provided document. |
|
Select a question from the dropdown menu or enter your own question to get |
|
Gemini's generated response based on the provided document. |
|
""" |
|
|
|
with st.expander("How to use this App"): |
|
st.markdown(about) |
|
|
|
|
|
|
|
image = Image.open("authorship.png") |
|
st.image(image, width=400) |
|
|
|
|
|
st.header("Questions and Answers") |
|
|
|
|
|
|
|
questions = [ |
|
"What are the key differences between Authorship Attribution (AA) and Authorship Verification (AV)?", |
|
"What is the 'non-comparability problem' in authorship verification, and why is it significant?", |
|
"How does the proposed DV-Distance metric address the non-comparability problem?", |
|
"Explain the concept of Normal Writing Style (NWS) and its role in the proposed method.", |
|
"How are Deviation Vectors (DVs) calculated, and what do they represent?", |
|
"Describe the two main methods proposed in the paper: DV-Distance and DV-Projection.", |
|
"What are the advantages and limitations of the unsupervised DV-Distance method?", |
|
"How does the supervised DV-Projection method improve upon the DV-Distance method?", |
|
"What language models were used in the study, and why?", |
|
"What datasets were used to evaluate the proposed methods?", |
|
"What evaluation metrics were used in the study?", |
|
"How did the proposed methods perform compared to the baselines and state-of-the-art methods?", |
|
"What were the key findings and trends observed in the experiments?", |
|
"Why did the AWD-LSTM based DV-Distance method consistently outperform the RoBERTa based DV-Distance method?", |
|
"For what types of documents were the proposed methods most suitable?", |
|
"How do the authors explain the performance differences across different document types?", |
|
"What are the potential real-world applications of this research?", |
|
"What are the limitations of the proposed methods?", |
|
"What are some possible directions for future research in this area?", |
|
"How does this research contribute to the broader field of Natural Language Processing (NLP)?" |
|
] |
|
|
|
|
|
selected_question = st.selectbox("Choose a question", questions) |
|
|
|
|
|
if st.checkbox('Check this box to ask a question not listed above'): |
|
|
|
selected_question = st.text_input('Enter a question') |
|
|
|
if st.button("Ask AI"): |
|
with st.spinner("AI is thinking..."): |
|
if st.session_state.uploaded_pdf_path is None: |
|
st.session_state.uploaded_pdf_path = download_pdf() |
|
|
|
filepath = st.session_state.uploaded_pdf_path |
|
text_prompt = f"Use the provided document to answer the following question: {selected_question}. Cite the relevant sections of the IRR." |
|
response = multimodal_prompt(filepath, text_prompt) |
|
st.markdown(f"**Response:** {response}") |
|
|
|
if st.session_state.uploaded_pdf_path: |
|
display_download_button(st.session_state.uploaded_pdf_path, "Visual_Understanding.pdf") |
|
|
|
st.markdown("[Visit our Hugging Face Space!](https://huggingface.co/wvsuaidev)") |
|
st.markdown("Β© 2025 WVSU AI Dev Team π€ β¨") |