AA_Final2 / app.py
ahm14's picture
Update app.py
9943898 verified
import streamlit as st
import re
from langdetect import detect
from transformers import pipeline
import nltk
from docx import Document
import io
# Download required NLTK resources
nltk.download('punkt')
# Load AI models once to optimize performance
try:
tone_model = pipeline("zero-shot-classification", model="cross-encoder/nli-deberta-v3-large")
except OSError:
st.error("Failed to load tone analysis model. Please check internet connection or model availability.")
try:
frame_model = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
except OSError:
st.error("Failed to load frame classification model. Please check internet connection or model availability.")
# Updated tone categories
tone_categories = [
"Emotional & Urgent", "Harsh & Critical", "Negative & Somber",
"Empowering & Motivational", "Neutral & Informative", "Hopeful & Positive"
]
# Updated frame categories
frame_categories = [
"Human Rights & Justice", "Political & State Accountability", "Gender & Patriarchy",
"Religious Freedom & Persecution", "Grassroots Mobilization", "Environmental Crisis & Activism",
"Anti-Extremism & Anti-Violence", "Social Inequality & Economic Disparities"
]
# Detect language
def detect_language(text):
try:
return detect(text)
except Exception:
return "unknown"
# Analyze tone using DeBERTa model
def analyze_tone(text):
try:
model_result = tone_model(text, candidate_labels=tone_categories)
return model_result["labels"][:2] # Top 2 tone labels
except Exception as e:
st.error(f"Error analyzing tone: {e}")
return ["Unknown"]
# Extract frames using BART model
def extract_frames(text):
try:
model_result = frame_model(text, candidate_labels=frame_categories)
return model_result["labels"][:2] # Top 2 frame labels
except Exception as e:
st.error(f"Error extracting frames: {e}")
return ["Unknown"]
# Extract hashtags
def extract_hashtags(text):
return re.findall(r"#\w+", text)
# Extract captions from DOCX file
def extract_captions_from_docx(docx_file):
doc = Document(docx_file)
captions = {}
current_post = None
for para in doc.paragraphs:
text = para.text.strip()
if re.match(r"Post \d+", text, re.IGNORECASE):
current_post = text
captions[current_post] = []
elif current_post:
captions[current_post].append(text)
return {post: " ".join(lines) for post, lines in captions.items() if lines}
# Generate a DOCX file in-memory
def generate_docx(output_data):
doc = Document()
doc.add_heading('Activism Message Analysis', 0)
for index, (caption, result) in enumerate(output_data.items(), start=1):
doc.add_heading(f"{index}. {caption}", level=1)
doc.add_paragraph("Full Caption:")
doc.add_paragraph(result['Full Caption'], style="Quote")
doc.add_paragraph(f"Language: {result['Language']}")
doc.add_paragraph(f"Tone of Caption: {', '.join(result['Tone of Caption'])}")
doc.add_paragraph(f"Number of Hashtags: {result['Hashtag Count']}")
doc.add_paragraph(f"Hashtags Found: {', '.join(result['Hashtags'])}")
doc.add_heading('Frames:', level=2)
for frame in result['Frames']:
doc.add_paragraph(frame)
doc_io = io.BytesIO()
doc.save(doc_io)
doc_io.seek(0)
return doc_io
# Streamlit app UI
st.title('AI-Powered Activism Message Analyzer')
st.write("Enter the text to analyze or upload a DOCX file containing captions:")
# Text Input
input_text = st.text_area("Input Text", height=200)
# File Upload
uploaded_file = st.file_uploader("Upload a DOCX file", type=["docx"])
# Initialize output dictionary
output_data = {}
if input_text:
language = detect_language(input_text)
tone = analyze_tone(input_text)
hashtags = extract_hashtags(input_text)
frames = extract_frames(input_text)
output_data["Manual Input"] = {
'Full Caption': input_text,
'Language': language,
'Tone of Caption': tone,
'Hashtags': hashtags,
'Hashtag Count': len(hashtags),
'Frames': frames
}
st.success("Analysis completed for text input.")
if uploaded_file:
captions = extract_captions_from_docx(uploaded_file)
for caption, text in captions.items():
language = detect_language(text)
tone = analyze_tone(text)
hashtags = extract_hashtags(text)
frames = extract_frames(text)
output_data[caption] = {
'Full Caption': text,
'Language': language,
'Tone of Caption': tone,
'Hashtags': hashtags,
'Hashtag Count': len(hashtags),
'Frames': frames
}
st.success(f"Analysis completed for {len(captions)} posts from the DOCX file.")
# Display results
if output_data:
with st.expander("Generated Output"):
st.subheader("Analysis Results")
for index, (caption, result) in enumerate(output_data.items(), start=1):
st.write(f"### {index}. {caption}")
st.write("**Full Caption:**")
st.write(f"> {result['Full Caption']}")
st.write(f"**Language**: {result['Language']}")
st.write(f"**Tone of Caption**: {', '.join(result['Tone of Caption'])}")
st.write(f"**Number of Hashtags**: {result['Hashtag Count']}")
st.write(f"**Hashtags Found:** {', '.join(result['Hashtags'])}")
st.write("**Frames**:")
for frame in result['Frames']:
st.write(f"- {frame}")
docx_file = generate_docx(output_data)
if docx_file:
st.download_button(
label="Download Analysis as DOCX",
data=docx_file,
file_name="activism_message_analysis.docx",
mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document"
)