Spaces:

Sobit
/

DocuMentorAI

Sleeping

App Files Files Community

Sobit commited on 29 days ago

Commit

a35fb23

verified ·

1 Parent(s): 1ad6ea2

Update app.py

Browse files

Files changed (1) hide show

app.py +150 -181

app.py CHANGED Viewed

@@ -2,51 +2,43 @@ import streamlit as st
 from langchain.chains import LLMChain
 from langchain.prompts import PromptTemplate
 from langchain.llms import HuggingFaceHub
-import fitz  # PyMuPDF for PDF extraction
 from PIL import Image
 import os
 import pytesseract
 import re
-# Set Hugging Face API Key (Set this in Hugging Face Secrets)
 os.environ["HUGGINGFACEHUB_API_TOKEN"] = st.secrets["HF_TOKEN"]
-# Load Free LLM from Hugging Face
 llm = HuggingFaceHub(repo_id="mistralai/Mistral-7B-Instruct-v0.3", model_kwargs={"temperature": 0.5})
-# Streamlit App Configuration
 st.set_page_config(page_title="DocuMentorAI", layout="wide", page_icon="📄")
 st.title("📄 DocuMentorAI")
-st.write("Generate professional application documents with ease!")
-# Custom CSS for better UI
 st.markdown("""
 <style>
-    .stTextArea textarea { font-size: 16px !important; }
-    .stButton button { width: 100%; background-color: #4CAF50; color: white; }
-    .stDownloadButton button { width: 100%; background-color: #008CBA; color: white; }
-    .stMarkdown { font-size: 18px; }
-    .stSpinner div { margin: auto; }
 </style>
 """, unsafe_allow_html=True)
-# Text Input for Job Opening Details
-st.subheader("📢 Enter Opening Details")
-job_opening_text = st.text_area(
-    "Paste the job/research opening details here...",
-    height=150,
-    placeholder="Example: 'We are hiring a Research Assistant at XYZ University. The ideal candidate has experience in machine learning and data analysis...'"
-)
-# Upload CV/Resume
-st.subheader("📄 Upload CV/Resume")
-cv_resume_file = st.file_uploader(
-    "Upload your CV/Resume (PDF or Image)",
-    type=["pdf", "png", "jpg", "jpeg"],
-    help="Upload a PDF or image of your CV/Resume for text extraction."
-)
-# Function to extract text from PDF
 def extract_text_from_pdf(pdf_file):
     try:
         pdf_bytes = pdf_file.read()
@@ -56,7 +48,6 @@ def extract_text_from_pdf(pdf_file):
         st.error(f"Error extracting text from PDF: {e}")
         return ""
-# Function to extract text from Image using OCR
 def extract_text_from_image(image_file):
     try:
         image = Image.open(image_file)
@@ -65,212 +56,190 @@ def extract_text_from_image(image_file):
         st.error(f"Error extracting text from image: {e}")
         return ""
-# Function to extract text from uploaded files
 def extract_text(uploaded_file):
-    if uploaded_file:
-        file_type = uploaded_file.type
-        if file_type == "application/pdf":
-            return extract_text_from_pdf(uploaded_file)
-        else:
-            return extract_text_from_image(uploaded_file)
-    return ""
-# Extract text from CV/Resume
-cv_resume_text = extract_text(cv_resume_file)
-# Display Extracted Text
-if job_opening_text:
-    with st.expander("🔍 View Entered Opening Details"):
-        st.markdown(f"**Job Opening Details:**\n\n{job_opening_text}")
-if cv_resume_text:
-    with st.expander("🔍 View Extracted CV/Resume Details"):
-        st.markdown(f"**CV/Resume Details:**\n\n{cv_resume_text}")
-# Function to extract professor name, designation, and university
 def extract_professor_details(text):
     professor_pattern = r"(Dr\.|Professor|Prof\.?)\s+([A-Z][a-z]+\s[A-Z][a-z]+)"
     university_pattern = r"(University|Institute|College|School of [A-Za-z]+)"
     professor_match = re.search(professor_pattern, text)
     university_match = re.search(university_pattern, text)
-    professor_name = professor_match.group(0) if professor_match else "Not Found"
-    university_name = university_match.group(0) if university_match else "Not Found"
-    return professor_name, university_name
-# Extract professor details if job opening is uploaded
-professor_name, university_name = extract_professor_details(job_opening_text)
-# LLM Prompt Templates
-email_template = PromptTemplate.from_template("""
 Write a professional cold email for a research position.
-- Address the professor formally.
-- Introduce yourself and academic background.
-- Express interest in their research.
-- Highlight key skills from your CV.
-- Conclude with a polite request.
-### Input:
-- Professor: {professor_name}
-- University: {university_name}
-- Research Interests: {research_interests}
-- Why This Lab: {reason}
-- CV Highlights: {resume_text}
-### Output:
-A well-structured, professional cold email.
-""")
-cover_letter_template = PromptTemplate.from_template("""
 Write a compelling job application cover letter.
-- Address the employer formally.
-- Mention job title and where you found it.
-- Highlight key skills and experiences.
-- Relate background to the company.
-- Conclude with enthusiasm.
-### Input:
-- Job Title: {job_title}
-- Company: {company}
-- Key Skills: {key_skills}
-- CV Highlights: {resume_text}
-### Output:
-A strong, well-formatted cover letter.
-""")
-research_statement_template = PromptTemplate.from_template("""
 Write a research statement for Ph.D. applications.
-- Discuss research background and motivation.
-- Explain key research experiences and findings.
-- Outline future research interests and goals.
-- Highlight contributions to the field.
-### Input:
-- Research Background: {research_background}
-- Key Research Projects: {key_projects}
-- Future Goals: {future_goals}
-### Output:
-A well-structured, professional research statement.
-""")
-sop_template = PromptTemplate.from_template("""
 Write a compelling Statement of Purpose (SOP).
-- Introduce motivation for graduate studies.
-- Discuss academic background.
-- Explain relevant experiences and research.
-- Outline career goals.
-- Justify fit for the program.
-### Input:
-- Motivation: {motivation}
-- Academic Background: {academic_background}
-- Research & Projects: {research_experiences}
-- Career Goals: {career_goals}
-- Why This Program: {why_this_program}
-### Output:
-A well-structured SOP.
 """)
-# LangChain Chains
-email_chain = LLMChain(llm=llm, prompt=email_template)
-cover_letter_chain = LLMChain(llm=llm, prompt=cover_letter_template)
-research_statement_chain = LLMChain(llm=llm, prompt=research_statement_template)
-sop_chain = LLMChain(llm=llm, prompt=sop_template)
-# User Inputs
-st.subheader("📩 Generate Application Documents")
 tab1, tab2, tab3, tab4 = st.tabs(["Cold Email", "Cover Letter", "Research Statement", "SOP"])
-# Cold Email Generation
 with tab1:
-    st.write(f"🧑‍🏫 **Detected Professor:** {professor_name} at {university_name}")
-    research_interests = st.text_area("Research Interests", placeholder="Example: Machine Learning, Data Analysis, etc.")
-    reason = st.text_area("Why this professor/lab?", placeholder="Example: I am particularly interested in your work on...")
-    if st.button("Generate Cold Email"):
-        if not job_opening_text or not cv_resume_text:
-            st.error("Please provide job opening details and upload your CV/Resume.")
-        else:
-            with st.spinner("Generating Cold Email..."):
                 try:
-                    email = email_chain.run({
                         "professor_name": professor_name,
                         "university_name": university_name,
                         "research_interests": research_interests,
                         "reason": reason,
                         "resume_text": cv_resume_text
                     })
-                    st.markdown("**Generated Cold Email:**")
-                    st.markdown(email)
-                    st.download_button("Download Email", email, file_name="cold_email.txt")
                 except Exception as e:
-                    st.error(f"Error generating cold email: {e}")
-# Cover Letter Generation
-with tab2:
-    job_title = st.text_input("Job Title", placeholder="Example: Research Assistant")
-    company_name = university_name if university_name != "Not Found" else st.text_input("Company/University", placeholder="Example: XYZ University")
-    key_skills = st.text_area("Key Skills", placeholder="Example: Python, Machine Learning, Data Analysis")
-    if st.button("Generate Cover Letter"):
-        if not job_opening_text or not cv_resume_text:
-            st.error("Please provide job opening details and upload your CV/Resume.")
-        else:
-            with st.spinner("Generating Cover Letter..."):
                 try:
-                    cover_letter = cover_letter_chain.run({
                         "job_title": job_title,
                         "company": company_name,
                         "key_skills": key_skills,
                         "resume_text": cv_resume_text
                     })
-                    st.markdown("**Generated Cover Letter:**")
-                    st.markdown(cover_letter)
-                    st.download_button("Download Cover Letter", cover_letter, file_name="cover_letter.txt")
                 except Exception as e:
-                    st.error(f"Error generating cover letter: {e}")
-# Research Statement Generation
-with tab3:
-    research_background = st.text_area("Research Background", placeholder="Example: My research focuses on...")
-    key_projects = st.text_area("Key Research Projects", placeholder="Example: Developed a machine learning model for...")
-    future_goals = st.text_area("Future Research Goals", placeholder="Example: I aim to explore...")
-    if st.button("Generate Research Statement"):
-        with st.spinner("Generating Research Statement..."):
             try:
-                research_statement = research_statement_chain.run({
                     "research_background": research_background,
                     "key_projects": key_projects,
                     "future_goals": future_goals
                 })
-                st.markdown("**Generated Research Statement:**")
-                st.markdown(research_statement)
-                st.download_button("Download Research Statement", research_statement, file_name="research_statement.txt")
             except Exception as e:
-                st.error(f"Error generating research statement: {e}")
-# SOP Generation
-with tab4:
-    motivation = st.text_area("Motivation for Graduate Studies", placeholder="Example: I have always been passionate about...")
-    academic_background = st.text_area("Academic Background", placeholder="Example: I completed my undergraduate degree in...")
-    research_experiences = st.text_area("Research & Projects", placeholder="Example: During my undergraduate studies, I worked on...")
-    career_goals = st.text_area("Career Goals", placeholder="Example: My long-term goal is to...")
-    why_this_program = st.text_area("Why This Program", placeholder="Example: This program aligns with my research interests because...")
-    if st.button("Generate SOP"):
-        with st.spinner("Generating SOP..."):
             try:
-                sop = sop_chain.run({
                     "motivation": motivation,
                     "academic_background": academic_background,
                     "research_experiences": research_experiences,
                     "career_goals": career_goals,
                     "why_this_program": why_this_program
                 })
-                st.markdown("**Generated SOP:**")
-                st.markdown(sop)
-                st.download_button("Download SOP", sop, file_name="sop.txt")
             except Exception as e:
-                st.error(f"Error generating SOP: {e}")
 # Reset Button
-if st.button("🔄 Reset All Inputs and Outputs"):
     st.experimental_rerun()

 from langchain.chains import LLMChain
 from langchain.prompts import PromptTemplate
 from langchain.llms import HuggingFaceHub
+import fitz
 from PIL import Image
 import os
 import pytesseract
 import re
+# Set Hugging Face API Key
 os.environ["HUGGINGFACEHUB_API_TOKEN"] = st.secrets["HF_TOKEN"]
+# Initialize LLM
 llm = HuggingFaceHub(repo_id="mistralai/Mistral-7B-Instruct-v0.3", model_kwargs={"temperature": 0.5})
+# App Configuration
 st.set_page_config(page_title="DocuMentorAI", layout="wide", page_icon="📄")
 st.title("📄 DocuMentorAI")
+# Improved CSS with dedicated output styling
 st.markdown("""
 <style>
+    .output-container {
+        background-color: #f0f2f6;
+        padding: 20px;
+        border-radius: 10px;
+        margin-top: 20px;
+    }
+    .generated-content {
+        font-size: 16px;
+        line-height: 1.6;
+        white-space: pre-wrap;
+    }
+    .download-button {
+        margin-top: 10px;
+    }
 </style>
 """, unsafe_allow_html=True)
+# Helper Functions
 def extract_text_from_pdf(pdf_file):
     try:
         pdf_bytes = pdf_file.read()
         st.error(f"Error extracting text from PDF: {e}")
         return ""
 def extract_text_from_image(image_file):
     try:
         image = Image.open(image_file)
         st.error(f"Error extracting text from image: {e}")
         return ""
 def extract_text(uploaded_file):
+    if not uploaded_file:
+        return ""
+    return extract_text_from_pdf(uploaded_file) if uploaded_file.type == "application/pdf" else extract_text_from_image(uploaded_file)
 def extract_professor_details(text):
     professor_pattern = r"(Dr\.|Professor|Prof\.?)\s+([A-Z][a-z]+\s[A-Z][a-z]+)"
     university_pattern = r"(University|Institute|College|School of [A-Za-z]+)"
     professor_match = re.search(professor_pattern, text)
     university_match = re.search(university_pattern, text)
+    return (professor_match.group(0) if professor_match else "Not Found",
+            university_match.group(0) if university_match else "Not Found")
+# Sidebar for Input Collection
+with st.sidebar:
+    st.subheader("📝 Input Details")
+    job_opening_text = st.text_area("Job/Research Opening Details", height=150)
+    cv_resume_file = st.file_uploader("Upload CV/Resume", type=["pdf", "png", "jpg", "jpeg"])
+    cv_resume_text = extract_text(cv_resume_file)
+# Initialize session state for generated content
+if 'generated_content' not in st.session_state:
+    st.session_state.generated_content = {
+        'email': None,
+        'cover_letter': None,
+        'research_statement': None,
+        'sop': None
+    }
+# Template Definitions
+templates = {
+    'email': PromptTemplate.from_template("""
 Write a professional cold email for a research position.
+Output only the email content without any additional text or formatting.
+Professor: {professor_name}
+University: {university_name}
+Research Interests: {research_interests}
+Why This Lab: {reason}
+CV Highlights: {resume_text}
+"""),
+    'cover_letter': PromptTemplate.from_template("""
 Write a compelling job application cover letter.
+Output only the letter content without any additional text or formatting.
+Job Title: {job_title}
+Company: {company}
+Key Skills: {key_skills}
+CV Highlights: {resume_text}
+"""),
+    'research_statement': PromptTemplate.from_template("""
 Write a research statement for Ph.D. applications.
+Output only the statement content without any additional text or formatting.
+Research Background: {research_background}
+Key Research Projects: {key_projects}
+Future Goals: {future_goals}
+"""),
+    'sop': PromptTemplate.from_template("""
 Write a compelling Statement of Purpose (SOP).
+Output only the SOP content without any additional text or formatting.
+Motivation: {motivation}
+Academic Background: {academic_background}
+Research & Projects: {research_experiences}
+Career Goals: {career_goals}
+Why This Program: {why_this_program}
 """)
+}
+# Create LangChain instances
+chains = {key: LLMChain(llm=llm, prompt=template) for key, template in templates.items()}
+# Tab Layout
 tab1, tab2, tab3, tab4 = st.tabs(["Cold Email", "Cover Letter", "Research Statement", "SOP"])
+# Cold Email Tab
 with tab1:
+    professor_name, university_name = extract_professor_details(job_opening_text)
+    research_interests = st.text_input("Research Interests")
+    reason = st.text_input("Why this professor/lab?")
+    if st.button("Generate Email", key="email_btn"):
+        if job_opening_text and cv_resume_text:
+            with st.spinner("Generating..."):
                 try:
+                    st.session_state.generated_content['email'] = chains['email'].run({
                         "professor_name": professor_name,
                         "university_name": university_name,
                         "research_interests": research_interests,
                         "reason": reason,
                         "resume_text": cv_resume_text
                     })
                 except Exception as e:
+                    st.error(f"Generation error: {e}")
+        else:
+            st.error("Please provide all required inputs")
+    if st.session_state.generated_content['email']:
+        st.markdown('<div class="output-container">', unsafe_allow_html=True)
+        st.markdown(st.session_state.generated_content['email'])
+        st.download_button("Download Email", st.session_state.generated_content['email'],
+                         file_name="cold_email.txt", key="email_download")
+        st.markdown('</div>', unsafe_allow_html=True)
+# Cover Letter Tab
+with tab2:
+    job_title = st.text_input("Job Title")
+    company_name = university_name if university_name != "Not Found" else st.text_input("Company/University")
+    key_skills = st.text_input("Key Skills")
+    if st.button("Generate Cover Letter", key="cover_letter_btn"):
+        if job_opening_text and cv_resume_text:
+            with st.spinner("Generating..."):
                 try:
+                    st.session_state.generated_content['cover_letter'] = chains['cover_letter'].run({
                         "job_title": job_title,
                         "company": company_name,
                         "key_skills": key_skills,
                         "resume_text": cv_resume_text
                     })
                 except Exception as e:
+                    st.error(f"Generation error: {e}")
+        else:
+            st.error("Please provide all required inputs")
+    if st.session_state.generated_content['cover_letter']:
+        st.markdown('<div class="output-container">', unsafe_allow_html=True)
+        st.markdown(st.session_state.generated_content['cover_letter'])
+        st.download_button("Download Cover Letter", st.session_state.generated_content['cover_letter'],
+                         file_name="cover_letter.txt", key="cover_letter_download")
+        st.markdown('</div>', unsafe_allow_html=True)
+# Research Statement Tab
+with tab3:
+    research_background = st.text_input("Research Background")
+    key_projects = st.text_input("Key Research Projects")
+    future_goals = st.text_input("Future Research Goals")
+    if st.button("Generate Research Statement", key="research_stmt_btn"):
+        with st.spinner("Generating..."):
             try:
+                st.session_state.generated_content['research_statement'] = chains['research_statement'].run({
                     "research_background": research_background,
                     "key_projects": key_projects,
                     "future_goals": future_goals
                 })
             except Exception as e:
+                st.error(f"Generation error: {e}")
+    if st.session_state.generated_content['research_statement']:
+        st.markdown('<div class="output-container">', unsafe_allow_html=True)
+        st.markdown(st.session_state.generated_content['research_statement'])
+        st.download_button("Download Research Statement", st.session_state.generated_content['research_statement'],
+                         file_name="research_statement.txt", key="research_stmt_download")
+        st.markdown('</div>', unsafe_allow_html=True)
+# SOP Tab
+with tab4:
+    motivation = st.text_input("Motivation for Graduate Studies")
+    academic_background = st.text_input("Academic Background")
+    research_experiences = st.text_input("Research & Projects")
+    career_goals = st.text_input("Career Goals")
+    why_this_program = st.text_input("Why This Program")
+    if st.button("Generate SOP", key="sop_btn"):
+        with st.spinner("Generating..."):
             try:
+                st.session_state.generated_content['sop'] = chains['sop'].run({
                     "motivation": motivation,
                     "academic_background": academic_background,
                     "research_experiences": research_experiences,
                     "career_goals": career_goals,
                     "why_this_program": why_this_program
                 })
             except Exception as e:
+                st.error(f"Generation error: {e}")
+    if st.session_state.generated_content['sop']:
+        st.markdown('<div class="output-container">', unsafe_allow_html=True)
+        st.markdown(st.session_state.generated_content['sop'])
+        st.download_button("Download SOP", st.session_state.generated_content['sop'],
+                         file_name="sop.txt", key="sop_download")
+        st.markdown('</div>', unsafe_allow_html=True)
 # Reset Button
+if st.sidebar.button("🔄 Reset All"):
+    st.session_state.generated_content = {key: None for key in st.session_state.generated_content}
     st.experimental_rerun()