Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -2,51 +2,43 @@ import streamlit as st
|
|
2 |
from langchain.chains import LLMChain
|
3 |
from langchain.prompts import PromptTemplate
|
4 |
from langchain.llms import HuggingFaceHub
|
5 |
-
import fitz
|
6 |
from PIL import Image
|
7 |
import os
|
8 |
import pytesseract
|
9 |
import re
|
10 |
|
11 |
-
# Set Hugging Face API Key
|
12 |
os.environ["HUGGINGFACEHUB_API_TOKEN"] = st.secrets["HF_TOKEN"]
|
13 |
|
14 |
-
#
|
15 |
llm = HuggingFaceHub(repo_id="mistralai/Mistral-7B-Instruct-v0.3", model_kwargs={"temperature": 0.5})
|
16 |
|
17 |
-
#
|
18 |
st.set_page_config(page_title="DocuMentorAI", layout="wide", page_icon="📄")
|
19 |
st.title("📄 DocuMentorAI")
|
20 |
-
st.write("Generate professional application documents with ease!")
|
21 |
|
22 |
-
#
|
23 |
st.markdown("""
|
24 |
<style>
|
25 |
-
.
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
</style>
|
31 |
""", unsafe_allow_html=True)
|
32 |
|
33 |
-
#
|
34 |
-
st.subheader("📢 Enter Opening Details")
|
35 |
-
job_opening_text = st.text_area(
|
36 |
-
"Paste the job/research opening details here...",
|
37 |
-
height=150,
|
38 |
-
placeholder="Example: 'We are hiring a Research Assistant at XYZ University. The ideal candidate has experience in machine learning and data analysis...'"
|
39 |
-
)
|
40 |
-
|
41 |
-
# Upload CV/Resume
|
42 |
-
st.subheader("📄 Upload CV/Resume")
|
43 |
-
cv_resume_file = st.file_uploader(
|
44 |
-
"Upload your CV/Resume (PDF or Image)",
|
45 |
-
type=["pdf", "png", "jpg", "jpeg"],
|
46 |
-
help="Upload a PDF or image of your CV/Resume for text extraction."
|
47 |
-
)
|
48 |
-
|
49 |
-
# Function to extract text from PDF
|
50 |
def extract_text_from_pdf(pdf_file):
|
51 |
try:
|
52 |
pdf_bytes = pdf_file.read()
|
@@ -56,7 +48,6 @@ def extract_text_from_pdf(pdf_file):
|
|
56 |
st.error(f"Error extracting text from PDF: {e}")
|
57 |
return ""
|
58 |
|
59 |
-
# Function to extract text from Image using OCR
|
60 |
def extract_text_from_image(image_file):
|
61 |
try:
|
62 |
image = Image.open(image_file)
|
@@ -65,212 +56,190 @@ def extract_text_from_image(image_file):
|
|
65 |
st.error(f"Error extracting text from image: {e}")
|
66 |
return ""
|
67 |
|
68 |
-
# Function to extract text from uploaded files
|
69 |
def extract_text(uploaded_file):
|
70 |
-
if uploaded_file:
|
71 |
-
|
72 |
-
|
73 |
-
return extract_text_from_pdf(uploaded_file)
|
74 |
-
else:
|
75 |
-
return extract_text_from_image(uploaded_file)
|
76 |
-
return ""
|
77 |
-
|
78 |
-
# Extract text from CV/Resume
|
79 |
-
cv_resume_text = extract_text(cv_resume_file)
|
80 |
-
|
81 |
-
# Display Extracted Text
|
82 |
-
if job_opening_text:
|
83 |
-
with st.expander("🔍 View Entered Opening Details"):
|
84 |
-
st.markdown(f"**Job Opening Details:**\n\n{job_opening_text}")
|
85 |
-
|
86 |
-
if cv_resume_text:
|
87 |
-
with st.expander("🔍 View Extracted CV/Resume Details"):
|
88 |
-
st.markdown(f"**CV/Resume Details:**\n\n{cv_resume_text}")
|
89 |
|
90 |
-
# Function to extract professor name, designation, and university
|
91 |
def extract_professor_details(text):
|
92 |
professor_pattern = r"(Dr\.|Professor|Prof\.?)\s+([A-Z][a-z]+\s[A-Z][a-z]+)"
|
93 |
university_pattern = r"(University|Institute|College|School of [A-Za-z]+)"
|
94 |
|
95 |
professor_match = re.search(professor_pattern, text)
|
96 |
university_match = re.search(university_pattern, text)
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
108 |
Write a professional cold email for a research position.
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
- Research Interests: {research_interests}
|
118 |
-
- Why This Lab: {reason}
|
119 |
-
- CV Highlights: {resume_text}
|
120 |
-
### Output:
|
121 |
-
A well-structured, professional cold email.
|
122 |
-
""")
|
123 |
-
|
124 |
-
cover_letter_template = PromptTemplate.from_template("""
|
125 |
Write a compelling job application cover letter.
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
- Company: {company}
|
134 |
-
- Key Skills: {key_skills}
|
135 |
-
- CV Highlights: {resume_text}
|
136 |
-
### Output:
|
137 |
-
A strong, well-formatted cover letter.
|
138 |
-
""")
|
139 |
-
|
140 |
-
research_statement_template = PromptTemplate.from_template("""
|
141 |
Write a research statement for Ph.D. applications.
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
- Key Research Projects: {key_projects}
|
149 |
-
- Future Goals: {future_goals}
|
150 |
-
### Output:
|
151 |
-
A well-structured, professional research statement.
|
152 |
-
""")
|
153 |
-
|
154 |
-
sop_template = PromptTemplate.from_template("""
|
155 |
Write a compelling Statement of Purpose (SOP).
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
- Motivation: {motivation}
|
163 |
-
- Academic Background: {academic_background}
|
164 |
-
- Research & Projects: {research_experiences}
|
165 |
-
- Career Goals: {career_goals}
|
166 |
-
- Why This Program: {why_this_program}
|
167 |
-
### Output:
|
168 |
-
A well-structured SOP.
|
169 |
""")
|
|
|
170 |
|
171 |
-
# LangChain
|
172 |
-
|
173 |
-
cover_letter_chain = LLMChain(llm=llm, prompt=cover_letter_template)
|
174 |
-
research_statement_chain = LLMChain(llm=llm, prompt=research_statement_template)
|
175 |
-
sop_chain = LLMChain(llm=llm, prompt=sop_template)
|
176 |
|
177 |
-
#
|
178 |
-
st.subheader("📩 Generate Application Documents")
|
179 |
tab1, tab2, tab3, tab4 = st.tabs(["Cold Email", "Cover Letter", "Research Statement", "SOP"])
|
180 |
|
181 |
-
# Cold Email
|
182 |
with tab1:
|
183 |
-
|
184 |
-
research_interests = st.
|
185 |
-
reason = st.
|
186 |
|
187 |
-
if st.button("Generate
|
188 |
-
if
|
189 |
-
st.
|
190 |
-
else:
|
191 |
-
with st.spinner("Generating Cold Email..."):
|
192 |
try:
|
193 |
-
email =
|
194 |
"professor_name": professor_name,
|
195 |
"university_name": university_name,
|
196 |
"research_interests": research_interests,
|
197 |
"reason": reason,
|
198 |
"resume_text": cv_resume_text
|
199 |
})
|
200 |
-
st.markdown("**Generated Cold Email:**")
|
201 |
-
st.markdown(email)
|
202 |
-
st.download_button("Download Email", email, file_name="cold_email.txt")
|
203 |
except Exception as e:
|
204 |
-
st.error(f"
|
|
|
|
|
205 |
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
|
|
|
211 |
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
-
|
216 |
-
|
|
|
|
|
|
|
|
|
217 |
try:
|
218 |
-
cover_letter =
|
219 |
"job_title": job_title,
|
220 |
"company": company_name,
|
221 |
"key_skills": key_skills,
|
222 |
"resume_text": cv_resume_text
|
223 |
})
|
224 |
-
st.markdown("**Generated Cover Letter:**")
|
225 |
-
st.markdown(cover_letter)
|
226 |
-
st.download_button("Download Cover Letter", cover_letter, file_name="cover_letter.txt")
|
227 |
except Exception as e:
|
228 |
-
st.error(f"
|
|
|
|
|
229 |
|
230 |
-
|
231 |
-
|
232 |
-
|
233 |
-
|
234 |
-
|
|
|
235 |
|
236 |
-
|
237 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
238 |
try:
|
239 |
-
research_statement =
|
240 |
"research_background": research_background,
|
241 |
"key_projects": key_projects,
|
242 |
"future_goals": future_goals
|
243 |
})
|
244 |
-
st.markdown("**Generated Research Statement:**")
|
245 |
-
st.markdown(research_statement)
|
246 |
-
st.download_button("Download Research Statement", research_statement, file_name="research_statement.txt")
|
247 |
except Exception as e:
|
248 |
-
st.error(f"
|
249 |
|
250 |
-
|
251 |
-
|
252 |
-
|
253 |
-
|
254 |
-
|
255 |
-
|
256 |
-
why_this_program = st.text_area("Why This Program", placeholder="Example: This program aligns with my research interests because...")
|
257 |
|
258 |
-
|
259 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
260 |
try:
|
261 |
-
sop =
|
262 |
"motivation": motivation,
|
263 |
"academic_background": academic_background,
|
264 |
"research_experiences": research_experiences,
|
265 |
"career_goals": career_goals,
|
266 |
"why_this_program": why_this_program
|
267 |
})
|
268 |
-
st.markdown("**Generated SOP:**")
|
269 |
-
st.markdown(sop)
|
270 |
-
st.download_button("Download SOP", sop, file_name="sop.txt")
|
271 |
except Exception as e:
|
272 |
-
st.error(f"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
273 |
|
274 |
# Reset Button
|
275 |
-
if st.button("🔄 Reset All
|
|
|
276 |
st.experimental_rerun()
|
|
|
2 |
from langchain.chains import LLMChain
|
3 |
from langchain.prompts import PromptTemplate
|
4 |
from langchain.llms import HuggingFaceHub
|
5 |
+
import fitz
|
6 |
from PIL import Image
|
7 |
import os
|
8 |
import pytesseract
|
9 |
import re
|
10 |
|
11 |
+
# Set Hugging Face API Key
|
12 |
os.environ["HUGGINGFACEHUB_API_TOKEN"] = st.secrets["HF_TOKEN"]
|
13 |
|
14 |
+
# Initialize LLM
|
15 |
llm = HuggingFaceHub(repo_id="mistralai/Mistral-7B-Instruct-v0.3", model_kwargs={"temperature": 0.5})
|
16 |
|
17 |
+
# App Configuration
|
18 |
st.set_page_config(page_title="DocuMentorAI", layout="wide", page_icon="📄")
|
19 |
st.title("📄 DocuMentorAI")
|
|
|
20 |
|
21 |
+
# Improved CSS with dedicated output styling
|
22 |
st.markdown("""
|
23 |
<style>
|
24 |
+
.output-container {
|
25 |
+
background-color: #f0f2f6;
|
26 |
+
padding: 20px;
|
27 |
+
border-radius: 10px;
|
28 |
+
margin-top: 20px;
|
29 |
+
}
|
30 |
+
.generated-content {
|
31 |
+
font-size: 16px;
|
32 |
+
line-height: 1.6;
|
33 |
+
white-space: pre-wrap;
|
34 |
+
}
|
35 |
+
.download-button {
|
36 |
+
margin-top: 10px;
|
37 |
+
}
|
38 |
</style>
|
39 |
""", unsafe_allow_html=True)
|
40 |
|
41 |
+
# Helper Functions
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
def extract_text_from_pdf(pdf_file):
|
43 |
try:
|
44 |
pdf_bytes = pdf_file.read()
|
|
|
48 |
st.error(f"Error extracting text from PDF: {e}")
|
49 |
return ""
|
50 |
|
|
|
51 |
def extract_text_from_image(image_file):
|
52 |
try:
|
53 |
image = Image.open(image_file)
|
|
|
56 |
st.error(f"Error extracting text from image: {e}")
|
57 |
return ""
|
58 |
|
|
|
59 |
def extract_text(uploaded_file):
|
60 |
+
if not uploaded_file:
|
61 |
+
return ""
|
62 |
+
return extract_text_from_pdf(uploaded_file) if uploaded_file.type == "application/pdf" else extract_text_from_image(uploaded_file)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
63 |
|
|
|
64 |
def extract_professor_details(text):
|
65 |
professor_pattern = r"(Dr\.|Professor|Prof\.?)\s+([A-Z][a-z]+\s[A-Z][a-z]+)"
|
66 |
university_pattern = r"(University|Institute|College|School of [A-Za-z]+)"
|
67 |
|
68 |
professor_match = re.search(professor_pattern, text)
|
69 |
university_match = re.search(university_pattern, text)
|
70 |
+
|
71 |
+
return (professor_match.group(0) if professor_match else "Not Found",
|
72 |
+
university_match.group(0) if university_match else "Not Found")
|
73 |
+
|
74 |
+
# Sidebar for Input Collection
|
75 |
+
with st.sidebar:
|
76 |
+
st.subheader("📝 Input Details")
|
77 |
+
job_opening_text = st.text_area("Job/Research Opening Details", height=150)
|
78 |
+
cv_resume_file = st.file_uploader("Upload CV/Resume", type=["pdf", "png", "jpg", "jpeg"])
|
79 |
+
cv_resume_text = extract_text(cv_resume_file)
|
80 |
+
|
81 |
+
# Initialize session state for generated content
|
82 |
+
if 'generated_content' not in st.session_state:
|
83 |
+
st.session_state.generated_content = {
|
84 |
+
'email': None,
|
85 |
+
'cover_letter': None,
|
86 |
+
'research_statement': None,
|
87 |
+
'sop': None
|
88 |
+
}
|
89 |
+
|
90 |
+
# Template Definitions
|
91 |
+
templates = {
|
92 |
+
'email': PromptTemplate.from_template("""
|
93 |
Write a professional cold email for a research position.
|
94 |
+
Output only the email content without any additional text or formatting.
|
95 |
+
Professor: {professor_name}
|
96 |
+
University: {university_name}
|
97 |
+
Research Interests: {research_interests}
|
98 |
+
Why This Lab: {reason}
|
99 |
+
CV Highlights: {resume_text}
|
100 |
+
"""),
|
101 |
+
'cover_letter': PromptTemplate.from_template("""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
102 |
Write a compelling job application cover letter.
|
103 |
+
Output only the letter content without any additional text or formatting.
|
104 |
+
Job Title: {job_title}
|
105 |
+
Company: {company}
|
106 |
+
Key Skills: {key_skills}
|
107 |
+
CV Highlights: {resume_text}
|
108 |
+
"""),
|
109 |
+
'research_statement': PromptTemplate.from_template("""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
110 |
Write a research statement for Ph.D. applications.
|
111 |
+
Output only the statement content without any additional text or formatting.
|
112 |
+
Research Background: {research_background}
|
113 |
+
Key Research Projects: {key_projects}
|
114 |
+
Future Goals: {future_goals}
|
115 |
+
"""),
|
116 |
+
'sop': PromptTemplate.from_template("""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
117 |
Write a compelling Statement of Purpose (SOP).
|
118 |
+
Output only the SOP content without any additional text or formatting.
|
119 |
+
Motivation: {motivation}
|
120 |
+
Academic Background: {academic_background}
|
121 |
+
Research & Projects: {research_experiences}
|
122 |
+
Career Goals: {career_goals}
|
123 |
+
Why This Program: {why_this_program}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
124 |
""")
|
125 |
+
}
|
126 |
|
127 |
+
# Create LangChain instances
|
128 |
+
chains = {key: LLMChain(llm=llm, prompt=template) for key, template in templates.items()}
|
|
|
|
|
|
|
129 |
|
130 |
+
# Tab Layout
|
|
|
131 |
tab1, tab2, tab3, tab4 = st.tabs(["Cold Email", "Cover Letter", "Research Statement", "SOP"])
|
132 |
|
133 |
+
# Cold Email Tab
|
134 |
with tab1:
|
135 |
+
professor_name, university_name = extract_professor_details(job_opening_text)
|
136 |
+
research_interests = st.text_input("Research Interests")
|
137 |
+
reason = st.text_input("Why this professor/lab?")
|
138 |
|
139 |
+
if st.button("Generate Email", key="email_btn"):
|
140 |
+
if job_opening_text and cv_resume_text:
|
141 |
+
with st.spinner("Generating..."):
|
|
|
|
|
142 |
try:
|
143 |
+
st.session_state.generated_content['email'] = chains['email'].run({
|
144 |
"professor_name": professor_name,
|
145 |
"university_name": university_name,
|
146 |
"research_interests": research_interests,
|
147 |
"reason": reason,
|
148 |
"resume_text": cv_resume_text
|
149 |
})
|
|
|
|
|
|
|
150 |
except Exception as e:
|
151 |
+
st.error(f"Generation error: {e}")
|
152 |
+
else:
|
153 |
+
st.error("Please provide all required inputs")
|
154 |
|
155 |
+
if st.session_state.generated_content['email']:
|
156 |
+
st.markdown('<div class="output-container">', unsafe_allow_html=True)
|
157 |
+
st.markdown(st.session_state.generated_content['email'])
|
158 |
+
st.download_button("Download Email", st.session_state.generated_content['email'],
|
159 |
+
file_name="cold_email.txt", key="email_download")
|
160 |
+
st.markdown('</div>', unsafe_allow_html=True)
|
161 |
|
162 |
+
# Cover Letter Tab
|
163 |
+
with tab2:
|
164 |
+
job_title = st.text_input("Job Title")
|
165 |
+
company_name = university_name if university_name != "Not Found" else st.text_input("Company/University")
|
166 |
+
key_skills = st.text_input("Key Skills")
|
167 |
+
|
168 |
+
if st.button("Generate Cover Letter", key="cover_letter_btn"):
|
169 |
+
if job_opening_text and cv_resume_text:
|
170 |
+
with st.spinner("Generating..."):
|
171 |
try:
|
172 |
+
st.session_state.generated_content['cover_letter'] = chains['cover_letter'].run({
|
173 |
"job_title": job_title,
|
174 |
"company": company_name,
|
175 |
"key_skills": key_skills,
|
176 |
"resume_text": cv_resume_text
|
177 |
})
|
|
|
|
|
|
|
178 |
except Exception as e:
|
179 |
+
st.error(f"Generation error: {e}")
|
180 |
+
else:
|
181 |
+
st.error("Please provide all required inputs")
|
182 |
|
183 |
+
if st.session_state.generated_content['cover_letter']:
|
184 |
+
st.markdown('<div class="output-container">', unsafe_allow_html=True)
|
185 |
+
st.markdown(st.session_state.generated_content['cover_letter'])
|
186 |
+
st.download_button("Download Cover Letter", st.session_state.generated_content['cover_letter'],
|
187 |
+
file_name="cover_letter.txt", key="cover_letter_download")
|
188 |
+
st.markdown('</div>', unsafe_allow_html=True)
|
189 |
|
190 |
+
# Research Statement Tab
|
191 |
+
with tab3:
|
192 |
+
research_background = st.text_input("Research Background")
|
193 |
+
key_projects = st.text_input("Key Research Projects")
|
194 |
+
future_goals = st.text_input("Future Research Goals")
|
195 |
+
|
196 |
+
if st.button("Generate Research Statement", key="research_stmt_btn"):
|
197 |
+
with st.spinner("Generating..."):
|
198 |
try:
|
199 |
+
st.session_state.generated_content['research_statement'] = chains['research_statement'].run({
|
200 |
"research_background": research_background,
|
201 |
"key_projects": key_projects,
|
202 |
"future_goals": future_goals
|
203 |
})
|
|
|
|
|
|
|
204 |
except Exception as e:
|
205 |
+
st.error(f"Generation error: {e}")
|
206 |
|
207 |
+
if st.session_state.generated_content['research_statement']:
|
208 |
+
st.markdown('<div class="output-container">', unsafe_allow_html=True)
|
209 |
+
st.markdown(st.session_state.generated_content['research_statement'])
|
210 |
+
st.download_button("Download Research Statement", st.session_state.generated_content['research_statement'],
|
211 |
+
file_name="research_statement.txt", key="research_stmt_download")
|
212 |
+
st.markdown('</div>', unsafe_allow_html=True)
|
|
|
213 |
|
214 |
+
# SOP Tab
|
215 |
+
with tab4:
|
216 |
+
motivation = st.text_input("Motivation for Graduate Studies")
|
217 |
+
academic_background = st.text_input("Academic Background")
|
218 |
+
research_experiences = st.text_input("Research & Projects")
|
219 |
+
career_goals = st.text_input("Career Goals")
|
220 |
+
why_this_program = st.text_input("Why This Program")
|
221 |
+
|
222 |
+
if st.button("Generate SOP", key="sop_btn"):
|
223 |
+
with st.spinner("Generating..."):
|
224 |
try:
|
225 |
+
st.session_state.generated_content['sop'] = chains['sop'].run({
|
226 |
"motivation": motivation,
|
227 |
"academic_background": academic_background,
|
228 |
"research_experiences": research_experiences,
|
229 |
"career_goals": career_goals,
|
230 |
"why_this_program": why_this_program
|
231 |
})
|
|
|
|
|
|
|
232 |
except Exception as e:
|
233 |
+
st.error(f"Generation error: {e}")
|
234 |
+
|
235 |
+
if st.session_state.generated_content['sop']:
|
236 |
+
st.markdown('<div class="output-container">', unsafe_allow_html=True)
|
237 |
+
st.markdown(st.session_state.generated_content['sop'])
|
238 |
+
st.download_button("Download SOP", st.session_state.generated_content['sop'],
|
239 |
+
file_name="sop.txt", key="sop_download")
|
240 |
+
st.markdown('</div>', unsafe_allow_html=True)
|
241 |
|
242 |
# Reset Button
|
243 |
+
if st.sidebar.button("🔄 Reset All"):
|
244 |
+
st.session_state.generated_content = {key: None for key in st.session_state.generated_content}
|
245 |
st.experimental_rerun()
|