Sobit commited on
Commit
a35fb23
·
verified ·
1 Parent(s): 1ad6ea2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +150 -181
app.py CHANGED
@@ -2,51 +2,43 @@ import streamlit as st
2
  from langchain.chains import LLMChain
3
  from langchain.prompts import PromptTemplate
4
  from langchain.llms import HuggingFaceHub
5
- import fitz # PyMuPDF for PDF extraction
6
  from PIL import Image
7
  import os
8
  import pytesseract
9
  import re
10
 
11
- # Set Hugging Face API Key (Set this in Hugging Face Secrets)
12
  os.environ["HUGGINGFACEHUB_API_TOKEN"] = st.secrets["HF_TOKEN"]
13
 
14
- # Load Free LLM from Hugging Face
15
  llm = HuggingFaceHub(repo_id="mistralai/Mistral-7B-Instruct-v0.3", model_kwargs={"temperature": 0.5})
16
 
17
- # Streamlit App Configuration
18
  st.set_page_config(page_title="DocuMentorAI", layout="wide", page_icon="📄")
19
  st.title("📄 DocuMentorAI")
20
- st.write("Generate professional application documents with ease!")
21
 
22
- # Custom CSS for better UI
23
  st.markdown("""
24
  <style>
25
- .stTextArea textarea { font-size: 16px !important; }
26
- .stButton button { width: 100%; background-color: #4CAF50; color: white; }
27
- .stDownloadButton button { width: 100%; background-color: #008CBA; color: white; }
28
- .stMarkdown { font-size: 18px; }
29
- .stSpinner div { margin: auto; }
 
 
 
 
 
 
 
 
 
30
  </style>
31
  """, unsafe_allow_html=True)
32
 
33
- # Text Input for Job Opening Details
34
- st.subheader("📢 Enter Opening Details")
35
- job_opening_text = st.text_area(
36
- "Paste the job/research opening details here...",
37
- height=150,
38
- placeholder="Example: 'We are hiring a Research Assistant at XYZ University. The ideal candidate has experience in machine learning and data analysis...'"
39
- )
40
-
41
- # Upload CV/Resume
42
- st.subheader("📄 Upload CV/Resume")
43
- cv_resume_file = st.file_uploader(
44
- "Upload your CV/Resume (PDF or Image)",
45
- type=["pdf", "png", "jpg", "jpeg"],
46
- help="Upload a PDF or image of your CV/Resume for text extraction."
47
- )
48
-
49
- # Function to extract text from PDF
50
  def extract_text_from_pdf(pdf_file):
51
  try:
52
  pdf_bytes = pdf_file.read()
@@ -56,7 +48,6 @@ def extract_text_from_pdf(pdf_file):
56
  st.error(f"Error extracting text from PDF: {e}")
57
  return ""
58
 
59
- # Function to extract text from Image using OCR
60
  def extract_text_from_image(image_file):
61
  try:
62
  image = Image.open(image_file)
@@ -65,212 +56,190 @@ def extract_text_from_image(image_file):
65
  st.error(f"Error extracting text from image: {e}")
66
  return ""
67
 
68
- # Function to extract text from uploaded files
69
  def extract_text(uploaded_file):
70
- if uploaded_file:
71
- file_type = uploaded_file.type
72
- if file_type == "application/pdf":
73
- return extract_text_from_pdf(uploaded_file)
74
- else:
75
- return extract_text_from_image(uploaded_file)
76
- return ""
77
-
78
- # Extract text from CV/Resume
79
- cv_resume_text = extract_text(cv_resume_file)
80
-
81
- # Display Extracted Text
82
- if job_opening_text:
83
- with st.expander("🔍 View Entered Opening Details"):
84
- st.markdown(f"**Job Opening Details:**\n\n{job_opening_text}")
85
-
86
- if cv_resume_text:
87
- with st.expander("🔍 View Extracted CV/Resume Details"):
88
- st.markdown(f"**CV/Resume Details:**\n\n{cv_resume_text}")
89
 
90
- # Function to extract professor name, designation, and university
91
  def extract_professor_details(text):
92
  professor_pattern = r"(Dr\.|Professor|Prof\.?)\s+([A-Z][a-z]+\s[A-Z][a-z]+)"
93
  university_pattern = r"(University|Institute|College|School of [A-Za-z]+)"
94
 
95
  professor_match = re.search(professor_pattern, text)
96
  university_match = re.search(university_pattern, text)
97
-
98
- professor_name = professor_match.group(0) if professor_match else "Not Found"
99
- university_name = university_match.group(0) if university_match else "Not Found"
100
-
101
- return professor_name, university_name
102
-
103
- # Extract professor details if job opening is uploaded
104
- professor_name, university_name = extract_professor_details(job_opening_text)
105
-
106
- # LLM Prompt Templates
107
- email_template = PromptTemplate.from_template("""
 
 
 
 
 
 
 
 
 
 
 
 
108
  Write a professional cold email for a research position.
109
- - Address the professor formally.
110
- - Introduce yourself and academic background.
111
- - Express interest in their research.
112
- - Highlight key skills from your CV.
113
- - Conclude with a polite request.
114
- ### Input:
115
- - Professor: {professor_name}
116
- - University: {university_name}
117
- - Research Interests: {research_interests}
118
- - Why This Lab: {reason}
119
- - CV Highlights: {resume_text}
120
- ### Output:
121
- A well-structured, professional cold email.
122
- """)
123
-
124
- cover_letter_template = PromptTemplate.from_template("""
125
  Write a compelling job application cover letter.
126
- - Address the employer formally.
127
- - Mention job title and where you found it.
128
- - Highlight key skills and experiences.
129
- - Relate background to the company.
130
- - Conclude with enthusiasm.
131
- ### Input:
132
- - Job Title: {job_title}
133
- - Company: {company}
134
- - Key Skills: {key_skills}
135
- - CV Highlights: {resume_text}
136
- ### Output:
137
- A strong, well-formatted cover letter.
138
- """)
139
-
140
- research_statement_template = PromptTemplate.from_template("""
141
  Write a research statement for Ph.D. applications.
142
- - Discuss research background and motivation.
143
- - Explain key research experiences and findings.
144
- - Outline future research interests and goals.
145
- - Highlight contributions to the field.
146
- ### Input:
147
- - Research Background: {research_background}
148
- - Key Research Projects: {key_projects}
149
- - Future Goals: {future_goals}
150
- ### Output:
151
- A well-structured, professional research statement.
152
- """)
153
-
154
- sop_template = PromptTemplate.from_template("""
155
  Write a compelling Statement of Purpose (SOP).
156
- - Introduce motivation for graduate studies.
157
- - Discuss academic background.
158
- - Explain relevant experiences and research.
159
- - Outline career goals.
160
- - Justify fit for the program.
161
- ### Input:
162
- - Motivation: {motivation}
163
- - Academic Background: {academic_background}
164
- - Research & Projects: {research_experiences}
165
- - Career Goals: {career_goals}
166
- - Why This Program: {why_this_program}
167
- ### Output:
168
- A well-structured SOP.
169
  """)
 
170
 
171
- # LangChain Chains
172
- email_chain = LLMChain(llm=llm, prompt=email_template)
173
- cover_letter_chain = LLMChain(llm=llm, prompt=cover_letter_template)
174
- research_statement_chain = LLMChain(llm=llm, prompt=research_statement_template)
175
- sop_chain = LLMChain(llm=llm, prompt=sop_template)
176
 
177
- # User Inputs
178
- st.subheader("📩 Generate Application Documents")
179
  tab1, tab2, tab3, tab4 = st.tabs(["Cold Email", "Cover Letter", "Research Statement", "SOP"])
180
 
181
- # Cold Email Generation
182
  with tab1:
183
- st.write(f"🧑‍🏫 **Detected Professor:** {professor_name} at {university_name}")
184
- research_interests = st.text_area("Research Interests", placeholder="Example: Machine Learning, Data Analysis, etc.")
185
- reason = st.text_area("Why this professor/lab?", placeholder="Example: I am particularly interested in your work on...")
186
 
187
- if st.button("Generate Cold Email"):
188
- if not job_opening_text or not cv_resume_text:
189
- st.error("Please provide job opening details and upload your CV/Resume.")
190
- else:
191
- with st.spinner("Generating Cold Email..."):
192
  try:
193
- email = email_chain.run({
194
  "professor_name": professor_name,
195
  "university_name": university_name,
196
  "research_interests": research_interests,
197
  "reason": reason,
198
  "resume_text": cv_resume_text
199
  })
200
- st.markdown("**Generated Cold Email:**")
201
- st.markdown(email)
202
- st.download_button("Download Email", email, file_name="cold_email.txt")
203
  except Exception as e:
204
- st.error(f"Error generating cold email: {e}")
 
 
205
 
206
- # Cover Letter Generation
207
- with tab2:
208
- job_title = st.text_input("Job Title", placeholder="Example: Research Assistant")
209
- company_name = university_name if university_name != "Not Found" else st.text_input("Company/University", placeholder="Example: XYZ University")
210
- key_skills = st.text_area("Key Skills", placeholder="Example: Python, Machine Learning, Data Analysis")
 
211
 
212
- if st.button("Generate Cover Letter"):
213
- if not job_opening_text or not cv_resume_text:
214
- st.error("Please provide job opening details and upload your CV/Resume.")
215
- else:
216
- with st.spinner("Generating Cover Letter..."):
 
 
 
 
217
  try:
218
- cover_letter = cover_letter_chain.run({
219
  "job_title": job_title,
220
  "company": company_name,
221
  "key_skills": key_skills,
222
  "resume_text": cv_resume_text
223
  })
224
- st.markdown("**Generated Cover Letter:**")
225
- st.markdown(cover_letter)
226
- st.download_button("Download Cover Letter", cover_letter, file_name="cover_letter.txt")
227
  except Exception as e:
228
- st.error(f"Error generating cover letter: {e}")
 
 
229
 
230
- # Research Statement Generation
231
- with tab3:
232
- research_background = st.text_area("Research Background", placeholder="Example: My research focuses on...")
233
- key_projects = st.text_area("Key Research Projects", placeholder="Example: Developed a machine learning model for...")
234
- future_goals = st.text_area("Future Research Goals", placeholder="Example: I aim to explore...")
 
235
 
236
- if st.button("Generate Research Statement"):
237
- with st.spinner("Generating Research Statement..."):
 
 
 
 
 
 
238
  try:
239
- research_statement = research_statement_chain.run({
240
  "research_background": research_background,
241
  "key_projects": key_projects,
242
  "future_goals": future_goals
243
  })
244
- st.markdown("**Generated Research Statement:**")
245
- st.markdown(research_statement)
246
- st.download_button("Download Research Statement", research_statement, file_name="research_statement.txt")
247
  except Exception as e:
248
- st.error(f"Error generating research statement: {e}")
249
 
250
- # SOP Generation
251
- with tab4:
252
- motivation = st.text_area("Motivation for Graduate Studies", placeholder="Example: I have always been passionate about...")
253
- academic_background = st.text_area("Academic Background", placeholder="Example: I completed my undergraduate degree in...")
254
- research_experiences = st.text_area("Research & Projects", placeholder="Example: During my undergraduate studies, I worked on...")
255
- career_goals = st.text_area("Career Goals", placeholder="Example: My long-term goal is to...")
256
- why_this_program = st.text_area("Why This Program", placeholder="Example: This program aligns with my research interests because...")
257
 
258
- if st.button("Generate SOP"):
259
- with st.spinner("Generating SOP..."):
 
 
 
 
 
 
 
 
260
  try:
261
- sop = sop_chain.run({
262
  "motivation": motivation,
263
  "academic_background": academic_background,
264
  "research_experiences": research_experiences,
265
  "career_goals": career_goals,
266
  "why_this_program": why_this_program
267
  })
268
- st.markdown("**Generated SOP:**")
269
- st.markdown(sop)
270
- st.download_button("Download SOP", sop, file_name="sop.txt")
271
  except Exception as e:
272
- st.error(f"Error generating SOP: {e}")
 
 
 
 
 
 
 
273
 
274
  # Reset Button
275
- if st.button("🔄 Reset All Inputs and Outputs"):
 
276
  st.experimental_rerun()
 
2
  from langchain.chains import LLMChain
3
  from langchain.prompts import PromptTemplate
4
  from langchain.llms import HuggingFaceHub
5
+ import fitz
6
  from PIL import Image
7
  import os
8
  import pytesseract
9
  import re
10
 
11
+ # Set Hugging Face API Key
12
  os.environ["HUGGINGFACEHUB_API_TOKEN"] = st.secrets["HF_TOKEN"]
13
 
14
+ # Initialize LLM
15
  llm = HuggingFaceHub(repo_id="mistralai/Mistral-7B-Instruct-v0.3", model_kwargs={"temperature": 0.5})
16
 
17
+ # App Configuration
18
  st.set_page_config(page_title="DocuMentorAI", layout="wide", page_icon="📄")
19
  st.title("📄 DocuMentorAI")
 
20
 
21
+ # Improved CSS with dedicated output styling
22
  st.markdown("""
23
  <style>
24
+ .output-container {
25
+ background-color: #f0f2f6;
26
+ padding: 20px;
27
+ border-radius: 10px;
28
+ margin-top: 20px;
29
+ }
30
+ .generated-content {
31
+ font-size: 16px;
32
+ line-height: 1.6;
33
+ white-space: pre-wrap;
34
+ }
35
+ .download-button {
36
+ margin-top: 10px;
37
+ }
38
  </style>
39
  """, unsafe_allow_html=True)
40
 
41
+ # Helper Functions
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  def extract_text_from_pdf(pdf_file):
43
  try:
44
  pdf_bytes = pdf_file.read()
 
48
  st.error(f"Error extracting text from PDF: {e}")
49
  return ""
50
 
 
51
  def extract_text_from_image(image_file):
52
  try:
53
  image = Image.open(image_file)
 
56
  st.error(f"Error extracting text from image: {e}")
57
  return ""
58
 
 
59
  def extract_text(uploaded_file):
60
+ if not uploaded_file:
61
+ return ""
62
+ return extract_text_from_pdf(uploaded_file) if uploaded_file.type == "application/pdf" else extract_text_from_image(uploaded_file)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
 
 
64
  def extract_professor_details(text):
65
  professor_pattern = r"(Dr\.|Professor|Prof\.?)\s+([A-Z][a-z]+\s[A-Z][a-z]+)"
66
  university_pattern = r"(University|Institute|College|School of [A-Za-z]+)"
67
 
68
  professor_match = re.search(professor_pattern, text)
69
  university_match = re.search(university_pattern, text)
70
+
71
+ return (professor_match.group(0) if professor_match else "Not Found",
72
+ university_match.group(0) if university_match else "Not Found")
73
+
74
+ # Sidebar for Input Collection
75
+ with st.sidebar:
76
+ st.subheader("📝 Input Details")
77
+ job_opening_text = st.text_area("Job/Research Opening Details", height=150)
78
+ cv_resume_file = st.file_uploader("Upload CV/Resume", type=["pdf", "png", "jpg", "jpeg"])
79
+ cv_resume_text = extract_text(cv_resume_file)
80
+
81
+ # Initialize session state for generated content
82
+ if 'generated_content' not in st.session_state:
83
+ st.session_state.generated_content = {
84
+ 'email': None,
85
+ 'cover_letter': None,
86
+ 'research_statement': None,
87
+ 'sop': None
88
+ }
89
+
90
+ # Template Definitions
91
+ templates = {
92
+ 'email': PromptTemplate.from_template("""
93
  Write a professional cold email for a research position.
94
+ Output only the email content without any additional text or formatting.
95
+ Professor: {professor_name}
96
+ University: {university_name}
97
+ Research Interests: {research_interests}
98
+ Why This Lab: {reason}
99
+ CV Highlights: {resume_text}
100
+ """),
101
+ 'cover_letter': PromptTemplate.from_template("""
 
 
 
 
 
 
 
 
102
  Write a compelling job application cover letter.
103
+ Output only the letter content without any additional text or formatting.
104
+ Job Title: {job_title}
105
+ Company: {company}
106
+ Key Skills: {key_skills}
107
+ CV Highlights: {resume_text}
108
+ """),
109
+ 'research_statement': PromptTemplate.from_template("""
 
 
 
 
 
 
 
 
110
  Write a research statement for Ph.D. applications.
111
+ Output only the statement content without any additional text or formatting.
112
+ Research Background: {research_background}
113
+ Key Research Projects: {key_projects}
114
+ Future Goals: {future_goals}
115
+ """),
116
+ 'sop': PromptTemplate.from_template("""
 
 
 
 
 
 
 
117
  Write a compelling Statement of Purpose (SOP).
118
+ Output only the SOP content without any additional text or formatting.
119
+ Motivation: {motivation}
120
+ Academic Background: {academic_background}
121
+ Research & Projects: {research_experiences}
122
+ Career Goals: {career_goals}
123
+ Why This Program: {why_this_program}
 
 
 
 
 
 
 
124
  """)
125
+ }
126
 
127
+ # Create LangChain instances
128
+ chains = {key: LLMChain(llm=llm, prompt=template) for key, template in templates.items()}
 
 
 
129
 
130
+ # Tab Layout
 
131
  tab1, tab2, tab3, tab4 = st.tabs(["Cold Email", "Cover Letter", "Research Statement", "SOP"])
132
 
133
+ # Cold Email Tab
134
  with tab1:
135
+ professor_name, university_name = extract_professor_details(job_opening_text)
136
+ research_interests = st.text_input("Research Interests")
137
+ reason = st.text_input("Why this professor/lab?")
138
 
139
+ if st.button("Generate Email", key="email_btn"):
140
+ if job_opening_text and cv_resume_text:
141
+ with st.spinner("Generating..."):
 
 
142
  try:
143
+ st.session_state.generated_content['email'] = chains['email'].run({
144
  "professor_name": professor_name,
145
  "university_name": university_name,
146
  "research_interests": research_interests,
147
  "reason": reason,
148
  "resume_text": cv_resume_text
149
  })
 
 
 
150
  except Exception as e:
151
+ st.error(f"Generation error: {e}")
152
+ else:
153
+ st.error("Please provide all required inputs")
154
 
155
+ if st.session_state.generated_content['email']:
156
+ st.markdown('<div class="output-container">', unsafe_allow_html=True)
157
+ st.markdown(st.session_state.generated_content['email'])
158
+ st.download_button("Download Email", st.session_state.generated_content['email'],
159
+ file_name="cold_email.txt", key="email_download")
160
+ st.markdown('</div>', unsafe_allow_html=True)
161
 
162
+ # Cover Letter Tab
163
+ with tab2:
164
+ job_title = st.text_input("Job Title")
165
+ company_name = university_name if university_name != "Not Found" else st.text_input("Company/University")
166
+ key_skills = st.text_input("Key Skills")
167
+
168
+ if st.button("Generate Cover Letter", key="cover_letter_btn"):
169
+ if job_opening_text and cv_resume_text:
170
+ with st.spinner("Generating..."):
171
  try:
172
+ st.session_state.generated_content['cover_letter'] = chains['cover_letter'].run({
173
  "job_title": job_title,
174
  "company": company_name,
175
  "key_skills": key_skills,
176
  "resume_text": cv_resume_text
177
  })
 
 
 
178
  except Exception as e:
179
+ st.error(f"Generation error: {e}")
180
+ else:
181
+ st.error("Please provide all required inputs")
182
 
183
+ if st.session_state.generated_content['cover_letter']:
184
+ st.markdown('<div class="output-container">', unsafe_allow_html=True)
185
+ st.markdown(st.session_state.generated_content['cover_letter'])
186
+ st.download_button("Download Cover Letter", st.session_state.generated_content['cover_letter'],
187
+ file_name="cover_letter.txt", key="cover_letter_download")
188
+ st.markdown('</div>', unsafe_allow_html=True)
189
 
190
+ # Research Statement Tab
191
+ with tab3:
192
+ research_background = st.text_input("Research Background")
193
+ key_projects = st.text_input("Key Research Projects")
194
+ future_goals = st.text_input("Future Research Goals")
195
+
196
+ if st.button("Generate Research Statement", key="research_stmt_btn"):
197
+ with st.spinner("Generating..."):
198
  try:
199
+ st.session_state.generated_content['research_statement'] = chains['research_statement'].run({
200
  "research_background": research_background,
201
  "key_projects": key_projects,
202
  "future_goals": future_goals
203
  })
 
 
 
204
  except Exception as e:
205
+ st.error(f"Generation error: {e}")
206
 
207
+ if st.session_state.generated_content['research_statement']:
208
+ st.markdown('<div class="output-container">', unsafe_allow_html=True)
209
+ st.markdown(st.session_state.generated_content['research_statement'])
210
+ st.download_button("Download Research Statement", st.session_state.generated_content['research_statement'],
211
+ file_name="research_statement.txt", key="research_stmt_download")
212
+ st.markdown('</div>', unsafe_allow_html=True)
 
213
 
214
+ # SOP Tab
215
+ with tab4:
216
+ motivation = st.text_input("Motivation for Graduate Studies")
217
+ academic_background = st.text_input("Academic Background")
218
+ research_experiences = st.text_input("Research & Projects")
219
+ career_goals = st.text_input("Career Goals")
220
+ why_this_program = st.text_input("Why This Program")
221
+
222
+ if st.button("Generate SOP", key="sop_btn"):
223
+ with st.spinner("Generating..."):
224
  try:
225
+ st.session_state.generated_content['sop'] = chains['sop'].run({
226
  "motivation": motivation,
227
  "academic_background": academic_background,
228
  "research_experiences": research_experiences,
229
  "career_goals": career_goals,
230
  "why_this_program": why_this_program
231
  })
 
 
 
232
  except Exception as e:
233
+ st.error(f"Generation error: {e}")
234
+
235
+ if st.session_state.generated_content['sop']:
236
+ st.markdown('<div class="output-container">', unsafe_allow_html=True)
237
+ st.markdown(st.session_state.generated_content['sop'])
238
+ st.download_button("Download SOP", st.session_state.generated_content['sop'],
239
+ file_name="sop.txt", key="sop_download")
240
+ st.markdown('</div>', unsafe_allow_html=True)
241
 
242
  # Reset Button
243
+ if st.sidebar.button("🔄 Reset All"):
244
+ st.session_state.generated_content = {key: None for key in st.session_state.generated_content}
245
  st.experimental_rerun()