Sobit commited on
Commit
34bda2a
·
verified ·
1 Parent(s): a35fb23

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +135 -34
app.py CHANGED
@@ -18,7 +18,7 @@ llm = HuggingFaceHub(repo_id="mistralai/Mistral-7B-Instruct-v0.3", model_kwargs=
18
  st.set_page_config(page_title="DocuMentorAI", layout="wide", page_icon="📄")
19
  st.title("📄 DocuMentorAI")
20
 
21
- # Improved CSS with dedicated output styling
22
  st.markdown("""
23
  <style>
24
  .output-container {
@@ -26,14 +26,13 @@ st.markdown("""
26
  padding: 20px;
27
  border-radius: 10px;
28
  margin-top: 20px;
29
- }
30
- .generated-content {
31
- font-size: 16px;
32
- line-height: 1.6;
33
  white-space: pre-wrap;
34
  }
35
- .download-button {
36
- margin-top: 10px;
 
 
 
37
  }
38
  </style>
39
  """, unsafe_allow_html=True)
@@ -63,7 +62,7 @@ def extract_text(uploaded_file):
63
 
64
  def extract_professor_details(text):
65
  professor_pattern = r"(Dr\.|Professor|Prof\.?)\s+([A-Z][a-z]+\s[A-Z][a-z]+)"
66
- university_pattern = r"(University|Institute|College|School of [A-Za-z]+)"
67
 
68
  professor_match = re.search(professor_pattern, text)
69
  university_match = re.search(university_pattern, text)
@@ -71,14 +70,87 @@ def extract_professor_details(text):
71
  return (professor_match.group(0) if professor_match else "Not Found",
72
  university_match.group(0) if university_match else "Not Found")
73
 
74
- # Sidebar for Input Collection
75
- with st.sidebar:
76
- st.subheader("📝 Input Details")
77
- job_opening_text = st.text_area("Job/Research Opening Details", height=150)
78
- cv_resume_file = st.file_uploader("Upload CV/Resume", type=["pdf", "png", "jpg", "jpeg"])
79
- cv_resume_text = extract_text(cv_resume_file)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
 
81
- # Initialize session state for generated content
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  if 'generated_content' not in st.session_state:
83
  st.session_state.generated_content = {
84
  'email': None,
@@ -90,43 +162,68 @@ if 'generated_content' not in st.session_state:
90
  # Template Definitions
91
  templates = {
92
  'email': PromptTemplate.from_template("""
93
- Write a professional cold email for a research position.
94
- Output only the email content without any additional text or formatting.
 
 
 
 
95
  Professor: {professor_name}
96
  University: {university_name}
97
  Research Interests: {research_interests}
98
  Why This Lab: {reason}
99
- CV Highlights: {resume_text}
100
  """),
 
101
  'cover_letter': PromptTemplate.from_template("""
102
- Write a compelling job application cover letter.
103
- Output only the letter content without any additional text or formatting.
 
 
 
 
104
  Job Title: {job_title}
105
  Company: {company}
106
  Key Skills: {key_skills}
107
- CV Highlights: {resume_text}
108
  """),
 
109
  'research_statement': PromptTemplate.from_template("""
110
- Write a research statement for Ph.D. applications.
111
- Output only the statement content without any additional text or formatting.
112
- Research Background: {research_background}
113
- Key Research Projects: {key_projects}
 
 
 
 
114
  Future Goals: {future_goals}
115
  """),
 
116
  'sop': PromptTemplate.from_template("""
117
- Write a compelling Statement of Purpose (SOP).
118
- Output only the SOP content without any additional text or formatting.
 
 
 
 
119
  Motivation: {motivation}
120
  Academic Background: {academic_background}
121
- Research & Projects: {research_experiences}
122
  Career Goals: {career_goals}
123
- Why This Program: {why_this_program}
124
  """)
125
  }
126
 
127
  # Create LangChain instances
128
  chains = {key: LLMChain(llm=llm, prompt=template) for key, template in templates.items()}
129
 
 
 
 
 
 
 
 
130
  # Tab Layout
131
  tab1, tab2, tab3, tab4 = st.tabs(["Cold Email", "Cover Letter", "Research Statement", "SOP"])
132
 
@@ -140,13 +237,14 @@ with tab1:
140
  if job_opening_text and cv_resume_text:
141
  with st.spinner("Generating..."):
142
  try:
143
- st.session_state.generated_content['email'] = chains['email'].run({
144
  "professor_name": professor_name,
145
  "university_name": university_name,
146
  "research_interests": research_interests,
147
  "reason": reason,
148
  "resume_text": cv_resume_text
149
  })
 
150
  except Exception as e:
151
  st.error(f"Generation error: {e}")
152
  else:
@@ -155,7 +253,7 @@ with tab1:
155
  if st.session_state.generated_content['email']:
156
  st.markdown('<div class="output-container">', unsafe_allow_html=True)
157
  st.markdown(st.session_state.generated_content['email'])
158
- st.download_button("Download Email", st.session_state.generated_content['email'],
159
  file_name="cold_email.txt", key="email_download")
160
  st.markdown('</div>', unsafe_allow_html=True)
161
 
@@ -169,12 +267,13 @@ with tab2:
169
  if job_opening_text and cv_resume_text:
170
  with st.spinner("Generating..."):
171
  try:
172
- st.session_state.generated_content['cover_letter'] = chains['cover_letter'].run({
173
  "job_title": job_title,
174
  "company": company_name,
175
  "key_skills": key_skills,
176
  "resume_text": cv_resume_text
177
  })
 
178
  except Exception as e:
179
  st.error(f"Generation error: {e}")
180
  else:
@@ -196,11 +295,12 @@ with tab3:
196
  if st.button("Generate Research Statement", key="research_stmt_btn"):
197
  with st.spinner("Generating..."):
198
  try:
199
- st.session_state.generated_content['research_statement'] = chains['research_statement'].run({
200
  "research_background": research_background,
201
  "key_projects": key_projects,
202
  "future_goals": future_goals
203
  })
 
204
  except Exception as e:
205
  st.error(f"Generation error: {e}")
206
 
@@ -222,13 +322,14 @@ with tab4:
222
  if st.button("Generate SOP", key="sop_btn"):
223
  with st.spinner("Generating..."):
224
  try:
225
- st.session_state.generated_content['sop'] = chains['sop'].run({
226
  "motivation": motivation,
227
  "academic_background": academic_background,
228
  "research_experiences": research_experiences,
229
  "career_goals": career_goals,
230
  "why_this_program": why_this_program
231
  })
 
232
  except Exception as e:
233
  st.error(f"Generation error: {e}")
234
 
 
18
  st.set_page_config(page_title="DocuMentorAI", layout="wide", page_icon="📄")
19
  st.title("📄 DocuMentorAI")
20
 
21
+ # Improved CSS
22
  st.markdown("""
23
  <style>
24
  .output-container {
 
26
  padding: 20px;
27
  border-radius: 10px;
28
  margin-top: 20px;
 
 
 
 
29
  white-space: pre-wrap;
30
  }
31
+ .stTextArea textarea {
32
+ font-size: 16px !important;
33
+ }
34
+ .stButton button {
35
+ width: 100%;
36
  }
37
  </style>
38
  """, unsafe_allow_html=True)
 
62
 
63
  def extract_professor_details(text):
64
  professor_pattern = r"(Dr\.|Professor|Prof\.?)\s+([A-Z][a-z]+\s[A-Z][a-z]+)"
65
+ university_pattern = r"(University|Institute|College|School of [A-Z][A-Za-z\s]+)"
66
 
67
  professor_match = re.search(professor_pattern, text)
68
  university_match = re.search(university_pattern, text)
 
70
  return (professor_match.group(0) if professor_match else "Not Found",
71
  university_match.group(0) if university_match else "Not Found")
72
 
73
+ def clean_email_output(email_text):
74
+ """Clean and format email content"""
75
+ start_idx = email_text.find("Dear")
76
+ if start_idx == -1:
77
+ start_idx = 0
78
+
79
+ end_markers = ["Best regards,", "Sincerely,", "Yours sincerely,", "Kind regards,"]
80
+ end_idx = len(email_text)
81
+ for marker in end_markers:
82
+ idx = email_text.find(marker)
83
+ if idx != -1:
84
+ end_idx = email_text.find("\n\n", idx) if email_text.find("\n\n", idx) != -1 else len(email_text)
85
+ break
86
+
87
+ email_content = email_text[start_idx:end_idx].strip()
88
+
89
+ if "Phone:" in email_text or "Email:" in email_text:
90
+ contact_info = "\n\n" + "\n".join([
91
+ line for line in email_text[end_idx:].split("\n")
92
+ if any(info in line for info in ["Phone:", "Email:"])
93
+ ]).strip()
94
+ email_content += contact_info
95
+
96
+ return email_content
97
+
98
+ def clean_cover_letter_output(letter_text):
99
+ """Clean and format cover letter content"""
100
+ start_markers = ["Dear", "To Whom", "Hiring"]
101
+ start_idx = len(letter_text)
102
+ for marker in start_markers:
103
+ idx = letter_text.find(marker)
104
+ if idx != -1:
105
+ start_idx = min(start_idx, idx)
106
+
107
+ end_markers = ["Sincerely,", "Best regards,", "Yours truly,", "Regards,"]
108
+ end_idx = len(letter_text)
109
+ for marker in end_markers:
110
+ idx = letter_text.find(marker)
111
+ if idx != -1:
112
+ end_idx = letter_text.find("\n\n", idx) if letter_text.find("\n\n", idx) != -1 else len(letter_text)
113
+ break
114
+
115
+ return letter_text[start_idx:end_idx].strip()
116
+
117
+ def clean_research_statement_output(statement_text):
118
+ """Clean and format research statement content"""
119
+ # Remove common headers
120
+ headers = ["Research Statement", "Statement of Research", "Research Interests"]
121
+ cleaned_text = statement_text
122
+ for header in headers:
123
+ if cleaned_text.startswith(header):
124
+ cleaned_text = cleaned_text[len(header):].lstrip(":\n")
125
+
126
+ # Remove any trailing references or bibliography sections
127
+ end_markers = ["References", "Bibliography", "Citations"]
128
+ for marker in end_markers:
129
+ idx = cleaned_text.find(marker)
130
+ if idx != -1:
131
+ cleaned_text = cleaned_text[:idx].strip()
132
+
133
+ return cleaned_text.strip()
134
 
135
+ def clean_sop_output(sop_text):
136
+ """Clean and format Statement of Purpose content"""
137
+ # Remove common headers
138
+ headers = ["Statement of Purpose", "Personal Statement", "Academic Statement"]
139
+ cleaned_text = sop_text
140
+ for header in headers:
141
+ if cleaned_text.startswith(header):
142
+ cleaned_text = cleaned_text[len(header):].lstrip(":\n")
143
+
144
+ # Remove any trailing sections
145
+ end_markers = ["Thank you", "References", "Additional Information"]
146
+ for marker in end_markers:
147
+ idx = cleaned_text.find(marker)
148
+ if idx != -1:
149
+ cleaned_text = cleaned_text[:idx].strip()
150
+
151
+ return cleaned_text.strip()
152
+
153
+ # Initialize session state
154
  if 'generated_content' not in st.session_state:
155
  st.session_state.generated_content = {
156
  'email': None,
 
162
  # Template Definitions
163
  templates = {
164
  'email': PromptTemplate.from_template("""
165
+ Write ONLY a formal cold email for a research position.
166
+ Start with 'Dear Professor' and end with a signature.
167
+ Include only 2-3 most relevant highlights from the CV.
168
+ Do not include any other text or formatting.
169
+
170
+ Details to use:
171
  Professor: {professor_name}
172
  University: {university_name}
173
  Research Interests: {research_interests}
174
  Why This Lab: {reason}
 
175
  """),
176
+
177
  'cover_letter': PromptTemplate.from_template("""
178
+ Write ONLY a professional cover letter.
179
+ Start with a formal greeting and end with a signature.
180
+ Focus on relevant experience and skills.
181
+ Do not include any headers or additional formatting.
182
+
183
+ Details to use:
184
  Job Title: {job_title}
185
  Company: {company}
186
  Key Skills: {key_skills}
187
+ Relevant Experience: {resume_text}
188
  """),
189
+
190
  'research_statement': PromptTemplate.from_template("""
191
+ Write ONLY a research statement.
192
+ Focus on research background, achievements, and future goals.
193
+ Maintain a professional and academic tone.
194
+ Do not include any headers or section titles.
195
+
196
+ Content to include:
197
+ Background: {research_background}
198
+ Key Projects: {key_projects}
199
  Future Goals: {future_goals}
200
  """),
201
+
202
  'sop': PromptTemplate.from_template("""
203
+ Write ONLY a Statement of Purpose.
204
+ Focus on academic journey, research interests, and career goals.
205
+ Maintain a personal yet professional tone.
206
+ Do not include any headers or section titles.
207
+
208
+ Details to use:
209
  Motivation: {motivation}
210
  Academic Background: {academic_background}
211
+ Research Experience: {research_experiences}
212
  Career Goals: {career_goals}
213
+ Program Fit: {why_this_program}
214
  """)
215
  }
216
 
217
  # Create LangChain instances
218
  chains = {key: LLMChain(llm=llm, prompt=template) for key, template in templates.items()}
219
 
220
+ # Sidebar for Input Collection
221
+ with st.sidebar:
222
+ st.subheader("📝 Input Details")
223
+ job_opening_text = st.text_area("Job/Research Opening Details", height=150)
224
+ cv_resume_file = st.file_uploader("Upload CV/Resume", type=["pdf", "png", "jpg", "jpeg"])
225
+ cv_resume_text = extract_text(cv_resume_file)
226
+
227
  # Tab Layout
228
  tab1, tab2, tab3, tab4 = st.tabs(["Cold Email", "Cover Letter", "Research Statement", "SOP"])
229
 
 
237
  if job_opening_text and cv_resume_text:
238
  with st.spinner("Generating..."):
239
  try:
240
+ generated_email = chains['email'].run({
241
  "professor_name": professor_name,
242
  "university_name": university_name,
243
  "research_interests": research_interests,
244
  "reason": reason,
245
  "resume_text": cv_resume_text
246
  })
247
+ st.session_state.generated_content['email'] = clean_email_output(generated_email)
248
  except Exception as e:
249
  st.error(f"Generation error: {e}")
250
  else:
 
253
  if st.session_state.generated_content['email']:
254
  st.markdown('<div class="output-container">', unsafe_allow_html=True)
255
  st.markdown(st.session_state.generated_content['email'])
256
+ st.download_button("Download Email", st.session_state.generated_content['email'],
257
  file_name="cold_email.txt", key="email_download")
258
  st.markdown('</div>', unsafe_allow_html=True)
259
 
 
267
  if job_opening_text and cv_resume_text:
268
  with st.spinner("Generating..."):
269
  try:
270
+ generated_letter = chains['cover_letter'].run({
271
  "job_title": job_title,
272
  "company": company_name,
273
  "key_skills": key_skills,
274
  "resume_text": cv_resume_text
275
  })
276
+ st.session_state.generated_content['cover_letter'] = clean_cover_letter_output(generated_letter)
277
  except Exception as e:
278
  st.error(f"Generation error: {e}")
279
  else:
 
295
  if st.button("Generate Research Statement", key="research_stmt_btn"):
296
  with st.spinner("Generating..."):
297
  try:
298
+ generated_statement = chains['research_statement'].run({
299
  "research_background": research_background,
300
  "key_projects": key_projects,
301
  "future_goals": future_goals
302
  })
303
+ st.session_state.generated_content['research_statement'] = clean_research_statement_output(generated_statement)
304
  except Exception as e:
305
  st.error(f"Generation error: {e}")
306
 
 
322
  if st.button("Generate SOP", key="sop_btn"):
323
  with st.spinner("Generating..."):
324
  try:
325
+ generated_sop = chains['sop'].run({
326
  "motivation": motivation,
327
  "academic_background": academic_background,
328
  "research_experiences": research_experiences,
329
  "career_goals": career_goals,
330
  "why_this_program": why_this_program
331
  })
332
+ st.session_state.generated_content['sop'] = clean_sop_output(generated_sop)
333
  except Exception as e:
334
  st.error(f"Generation error: {e}")
335