Krish30 commited on
Commit
2d23c1c
·
verified ·
1 Parent(s): 004ff02

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +220 -220
app.py CHANGED
@@ -1,220 +1,220 @@
1
- import os
2
- from datetime import datetime
3
- import streamlit as st
4
- import google.generativeai as genai
5
- import PyPDF2 as pdf
6
- from fpdf import FPDF
7
- from dotenv import load_dotenv
8
- from langchain_huggingface import HuggingFaceEmbeddings
9
- from langchain_chroma import Chroma
10
-
11
- # Load environment variables
12
- load_dotenv()
13
-
14
- # Configure Generative AI API
15
- genai.configure(api_key=("AIzaSyDv1VwMMrrUHCnWCU16PkN8idcDpXVFqyY"))
16
-
17
- # Initialize vectorstore
18
- @st.cache_resource
19
- def setup_vectorstore():
20
- embeddings = HuggingFaceEmbeddings()
21
- vectorstore = Chroma(persist_directory="cv_vectordb", embedding_function=embeddings)
22
- return vectorstore
23
-
24
- # Convert PDF to text
25
- def input_pdf_text(uploaded_file):
26
- reader = pdf.PdfReader(uploaded_file)
27
- text = ""
28
- for page in range(len(reader.pages)):
29
- page = reader.pages[page]
30
- text += str(page.extract_text())
31
- return text
32
-
33
- # Retrieve relevant content from vectorstore
34
- def retrieve_from_vectorstore(vectorstore, query):
35
- retriever = vectorstore.as_retriever()
36
- results = retriever.invoke(query)
37
- return "\n".join([doc.page_content for doc in results])
38
-
39
- # Get response from Generative AI
40
- def get_gemini_response(prompt):
41
- model = genai.GenerativeModel('gemini-pro')
42
- response = model.generate_content(prompt)
43
- return response.candidates[0].content.parts[0].text if response else None
44
-
45
- def generate_pdf_report(candidate_name, report_content):
46
- pdf = FPDF()
47
- pdf.add_page()
48
- pdf.set_font("Arial", size=12)
49
- pdf.cell(0, 8, txt=f"Candidate Report: {candidate_name}", ln=True, align="L")
50
- pdf.ln(5) # Add slight spacing after the title
51
-
52
- # Define numbered sections
53
- numbered_sections = {
54
- 1: "Candidate Name and Email",
55
- 2: '"Can Do" list:',
56
- 3: '"Should Do" list',
57
- 4: "Skill Comparison Table:",
58
- 5: "Overall Matching Score:",
59
- 6: "Analysis of Strengths and Weaknesses",
60
- 7: "Recommendations for Improvement",
61
- 8: "Conclusion on Fitment",
62
- }
63
-
64
- # Parse report content
65
- lines = report_content.splitlines()
66
- current_section = None
67
- bullet_point = "\u2022 " # Unicode for a bullet point
68
-
69
- for line in lines:
70
- stripped_line = line.strip().replace("*", "") # Remove all asterisks
71
-
72
- # Check if line matches a section header
73
- if stripped_line in numbered_sections.values():
74
- for number, section in numbered_sections.items():
75
- if stripped_line == section:
76
- current_section = number
77
- pdf.set_font("Arial", style="", size=11)
78
- pdf.cell(0, 6, txt=f"{number}. {section}", ln=True, align="L")
79
- pdf.ln(3) # Reduced spacing after each section header
80
- break
81
- # Check for sub-content that starts with "-"
82
- elif current_section and stripped_line.startswith("- "):
83
- pdf.set_font("Arial", size=10)
84
- pdf.cell(5) # Add slight indentation
85
- pdf.cell(0, 5, txt=f"{bullet_point}{stripped_line[2:]}", ln=True)
86
- # Handle table rows
87
- elif "|" in stripped_line:
88
- cells = [cell.strip() for cell in stripped_line.split("|")[1:-1]]
89
- if len(cells) == 4:
90
- pdf.set_font("Arial", size=9)
91
- pdf.cell(50, 6, cells[0], border=1)
92
- pdf.cell(35, 6, cells[1], border=1, align="C")
93
- pdf.cell(35, 6, cells[2], border=1, align="C")
94
- pdf.cell(35, 6, cells[3], border=1, align="C")
95
- pdf.ln()
96
- # Add regular content as plain text
97
- else:
98
- pdf.set_font("Arial", size=10)
99
- pdf.multi_cell(0, 5, stripped_line)
100
-
101
- # Save the report as a PDF file
102
- timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
103
- file_name = f"{candidate_name}_report_{timestamp}.pdf"
104
- pdf.output(file_name)
105
- return file_name
106
-
107
-
108
-
109
- # Streamlit UI
110
- st.title("AI-Powered Candidate Shortlisting")
111
- st.markdown("Analyze resumes, job descriptions, and match with company culture using RAG.")
112
-
113
- # Setup vectorstore
114
- vectorstore = setup_vectorstore()
115
-
116
- # File upload
117
- uploaded_resumes = st.file_uploader("Upload Resumes (PDFs)", type="pdf", accept_multiple_files=True)
118
- uploaded_job_description = st.file_uploader("Upload Job Description (PDF)", type="pdf")
119
-
120
- if st.button("Generate Fitment Reports"):
121
- if not uploaded_resumes or not uploaded_job_description:
122
- st.error("Please upload resumes and a job description.")
123
- else:
124
- with st.spinner("Processing..."):
125
- try:
126
- # Convert job description to text
127
- job_description_text = input_pdf_text(uploaded_job_description)
128
-
129
- # Retrieve relevant content from vectorstore
130
- company_culture_content = retrieve_from_vectorstore(vectorstore, "company culture match")
131
-
132
- # Process each resume
133
- fitment_results = []
134
- for resume_file in uploaded_resumes:
135
- # Extract candidate name
136
- candidate_name = os.path.splitext(resume_file.name)[0]
137
-
138
- # Convert resume to text
139
- resume_text = input_pdf_text(resume_file)
140
-
141
- # Construct the prompt
142
- input_prompt = f"""
143
- ### Task: Generate a candidate shortlisting report.
144
-
145
- ### Instructions:
146
- You are a highly intelligent and unbiased system designed to shortlist candidates for a job based on:
147
- 1. The candidate's resume.
148
- 2. A provided job description.
149
- 3. Relevant company culture data retrieved from the vector database.
150
-
151
- ### Key Objectives:
152
- - Analyze skills, qualifications, and experiences in the resume.
153
- - Evaluate alignment with the job description.
154
- - Assess cultural fit using company culture data.
155
- - Provide detailed scoring, strengths, weaknesses, and recommendations.
156
-
157
- ### Required Sections in the Report:
158
- - Candidate Name and Email
159
- - Parse the job description and create a 'Should Do' list, categorizing required skills into levels: Beginner, Competent, Intermediate, Expert.
160
- - Parse the candidate's resume and create a 'Can Do' list, categorizing listed skills into the same levels: Beginner, Competent, Intermediate, Expert.
161
- - Matching score: A detailed table showing alignment of skills.
162
- - Analysis of strengths and weaknesses.
163
- - Recommendations for improvement.
164
- - Overall conclusion.
165
-
166
- ### Input Data:
167
- - **Resume**: {resume_text}
168
- - **Job Description**: {job_description_text}
169
- - **Company Culture Data**: {company_culture_content}
170
-
171
- ### Output Format:
172
- 1. Candidate Name and Email
173
- 2."Can Do" list:
174
- 3. "Should Do" list
175
- 4. Skill Comparison Table:
176
- | Skill | "Can Do" Level | "Should Do" Level | Matching Score |
177
- |--------------------------|----------------|--------------------|----------------|
178
- 5. Overall Matching Score: [Percentage]
179
- 6. Analysis of Strengths and Weaknesses
180
- 7. Recommendations for Improvement
181
- 8. Conclusion on Fitment
182
- """
183
-
184
- # Generate the report
185
- report_content = get_gemini_response(input_prompt)
186
-
187
- if report_content:
188
- # Extract the matching score safely
189
- try:
190
- matching_score = float(report_content.split("Overall Matching Score:")[1].split("%")[0].strip())
191
- except (IndexError, ValueError):
192
- matching_score = 0.0
193
- report_content += "\n\n[ERROR: Matching Score could not be parsed]"
194
-
195
- # Generate PDF report
196
- report_file = generate_pdf_report(candidate_name, report_content)
197
-
198
- # Save results
199
- fitment_results.append((candidate_name, matching_score, report_file))
200
-
201
- # Sort results by matching score in descending order
202
- fitment_results.sort(key=lambda x: x[1], reverse=True)
203
-
204
- # Display results in tabular form
205
- st.write("### Fitment Results")
206
- st.write("Below are the shortlisted candidates ranked by their fitment scores.")
207
- for rank, (candidate_name, matching_score, report_file) in enumerate(fitment_results, start=1):
208
- col1, col2, col3, col4 = st.columns([3, 2, 2, 2])
209
- col1.write(candidate_name)
210
- col2.write(f"{matching_score:.2f}%")
211
- col3.write(f"Rank {rank}")
212
- with open(report_file, "rb") as f:
213
- col4.download_button(
214
- label="Download Report",
215
- data=f,
216
- file_name=os.path.basename(report_file),
217
- mime="application/pdf",
218
- )
219
- except Exception as e:
220
- st.error(f"Error generating fitment reports: {e}")
 
1
+ import os
2
+ from datetime import datetime
3
+ import streamlit as st
4
+ import google.generativeai as genai
5
+ import PyPDF2 as pdf
6
+ from fpdf import FPDF
7
+ from dotenv import load_dotenv
8
+ from langchain_huggingface import HuggingFaceEmbeddings
9
+ from langchain_chroma import Chroma
10
+
11
+ # Load environment variables
12
+ load_dotenv()
13
+
14
+ # Configure Generative AI API
15
+ genai.configure(api_key=("AIzaSyDc3CnZUpQ8Z_2_hy764kDZdzSyWSXgtVM"))
16
+
17
+ # Initialize vectorstore
18
+ @st.cache_resource
19
+ def setup_vectorstore():
20
+ embeddings = HuggingFaceEmbeddings()
21
+ vectorstore = Chroma(persist_directory="cv_vectordb", embedding_function=embeddings)
22
+ return vectorstore
23
+
24
+ # Convert PDF to text
25
+ def input_pdf_text(uploaded_file):
26
+ reader = pdf.PdfReader(uploaded_file)
27
+ text = ""
28
+ for page in range(len(reader.pages)):
29
+ page = reader.pages[page]
30
+ text += str(page.extract_text())
31
+ return text
32
+
33
+ # Retrieve relevant content from vectorstore
34
+ def retrieve_from_vectorstore(vectorstore, query):
35
+ retriever = vectorstore.as_retriever()
36
+ results = retriever.invoke(query)
37
+ return "\n".join([doc.page_content for doc in results])
38
+
39
+ # Get response from Generative AI
40
+ def get_gemini_response(prompt):
41
+ model = genai.GenerativeModel('gemini-pro')
42
+ response = model.generate_content(prompt)
43
+ return response.candidates[0].content.parts[0].text if response else None
44
+
45
+ def generate_pdf_report(candidate_name, report_content):
46
+ pdf = FPDF()
47
+ pdf.add_page()
48
+ pdf.set_font("Arial", size=12)
49
+ pdf.cell(0, 8, txt=f"Candidate Report: {candidate_name}", ln=True, align="L")
50
+ pdf.ln(5) # Add slight spacing after the title
51
+
52
+ # Define numbered sections
53
+ numbered_sections = {
54
+ 1: "Candidate Name and Email",
55
+ 2: '"Can Do" list:',
56
+ 3: '"Should Do" list',
57
+ 4: "Skill Comparison Table:",
58
+ 5: "Overall Matching Score:",
59
+ 6: "Analysis of Strengths and Weaknesses",
60
+ 7: "Recommendations for Improvement",
61
+ 8: "Conclusion on Fitment",
62
+ }
63
+
64
+ # Parse report content
65
+ lines = report_content.splitlines()
66
+ current_section = None
67
+ bullet_point = "\u2022 " # Unicode for a bullet point
68
+
69
+ for line in lines:
70
+ stripped_line = line.strip().replace("*", "") # Remove all asterisks
71
+
72
+ # Check if line matches a section header
73
+ if stripped_line in numbered_sections.values():
74
+ for number, section in numbered_sections.items():
75
+ if stripped_line == section:
76
+ current_section = number
77
+ pdf.set_font("Arial", style="", size=11)
78
+ pdf.cell(0, 6, txt=f"{number}. {section}", ln=True, align="L")
79
+ pdf.ln(3) # Reduced spacing after each section header
80
+ break
81
+ # Check for sub-content that starts with "-"
82
+ elif current_section and stripped_line.startswith("- "):
83
+ pdf.set_font("Arial", size=10)
84
+ pdf.cell(5) # Add slight indentation
85
+ pdf.cell(0, 5, txt=f"{bullet_point}{stripped_line[2:]}", ln=True)
86
+ # Handle table rows
87
+ elif "|" in stripped_line:
88
+ cells = [cell.strip() for cell in stripped_line.split("|")[1:-1]]
89
+ if len(cells) == 4:
90
+ pdf.set_font("Arial", size=9)
91
+ pdf.cell(50, 6, cells[0], border=1)
92
+ pdf.cell(35, 6, cells[1], border=1, align="C")
93
+ pdf.cell(35, 6, cells[2], border=1, align="C")
94
+ pdf.cell(35, 6, cells[3], border=1, align="C")
95
+ pdf.ln()
96
+ # Add regular content as plain text
97
+ else:
98
+ pdf.set_font("Arial", size=10)
99
+ pdf.multi_cell(0, 5, stripped_line)
100
+
101
+ # Save the report as a PDF file
102
+ timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
103
+ file_name = f"{candidate_name}_report_{timestamp}.pdf"
104
+ pdf.output(file_name)
105
+ return file_name
106
+
107
+
108
+
109
+ # Streamlit UI
110
+ st.title("AI-Powered Candidate Shortlisting")
111
+ st.markdown("Analyze resumes, job descriptions, and match with company culture using RAG.")
112
+
113
+ # Setup vectorstore
114
+ vectorstore = setup_vectorstore()
115
+
116
+ # File upload
117
+ uploaded_resumes = st.file_uploader("Upload Resumes (PDFs)", type="pdf", accept_multiple_files=True)
118
+ uploaded_job_description = st.file_uploader("Upload Job Description (PDF)", type="pdf")
119
+
120
+ if st.button("Generate Fitment Reports"):
121
+ if not uploaded_resumes or not uploaded_job_description:
122
+ st.error("Please upload resumes and a job description.")
123
+ else:
124
+ with st.spinner("Processing..."):
125
+ try:
126
+ # Convert job description to text
127
+ job_description_text = input_pdf_text(uploaded_job_description)
128
+
129
+ # Retrieve relevant content from vectorstore
130
+ company_culture_content = retrieve_from_vectorstore(vectorstore, "company culture match")
131
+
132
+ # Process each resume
133
+ fitment_results = []
134
+ for resume_file in uploaded_resumes:
135
+ # Extract candidate name
136
+ candidate_name = os.path.splitext(resume_file.name)[0]
137
+
138
+ # Convert resume to text
139
+ resume_text = input_pdf_text(resume_file)
140
+
141
+ # Construct the prompt
142
+ input_prompt = f"""
143
+ ### Task: Generate a candidate shortlisting report.
144
+
145
+ ### Instructions:
146
+ You are a highly intelligent and unbiased system designed to shortlist candidates for a job based on:
147
+ 1. The candidate's resume.
148
+ 2. A provided job description.
149
+ 3. Relevant company culture data retrieved from the vector database.
150
+
151
+ ### Key Objectives:
152
+ - Analyze skills, qualifications, and experiences in the resume.
153
+ - Evaluate alignment with the job description.
154
+ - Assess cultural fit using company culture data.
155
+ - Provide detailed scoring, strengths, weaknesses, and recommendations.
156
+
157
+ ### Required Sections in the Report:
158
+ - Candidate Name and Email
159
+ - Parse the job description and create a 'Should Do' list, categorizing required skills into levels: Beginner, Competent, Intermediate, Expert.
160
+ - Parse the candidate's resume and create a 'Can Do' list, categorizing listed skills into the same levels: Beginner, Competent, Intermediate, Expert.
161
+ - Matching score: A detailed table showing alignment of skills.
162
+ - Analysis of strengths and weaknesses.
163
+ - Recommendations for improvement.
164
+ - Overall conclusion.
165
+
166
+ ### Input Data:
167
+ - **Resume**: {resume_text}
168
+ - **Job Description**: {job_description_text}
169
+ - **Company Culture Data**: {company_culture_content}
170
+
171
+ ### Output Format:
172
+ 1. Candidate Name and Email
173
+ 2."Can Do" list:
174
+ 3. "Should Do" list
175
+ 4. Skill Comparison Table:
176
+ | Skill | "Can Do" Level | "Should Do" Level | Matching Score |
177
+ |--------------------------|----------------|--------------------|----------------|
178
+ 5. Overall Matching Score: [Percentage]
179
+ 6. Analysis of Strengths and Weaknesses
180
+ 7. Recommendations for Improvement
181
+ 8. Conclusion on Fitment
182
+ """
183
+
184
+ # Generate the report
185
+ report_content = get_gemini_response(input_prompt)
186
+
187
+ if report_content:
188
+ # Extract the matching score safely
189
+ try:
190
+ matching_score = float(report_content.split("Overall Matching Score:")[1].split("%")[0].strip())
191
+ except (IndexError, ValueError):
192
+ matching_score = 0.0
193
+ report_content += "\n\n[ERROR: Matching Score could not be parsed]"
194
+
195
+ # Generate PDF report
196
+ report_file = generate_pdf_report(candidate_name, report_content)
197
+
198
+ # Save results
199
+ fitment_results.append((candidate_name, matching_score, report_file))
200
+
201
+ # Sort results by matching score in descending order
202
+ fitment_results.sort(key=lambda x: x[1], reverse=True)
203
+
204
+ # Display results in tabular form
205
+ st.write("### Fitment Results")
206
+ st.write("Below are the shortlisted candidates ranked by their fitment scores.")
207
+ for rank, (candidate_name, matching_score, report_file) in enumerate(fitment_results, start=1):
208
+ col1, col2, col3, col4 = st.columns([3, 2, 2, 2])
209
+ col1.write(candidate_name)
210
+ col2.write(f"{matching_score:.2f}%")
211
+ col3.write(f"Rank {rank}")
212
+ with open(report_file, "rb") as f:
213
+ col4.download_button(
214
+ label="Download Report",
215
+ data=f,
216
+ file_name=os.path.basename(report_file),
217
+ mime="application/pdf",
218
+ )
219
+ except Exception as e:
220
+ st.error(f"Error generating fitment reports: {e}")