Upload 3 files
Browse files- cv_app.py +220 -0
- requirements.txt +14 -0
- vectorize_documents.py +45 -0
cv_app.py
ADDED
@@ -0,0 +1,220 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from datetime import datetime
|
3 |
+
import streamlit as st
|
4 |
+
import google.generativeai as genai
|
5 |
+
import PyPDF2 as pdf
|
6 |
+
from fpdf import FPDF
|
7 |
+
from dotenv import load_dotenv
|
8 |
+
from langchain_huggingface import HuggingFaceEmbeddings
|
9 |
+
from langchain_chroma import Chroma
|
10 |
+
|
11 |
+
# Load environment variables
|
12 |
+
load_dotenv()
|
13 |
+
|
14 |
+
# Configure Generative AI API
|
15 |
+
genai.configure(api_key=("AIzaSyDv1VwMMrrUHCnWCU16PkN8idcDpXVFqyY"))
|
16 |
+
|
17 |
+
# Initialize vectorstore
|
18 |
+
@st.cache_resource
|
19 |
+
def setup_vectorstore():
|
20 |
+
embeddings = HuggingFaceEmbeddings()
|
21 |
+
vectorstore = Chroma(persist_directory="cv_vectordb", embedding_function=embeddings)
|
22 |
+
return vectorstore
|
23 |
+
|
24 |
+
# Convert PDF to text
|
25 |
+
def input_pdf_text(uploaded_file):
|
26 |
+
reader = pdf.PdfReader(uploaded_file)
|
27 |
+
text = ""
|
28 |
+
for page in range(len(reader.pages)):
|
29 |
+
page = reader.pages[page]
|
30 |
+
text += str(page.extract_text())
|
31 |
+
return text
|
32 |
+
|
33 |
+
# Retrieve relevant content from vectorstore
|
34 |
+
def retrieve_from_vectorstore(vectorstore, query):
|
35 |
+
retriever = vectorstore.as_retriever()
|
36 |
+
results = retriever.invoke(query)
|
37 |
+
return "\n".join([doc.page_content for doc in results])
|
38 |
+
|
39 |
+
# Get response from Generative AI
|
40 |
+
def get_gemini_response(prompt):
|
41 |
+
model = genai.GenerativeModel('gemini-pro')
|
42 |
+
response = model.generate_content(prompt)
|
43 |
+
return response.candidates[0].content.parts[0].text if response else None
|
44 |
+
|
45 |
+
def generate_pdf_report(candidate_name, report_content):
|
46 |
+
pdf = FPDF()
|
47 |
+
pdf.add_page()
|
48 |
+
pdf.set_font("Arial", size=12)
|
49 |
+
pdf.cell(0, 8, txt=f"Candidate Report: {candidate_name}", ln=True, align="L")
|
50 |
+
pdf.ln(5) # Add slight spacing after the title
|
51 |
+
|
52 |
+
# Define numbered sections
|
53 |
+
numbered_sections = {
|
54 |
+
1: "Candidate Name and Email",
|
55 |
+
2: '"Can Do" list:',
|
56 |
+
3: '"Should Do" list',
|
57 |
+
4: "Skill Comparison Table:",
|
58 |
+
5: "Overall Matching Score:",
|
59 |
+
6: "Analysis of Strengths and Weaknesses",
|
60 |
+
7: "Recommendations for Improvement",
|
61 |
+
8: "Conclusion on Fitment",
|
62 |
+
}
|
63 |
+
|
64 |
+
# Parse report content
|
65 |
+
lines = report_content.splitlines()
|
66 |
+
current_section = None
|
67 |
+
bullet_point = "\u2022 " # Unicode for a bullet point
|
68 |
+
|
69 |
+
for line in lines:
|
70 |
+
stripped_line = line.strip().replace("*", "") # Remove all asterisks
|
71 |
+
|
72 |
+
# Check if line matches a section header
|
73 |
+
if stripped_line in numbered_sections.values():
|
74 |
+
for number, section in numbered_sections.items():
|
75 |
+
if stripped_line == section:
|
76 |
+
current_section = number
|
77 |
+
pdf.set_font("Arial", style="", size=11)
|
78 |
+
pdf.cell(0, 6, txt=f"{number}. {section}", ln=True, align="L")
|
79 |
+
pdf.ln(3) # Reduced spacing after each section header
|
80 |
+
break
|
81 |
+
# Check for sub-content that starts with "-"
|
82 |
+
elif current_section and stripped_line.startswith("- "):
|
83 |
+
pdf.set_font("Arial", size=10)
|
84 |
+
pdf.cell(5) # Add slight indentation
|
85 |
+
pdf.cell(0, 5, txt=f"{bullet_point}{stripped_line[2:]}", ln=True)
|
86 |
+
# Handle table rows
|
87 |
+
elif "|" in stripped_line:
|
88 |
+
cells = [cell.strip() for cell in stripped_line.split("|")[1:-1]]
|
89 |
+
if len(cells) == 4:
|
90 |
+
pdf.set_font("Arial", size=9)
|
91 |
+
pdf.cell(50, 6, cells[0], border=1)
|
92 |
+
pdf.cell(35, 6, cells[1], border=1, align="C")
|
93 |
+
pdf.cell(35, 6, cells[2], border=1, align="C")
|
94 |
+
pdf.cell(35, 6, cells[3], border=1, align="C")
|
95 |
+
pdf.ln()
|
96 |
+
# Add regular content as plain text
|
97 |
+
else:
|
98 |
+
pdf.set_font("Arial", size=10)
|
99 |
+
pdf.multi_cell(0, 5, stripped_line)
|
100 |
+
|
101 |
+
# Save the report as a PDF file
|
102 |
+
timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
|
103 |
+
file_name = f"{candidate_name}_report_{timestamp}.pdf"
|
104 |
+
pdf.output(file_name)
|
105 |
+
return file_name
|
106 |
+
|
107 |
+
|
108 |
+
|
109 |
+
# Streamlit UI
|
110 |
+
st.title("AI-Powered Candidate Shortlisting")
|
111 |
+
st.markdown("Analyze resumes, job descriptions, and match with company culture using RAG.")
|
112 |
+
|
113 |
+
# Setup vectorstore
|
114 |
+
vectorstore = setup_vectorstore()
|
115 |
+
|
116 |
+
# File upload
|
117 |
+
uploaded_resumes = st.file_uploader("Upload Resumes (PDFs)", type="pdf", accept_multiple_files=True)
|
118 |
+
uploaded_job_description = st.file_uploader("Upload Job Description (PDF)", type="pdf")
|
119 |
+
|
120 |
+
if st.button("Generate Fitment Reports"):
|
121 |
+
if not uploaded_resumes or not uploaded_job_description:
|
122 |
+
st.error("Please upload resumes and a job description.")
|
123 |
+
else:
|
124 |
+
with st.spinner("Processing..."):
|
125 |
+
try:
|
126 |
+
# Convert job description to text
|
127 |
+
job_description_text = input_pdf_text(uploaded_job_description)
|
128 |
+
|
129 |
+
# Retrieve relevant content from vectorstore
|
130 |
+
company_culture_content = retrieve_from_vectorstore(vectorstore, "company culture match")
|
131 |
+
|
132 |
+
# Process each resume
|
133 |
+
fitment_results = []
|
134 |
+
for resume_file in uploaded_resumes:
|
135 |
+
# Extract candidate name
|
136 |
+
candidate_name = os.path.splitext(resume_file.name)[0]
|
137 |
+
|
138 |
+
# Convert resume to text
|
139 |
+
resume_text = input_pdf_text(resume_file)
|
140 |
+
|
141 |
+
# Construct the prompt
|
142 |
+
input_prompt = f"""
|
143 |
+
### Task: Generate a candidate shortlisting report.
|
144 |
+
|
145 |
+
### Instructions:
|
146 |
+
You are a highly intelligent and unbiased system designed to shortlist candidates for a job based on:
|
147 |
+
1. The candidate's resume.
|
148 |
+
2. A provided job description.
|
149 |
+
3. Relevant company culture data retrieved from the vector database.
|
150 |
+
|
151 |
+
### Key Objectives:
|
152 |
+
- Analyze skills, qualifications, and experiences in the resume.
|
153 |
+
- Evaluate alignment with the job description.
|
154 |
+
- Assess cultural fit using company culture data.
|
155 |
+
- Provide detailed scoring, strengths, weaknesses, and recommendations.
|
156 |
+
|
157 |
+
### Required Sections in the Report:
|
158 |
+
- Candidate Name and Email
|
159 |
+
- Parse the job description and create a 'Should Do' list, categorizing required skills into levels: Beginner, Competent, Intermediate, Expert.
|
160 |
+
- Parse the candidate's resume and create a 'Can Do' list, categorizing listed skills into the same levels: Beginner, Competent, Intermediate, Expert.
|
161 |
+
- Matching score: A detailed table showing alignment of skills.
|
162 |
+
- Analysis of strengths and weaknesses.
|
163 |
+
- Recommendations for improvement.
|
164 |
+
- Overall conclusion.
|
165 |
+
|
166 |
+
### Input Data:
|
167 |
+
- **Resume**: {resume_text}
|
168 |
+
- **Job Description**: {job_description_text}
|
169 |
+
- **Company Culture Data**: {company_culture_content}
|
170 |
+
|
171 |
+
### Output Format:
|
172 |
+
1. Candidate Name and Email
|
173 |
+
2."Can Do" list:
|
174 |
+
3. "Should Do" list
|
175 |
+
4. Skill Comparison Table:
|
176 |
+
| Skill | "Can Do" Level | "Should Do" Level | Matching Score |
|
177 |
+
|--------------------------|----------------|--------------------|----------------|
|
178 |
+
5. Overall Matching Score: [Percentage]
|
179 |
+
6. Analysis of Strengths and Weaknesses
|
180 |
+
7. Recommendations for Improvement
|
181 |
+
8. Conclusion on Fitment
|
182 |
+
"""
|
183 |
+
|
184 |
+
# Generate the report
|
185 |
+
report_content = get_gemini_response(input_prompt)
|
186 |
+
|
187 |
+
if report_content:
|
188 |
+
# Extract the matching score safely
|
189 |
+
try:
|
190 |
+
matching_score = float(report_content.split("Overall Matching Score:")[1].split("%")[0].strip())
|
191 |
+
except (IndexError, ValueError):
|
192 |
+
matching_score = 0.0
|
193 |
+
report_content += "\n\n[ERROR: Matching Score could not be parsed]"
|
194 |
+
|
195 |
+
# Generate PDF report
|
196 |
+
report_file = generate_pdf_report(candidate_name, report_content)
|
197 |
+
|
198 |
+
# Save results
|
199 |
+
fitment_results.append((candidate_name, matching_score, report_file))
|
200 |
+
|
201 |
+
# Sort results by matching score in descending order
|
202 |
+
fitment_results.sort(key=lambda x: x[1], reverse=True)
|
203 |
+
|
204 |
+
# Display results in tabular form
|
205 |
+
st.write("### Fitment Results")
|
206 |
+
st.write("Below are the shortlisted candidates ranked by their fitment scores.")
|
207 |
+
for rank, (candidate_name, matching_score, report_file) in enumerate(fitment_results, start=1):
|
208 |
+
col1, col2, col3, col4 = st.columns([3, 2, 2, 2])
|
209 |
+
col1.write(candidate_name)
|
210 |
+
col2.write(f"{matching_score:.2f}%")
|
211 |
+
col3.write(f"Rank {rank}")
|
212 |
+
with open(report_file, "rb") as f:
|
213 |
+
col4.download_button(
|
214 |
+
label="Download Report",
|
215 |
+
data=f,
|
216 |
+
file_name=os.path.basename(report_file),
|
217 |
+
mime="application/pdf",
|
218 |
+
)
|
219 |
+
except Exception as e:
|
220 |
+
st.error(f"Error generating fitment reports: {e}")
|
requirements.txt
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
streamlit==1.38.0
|
2 |
+
langchain-community==0.2.16
|
3 |
+
langchain-text-splitters==0.2.4
|
4 |
+
langchain-chroma==0.1.3
|
5 |
+
langchain-huggingface==0.0.3
|
6 |
+
langchain-groq==0.1.9
|
7 |
+
unstructured==0.15.0
|
8 |
+
unstructured[pdf]==0.15.0
|
9 |
+
nltk==3.8.1
|
10 |
+
psycopg2-binary
|
11 |
+
pgvector
|
12 |
+
langchain_postgres
|
13 |
+
docx2txt
|
14 |
+
PyPDF2
|
vectorize_documents.py
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langchain_community.document_loaders import UnstructuredFileLoader
|
2 |
+
from langchain_community.document_loaders import DirectoryLoader
|
3 |
+
from langchain_text_splitters import CharacterTextSplitter
|
4 |
+
from langchain_huggingface import HuggingFaceEmbeddings
|
5 |
+
from langchain_chroma import Chroma
|
6 |
+
|
7 |
+
|
8 |
+
# # Define a function to perform vectorization
|
9 |
+
def vectorize_documents():
|
10 |
+
embeddings = HuggingFaceEmbeddings()
|
11 |
+
|
12 |
+
loader = DirectoryLoader(
|
13 |
+
path="cv_data",
|
14 |
+
glob="./*.pdf",
|
15 |
+
loader_cls=UnstructuredFileLoader
|
16 |
+
)
|
17 |
+
|
18 |
+
documents = loader.load()
|
19 |
+
|
20 |
+
# Splitting the text and creating chunks of these documents.
|
21 |
+
text_splitter = CharacterTextSplitter(
|
22 |
+
chunk_size=2000,
|
23 |
+
chunk_overlap=500
|
24 |
+
)
|
25 |
+
|
26 |
+
text_chunks = text_splitter.split_documents(documents)
|
27 |
+
|
28 |
+
# Store in Chroma vector DB
|
29 |
+
vectordb = Chroma.from_documents(
|
30 |
+
documents=text_chunks,
|
31 |
+
embedding=embeddings,
|
32 |
+
persist_directory="cv_vectordb"
|
33 |
+
)
|
34 |
+
|
35 |
+
print("Documents Vectorized and saved in VectorDB")
|
36 |
+
|
37 |
+
|
38 |
+
|
39 |
+
# Expose embeddings if needed
|
40 |
+
embeddings = HuggingFaceEmbeddings()
|
41 |
+
|
42 |
+
|
43 |
+
# Main guard to prevent execution on import
|
44 |
+
if __name__ == "__main__":
|
45 |
+
vectorize_documents()
|