Krish30 commited on
Commit
bad80ad
·
verified ·
1 Parent(s): 019ddba

Upload 3 files

Browse files
Files changed (3) hide show
  1. cv_app.py +220 -0
  2. requirements.txt +14 -0
  3. vectorize_documents.py +45 -0
cv_app.py ADDED
@@ -0,0 +1,220 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from datetime import datetime
3
+ import streamlit as st
4
+ import google.generativeai as genai
5
+ import PyPDF2 as pdf
6
+ from fpdf import FPDF
7
+ from dotenv import load_dotenv
8
+ from langchain_huggingface import HuggingFaceEmbeddings
9
+ from langchain_chroma import Chroma
10
+
11
+ # Load environment variables
12
+ load_dotenv()
13
+
14
+ # Configure Generative AI API
15
+ genai.configure(api_key=("AIzaSyDv1VwMMrrUHCnWCU16PkN8idcDpXVFqyY"))
16
+
17
+ # Initialize vectorstore
18
+ @st.cache_resource
19
+ def setup_vectorstore():
20
+ embeddings = HuggingFaceEmbeddings()
21
+ vectorstore = Chroma(persist_directory="cv_vectordb", embedding_function=embeddings)
22
+ return vectorstore
23
+
24
+ # Convert PDF to text
25
+ def input_pdf_text(uploaded_file):
26
+ reader = pdf.PdfReader(uploaded_file)
27
+ text = ""
28
+ for page in range(len(reader.pages)):
29
+ page = reader.pages[page]
30
+ text += str(page.extract_text())
31
+ return text
32
+
33
+ # Retrieve relevant content from vectorstore
34
+ def retrieve_from_vectorstore(vectorstore, query):
35
+ retriever = vectorstore.as_retriever()
36
+ results = retriever.invoke(query)
37
+ return "\n".join([doc.page_content for doc in results])
38
+
39
+ # Get response from Generative AI
40
+ def get_gemini_response(prompt):
41
+ model = genai.GenerativeModel('gemini-pro')
42
+ response = model.generate_content(prompt)
43
+ return response.candidates[0].content.parts[0].text if response else None
44
+
45
+ def generate_pdf_report(candidate_name, report_content):
46
+ pdf = FPDF()
47
+ pdf.add_page()
48
+ pdf.set_font("Arial", size=12)
49
+ pdf.cell(0, 8, txt=f"Candidate Report: {candidate_name}", ln=True, align="L")
50
+ pdf.ln(5) # Add slight spacing after the title
51
+
52
+ # Define numbered sections
53
+ numbered_sections = {
54
+ 1: "Candidate Name and Email",
55
+ 2: '"Can Do" list:',
56
+ 3: '"Should Do" list',
57
+ 4: "Skill Comparison Table:",
58
+ 5: "Overall Matching Score:",
59
+ 6: "Analysis of Strengths and Weaknesses",
60
+ 7: "Recommendations for Improvement",
61
+ 8: "Conclusion on Fitment",
62
+ }
63
+
64
+ # Parse report content
65
+ lines = report_content.splitlines()
66
+ current_section = None
67
+ bullet_point = "\u2022 " # Unicode for a bullet point
68
+
69
+ for line in lines:
70
+ stripped_line = line.strip().replace("*", "") # Remove all asterisks
71
+
72
+ # Check if line matches a section header
73
+ if stripped_line in numbered_sections.values():
74
+ for number, section in numbered_sections.items():
75
+ if stripped_line == section:
76
+ current_section = number
77
+ pdf.set_font("Arial", style="", size=11)
78
+ pdf.cell(0, 6, txt=f"{number}. {section}", ln=True, align="L")
79
+ pdf.ln(3) # Reduced spacing after each section header
80
+ break
81
+ # Check for sub-content that starts with "-"
82
+ elif current_section and stripped_line.startswith("- "):
83
+ pdf.set_font("Arial", size=10)
84
+ pdf.cell(5) # Add slight indentation
85
+ pdf.cell(0, 5, txt=f"{bullet_point}{stripped_line[2:]}", ln=True)
86
+ # Handle table rows
87
+ elif "|" in stripped_line:
88
+ cells = [cell.strip() for cell in stripped_line.split("|")[1:-1]]
89
+ if len(cells) == 4:
90
+ pdf.set_font("Arial", size=9)
91
+ pdf.cell(50, 6, cells[0], border=1)
92
+ pdf.cell(35, 6, cells[1], border=1, align="C")
93
+ pdf.cell(35, 6, cells[2], border=1, align="C")
94
+ pdf.cell(35, 6, cells[3], border=1, align="C")
95
+ pdf.ln()
96
+ # Add regular content as plain text
97
+ else:
98
+ pdf.set_font("Arial", size=10)
99
+ pdf.multi_cell(0, 5, stripped_line)
100
+
101
+ # Save the report as a PDF file
102
+ timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
103
+ file_name = f"{candidate_name}_report_{timestamp}.pdf"
104
+ pdf.output(file_name)
105
+ return file_name
106
+
107
+
108
+
109
+ # Streamlit UI
110
+ st.title("AI-Powered Candidate Shortlisting")
111
+ st.markdown("Analyze resumes, job descriptions, and match with company culture using RAG.")
112
+
113
+ # Setup vectorstore
114
+ vectorstore = setup_vectorstore()
115
+
116
+ # File upload
117
+ uploaded_resumes = st.file_uploader("Upload Resumes (PDFs)", type="pdf", accept_multiple_files=True)
118
+ uploaded_job_description = st.file_uploader("Upload Job Description (PDF)", type="pdf")
119
+
120
+ if st.button("Generate Fitment Reports"):
121
+ if not uploaded_resumes or not uploaded_job_description:
122
+ st.error("Please upload resumes and a job description.")
123
+ else:
124
+ with st.spinner("Processing..."):
125
+ try:
126
+ # Convert job description to text
127
+ job_description_text = input_pdf_text(uploaded_job_description)
128
+
129
+ # Retrieve relevant content from vectorstore
130
+ company_culture_content = retrieve_from_vectorstore(vectorstore, "company culture match")
131
+
132
+ # Process each resume
133
+ fitment_results = []
134
+ for resume_file in uploaded_resumes:
135
+ # Extract candidate name
136
+ candidate_name = os.path.splitext(resume_file.name)[0]
137
+
138
+ # Convert resume to text
139
+ resume_text = input_pdf_text(resume_file)
140
+
141
+ # Construct the prompt
142
+ input_prompt = f"""
143
+ ### Task: Generate a candidate shortlisting report.
144
+
145
+ ### Instructions:
146
+ You are a highly intelligent and unbiased system designed to shortlist candidates for a job based on:
147
+ 1. The candidate's resume.
148
+ 2. A provided job description.
149
+ 3. Relevant company culture data retrieved from the vector database.
150
+
151
+ ### Key Objectives:
152
+ - Analyze skills, qualifications, and experiences in the resume.
153
+ - Evaluate alignment with the job description.
154
+ - Assess cultural fit using company culture data.
155
+ - Provide detailed scoring, strengths, weaknesses, and recommendations.
156
+
157
+ ### Required Sections in the Report:
158
+ - Candidate Name and Email
159
+ - Parse the job description and create a 'Should Do' list, categorizing required skills into levels: Beginner, Competent, Intermediate, Expert.
160
+ - Parse the candidate's resume and create a 'Can Do' list, categorizing listed skills into the same levels: Beginner, Competent, Intermediate, Expert.
161
+ - Matching score: A detailed table showing alignment of skills.
162
+ - Analysis of strengths and weaknesses.
163
+ - Recommendations for improvement.
164
+ - Overall conclusion.
165
+
166
+ ### Input Data:
167
+ - **Resume**: {resume_text}
168
+ - **Job Description**: {job_description_text}
169
+ - **Company Culture Data**: {company_culture_content}
170
+
171
+ ### Output Format:
172
+ 1. Candidate Name and Email
173
+ 2."Can Do" list:
174
+ 3. "Should Do" list
175
+ 4. Skill Comparison Table:
176
+ | Skill | "Can Do" Level | "Should Do" Level | Matching Score |
177
+ |--------------------------|----------------|--------------------|----------------|
178
+ 5. Overall Matching Score: [Percentage]
179
+ 6. Analysis of Strengths and Weaknesses
180
+ 7. Recommendations for Improvement
181
+ 8. Conclusion on Fitment
182
+ """
183
+
184
+ # Generate the report
185
+ report_content = get_gemini_response(input_prompt)
186
+
187
+ if report_content:
188
+ # Extract the matching score safely
189
+ try:
190
+ matching_score = float(report_content.split("Overall Matching Score:")[1].split("%")[0].strip())
191
+ except (IndexError, ValueError):
192
+ matching_score = 0.0
193
+ report_content += "\n\n[ERROR: Matching Score could not be parsed]"
194
+
195
+ # Generate PDF report
196
+ report_file = generate_pdf_report(candidate_name, report_content)
197
+
198
+ # Save results
199
+ fitment_results.append((candidate_name, matching_score, report_file))
200
+
201
+ # Sort results by matching score in descending order
202
+ fitment_results.sort(key=lambda x: x[1], reverse=True)
203
+
204
+ # Display results in tabular form
205
+ st.write("### Fitment Results")
206
+ st.write("Below are the shortlisted candidates ranked by their fitment scores.")
207
+ for rank, (candidate_name, matching_score, report_file) in enumerate(fitment_results, start=1):
208
+ col1, col2, col3, col4 = st.columns([3, 2, 2, 2])
209
+ col1.write(candidate_name)
210
+ col2.write(f"{matching_score:.2f}%")
211
+ col3.write(f"Rank {rank}")
212
+ with open(report_file, "rb") as f:
213
+ col4.download_button(
214
+ label="Download Report",
215
+ data=f,
216
+ file_name=os.path.basename(report_file),
217
+ mime="application/pdf",
218
+ )
219
+ except Exception as e:
220
+ st.error(f"Error generating fitment reports: {e}")
requirements.txt ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ streamlit==1.38.0
2
+ langchain-community==0.2.16
3
+ langchain-text-splitters==0.2.4
4
+ langchain-chroma==0.1.3
5
+ langchain-huggingface==0.0.3
6
+ langchain-groq==0.1.9
7
+ unstructured==0.15.0
8
+ unstructured[pdf]==0.15.0
9
+ nltk==3.8.1
10
+ psycopg2-binary
11
+ pgvector
12
+ langchain_postgres
13
+ docx2txt
14
+ PyPDF2
vectorize_documents.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_community.document_loaders import UnstructuredFileLoader
2
+ from langchain_community.document_loaders import DirectoryLoader
3
+ from langchain_text_splitters import CharacterTextSplitter
4
+ from langchain_huggingface import HuggingFaceEmbeddings
5
+ from langchain_chroma import Chroma
6
+
7
+
8
+ # # Define a function to perform vectorization
9
+ def vectorize_documents():
10
+ embeddings = HuggingFaceEmbeddings()
11
+
12
+ loader = DirectoryLoader(
13
+ path="cv_data",
14
+ glob="./*.pdf",
15
+ loader_cls=UnstructuredFileLoader
16
+ )
17
+
18
+ documents = loader.load()
19
+
20
+ # Splitting the text and creating chunks of these documents.
21
+ text_splitter = CharacterTextSplitter(
22
+ chunk_size=2000,
23
+ chunk_overlap=500
24
+ )
25
+
26
+ text_chunks = text_splitter.split_documents(documents)
27
+
28
+ # Store in Chroma vector DB
29
+ vectordb = Chroma.from_documents(
30
+ documents=text_chunks,
31
+ embedding=embeddings,
32
+ persist_directory="cv_vectordb"
33
+ )
34
+
35
+ print("Documents Vectorized and saved in VectorDB")
36
+
37
+
38
+
39
+ # Expose embeddings if needed
40
+ embeddings = HuggingFaceEmbeddings()
41
+
42
+
43
+ # Main guard to prevent execution on import
44
+ if __name__ == "__main__":
45
+ vectorize_documents()