Spaces:

Krish30
/

final-yr-cv-shortlisting-rag-ai

Sleeping

App Files Files Community

Krish30 commited on Feb 27

Commit

2d23c1c

verified ·

1 Parent(s): 004ff02

Update app.py

Browse files

Files changed (1) hide show

app.py +220 -220

app.py CHANGED Viewed

@@ -1,220 +1,220 @@
-import os
-from datetime import datetime
-import streamlit as st
-import google.generativeai as genai
-import PyPDF2 as pdf
-from fpdf import FPDF
-from dotenv import load_dotenv
-from langchain_huggingface import HuggingFaceEmbeddings
-from langchain_chroma import Chroma
-# Load environment variables
-load_dotenv()
-# Configure Generative AI API
-genai.configure(api_key=("AIzaSyDv1VwMMrrUHCnWCU16PkN8idcDpXVFqyY"))
-# Initialize vectorstore
-@st.cache_resource
-def setup_vectorstore():
-    embeddings = HuggingFaceEmbeddings()
-    vectorstore = Chroma(persist_directory="cv_vectordb", embedding_function=embeddings)
-    return vectorstore
-# Convert PDF to text
-def input_pdf_text(uploaded_file):
-    reader = pdf.PdfReader(uploaded_file)
-    text = ""
-    for page in range(len(reader.pages)):
-        page = reader.pages[page]
-        text += str(page.extract_text())
-    return text
-# Retrieve relevant content from vectorstore
-def retrieve_from_vectorstore(vectorstore, query):
-    retriever = vectorstore.as_retriever()
-    results = retriever.invoke(query)
-    return "\n".join([doc.page_content for doc in results])
-# Get response from Generative AI
-def get_gemini_response(prompt):
-    model = genai.GenerativeModel('gemini-pro')
-    response = model.generate_content(prompt)
-    return response.candidates[0].content.parts[0].text if response else None
-def generate_pdf_report(candidate_name, report_content):
-    pdf = FPDF()
-    pdf.add_page()
-    pdf.set_font("Arial", size=12)
-    pdf.cell(0, 8, txt=f"Candidate Report: {candidate_name}", ln=True, align="L")
-    pdf.ln(5)  # Add slight spacing after the title
-    # Define numbered sections
-    numbered_sections = {
-        1: "Candidate Name and Email",
-        2: '"Can Do" list:',
-        3: '"Should Do" list',
-        4: "Skill Comparison Table:",
-        5: "Overall Matching Score:",
-        6: "Analysis of Strengths and Weaknesses",
-        7: "Recommendations for Improvement",
-        8: "Conclusion on Fitment",
-    }
-    # Parse report content
-    lines = report_content.splitlines()
-    current_section = None
-    bullet_point = "\u2022 "  # Unicode for a bullet point
-    for line in lines:
-        stripped_line = line.strip().replace("*", "")  # Remove all asterisks
-        # Check if line matches a section header
-        if stripped_line in numbered_sections.values():
-            for number, section in numbered_sections.items():
-                if stripped_line == section:
-                    current_section = number
-                    pdf.set_font("Arial", style="", size=11)
-                    pdf.cell(0, 6, txt=f"{number}. {section}", ln=True, align="L")
-                    pdf.ln(3)  # Reduced spacing after each section header
-                    break
-        # Check for sub-content that starts with "-"
-        elif current_section and stripped_line.startswith("- "):
-            pdf.set_font("Arial", size=10)
-            pdf.cell(5)  # Add slight indentation
-            pdf.cell(0, 5, txt=f"{bullet_point}{stripped_line[2:]}", ln=True)
-        # Handle table rows
-        elif "|" in stripped_line:
-            cells = [cell.strip() for cell in stripped_line.split("|")[1:-1]]
-            if len(cells) == 4:
-                pdf.set_font("Arial", size=9)
-                pdf.cell(50, 6, cells[0], border=1)
-                pdf.cell(35, 6, cells[1], border=1, align="C")
-                pdf.cell(35, 6, cells[2], border=1, align="C")
-                pdf.cell(35, 6, cells[3], border=1, align="C")
-                pdf.ln()
-        # Add regular content as plain text
-        else:
-            pdf.set_font("Arial", size=10)
-            pdf.multi_cell(0, 5, stripped_line)
-    # Save the report as a PDF file
-    timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
-    file_name = f"{candidate_name}_report_{timestamp}.pdf"
-    pdf.output(file_name)
-    return file_name
-# Streamlit UI
-st.title("AI-Powered Candidate Shortlisting")
-st.markdown("Analyze resumes, job descriptions, and match with company culture using RAG.")
-# Setup vectorstore
-vectorstore = setup_vectorstore()
-# File upload
-uploaded_resumes = st.file_uploader("Upload Resumes (PDFs)", type="pdf", accept_multiple_files=True)
-uploaded_job_description = st.file_uploader("Upload Job Description (PDF)", type="pdf")
-if st.button("Generate Fitment Reports"):
-    if not uploaded_resumes or not uploaded_job_description:
-        st.error("Please upload resumes and a job description.")
-    else:
-        with st.spinner("Processing..."):
-            try:
-                # Convert job description to text
-                job_description_text = input_pdf_text(uploaded_job_description)
-                # Retrieve relevant content from vectorstore
-                company_culture_content = retrieve_from_vectorstore(vectorstore, "company culture match")
-                # Process each resume
-                fitment_results = []
-                for resume_file in uploaded_resumes:
-                    # Extract candidate name
-                    candidate_name = os.path.splitext(resume_file.name)[0]
-                    # Convert resume to text
-                    resume_text = input_pdf_text(resume_file)
-                    # Construct the prompt
-                    input_prompt = f"""
-### Task: Generate a candidate shortlisting report.
-### Instructions:
-You are a highly intelligent and unbiased system designed to shortlist candidates for a job based on:
-1. The candidate's resume.
-2. A provided job description.
-3. Relevant company culture data retrieved from the vector database.
-### Key Objectives:
-- Analyze skills, qualifications, and experiences in the resume.
-- Evaluate alignment with the job description.
-- Assess cultural fit using company culture data.
-- Provide detailed scoring, strengths, weaknesses, and recommendations.
-### Required Sections in the Report:
-- Candidate Name and Email
-- Parse the job description and create a 'Should Do' list, categorizing required skills into levels: Beginner, Competent, Intermediate, Expert.
-- Parse the candidate's resume and create a 'Can Do' list, categorizing listed skills into the same levels: Beginner, Competent, Intermediate, Expert.
-- Matching score: A detailed table showing alignment of skills.
-- Analysis of strengths and weaknesses.
-- Recommendations for improvement.
-- Overall conclusion.
-### Input Data:
-- **Resume**: {resume_text}
-- **Job Description**: {job_description_text}
-- **Company Culture Data**: {company_culture_content}
-### Output Format:
-1. Candidate Name and Email
-2."Can Do" list:
-3. "Should Do" list
-4. Skill Comparison Table:
-   | Skill                   | "Can Do" Level  | "Should Do" Level  | Matching Score |
-   |--------------------------|----------------|--------------------|----------------|
-5. Overall Matching Score: [Percentage]
-6. Analysis of Strengths and Weaknesses
-7. Recommendations for Improvement
-8. Conclusion on Fitment
-                    """
-                    # Generate the report
-                    report_content = get_gemini_response(input_prompt)
-                    if report_content:
-                        # Extract the matching score safely
-                        try:
-                            matching_score = float(report_content.split("Overall Matching Score:")[1].split("%")[0].strip())
-                        except (IndexError, ValueError):
-                            matching_score = 0.0
-                            report_content += "\n\n[ERROR: Matching Score could not be parsed]"
-                        # Generate PDF report
-                        report_file = generate_pdf_report(candidate_name, report_content)
-                        # Save results
-                        fitment_results.append((candidate_name, matching_score, report_file))
-                # Sort results by matching score in descending order
-                fitment_results.sort(key=lambda x: x[1], reverse=True)
-                # Display results in tabular form
-                st.write("### Fitment Results")
-                st.write("Below are the shortlisted candidates ranked by their fitment scores.")
-                for rank, (candidate_name, matching_score, report_file) in enumerate(fitment_results, start=1):
-                    col1, col2, col3, col4 = st.columns([3, 2, 2, 2])
-                    col1.write(candidate_name)
-                    col2.write(f"{matching_score:.2f}%")
-                    col3.write(f"Rank {rank}")
-                    with open(report_file, "rb") as f:
-                        col4.download_button(
-                            label="Download Report",
-                            data=f,
-                            file_name=os.path.basename(report_file),
-                            mime="application/pdf",
-                        )
-            except Exception as e:
-                st.error(f"Error generating fitment reports: {e}")

+import os
+from datetime import datetime
+import streamlit as st
+import google.generativeai as genai
+import PyPDF2 as pdf
+from fpdf import FPDF
+from dotenv import load_dotenv
+from langchain_huggingface import HuggingFaceEmbeddings
+from langchain_chroma import Chroma
+# Load environment variables
+load_dotenv()
+# Configure Generative AI API
+genai.configure(api_key=("AIzaSyDc3CnZUpQ8Z_2_hy764kDZdzSyWSXgtVM"))
+# Initialize vectorstore
+@st.cache_resource
+def setup_vectorstore():
+    embeddings = HuggingFaceEmbeddings()
+    vectorstore = Chroma(persist_directory="cv_vectordb", embedding_function=embeddings)
+    return vectorstore
+# Convert PDF to text
+def input_pdf_text(uploaded_file):
+    reader = pdf.PdfReader(uploaded_file)
+    text = ""
+    for page in range(len(reader.pages)):
+        page = reader.pages[page]
+        text += str(page.extract_text())
+    return text
+# Retrieve relevant content from vectorstore
+def retrieve_from_vectorstore(vectorstore, query):
+    retriever = vectorstore.as_retriever()
+    results = retriever.invoke(query)
+    return "\n".join([doc.page_content for doc in results])
+# Get response from Generative AI
+def get_gemini_response(prompt):
+    model = genai.GenerativeModel('gemini-pro')
+    response = model.generate_content(prompt)
+    return response.candidates[0].content.parts[0].text if response else None
+def generate_pdf_report(candidate_name, report_content):
+    pdf = FPDF()
+    pdf.add_page()
+    pdf.set_font("Arial", size=12)
+    pdf.cell(0, 8, txt=f"Candidate Report: {candidate_name}", ln=True, align="L")
+    pdf.ln(5)  # Add slight spacing after the title
+    # Define numbered sections
+    numbered_sections = {
+        1: "Candidate Name and Email",
+        2: '"Can Do" list:',
+        3: '"Should Do" list',
+        4: "Skill Comparison Table:",
+        5: "Overall Matching Score:",
+        6: "Analysis of Strengths and Weaknesses",
+        7: "Recommendations for Improvement",
+        8: "Conclusion on Fitment",
+    }
+    # Parse report content
+    lines = report_content.splitlines()
+    current_section = None
+    bullet_point = "\u2022 "  # Unicode for a bullet point
+    for line in lines:
+        stripped_line = line.strip().replace("*", "")  # Remove all asterisks
+        # Check if line matches a section header
+        if stripped_line in numbered_sections.values():
+            for number, section in numbered_sections.items():
+                if stripped_line == section:
+                    current_section = number
+                    pdf.set_font("Arial", style="", size=11)
+                    pdf.cell(0, 6, txt=f"{number}. {section}", ln=True, align="L")
+                    pdf.ln(3)  # Reduced spacing after each section header
+                    break
+        # Check for sub-content that starts with "-"
+        elif current_section and stripped_line.startswith("- "):
+            pdf.set_font("Arial", size=10)
+            pdf.cell(5)  # Add slight indentation
+            pdf.cell(0, 5, txt=f"{bullet_point}{stripped_line[2:]}", ln=True)
+        # Handle table rows
+        elif "|" in stripped_line:
+            cells = [cell.strip() for cell in stripped_line.split("|")[1:-1]]
+            if len(cells) == 4:
+                pdf.set_font("Arial", size=9)
+                pdf.cell(50, 6, cells[0], border=1)
+                pdf.cell(35, 6, cells[1], border=1, align="C")
+                pdf.cell(35, 6, cells[2], border=1, align="C")
+                pdf.cell(35, 6, cells[3], border=1, align="C")
+                pdf.ln()
+        # Add regular content as plain text
+        else:
+            pdf.set_font("Arial", size=10)
+            pdf.multi_cell(0, 5, stripped_line)
+    # Save the report as a PDF file
+    timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
+    file_name = f"{candidate_name}_report_{timestamp}.pdf"
+    pdf.output(file_name)
+    return file_name
+# Streamlit UI
+st.title("AI-Powered Candidate Shortlisting")
+st.markdown("Analyze resumes, job descriptions, and match with company culture using RAG.")
+# Setup vectorstore
+vectorstore = setup_vectorstore()
+# File upload
+uploaded_resumes = st.file_uploader("Upload Resumes (PDFs)", type="pdf", accept_multiple_files=True)
+uploaded_job_description = st.file_uploader("Upload Job Description (PDF)", type="pdf")
+if st.button("Generate Fitment Reports"):
+    if not uploaded_resumes or not uploaded_job_description:
+        st.error("Please upload resumes and a job description.")
+    else:
+        with st.spinner("Processing..."):
+            try:
+                # Convert job description to text
+                job_description_text = input_pdf_text(uploaded_job_description)
+                # Retrieve relevant content from vectorstore
+                company_culture_content = retrieve_from_vectorstore(vectorstore, "company culture match")
+                # Process each resume
+                fitment_results = []
+                for resume_file in uploaded_resumes:
+                    # Extract candidate name
+                    candidate_name = os.path.splitext(resume_file.name)[0]
+                    # Convert resume to text
+                    resume_text = input_pdf_text(resume_file)
+                    # Construct the prompt
+                    input_prompt = f"""
+### Task: Generate a candidate shortlisting report.
+### Instructions:
+You are a highly intelligent and unbiased system designed to shortlist candidates for a job based on:
+1. The candidate's resume.
+2. A provided job description.
+3. Relevant company culture data retrieved from the vector database.
+### Key Objectives:
+- Analyze skills, qualifications, and experiences in the resume.
+- Evaluate alignment with the job description.
+- Assess cultural fit using company culture data.
+- Provide detailed scoring, strengths, weaknesses, and recommendations.
+### Required Sections in the Report:
+- Candidate Name and Email
+- Parse the job description and create a 'Should Do' list, categorizing required skills into levels: Beginner, Competent, Intermediate, Expert.
+- Parse the candidate's resume and create a 'Can Do' list, categorizing listed skills into the same levels: Beginner, Competent, Intermediate, Expert.
+- Matching score: A detailed table showing alignment of skills.
+- Analysis of strengths and weaknesses.
+- Recommendations for improvement.
+- Overall conclusion.
+### Input Data:
+- **Resume**: {resume_text}
+- **Job Description**: {job_description_text}
+- **Company Culture Data**: {company_culture_content}
+### Output Format:
+1. Candidate Name and Email
+2."Can Do" list:
+3. "Should Do" list
+4. Skill Comparison Table:
+   | Skill                   | "Can Do" Level  | "Should Do" Level  | Matching Score |
+   |--------------------------|----------------|--------------------|----------------|
+5. Overall Matching Score: [Percentage]
+6. Analysis of Strengths and Weaknesses
+7. Recommendations for Improvement
+8. Conclusion on Fitment
+                    """
+                    # Generate the report
+                    report_content = get_gemini_response(input_prompt)
+                    if report_content:
+                        # Extract the matching score safely
+                        try:
+                            matching_score = float(report_content.split("Overall Matching Score:")[1].split("%")[0].strip())
+                        except (IndexError, ValueError):
+                            matching_score = 0.0
+                            report_content += "\n\n[ERROR: Matching Score could not be parsed]"
+                        # Generate PDF report
+                        report_file = generate_pdf_report(candidate_name, report_content)
+                        # Save results
+                        fitment_results.append((candidate_name, matching_score, report_file))
+                # Sort results by matching score in descending order
+                fitment_results.sort(key=lambda x: x[1], reverse=True)
+                # Display results in tabular form
+                st.write("### Fitment Results")
+                st.write("Below are the shortlisted candidates ranked by their fitment scores.")
+                for rank, (candidate_name, matching_score, report_file) in enumerate(fitment_results, start=1):
+                    col1, col2, col3, col4 = st.columns([3, 2, 2, 2])
+                    col1.write(candidate_name)
+                    col2.write(f"{matching_score:.2f}%")
+                    col3.write(f"Rank {rank}")
+                    with open(report_file, "rb") as f:
+                        col4.download_button(
+                            label="Download Report",
+                            data=f,
+                            file_name=os.path.basename(report_file),
+                            mime="application/pdf",
+                        )
+            except Exception as e:
+                st.error(f"Error generating fitment reports: {e}")