Spaces:

nonstopiodemo
/

TalentEdge-2

Sleeping

App Files Files Community

TalentEdge-2 / app.py

Nikhil-Murade

Update app.py

0d1cf3d verified 6 months ago

raw

history blame

3.12 kB

	import streamlit as st
	import joblib
	import numpy as np
	import logging
	from processing import JobTitlePreprocessor # Import your preprocessor class

	# Configure logging for errors
	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	# Load the pre-trained models
	vectorizer = joblib.load('vectorizer_model.pkl')
	kmeans_model = joblib.load('kmeans_model-1.pkl')

	# Initialize the preprocessor
	preprocessor = JobTitlePreprocessor()

	# Streamlit app title
	st.title("Job Title Clustering App")

	# Display Silhouette Score in the sidebar
	st.sidebar.header("Insights") # This creates a big heading in the sidebar
	st.sidebar.write("Silhouette Score: 0.5840")
	st.sidebar.write("number of unique title: 6000")
	st.sidebar.write("number of cluster: 40")


	# Input fields for job titles
	job_title_1 = st.text_input("Enter the first job title:")
	job_title_2 = st.text_input("Enter the second job title:")

	# Button to process the inputs
	if st.button("Submit"):
	if not job_title_1 or not job_title_2:
	st.error("Please enter both job titles.")
	else:
	try:
	# Preprocess the input job titles
	clean_title_1 = preprocessor.preprocess(job_title_1)
	clean_title_2 = preprocessor.preprocess(job_title_2)

	# Log the preprocessed titles
	logger.info(f"Preprocessed Title 1: {clean_title_1}")
	logger.info(f"Preprocessed Title 2: {clean_title_2}")

	# Vectorize the preprocessed job titles
	title_vector_1 = vectorizer.transform([clean_title_1])
	title_vector_2 = vectorizer.transform([clean_title_2])

	# Predict clusters for each job title
	cluster_1 = kmeans_model.predict(title_vector_1)[0]
	cluster_2 = kmeans_model.predict(title_vector_2)[0]

	# Display results
	st.write(f"Cluster for '{job_title_1}': {cluster_1}")
	st.write(f"Cluster for '{job_title_2}': {cluster_2}")

	if cluster_1 == cluster_2:
	st.success(f"The job titles '{job_title_1}' and '{job_title_2}' belong to the same cluster!")
	else:
	st.warning(f"The job titles '{job_title_1}' and '{job_title_2}' do not belong to the same cluster.")

	# Display top words for the predicted clusters
	def get_top_words(cluster, vectorizer, kmeans_model):
	feature_names = vectorizer.get_feature_names_out()
	top_word_indices = np.argsort(kmeans_model.cluster_centers_[cluster])[::-1][:5]
	top_words = [feature_names[i] for i in top_word_indices]
	return top_words

	top_words_1 = get_top_words(cluster_1, vectorizer, kmeans_model)
	top_words_2 = get_top_words(cluster_2, vectorizer, kmeans_model)

	st.write(f"Top words in Cluster {cluster_1}: {', '.join(top_words_1)}")
	st.write(f"Top words in Cluster {cluster_2}: {', '.join(top_words_2)}")

	except Exception as e:
	logger.error(f"Error occurred: {e}", exc_info=True)
	st.error(f"An error occurred: {e}")