import streamlit as st
import difflib
import pandas as pd
import numpy as np
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Download NLTK stopwords if not already done
nltk.download('stopwords')

# Read the data
lpi_df = pd.read_csv('Learning Pathway Index.csv')

# Rename columns
lpi_df.rename(columns={
    "Course / Learning material": "Course_Learning_Material",
    "Course Level": "Course_Level",
    "Type (Free or Paid)": "Type",
    "Module / Sub-module \nDifficulty level": "Difficulty_Level",
    "Keywords / Tags / Skills / Interests / Categories": "Keywords"
}, inplace=True)

# Combine features
lpi_df['combined_features'] = lpi_df['Course_Learning_Material'] + ' ' + lpi_df['Source'] + ' ' + lpi_df['Course_Level'] + ' ' + lpi_df['Type'] + ' ' + lpi_df['Module'] + ' ' + lpi_df['Difficulty_Level'] + ' ' + lpi_df['Keywords']

# Text preprocessing
combined_features = lpi_df['combined_features']
porter_stemmer = PorterStemmer()

def stemming(content):
    stemmed_content = re.sub('[^a-zA-Z]', ' ', content)
    stemmed_content = stemmed_content.lower()
    stemmed_content = stemmed_content.split()
    stemmed_content = [porter_stemmer.stem(word) for word in stemmed_content if not word in stopwords.words('english')]
    stemmed_content = ' '.join(stemmed_content)
    return stemmed_content

combined_features = combined_features.apply(stemming)

# TF-IDF and similarity
vectorizer = TfidfVectorizer()
vectorizer.fit(combined_features)
combined_features = vectorizer.transform(combined_features)
similarity = cosine_similarity(combined_features)

# Streamlit app
st.title('Learning Pathway Index Course Recommendation')
user_input = st.text_input('Enter What You Want to Learn : ')

if user_input:
    list_of_all_titles = lpi_df['Module'].tolist()
    find_close_match = difflib.get_close_matches(user_input, list_of_all_titles)

    if find_close_match:
        close_match = find_close_match[0]
        index_of_the_course = lpi_df[lpi_df.Module == close_match].index.values[0]
        similarity_score = list(enumerate(similarity[index_of_the_course]))
        sorted_similar_course = sorted(similarity_score, key=lambda x: x[1], reverse=True)

        st.subheader('Courses suggested for you:')
        for i, course in enumerate(sorted_similar_course[:30], start=1):
            index = course[0]
            title_from_index = lpi_df.loc[index, 'Module']
            st.write(f"{i}. {title_from_index}")

        if len(sorted_similar_course) == 0:
            st.write('No close matches found.')
    else:
        st.write('No close matches found.')