Spaces:

AzizTh
/

Hotel-Semantic-Search

Sleeping

File size: 3,670 Bytes

d4f5493
 
 
 
 
da29c4d
 
 
 
5aa6ce6
da29c4d
5aa6ce6
 
da29c4d
5aa6ce6
 
d4f5493
 
 
 
 
 
 
 
 
da29c4d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d4f5493
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
da29c4d
d4f5493
 
 
 
 
da29c4d
d4f5493
da29c4d
d4f5493
 
da29c4d
d4f5493

import pandas as pd
from sentence_transformers import SentenceTransformer
import gradio as gr
import spacy

import subprocess

# Run the spacy model download command

# try:
    # Try to load the model to check if it's already installed
    # nlp = spacy.load("en_core_web_trf")
# except OSError:
    # If the model is not found, download it
subprocess.run(["python", "-m", "spacy", "download", "en_core_web_trf"])
nlp = spacy.load("en_core_web_trf")

model = SentenceTransformer("nomic-ai/nomic-embed-text-v1", trust_remote_code=True)

df_new = pd.read_csv('last_df.csv')


df_new['country'] = df_new['country'].replace('Türkiye', 'Turkey')
#
#

# Function to extract city name from the query
def get_city_name(query):
    text_query = nlp(query)
    for city in text_query.ents:
        if city.label_ == "GPE":
            return city.text.lower()
    return None

# Function to filter DataFrame by location
def filter_by_loc(query):
    city_name = get_city_name(query)
    if city_name in df_new['locality'].str.lower().unique():
        filtered_df = df_new[df_new['locality'].str.lower() == city_name.lower()]
        return filtered_df
    else:
        return df_new



import torch.nn as nn
import torch
import ast



# Function to calculate similarity score
def get_similarity_score(row, query_embedding):
    similarity = nn.CosineSimilarity(dim=0)  # Use dim=0 for 1D tensors

    # Safely evaluate string representations of lists
    rating_value_embedding = torch.tensor(ast.literal_eval(row['rating_value_embedding']))
    hotel_combined_embedding = torch.tensor(ast.literal_eval(row['hotel_combined_embedding']))
    review_embedding = torch.tensor(ast.literal_eval(row['review_embedding']))

    sim1 = similarity(rating_value_embedding, query_embedding)
    sim2 = similarity(hotel_combined_embedding, query_embedding)
    sim3 = similarity(review_embedding, query_embedding)

    return sim1.item() + sim2.item() + sim3.item()

# Main function to process the query and return results
def process_query(query):

    query_embedding = model.encode(query)

    # Filter DataFrame by location
    filtered_data = filter_by_loc(query)

    # Convert query_embedding to a tensor if it is not already
    query_embedding_tensor = torch.tensor(query_embedding)

    # Apply the similarity function to the filtered DataFrame
    filtered_data['similarity_score'] = filtered_data.apply(lambda row: get_similarity_score(row, query_embedding_tensor), axis=1)

    # df_new['similarity_score'] = df_new.apply(lambda row: get_similarity_score(row, query_embedding_tensor), axis=1)


    top_similar = filtered_data.sort_values('similarity_score', ascending=False).head(1)


    hotel_name = top_similar['hotel_name'].values[0]
    hotel_description = top_similar['hotel_description'].values[0]
    hotel_rate = top_similar['rate'].values[0]
    hotel_price_range = top_similar['price_range'].values[0]
    hotel_review = top_similar['review_title'].values[0]
    hotel_city = top_similar['locality'].values[0]
    hotel_country = top_similar['country'].values[0]

    # Format the output

    result = "Here's the most similar hotel we found:\n"
    result += "-" * 30 + "\n"
    result += f"Hotel Name: {hotel_name}\n"
    result += f"City: {hotel_city}\n"
    result += f"Country: {hotel_country}\n"
    result += f"Star Rating: {hotel_rate}\n"
    result += f"Price Range: {hotel_price_range}\n"

    return result





ui = gr.Interface(
    fn=process_query,
    inputs=gr.Textbox(label="Query", placeholder="Enter your query"),
    outputs="text",
    title="Hotel Similarity Finder",
    description="Enter a query to find similar hotels."
)

ui.launch()