Spaces:

morfriden
/

TripAdvisor

Sleeping

App Files Files Community

morfriden commited on 24 days ago

Commit

124d1f9

verified ·

1 Parent(s): eb45fc4

Upload 4 files

Browse files

Files changed (5) hide show

.gitattributes +1 -0
README.md +6 -8
app.py +196 -0
requirements.txt +8 -0
trip_index.faiss +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+trip_index.faiss filter=lfs diff=lfs merge=lfs -text

README.md CHANGED Viewed

@@ -1,13 +1,11 @@
 ---
-title: TripAdvisor
-emoji: 🏆
-colorFrom: yellow
-colorTo: indigo
 sdk: gradio
-sdk_version: 5.42.0
 app_file: app.py
 pinned: false
 license: mit
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: TripPlanner
+emoji: ✈️
+colorFrom: blue
+colorTo: green
 sdk: gradio
 app_file: app.py
 pinned: false
 license: mit
+short_description: 'Your personal AI trip planner! ✈️'
+---

app.py ADDED Viewed

	@@ -0,0 +1,196 @@

+import gradio as gr
+from datasets import load_dataset
+from sentence_transformers import SentenceTransformer, util
+import faiss
+import numpy as np
+from transformers import pipeline
+import time
+import ast
+import re
+# --- 1. DATA LOADING AND INITIALIZATION ---
+print("===== Application Startup =====")
+start_time = time.time()
+# Load the travel dataset and limit to the first 20,000 rows (same as index)
+print("Loading TravelPlanner dataset...")
+dataset = load_dataset("osunlp/TravelPlanner", "test")
+print("Dataset ready.")
+# --- 2. EMBEDDING AND RECOMMENDATION ENGINE ---
+print("Loading embedding model...")
+model_name = "all-mpnet-base-v2"
+embedding_model = SentenceTransformer(f"sentence-transformers/{model_name}")
+index_file = "trip_index.faiss"
+print(f"Loading FAISS index from {index_file}...")
+try:
+    index = faiss.read_index(index_file)
+    print(f"Index is ready. Total vectors in index: {index.ntotal}")
+except RuntimeError:
+    print(f"Error: FAISS index file '{index_file}' not found.")
+    print("Please run the `build_index.py` script first to create the index.")
+    exit()
+# --- 3. SYNTHETIC GENERATION ---
+def format_plan_details(plan_string):
+    """
+    Parses and formats the raw plan string from the dataset into readable Markdown.
+    """
+    # If the plan is not in the expected dictionary format, return it as is.
+    if not plan_string or not plan_string.strip().startswith('['):
+        return plan_string
+    try:
+        # Safely parse the string representation of a list of dictionaries
+        plan_list = ast.literal_eval(plan_string)
+    except (ValueError, SyntaxError):
+        # If parsing fails, return the original string to avoid crashing
+        return plan_string
+    formatted_sections = []
+    for section in plan_list:
+        description = section.get('Description', 'Details')
+        content = section.get('Content', '').strip()
+        # Add a bold title for each section
+        formatted_sections.append(f"#### {description}")
+        # Use specific formatting based on the section's description
+        if any(keyword in description for keyword in ['Attractions', 'Restaurants', 'Accommodations', 'Flight']):
+            lines = content.split('\n')
+            if lines:
+                # Make the header bold
+                formatted_sections.append(f"**{lines[0]}**")
+                # Format the rest of the lines as a clean, bulleted list
+                for item in lines[1:]:
+                    clean_item = ' '.join(item.split()) # Remove extra whitespace
+                    if clean_item:
+                        formatted_sections.append(f"- {clean_item}")
+        elif 'Self-driving' in description or 'Taxi' in description:
+            # Make simple travel descriptions more readable
+            mode_emoji = "🚗" if 'Self-driving' in description else "🚕"
+            formatted_sections.append(f"- {mode_emoji} {content.replace(', ', ', ')}")
+        else:
+            # Default formatting for any other type of content
+            formatted_sections.append(content)
+        # Add a newline for spacing between sections
+        formatted_sections.append("")
+    return "\n".join(formatted_sections)
+def get_recommendations_and_generate(query_text, k=3):
+    # 1. Get Recommendations from existing data
+    query_vector = embedding_model.encode([query_text])
+    query_vector = np.array(query_vector, dtype=np.float32)
+    distances, indices = index.search(query_vector, k)
+    results = []
+    for idx_numpy in indices[0]:
+        idx = int(idx_numpy)
+        trip_plan = {
+            "dest": dataset['test']['dest'][idx],
+            "days": dataset['test']['days'][idx],
+            "reference_information": dataset['test']['reference_information'][idx]
+        }
+        results.append(trip_plan)
+    while len(results) < 3:
+        results.append({"dest": "No trip plan found", "days":"", "reference_information": ""})
+    # 2. Create a prompt for the generative model
+    prompt = f"Write a complete travel plan that includes a title and a day-by-day itinerary. The trip must be about: {query_text}."
+    print("Loading generative model...")
+    generator = pipeline('text-generation', model='gpt2')
+    # 3. Generate 10 new, creative trip ideas
+    print("Generating 10 synthetic trip ideas...")
+    generated_outputs = generator(
+        prompt,
+        max_new_tokens=250,  # Increased tokens for more detailed plans
+        num_return_sequences=10,
+        pad_token_id=50256
+    )
+    # 4. Find the best trip out of the 10 generated
+    print("Finding the most relevant generated trip...")
+    generated_texts = [output['generated_text'].replace(prompt, "").strip() for output in generated_outputs]
+    # Embed all 10 generated texts
+    generated_embeddings = embedding_model.encode(generated_texts)
+    # Calculate cosine similarity between the user's query and each generated text
+    similarities = util.cos_sim(query_vector, generated_embeddings)
+    # Find the index of the most similar generated trip
+    best_recipe_index = np.argmax(similarities)
+    best_generated_trip = generated_texts[best_recipe_index]
+    return results[0], results[1], results[2], best_generated_trip
+# --- 4. GRADIO USER INTERFACE ---
+def format_trip_plan(trip):
+    # Formats the recommended trips with markdown
+    if not trip or 'reference_information' not in trip:
+        return "### No similar trip plan found."
+    formatted_plan = format_plan_details(trip['reference_information'])
+    return f"### {trip['days']}-days trip to {trip['dest'].upper()}\n**Suggested Plan:**\n{formatted_plan}"
+def format_generated_trip(trip_text):
+    return trip_text
+def trip_planner_wizard(destination, days):
+    # Combine user inputs into a single query for processing
+    days = int(days) # Ensure days is an integer for the f-string
+    query_text = f"a {days}-day trip to {destination}"
+    rec1, rec2, rec3, gen_rec_text = get_recommendations_and_generate(query_text)
+    return format_trip_plan(rec1), format_trip_plan(rec2), format_trip_plan(rec3), format_generated_trip(gen_rec_text)
+end_time = time.time()
+print(f"Models and data loaded in {end_time - start_time:.2f} seconds.")
+# Gradio Interface
+with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# ✈️ TripPlanner AI")
+    gr.Markdown("Enter your destination and desired trip length, and get plan recommendations plus a new AI-generated idea!")
+    with gr.Row():
+        destination_input = gr.Textbox(label="Destination", placeholder="e.g., Paris")
+        days_input = gr.Number(label="Number of Days", value=3)
+    with gr.Row():
+        submit_btn = gr.Button("Get Trip Plans", variant="primary")
+    with gr.Row():
+        with gr.Column(scale=2):
+            gr.Markdown("### Recommended Trip Plans from Dataset")
+            output_rec1 = gr.Markdown()
+            output_rec2 = gr.Markdown()
+            output_rec3 = gr.Markdown()
+        with gr.Column(scale=1):
+            gr.Markdown("### ✨ New AI-Generated Idea")
+            output_gen = gr.Textbox(label="AI Generated Trip Plan", lines=20, interactive=False)
+    submit_btn.click(
+        fn=trip_planner_wizard,
+        inputs=[destination_input, days_input],
+        outputs=[output_rec1, output_rec2, output_rec3, output_gen]
+    )
+    gr.Examples(
+        examples=[
+            ["Paris", 3],
+            ["Orlando", 7],
+            ["Tokyo", 5],
+            ["the Greek Islands", 10]
+        ],
+        inputs=[destination_input, days_input]
+    )
+demo.launch(ssr_mode=False)

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+gradio==4.31.5
+datasets==2.19.1
+sentence-transformers==2.7.0
+faiss-cpu==1.8.0
+transformers==4.41.2
+torch==2.3.1
+pyarrow==16.1.0
+huggingface-hub==0.23.3

trip_index.faiss ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2c8d2a2eb720f78a9be02358e0d87e287766a257179553b784831ba7b207c875
+size 3072045