morfriden commited on
Commit
124d1f9
·
verified ·
1 Parent(s): eb45fc4

Upload 4 files

Browse files
Files changed (5) hide show
  1. .gitattributes +1 -0
  2. README.md +6 -8
  3. app.py +196 -0
  4. requirements.txt +8 -0
  5. trip_index.faiss +3 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ trip_index.faiss filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,13 +1,11 @@
1
  ---
2
- title: TripAdvisor
3
- emoji: 🏆
4
- colorFrom: yellow
5
- colorTo: indigo
6
  sdk: gradio
7
- sdk_version: 5.42.0
8
  app_file: app.py
9
  pinned: false
10
  license: mit
11
- ---
12
-
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: TripPlanner
3
+ emoji: ✈️
4
+ colorFrom: blue
5
+ colorTo: green
6
  sdk: gradio
 
7
  app_file: app.py
8
  pinned: false
9
  license: mit
10
+ short_description: 'Your personal AI trip planner! ✈️'
11
+ ---
 
app.py ADDED
@@ -0,0 +1,196 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from datasets import load_dataset
3
+ from sentence_transformers import SentenceTransformer, util
4
+ import faiss
5
+ import numpy as np
6
+ from transformers import pipeline
7
+ import time
8
+ import ast
9
+ import re
10
+
11
+ # --- 1. DATA LOADING AND INITIALIZATION ---
12
+ print("===== Application Startup =====")
13
+ start_time = time.time()
14
+
15
+ # Load the travel dataset and limit to the first 20,000 rows (same as index)
16
+ print("Loading TravelPlanner dataset...")
17
+ dataset = load_dataset("osunlp/TravelPlanner", "test")
18
+ print("Dataset ready.")
19
+
20
+ # --- 2. EMBEDDING AND RECOMMENDATION ENGINE ---
21
+ print("Loading embedding model...")
22
+ model_name = "all-mpnet-base-v2"
23
+ embedding_model = SentenceTransformer(f"sentence-transformers/{model_name}")
24
+
25
+ index_file = "trip_index.faiss"
26
+
27
+ print(f"Loading FAISS index from {index_file}...")
28
+
29
+ try:
30
+ index = faiss.read_index(index_file)
31
+ print(f"Index is ready. Total vectors in index: {index.ntotal}")
32
+ except RuntimeError:
33
+ print(f"Error: FAISS index file '{index_file}' not found.")
34
+ print("Please run the `build_index.py` script first to create the index.")
35
+ exit()
36
+
37
+
38
+ # --- 3. SYNTHETIC GENERATION ---
39
+ def format_plan_details(plan_string):
40
+ """
41
+ Parses and formats the raw plan string from the dataset into readable Markdown.
42
+ """
43
+ # If the plan is not in the expected dictionary format, return it as is.
44
+ if not plan_string or not plan_string.strip().startswith('['):
45
+ return plan_string
46
+
47
+ try:
48
+ # Safely parse the string representation of a list of dictionaries
49
+ plan_list = ast.literal_eval(plan_string)
50
+ except (ValueError, SyntaxError):
51
+ # If parsing fails, return the original string to avoid crashing
52
+ return plan_string
53
+
54
+ formatted_sections = []
55
+ for section in plan_list:
56
+ description = section.get('Description', 'Details')
57
+ content = section.get('Content', '').strip()
58
+
59
+ # Add a bold title for each section
60
+ formatted_sections.append(f"#### {description}")
61
+
62
+ # Use specific formatting based on the section's description
63
+ if any(keyword in description for keyword in ['Attractions', 'Restaurants', 'Accommodations', 'Flight']):
64
+ lines = content.split('\n')
65
+ if lines:
66
+ # Make the header bold
67
+ formatted_sections.append(f"**{lines[0]}**")
68
+ # Format the rest of the lines as a clean, bulleted list
69
+ for item in lines[1:]:
70
+ clean_item = ' '.join(item.split()) # Remove extra whitespace
71
+ if clean_item:
72
+ formatted_sections.append(f"- {clean_item}")
73
+
74
+ elif 'Self-driving' in description or 'Taxi' in description:
75
+ # Make simple travel descriptions more readable
76
+ mode_emoji = "🚗" if 'Self-driving' in description else "🚕"
77
+ formatted_sections.append(f"- {mode_emoji} {content.replace(', ', ', ')}")
78
+
79
+ else:
80
+ # Default formatting for any other type of content
81
+ formatted_sections.append(content)
82
+
83
+ # Add a newline for spacing between sections
84
+ formatted_sections.append("")
85
+
86
+ return "\n".join(formatted_sections)
87
+
88
+ def get_recommendations_and_generate(query_text, k=3):
89
+ # 1. Get Recommendations from existing data
90
+ query_vector = embedding_model.encode([query_text])
91
+ query_vector = np.array(query_vector, dtype=np.float32)
92
+ distances, indices = index.search(query_vector, k)
93
+
94
+ results = []
95
+ for idx_numpy in indices[0]:
96
+ idx = int(idx_numpy)
97
+ trip_plan = {
98
+ "dest": dataset['test']['dest'][idx],
99
+ "days": dataset['test']['days'][idx],
100
+ "reference_information": dataset['test']['reference_information'][idx]
101
+ }
102
+ results.append(trip_plan)
103
+
104
+ while len(results) < 3:
105
+ results.append({"dest": "No trip plan found", "days":"", "reference_information": ""})
106
+
107
+ # 2. Create a prompt for the generative model
108
+ prompt = f"Write a complete travel plan that includes a title and a day-by-day itinerary. The trip must be about: {query_text}."
109
+ print("Loading generative model...")
110
+ generator = pipeline('text-generation', model='gpt2')
111
+
112
+ # 3. Generate 10 new, creative trip ideas
113
+ print("Generating 10 synthetic trip ideas...")
114
+ generated_outputs = generator(
115
+ prompt,
116
+ max_new_tokens=250, # Increased tokens for more detailed plans
117
+ num_return_sequences=10,
118
+ pad_token_id=50256
119
+ )
120
+
121
+ # 4. Find the best trip out of the 10 generated
122
+ print("Finding the most relevant generated trip...")
123
+ generated_texts = [output['generated_text'].replace(prompt, "").strip() for output in generated_outputs]
124
+
125
+ # Embed all 10 generated texts
126
+ generated_embeddings = embedding_model.encode(generated_texts)
127
+
128
+ # Calculate cosine similarity between the user's query and each generated text
129
+ similarities = util.cos_sim(query_vector, generated_embeddings)
130
+
131
+ # Find the index of the most similar generated trip
132
+ best_recipe_index = np.argmax(similarities)
133
+ best_generated_trip = generated_texts[best_recipe_index]
134
+
135
+ return results[0], results[1], results[2], best_generated_trip
136
+
137
+ # --- 4. GRADIO USER INTERFACE ---
138
+ def format_trip_plan(trip):
139
+ # Formats the recommended trips with markdown
140
+ if not trip or 'reference_information' not in trip:
141
+ return "### No similar trip plan found."
142
+ formatted_plan = format_plan_details(trip['reference_information'])
143
+ return f"### {trip['days']}-days trip to {trip['dest'].upper()}\n**Suggested Plan:**\n{formatted_plan}"
144
+
145
+ def format_generated_trip(trip_text):
146
+ return trip_text
147
+
148
+ def trip_planner_wizard(destination, days):
149
+ # Combine user inputs into a single query for processing
150
+ days = int(days) # Ensure days is an integer for the f-string
151
+ query_text = f"a {days}-day trip to {destination}"
152
+ rec1, rec2, rec3, gen_rec_text = get_recommendations_and_generate(query_text)
153
+ return format_trip_plan(rec1), format_trip_plan(rec2), format_trip_plan(rec3), format_generated_trip(gen_rec_text)
154
+
155
+ end_time = time.time()
156
+ print(f"Models and data loaded in {end_time - start_time:.2f} seconds.")
157
+
158
+ # Gradio Interface
159
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
160
+ gr.Markdown("# ✈️ TripPlanner AI")
161
+ gr.Markdown("Enter your destination and desired trip length, and get plan recommendations plus a new AI-generated idea!")
162
+
163
+ with gr.Row():
164
+ destination_input = gr.Textbox(label="Destination", placeholder="e.g., Paris")
165
+ days_input = gr.Number(label="Number of Days", value=3)
166
+
167
+ with gr.Row():
168
+ submit_btn = gr.Button("Get Trip Plans", variant="primary")
169
+
170
+ with gr.Row():
171
+ with gr.Column(scale=2):
172
+ gr.Markdown("### Recommended Trip Plans from Dataset")
173
+ output_rec1 = gr.Markdown()
174
+ output_rec2 = gr.Markdown()
175
+ output_rec3 = gr.Markdown()
176
+ with gr.Column(scale=1):
177
+ gr.Markdown("### ✨ New AI-Generated Idea")
178
+ output_gen = gr.Textbox(label="AI Generated Trip Plan", lines=20, interactive=False)
179
+
180
+ submit_btn.click(
181
+ fn=trip_planner_wizard,
182
+ inputs=[destination_input, days_input],
183
+ outputs=[output_rec1, output_rec2, output_rec3, output_gen]
184
+ )
185
+
186
+ gr.Examples(
187
+ examples=[
188
+ ["Paris", 3],
189
+ ["Orlando", 7],
190
+ ["Tokyo", 5],
191
+ ["the Greek Islands", 10]
192
+ ],
193
+ inputs=[destination_input, days_input]
194
+ )
195
+
196
+ demo.launch(ssr_mode=False)
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ gradio==4.31.5
2
+ datasets==2.19.1
3
+ sentence-transformers==2.7.0
4
+ faiss-cpu==1.8.0
5
+ transformers==4.41.2
6
+ torch==2.3.1
7
+ pyarrow==16.1.0
8
+ huggingface-hub==0.23.3
trip_index.faiss ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c8d2a2eb720f78a9be02358e0d87e287766a257179553b784831ba7b207c875
3
+ size 3072045