taspol commited on
Commit
1e81c91
·
1 Parent(s): 9027ae3

feat: init repo

Browse files
Files changed (7) hide show
  1. .gitignore +203 -0
  2. app/app.py +69 -0
  3. data_importer.py +99 -0
  4. interface.py +67 -0
  5. plan_mock.json +129 -0
  6. utils/llm_caller.py +196 -0
  7. utils/youtube_extractor.py +32 -0
.gitignore ADDED
@@ -0,0 +1,203 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[codz]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py.cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # UV
98
+ # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ #uv.lock
102
+
103
+ # poetry
104
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
106
+ # commonly ignored for libraries.
107
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108
+ #poetry.lock
109
+ #poetry.toml
110
+
111
+ # pdm
112
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
113
+ # pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
114
+ # https://pdm-project.org/en/latest/usage/project/#working-with-version-control
115
+ #pdm.lock
116
+ #pdm.toml
117
+ .pdm-python
118
+ .pdm-build/
119
+
120
+ # pixi
121
+ # Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
122
+ #pixi.lock
123
+ # Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
124
+ # in the .venv directory. It is recommended not to include this directory in version control.
125
+ .pixi
126
+
127
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
128
+ __pypackages__/
129
+
130
+ # Celery stuff
131
+ celerybeat-schedule
132
+ celerybeat.pid
133
+
134
+ # SageMath parsed files
135
+ *.sage.py
136
+
137
+ # Environments
138
+ .env
139
+ .envrc
140
+ .venv
141
+ env/
142
+ venv/
143
+ ENV/
144
+ env.bak/
145
+ venv.bak/
146
+
147
+ # Spyder project settings
148
+ .spyderproject
149
+ .spyproject
150
+
151
+ # Rope project settings
152
+ .ropeproject
153
+
154
+ # mkdocs documentation
155
+ /site
156
+
157
+ # mypy
158
+ .mypy_cache/
159
+ .dmypy.json
160
+ dmypy.json
161
+
162
+ # Pyre type checker
163
+ .pyre/
164
+
165
+ # pytype static type analyzer
166
+ .pytype/
167
+
168
+ # Cython debug symbols
169
+ cython_debug/
170
+
171
+ # PyCharm
172
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
173
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
174
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
175
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
176
+ #.idea/
177
+
178
+ # Abstra
179
+ # Abstra is an AI-powered process automation framework.
180
+ # Ignore directories containing user credentials, local state, and settings.
181
+ # Learn more at https://abstra.io/docs
182
+ .abstra/
183
+
184
+ # Visual Studio Code
185
+ # Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
186
+ # that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
187
+ # and can be added to the global gitignore or merged into this file. However, if you prefer,
188
+ # you could uncomment the following to ignore the entire vscode folder
189
+ # .vscode/
190
+
191
+ # Ruff stuff:
192
+ .ruff_cache/
193
+
194
+ # PyPI configuration file
195
+ .pypirc
196
+
197
+ # Marimo
198
+ marimo/_static/
199
+ marimo/_lsp/
200
+ __marimo__/
201
+
202
+ # Streamlit
203
+ .streamlit/secrets.toml
app/app.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+
3
+ from interface import PlanRequest, PlanResponse, PlanStep, TransportInfo, TripPlan , YoutubeLinkRequest, YoutubeLinkResponse
4
+ from data_importer import DataImporter
5
+ import os
6
+ import json
7
+
8
+ app = FastAPI()
9
+ data_importer = DataImporter()
10
+
11
+ def load_mock_data(path: str = "plan_mock.json") -> dict:
12
+ """Load mock data from plan_mock.json"""
13
+ try:
14
+ file_path = os.path.join(os.path.dirname(__file__), path)
15
+ with open(file_path, 'r', encoding='utf-8') as file:
16
+ return json.load(file)
17
+ except FileNotFoundError:
18
+ # Return default mock data if file not found
19
+ print("Mock data file not found. Using default mock data.")
20
+ return {"error": "Invalid JSON format"}
21
+
22
+
23
+ @app.get("/v1")
24
+ def greet_json():
25
+ return {"Hello": "World!"}
26
+
27
+ @app.post("/v1/generateTripPlan", response_model=PlanResponse)
28
+ def generate_trip_plan(request: PlanRequest):
29
+ mock_trip_plan = load_mock_data()
30
+ print(mock_trip_plan)
31
+ return PlanResponse(
32
+ tripOverview="Sample trip overview.",
33
+ query_params=request,
34
+ retrieved_data=[],
35
+ trip_plan=TripPlan(
36
+ overview="Sample trip overview",
37
+ total_estimated_cost=1000.0,
38
+ steps=[PlanStep(
39
+ day=1,
40
+ title="Arrival in New York",
41
+ description="Arrive at JFK Airport and check-in at the hotel.",
42
+ transport=TransportInfo(
43
+ mode="Plane",
44
+ departure="Your hometown airport",
45
+ arrival="JFK Airport",
46
+ duration_minutes=300,
47
+ price=300.0,
48
+ details="Non-stop flight"
49
+ ),
50
+ map_coordinates={"lat": 40.6413, "lon": -73.7781},
51
+ images=["https://example.com/images/jfk_airport.jpg"],
52
+ tips=["Bring a valid ID", "Confirm your hotel reservation"]
53
+ )]),
54
+ meta={"status": "success"}
55
+ )
56
+
57
+ # @app.post("/v1/addYoutubeLink", response_model=YoutubeLinkResponse)
58
+ # def add_youtube_link(request: YoutubeLinkRequest):
59
+ # try:
60
+ # data_importer.insert_from_youtube(request.video_id)
61
+ # except Exception as e:
62
+ # return YoutubeLinkResponse(
63
+ # message="Failed to add YouTube link",
64
+ # video_url=None
65
+ # )
66
+ # return YoutubeLinkResponse(
67
+ # message="add successfully",
68
+ # video_url=f"https://www.youtube.com/watch?v={request.video_id}"
69
+ # )
data_importer.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from utils.youtube_extractor import YoutubeExtractor
2
+ from sentence_transformers import SentenceTransformer
3
+ from qdrant_client import QdrantClient
4
+ from qdrant_client.models import Distance, VectorParams, PointStruct
5
+ from typing import List, Dict, Optional, Union
6
+ import uuid
7
+
8
+ class DataImporter:
9
+ def __init__(self, qdrant_url: str = "http://localhost:6333", collection_name: str = "demo_bge_m3"):
10
+ self.model = SentenceTransformer("BAAI/bge-m3")
11
+ self.client = QdrantClient(url=qdrant_url)
12
+ self.collection_name = collection_name
13
+ self.youtube_extractor = YoutubeExtractor()
14
+
15
+ # Create collection if it doesn't exist
16
+ self._create_collection()
17
+
18
+ def _create_collection(self):
19
+ try:
20
+ self.client.recreate_collection(
21
+ collection_name=self.collection_name,
22
+ vectors_config=VectorParams(size=1024, distance=Distance.COSINE)
23
+ )
24
+ print(f"Collection '{self.collection_name}' created successfully")
25
+ except Exception as e:
26
+ print(f"Error creating collection: {e}")
27
+
28
+ def encode_text(self, texts: Union[str, List[str]]) -> List[List[float]]:
29
+ if isinstance(texts, str):
30
+ texts = [texts]
31
+
32
+ embeddings = self.model.encode(texts, normalize_embeddings=True)
33
+ return embeddings.tolist()
34
+
35
+ def insert_text(self, text: str, metadata: Optional[Dict] = None, custom_id: Optional[str] = None) -> str:
36
+ point_id = custom_id or str(uuid.uuid4())
37
+ embedding = self.encode_text(text)[0]
38
+
39
+ payload = {"text": text}
40
+ if metadata:
41
+ payload.update(metadata)
42
+
43
+ self.client.upsert(
44
+ collection_name=self.collection_name,
45
+ points=[PointStruct(id=point_id, vector=embedding, payload=payload)]
46
+ )
47
+
48
+ print(f"Inserted text with ID: {point_id}")
49
+ return point_id
50
+
51
+ def insert_texts(self, texts: List[str], metadata_list: Optional[List[Dict]] = None) -> List[str]:
52
+ embeddings = self.encode_text(texts)
53
+ point_ids = [str(uuid.uuid4()) for _ in texts]
54
+
55
+ points = []
56
+ for i, (text, embedding, point_id) in enumerate(zip(texts, embeddings, point_ids)):
57
+ payload = {"text": text}
58
+ if metadata_list and i < len(metadata_list):
59
+ payload.update(metadata_list[i])
60
+
61
+ points.append(PointStruct(id=point_id, vector=embedding, payload=payload))
62
+
63
+ self.client.upsert(collection_name=self.collection_name, points=points)
64
+ print(f"Inserted {len(texts)} texts")
65
+ return point_ids
66
+
67
+ def insert_from_youtube(self, video_id: str, metadata: Optional[Dict] = None) -> Optional[str]:
68
+ try:
69
+ # Extract text from YouTube (assuming your YoutubeExtractor has this method)
70
+ text = self.youtube_extractor.extract_transcript(video_id)
71
+ if text:
72
+ video_metadata = {"source": "youtube", "video_id": video_id}
73
+ if metadata:
74
+ video_metadata.update(metadata)
75
+
76
+ return self.insert_text(text, video_metadata)
77
+ return None
78
+ except Exception as e:
79
+ print(f"Error extracting from YouTube: {e}")
80
+ return None
81
+
82
+ def search_similar(self, query: str, limit: int = 5) -> List[Dict]:
83
+ query_embedding = self.encode_text(query)[0]
84
+
85
+ results = self.client.search(
86
+ collection_name=self.collection_name,
87
+ query_vector=query_embedding,
88
+ limit=limit
89
+ )
90
+
91
+ return [
92
+ {
93
+ "id": result.id,
94
+ "score": result.score,
95
+ "text": result.payload.get("text", ""),
96
+ "metadata": {k: v for k, v in result.payload.items() if k != "text"}
97
+ }
98
+ for result in results
99
+ ]
interface.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel, Field
2
+ from typing import List, Optional, Any, Dict
3
+
4
+ class TripPlanRequest(BaseModel):
5
+ destination: str
6
+ duration: int
7
+ budget: float
8
+ preferences: list[str] = []
9
+
10
+ class TripPlanResponse(BaseModel):
11
+ message: str
12
+ plan: dict
13
+
14
+ class YoutubeLinkRequest(BaseModel):
15
+ video_id: str
16
+
17
+ class YoutubeLinkResponse(BaseModel):
18
+ message: str
19
+ video_url: str
20
+
21
+
22
+ class PlanRequest(BaseModel):
23
+ start_place: str
24
+ destination_place: str
25
+ trip_price: Optional[float] = Field(None, description="Total budget in local currency")
26
+ trip_context: Optional[str] = Field(None, description="e.g. adventure, rest, date")
27
+ trip_duration_days: Optional[int] = 1
28
+ group_size: Optional[int] = 1
29
+ preferences: Optional[List[str]] = None
30
+ top_k: Optional[int] = 3
31
+
32
+
33
+ class RetrievedItem(BaseModel):
34
+ place_id: str
35
+ place_name: str
36
+ description: Optional[str]
37
+ score: float
38
+ metadata: Optional[Dict[str, Any]] = None
39
+
40
+ class TransportInfo(BaseModel):
41
+ mode: Optional[str]
42
+ departure: Optional[str]
43
+ arrival: Optional[str]
44
+ duration_minutes: Optional[int]
45
+ price: Optional[float]
46
+ details: Optional[str]
47
+
48
+ class PlanStep(BaseModel):
49
+ day: Optional[int]
50
+ title: Optional[str]
51
+ description: Optional[str]
52
+ transport: Optional[TransportInfo]
53
+ map_coordinates: Optional[Dict[str, float]]
54
+ images: Optional[List[str]]
55
+ tips: Optional[List[str]]
56
+
57
+ class TripPlan(BaseModel):
58
+ overview: str
59
+ total_estimated_cost: Optional[float]
60
+ steps: List[PlanStep]
61
+
62
+ class PlanResponse(BaseModel):
63
+ tripOverview: str
64
+ query_params: PlanRequest
65
+ retrieved_data: List[RetrievedItem]
66
+ trip_plan: TripPlan
67
+ meta: Dict[str, Any]
plan_mock.json ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "tripOverview": {
3
+ "summary": "Adventure trip from Bangkok to Chiang Mai with cultural exploration",
4
+ "regions": ["Thailand", "Chiang Mai"],
5
+ "destination": "Chiang Mai, Thailand",
6
+ "EstimatedCost": "Approximately 10,000 THB per person",
7
+ "durationDays": 5,
8
+ "purpose": "Adventure, cultural experience"
9
+ },
10
+ "milestones": [
11
+ "Bangkok Suvarnabhumi Airport",
12
+ "Chiang Mai International Airport",
13
+ "Doi Suthep-Pui National Park",
14
+ "Chiang Mai Old City",
15
+ "Elephant Nature Park",
16
+ "Chiang Rai Night Bazaar"
17
+ ],
18
+ "transportation": [
19
+ {
20
+ "mode": "Plane",
21
+ "from": "Bangkok Suvarnabhumi Airport",
22
+ "to": "Chiang Mai International Airport",
23
+ "schedule": "2025-09-15T08:00:00Z",
24
+ "price": "1500 THB per person"
25
+ },
26
+ {
27
+ "mode": "Songthaew (Red Truck)",
28
+ "from": "Chiang Mai International Airport",
29
+ "to": "Chiang Mai Old City",
30
+ "schedule": "2025-09-15T11:00:00Z",
31
+ "price": "40 THB per person"
32
+ }
33
+ ],
34
+ "accommodation": [
35
+ {
36
+ "type": "Guesthouse",
37
+ "location": "Chiang Mai Old City",
38
+ "contact": "053-211-111",
39
+ "notes": "Centrally located with air conditioning"
40
+ }
41
+ ],
42
+ "tripRoute": [
43
+ {
44
+ "day": 1,
45
+ "activities": [
46
+ "Arrival at Chiang Mai International Airport",
47
+ "Check-in at guesthouse",
48
+ "Explore Chiang Mai Old City",
49
+ "Visit Wat Phra That Doi Suthep"
50
+ ],
51
+ "walkingRoute": "Old City Moat",
52
+ "signage": "Follow city maps",
53
+ "suggestions": ["Wear comfortable shoes", "Try local street food"],
54
+ "precautions": ["Beware of traffic"]
55
+ },
56
+ {
57
+ "day": 2,
58
+ "activities": [
59
+ "Visit Elephant Nature Park",
60
+ "Participate in elephant conservation activities"
61
+ ],
62
+ "transport": {
63
+ "mode": "Van",
64
+ "from": "Chiang Mai Old City",
65
+ "to": "Elephant Nature Park",
66
+ "price": "800 THB per person",
67
+ "duration_minutes": 60
68
+ },
69
+ "suggestions": ["Wear long sleeves", "Bring sunscreen"],
70
+ "precautions": ["Follow park guidelines"]
71
+ },
72
+ {
73
+ "day": 3,
74
+ "activities": [
75
+ "Trekking in Doi Suthep-Pui National Park",
76
+ "Visit waterfalls and scenic viewpoints"
77
+ ],
78
+ "transport": {
79
+ "mode": "Songthaew (Red Truck)",
80
+ "from": "Chiang Mai Old City",
81
+ "to": "Doi Suthep-Pui National Park",
82
+ "price": "50 THB per person",
83
+ "duration_minutes": 45
84
+ },
85
+ "suggestions": ["Bring water and snacks", "Wear hiking boots"],
86
+ "precautions": ["Beware of slippery trails"]
87
+ },
88
+ {
89
+ "day": 4,
90
+ "activities": [
91
+ "Visit Chiang Rai Night Bazaar",
92
+ "Explore local markets and try street food"
93
+ ],
94
+ "transport": {
95
+ "mode": "Bus",
96
+ "from": "Chiang Mai Arcade Bus Station",
97
+ "to": "Chiang Rai Night Bazaar",
98
+ "price": "200 THB per person",
99
+ "duration_minutes": 180
100
+ },
101
+ "suggestions": ["Bring cash", "Try local delicacies"],
102
+ "precautions": ["Beware of pickpockets"]
103
+ },
104
+ {
105
+ "day": 5,
106
+ "activities": [
107
+ "Return to Bangkok",
108
+ "Departure from Chiang Mai International Airport"
109
+ ],
110
+ "transport": {
111
+ "mode": "Plane",
112
+ "from": "Chiang Mai International Airport",
113
+ "to": "Bangkok Suvarnabhumi Airport",
114
+ "price": "1500 THB per person",
115
+ "duration_minutes": 75
116
+ }
117
+ }
118
+ ],
119
+ "emergencyContacts": {
120
+ "localRangers": "053-211-111",
121
+ "hospital": "Chiang Mai Ram Hospital: 053-211-111",
122
+ "embassy": "Thai Embassy: 02-281-0141"
123
+ },
124
+ "tips": [
125
+ "Always carry a map or use a GPS app",
126
+ "Respect local customs and traditions",
127
+ "Stay hydrated and wear sunscreen"
128
+ ]
129
+ }
utils/llm_caller.py ADDED
@@ -0,0 +1,196 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import asyncio
3
+ import httpx
4
+ from typing import List, Optional, Dict, Any
5
+ from dataclasses import dataclass
6
+ from qdrant_client import QdrantClient
7
+
8
+ SYSTEM_PROMPT = """You are a helpful travel assistant. Use the provided context to answer the user's question about travel destinations and places.
9
+ If the context doesn't contain relevant information, say so politely and provide general advice if possible."""
10
+ '''
11
+ '''
12
+ @dataclass
13
+ class RetrievedItem:
14
+ place_id: str
15
+ place_name: str
16
+ description: Optional[str]
17
+ score: float
18
+ metadata: Dict[str, Any]
19
+
20
+ class LLMCaller:
21
+ def __init__(self):
22
+ # Environment variables
23
+ self.qdrant_host = os.getenv("QDRANT_HOST", "localhost")
24
+ self.qdrant_api_key = os.getenv("QDRANT_API_KEY", None)
25
+ self.qdrant_collection = os.getenv("QDRANT_COLLECTION", "trip_places")
26
+ self.embedding_dim = int(os.getenv("EMBEDDING_DIM", "1024"))
27
+ self.top_k = int(os.getenv("TOP_K", "6"))
28
+
29
+ # LLM configuration
30
+ self.llm_api_url = os.getenv("LLM_API_URL", "https://api.openai.com/v1/chat/completions")
31
+ self.llm_api_key = os.getenv("LLM_API_KEY", "sk-REPLACE_ME")
32
+
33
+ # Initialize Qdrant client
34
+ self.qdrant = QdrantClient(
35
+ host=self.qdrant_host,
36
+ api_key=self.qdrant_api_key
37
+ )
38
+
39
+ async def call_llm(self, system_prompt: str, user_prompt: str, max_tokens: int = 512, model: str = "sea-lion-7b-instruct") -> str:
40
+ """
41
+ Call LLM with system and user prompts
42
+
43
+ Args:
44
+ system_prompt (str): System message for the LLM
45
+ user_prompt (str): User message/question
46
+ max_tokens (int): Maximum tokens to generate
47
+ model (str): Model to use
48
+
49
+ Returns:
50
+ str: LLM response text
51
+ """
52
+ headers = {
53
+ "Authorization": f"Bearer {self.llm_api_key}",
54
+ "Content-Type": "application/json",
55
+ }
56
+ payload = {
57
+ "model": model,
58
+ "messages": [
59
+ {"role": "system", "content": system_prompt},
60
+ {"role": "user", "content": user_prompt}
61
+ ],
62
+ "max_tokens": max_tokens,
63
+ "temperature": 0.7,
64
+ }
65
+
66
+ try:
67
+ async with httpx.AsyncClient(timeout=30) as client:
68
+ resp = await client.post(self.llm_api_url, json=payload, headers=headers)
69
+ resp.raise_for_status()
70
+ data = resp.json()
71
+
72
+ # Handle OpenAI-like response format
73
+ if "choices" in data and len(data["choices"]) > 0:
74
+ return data["choices"][0]["message"]["content"]
75
+
76
+ # Fallback for other formats
77
+ return data.get("text", "")
78
+
79
+ except Exception as e:
80
+ print(f"Error calling LLM: {e}")
81
+ return f"Error: Unable to get LLM response - {str(e)}"
82
+
83
+ async def query_qdrant(self, query_embedding: List[float], top_k: Optional[int] = None, collection_name: Optional[str] = None) -> List[RetrievedItem]:
84
+ """
85
+ Query Qdrant vector database
86
+
87
+ Args:
88
+ query_embedding (List[float]): Query vector embedding
89
+ top_k (int, optional): Number of results to return
90
+ collection_name (str, optional): Collection name to query
91
+
92
+ Returns:
93
+ List[RetrievedItem]: Retrieved items from Qdrant
94
+ """
95
+ top_k = top_k or self.top_k
96
+ collection_name = collection_name or self.qdrant_collection
97
+
98
+ def _search():
99
+ try:
100
+ hits = self.qdrant.search(
101
+ collection_name=collection_name,
102
+ query_vector=query_embedding,
103
+ limit=top_k,
104
+ with_payload=True,
105
+ )
106
+
107
+ items: List[RetrievedItem] = []
108
+ for h in hits:
109
+ payload = h.payload or {}
110
+ items.append(RetrievedItem(
111
+ place_id=str(h.id),
112
+ place_name=payload.get("name") or payload.get("title") or "",
113
+ description=payload.get("description") or payload.get("summary") or None,
114
+ score=float(h.score) if h.score is not None else 0.0,
115
+ metadata=payload,
116
+ ))
117
+ return items
118
+ except Exception as e:
119
+ print(f"Error querying Qdrant: {e}")
120
+ return []
121
+
122
+ return await asyncio.to_thread(_search)
123
+
124
+ async def rag_query(self, query: str, query_embedding: List[float], system_prompt: Optional[str] = None) -> Dict[str, Any]:
125
+ # Retrieve relevant items from Qdrant
126
+ retrieved_items = await self.query_qdrant(query_embedding)
127
+
128
+ # Build context from retrieved items
129
+ context_parts = []
130
+ for item in retrieved_items:
131
+ context_parts.append(f"- {item.place_name}: {item.description or 'No description available'}")
132
+
133
+ context = "\n".join(context_parts) if context_parts else "No relevant information found."
134
+
135
+ # Default system prompt if none provided
136
+ if not system_prompt:
137
+ system_prompt = """You are a helpful travel assistant. Use the provided context to answer the user's question about travel destinations and places.
138
+ If the context doesn't contain relevant information, say so politely and provide general advice if possible."""
139
+
140
+ # Create user prompt with context
141
+ user_prompt = f"""Context:
142
+ {context}
143
+
144
+ Question: {query}
145
+
146
+ Please provide a helpful response based on the context above."""
147
+
148
+ # Get LLM response
149
+ llm_response = await self.call_llm(system_prompt, user_prompt)
150
+
151
+ return {
152
+ "answer": llm_response,
153
+ "retrieved_items": retrieved_items,
154
+ "context": context,
155
+ "query": query
156
+ }
157
+
158
+ def update_config(self, **kwargs):
159
+ """
160
+ Update configuration parameters
161
+
162
+ Args:
163
+ **kwargs: Configuration parameters to update
164
+ """
165
+ for key, value in kwargs.items():
166
+ if hasattr(self, key):
167
+ setattr(self, key, value)
168
+ else:
169
+ print(f"Warning: Unknown configuration parameter: {key}")
170
+
171
+ # Example usage
172
+ if __name__ == "__main__":
173
+ async def main():
174
+ # Initialize LLM caller
175
+ llm_caller = LLMCaller()
176
+
177
+ # Example embedding (replace with actual embedding)
178
+ query_embedding = [0.1] * 1024 # Dummy embedding
179
+
180
+ # Perform RAG query
181
+ result = await llm_caller.rag_query(
182
+ query="What are the best places to visit in Thailand?",
183
+ query_embedding=query_embedding
184
+ )
185
+
186
+ print("Answer:", result["answer"])
187
+ print(f"Found {len(result['retrieved_items'])} relevant items")
188
+
189
+ # Direct LLM call
190
+ response = await llm_caller.call_llm(
191
+ system_prompt="You are a helpful assistant.",
192
+ user_prompt="What is the capital of Thailand?"
193
+ )
194
+ print("Direct LLM Response:", response)
195
+
196
+ asyncio.run(main())
utils/youtube_extractor.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from youtube_transcript_api import YouTubeTranscriptApi
2
+ from typing import List, Dict, Optional
3
+
4
+ class YoutubeExtractor:
5
+ def __init__(self):
6
+ self.ytt_api = YouTubeTranscriptApi()
7
+
8
+ def extract_transcript(self, video_id: str) -> Optional[List[Dict]]:
9
+ try:
10
+ transcript = self.ytt_api.fetch(video_id)
11
+ return transcript
12
+ except Exception as e:
13
+ print(f"An error occurred: {e}")
14
+ return None
15
+ def get_text_only(self, video_id: str) -> Optional[List[str]]:
16
+ transcript = self.extract_transcript(video_id)
17
+ if transcript:
18
+ return [entry['text'] for entry in transcript]
19
+ return None
20
+
21
+ def get_full_text(self, video_id: str) -> Optional[str]:
22
+ text_segments = self.get_text_only(video_id)
23
+ if text_segments:
24
+ return ' '.join(text_segments)
25
+ return None
26
+
27
+ def print_transcript(self, video_id: str) -> None:
28
+ transcript = self.extract_transcript(video_id)
29
+ if transcript:
30
+ print("--- Full Transcript ---")
31
+ for entry in transcript:
32
+ print(entry['text'])