Illia56 commited on
Commit
b70539a
·
verified ·
1 Parent(s): 4d29a9c

Upload 8 files

Browse files
Files changed (9) hide show
  1. .gitattributes +1 -0
  2. Dockerfile +15 -0
  3. config.json +307 -0
  4. docker-compose.yml +19 -0
  5. faiss_index/index.faiss +3 -0
  6. faiss_index/index.pkl +3 -0
  7. main.py +85 -0
  8. rag.py +443 -0
  9. requirements.txt +14 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ faiss_index/index.faiss filter=lfs diff=lfs merge=lfs -text
Dockerfile ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ WORKDIR /app
4
+
5
+ COPY requirements.txt .
6
+ RUN pip install --no-cache-dir -r requirements.txt
7
+
8
+ COPY . .
9
+
10
+ # Create faiss_index directory and ensure proper permissions
11
+ RUN mkdir -p /app/faiss_index && chmod 777 /app/faiss_index
12
+
13
+ EXPOSE 8000
14
+
15
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
config.json ADDED
@@ -0,0 +1,307 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cubix_docs": [
3
+ "https://cubixworld.net/autumn-update-2024",
4
+ "https://cubixworld.net/articles/avtokraft-shakhterskogo-lazera-i-almaznogo-bura"
5
+ ],
6
+ "mob_mappings": {
7
+ "зомби": "EntityZombie",
8
+ "скелет": "EntitySkeleton",
9
+ "паук": "EntitySpider",
10
+ "крипер": "EntityCreeper",
11
+ "свинья": "EntityPig",
12
+ "корова": "EntityCow",
13
+ "овца": "EntitySheep",
14
+ "курица": "EntityChicken",
15
+ "волк": "EntityWolf",
16
+ "кошка": "EntityOcelot",
17
+ "лошадь": "EntityHorse",
18
+ "мобы": "Mobs"
19
+ },
20
+ "command_specs": {
21
+ "follow_me": {
22
+ "description": "Команда для следования за игроком или перемещения к игроку",
23
+ "required_fields": ["type", "message", "repeat"],
24
+ "examples": [
25
+ {
26
+ "input": "Следуй за мной",
27
+ "output": {"type": "follow_me", "message": "Хорошо, я буду следовать за вами", "repeat": true}
28
+ },
29
+ {
30
+ "input": "Приди ко мне",
31
+ "output": {"type": "follow_me", "message": "Сейчас подойду к вам", "repeat": false}
32
+ },
33
+ {
34
+ "input": "Следуй за мной и убивай встречных зомби",
35
+ "output": {"type": "follow_me", "message": "Хорошо, я буду следовать за вами и убивать встречных зомби", "repeat": true, "secondary_action": {"type": "kill_mob", "mob": "EntityZombie", "repeat": true}}
36
+ },
37
+ {
38
+ "input": "Иди за мной и добывай камень",
39
+ "output": {"type": "follow_me", "message": "Хорошо, я буду следовать за вами и добывать камень", "repeat": true, "secondary_action": {"type": "break_block", "blocks": ["minecraft:stone"], "repeat": true}}
40
+ }
41
+ ]
42
+ },
43
+ "message": {
44
+ "description": "Обычное сообщение в чате или запоминание обращения",
45
+ "required_fields": ["type", "message"],
46
+ "examples": [
47
+ {
48
+ "input": "Привет, как дела?",
49
+ "output": {"type": "message", "message": "Привет! У меня всё хорошо, готов помочь вам в игре!"}
50
+ },
51
+ {
52
+ "input": "Называй меня лучший друг",
53
+ "output": {"type": "message", "message": "Хорошо, буду называть вас лучший друг!"}
54
+ }
55
+ ]
56
+ },
57
+ "kill_mob": {
58
+ "description": "Команда для убийства мобов или животных. Поле mob должно содержать entity ID моба из списка доступных.",
59
+ "required_fields": ["type", "message", "repeat", "mob"],
60
+ "examples": [
61
+ {
62
+ "input": "Убей зомби",
63
+ "output": {"type": "kill_mob", "message": "Хорошо, я иду убивать зомби", "repeat": false, "mob": "EntityZombie"}
64
+ },
65
+ {
66
+ "input": "Убивай мобов",
67
+ "output": {"type": "kill_mob", "message": "Хорошо, я буду убивать мобов", "repeat": true, "mob": "Mobs"}
68
+ },
69
+ {
70
+ "input": "Убивай скелетов",
71
+ "output": {"type": "kill_mob", "message": "Хорошо, я буду убивать скелетов", "repeat": true, "mob": "EntitySkeleton"}
72
+ },
73
+ {
74
+ "input": "Убей всех свиней",
75
+ "output": {"type": "kill_mob", "message": "Хорошо, я убью всех свиней", "repeat": false, "mob": "EntityPig"}
76
+ }
77
+ ]
78
+ },
79
+ "break_tree": {
80
+ "description": "Команда для ломания деревьев",
81
+ "required_fields": ["type", "message", "repeat", "quantityTrees", "quantity", "blocks"],
82
+ "examples": [
83
+ {
84
+ "input": "Сломай дерево",
85
+ "output": {"type": "break_tree", "message": "Хорошо, я сломаю дерево", "repeat": false, "quantityTrees": 1, "quantity": 0, "blocks": []}
86
+ },
87
+ {
88
+ "input": "Добудь дерева",
89
+ "output": {"type": "break_tree", "message": "Хорошо, я буду добывать дерево", "repeat": true, "quantityTrees": 0, "quantity": 0, "blocks": []}
90
+ },
91
+ {
92
+ "input": "Добудь 10 деревьев",
93
+ "output": {"type": "break_tree", "message": "Хорошо, я добуду 10 деревьев", "repeat": false, "quantityTrees": 10, "quantity": 0, "blocks": []}
94
+ },
95
+ {
96
+ "input": "Добудь 50 блоков дуба",
97
+ "output": {"type": "break_tree", "message": "Хорошо, я добуду 50 блоков дуба", "repeat": false, "quantityTrees": 0, "quantity": 50, "blocks": ["minecraft:oak_log"]}
98
+ }
99
+ ]
100
+ },
101
+ "harvest_crops": {
102
+ "description": "Команда для сбора урожая",
103
+ "required_fields": ["type", "message", "repeat", "harvest"],
104
+ "examples": [
105
+ {
106
+ "input": "Собери урожай",
107
+ "output": {"type": "harvest_crops", "message": "Хорошо, я соберу урожай", "repeat": false, "harvest": []}
108
+ },
109
+ {
110
+ "input": "Собирай пшеницу",
111
+ "output": {"type": "harvest_crops", "message": "Хорошо, я буду собирать пшеницу", "repeat": true, "harvest": ["пшеница"]}
112
+ },
113
+ {
114
+ "input": "Собирай картофель и морковь",
115
+ "output": {"type": "harvest_crops", "message": "Хорошо, я буду собирать картофель и морковь", "repeat": true, "harvest": ["картофель", "морковь"]}
116
+ }
117
+ ]
118
+ },
119
+ "break_block": {
120
+ "description": "Команда для ломания блоков",
121
+ "required_fields": ["type", "message", "repeat", "blocks", "quantity"],
122
+ "examples": [
123
+ {
124
+ "input": "Сломай землю",
125
+ "output": {"type": "break_block", "message": "Хорошо, я сломаю землю", "repeat": false, "blocks": ["minecraft:dirt"], "quantity": 1, "location": ""}
126
+ },
127
+ {
128
+ "input": "Сломай 20 земли",
129
+ "output": {"type": "break_block", "message": "Хорошо, я сломаю 20 блоков земли", "repeat": false, "quantity": 20, "blocks": ["minecraft:dirt"], "location": ""}
130
+ },
131
+ {
132
+ "input": "Копай камень",
133
+ "output": {"type": "break_block", "message": "Хорошо, я буду копать камень", "repeat": true, "blocks": ["minecraft:stone"], "quantity": 0, "location": ""}
134
+ },
135
+ {
136
+ "input": "Копай вниз",
137
+ "output": {"type": "break_block", "message": "Хорошо, я буду копать вниз", "repeat": true, "blocks": [], "quantity": 0, "location": "вниз"}
138
+ },
139
+ {
140
+ "input": "Добудь руду на севере",
141
+ "output": {"type": "break_block", "message": "Хорошо, я добуду руду на севере", "repeat": false, "blocks": [], "quantity": 0, "location": "на севере"}
142
+ }
143
+ ]
144
+ },
145
+ "cycle_break_block": {
146
+ "description": "Команда для цикличного ломания блока на координатах игрока",
147
+ "required_fields": ["type", "message"],
148
+ "examples": [
149
+ {
150
+ "input": "Копай вот здесь постоянно",
151
+ "output": {"type": "cycle_break_block", "message": "Буду копать на этом месте"}
152
+ },
153
+ {
154
+ "input": "Ломай блок под собой",
155
+ "output": {"type": "cycle_break_block", "message": "Буду ломать блок в этом месте"}
156
+ }
157
+ ]
158
+ },
159
+ "fishing": {
160
+ "description": "Команда для ловли рыбы",
161
+ "required_fields": ["type", "repeat", "quantity", "message"],
162
+ "examples": [
163
+ {
164
+ "input": "Слови рыбу",
165
+ "output": {"type": "fishing", "repeat": false, "quantity": 1, "message": "Хорошо, я поймаю рыбу"}
166
+ },
167
+ {
168
+ "input": "Лови рыбу",
169
+ "output": {"type": "fishing", "repeat": true, "quantity": 0, "message": "Хорошо, я буду ловить рыбу"}
170
+ },
171
+ {
172
+ "input": "Слови 10 рыб",
173
+ "output": {"type": "fishing", "repeat": false, "quantity": 10, "message": "Хоро��о, я поймаю 10 рыб"}
174
+ }
175
+ ]
176
+ },
177
+ "shear_wool": {
178
+ "description": "Команда для стрижки овец",
179
+ "required_fields": ["type", "repeat", "quantity", "message"],
180
+ "examples": [
181
+ {
182
+ "input": "Постриги овец",
183
+ "output": {"type": "shear_wool", "repeat": false, "quantity": 0, "message": "Хорошо, я постригу овец"}
184
+ },
185
+ {
186
+ "input": "Стриги овец",
187
+ "output": {"type": "shear_wool", "repeat": true, "quantity": 0, "message": "Хорошо, я буду стричь овец"}
188
+ },
189
+ {
190
+ "input": "Постриги 10 овец",
191
+ "output": {"type": "shear_wool", "repeat": false, "quantity": 10, "message": "Хорошо, я постригу 10 овец"}
192
+ }
193
+ ]
194
+ },
195
+ "lighting": {
196
+ "description": "Команда для освещения или размещения источников света",
197
+ "required_fields": ["type", "message", "quantity"],
198
+ "examples": [
199
+ {
200
+ "input": "Подсвети мне",
201
+ "output": {"type": "lighting", "message": "Хорошо, я буду освещать путь", "quantity": 0}
202
+ },
203
+ {
204
+ "input": "Освещай",
205
+ "output": {"type": "lighting", "message": "Включаю освещение", "quantity": 0}
206
+ },
207
+ {
208
+ "input": "Поставь 10 факелов",
209
+ "output": {"type": "lighting", "message": "Хорошо, я поставлю 10 факелов", "quantity": 10}
210
+ },
211
+ {
212
+ "input": "Расставь факелы вокруг дома",
213
+ "output": {"type": "lighting", "message": "Хорошо, я расставлю факелы вокруг дома", "quantity": 0}
214
+ }
215
+ ]
216
+ },
217
+ "stop": {
218
+ "description": "Команда для остановки действий бота",
219
+ "required_fields": ["type", "message"],
220
+ "examples": [
221
+ {
222
+ "input": "Остановись",
223
+ "output": {"type": "stop", "message": "Я остановился"}
224
+ },
225
+ {
226
+ "input": "Прекрати убивать зомби",
227
+ "output": {"type": "stop", "message": "Я прекратил убивать зомби"}
228
+ },
229
+ {
230
+ "input": "Хватит копать",
231
+ "output": {"type": "stop", "message": "Я прекратил копать"}
232
+ }
233
+ ]
234
+ }
235
+ },
236
+ "system_prompt_template": {
237
+ "intro": "You are an AI assistant for the Minecraft game server called CubixWorld. Your task is to understand player requests in Russian and convert them into structured JSON commands with high precision and reliability.",
238
+ "role_section": [
239
+ "Analyze player messages carefully to determine their exact intent and required actions",
240
+ "Convert natural language requests into precise JSON commands following the exact specifications",
241
+ "Ensure all command parameters are validated and properly formatted",
242
+ "Respond with valid JSON only, maintaining strict schema compliance",
243
+ "When uncertain about intent, prefer message responses over incorrect commands",
244
+ "For conversational messages like greetings, thanks, or jokes, respond in a friendly and engaging manner"
245
+ ],
246
+ "command_detection_guidelines": [
247
+ "Carefully analyze verb forms and temporal indicators to determine command type:",
248
+ "- Continuous actions: \"продолжай\", \"постоянно\", verbs with \"-ай\" suffix",
249
+ "- One-time commands: \"сделай\", \"сломай\", \"приди\"",
250
+ "For compound actions, identify primary and secondary components clearly",
251
+ "Extract and validate all numeric quantities before including in commands",
252
+ "Process spatial and directional information with high precision",
253
+ "For conversational inputs (greetings, thanks, jokes, etc.), use the 'message' type response"
254
+ ],
255
+ "contextual_information_processing": [
256
+ "Prioritize exact matches from documentation over general knowledge",
257
+ "Cross-reference game mechanics with server-specific features",
258
+ "Use precise entity IDs as specified in mob mappings",
259
+ "When documentation provides specific command formats, follow them exactly",
260
+ "Consider server limitations and restrictions when generating commands"
261
+ ],
262
+ "response_format": [
263
+ "Ensure all JSON responses are properly nested and formatted",
264
+ "Include all required fields as specified in command schemas",
265
+ "Validate numeric values are within acceptable ranges",
266
+ "Use consistent casing and formatting for entity IDs and commands",
267
+ "Include relevant metadata and context in responses when available"
268
+ ],
269
+ "conversation_handling": [
270
+ "For greetings (привет, здравствуй, etc.), respond with a friendly greeting and offer to help",
271
+ "For questions about your status (как дела, как ты, etc.), respond positively and ask how you can assist",
272
+ "For expressions of gratitude (спасибо, благодарю, etc.), acknowledge with a friendly response",
273
+ "For requests for jokes or fun content, respond with a Minecraft-themed joke or fun fact",
274
+ "Always maintain a helpful, friendly tone appropriate for a game assistant",
275
+ "Support both Russian and Ukrainian language inputs with appropriate responses"
276
+ ],
277
+ "rag_processing_instructions": [
278
+ "When contextual information is provided, carefully analyze and extract specific mechanics, rules, or features unique to CubixWorld",
279
+ "Prioritize recently retrieved information over older knowledge when they conflict",
280
+ "When working with information from multiple sources, synthesize a coherent understanding rather than just concatenating facts",
281
+ "For highly technical queries, extract and include specific numbers, formulas, or technical details from the retrieved content",
282
+ "Match the terminology used in the retrieved documentation when formulating responses",
283
+ "If contextual information appears incomplete or contradictory, acknowledge the limitations in your response",
284
+ "When processing game update information, clearly distinguish between new features and pre-existing ones",
285
+ "Apply different weights to different sources based on relevance score - prioritize higher scoring sources",
286
+ "For related items mentioned in context (like tools, weapons, or materials), include their relationships in your understanding",
287
+ "Always cite specific game mechanics exactly as described in the documentation rather than making assumptions"
288
+ ],
289
+ "ambiguity_handling": [
290
+ "For ambiguous commands, identify the specific ambiguity and propose the most likely interpretation",
291
+ "When a player uses terminology not found in documentation, map to the closest documented concept",
292
+ "For requests that could map to multiple command types, analyze verb usage and context to determine the most appropriate one",
293
+ "If a request contains conflicting parameters, prioritize the most recently stated ones",
294
+ "For vague spatial references, default to the player's current location or field of view",
295
+ "When uncertain about quantities, default to 1 for singular references and continuous action for plural ones",
296
+ "For unprecedented or novel requests, combine existing command structures in logical ways rather than rejecting outright"
297
+ ],
298
+ "token_optimization": [
299
+ "Focus on the most relevant contextual information, ignoring tangential details",
300
+ "Maintain brevity in responses while ensuring all required information is included",
301
+ "When processing documentation, prioritize sections that directly address the current query",
302
+ "For complex multi-part requests, break down processing into logical components",
303
+ "Use concise language in responses while maintaining clarity and friendliness",
304
+ "When multiple similar contextual examples exist, focus on the closest matching ones"
305
+ ]
306
+ }
307
+ }
docker-compose.yml ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ version: '3.8'
2
+
3
+ services:
4
+ app:
5
+ build: .
6
+ ports:
7
+ - "8000:8000"
8
+ volumes:
9
+ - .:/app
10
+ command: uvicorn main:app --host 0.0.0.0 --port 8000 --reload
11
+ environment:
12
+ - ENVIRONMENT=development
13
+ - OPENAI_API_KEY=${OPENAI_API_KEY}
14
+ env_file:
15
+ - .env
16
+
17
+ # Define the named volume for faiss data
18
+ volumes:
19
+ faiss_data:
faiss_index/index.faiss ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65ffa3256b20d95302c95fd3c367fbe3bf95f2e4a83f32513d59bb6e4a098cf0
3
+ size 620589
faiss_index/index.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b2d64ba81316d3d7e0bf2fb83fc120ecac92c2ddae5ce12c8be1fd37803e97d
3
+ size 78654
main.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException
2
+ from pydantic import BaseModel
3
+ from datetime import datetime
4
+ import os
5
+ from dotenv import load_dotenv
6
+ from typing import List, Optional
7
+ from uuid import uuid4
8
+
9
+ from rag import RAGSystem, load_config
10
+
11
+ # Load environment variables
12
+ load_dotenv()
13
+
14
+ # Load configuration
15
+ CONFIG = load_config()
16
+ MOB_MAPPINGS = CONFIG['mob_mappings']
17
+
18
+ # Get API key
19
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
20
+ if not OPENAI_API_KEY:
21
+ raise ValueError("OPENAI_API_KEY environment variable is not set")
22
+
23
+ # Initialize FastAPI app
24
+ app = FastAPI(
25
+ title="CubixAI API",
26
+ description="API for CubixAI - Minecraft bot with AI capabilities",
27
+ version="0.1.0",
28
+ )
29
+
30
+ # Initialize RAG system
31
+ rag_system = RAGSystem(openai_api_key=OPENAI_API_KEY)
32
+
33
+ # Status response model
34
+ class StatusResponse(BaseModel):
35
+ status: str
36
+ version: str
37
+ timestamp: str
38
+
39
+ # Message request model
40
+ class MessageRequest(BaseModel):
41
+ message: str
42
+ user_id: Optional[str] = None
43
+
44
+ # Document URL model
45
+ class DocumentURLs(BaseModel):
46
+ urls: List[str]
47
+
48
+ app.get("/")
49
+ def read_root():
50
+ return {"Hello": "World"}
51
+
52
+ @app.get("/status", response_model=StatusResponse)
53
+ async def get_status():
54
+ """Get the current status of the API."""
55
+ return StatusResponse(
56
+ status="online",
57
+ version=app.version,
58
+ timestamp=datetime.now().isoformat()
59
+ )
60
+
61
+ @app.post("/process_message")
62
+ async def process_message(request: MessageRequest):
63
+ """Process a message from a player and generate a structured JSON response."""
64
+ user_id = request.user_id or str(uuid4())
65
+ response = rag_system.generate_response(user_id, request.message)
66
+
67
+ # Just add user_id to response without other modifications
68
+ if isinstance(response, dict):
69
+ response["user_id"] = user_id
70
+
71
+ return response
72
+
73
+ @app.get("/mob_mappings")
74
+ async def get_mob_mappings():
75
+ """Get the mapping of mob names to entity IDs."""
76
+ return MOB_MAPPINGS
77
+
78
+ @app.get("/command_specs")
79
+ async def get_command_specs():
80
+ """Get the command specifications."""
81
+ return CONFIG['command_specs']
82
+
83
+ if __name__ == "__main__":
84
+ import uvicorn
85
+ uvicorn.run("main:app", host="0.0.0.0", port=8000, reload=True)
rag.py ADDED
@@ -0,0 +1,443 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_openai import OpenAIEmbeddings
2
+ from langchain_openai import AzureOpenAIEmbeddings
3
+ from langchain_community.vectorstores import FAISS
4
+ from langchain_community.document_loaders import WebBaseLoader
5
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
6
+ from langchain_openai import ChatOpenAI
7
+ from langchain_core.messages import SystemMessage, HumanMessage, AIMessage
8
+ from langchain_community.callbacks import get_openai_callback
9
+ from typing import List, Dict, Any
10
+ import json
11
+ import os
12
+ import re
13
+ import shutil
14
+
15
+ # Load configuration from JSON file
16
+ def load_config():
17
+ config_path = os.path.join(os.path.dirname(__file__), 'config.json')
18
+ with open(config_path, 'r', encoding='utf-8') as f:
19
+ return json.load(f)
20
+
21
+ # Load configuration
22
+ CONFIG = load_config()
23
+ CUBIX_DOCS = CONFIG['cubix_docs']
24
+ MOB_MAPPINGS = CONFIG['mob_mappings']
25
+ COMMAND_SPECS = CONFIG['command_specs']
26
+ SYSTEM_PROMPT_TEMPLATE = CONFIG['system_prompt_template']
27
+
28
+ class RAGSystem:
29
+ def __init__(self, openai_api_key: str):
30
+ self.openai_api_key = openai_api_key
31
+ self.embeddings = OpenAIEmbeddings(
32
+ openai_api_key=openai_api_key,
33
+ model="text-embedding-3-large",
34
+ dimensions=1536, # Explicitly setting dimensions for consistency
35
+ show_progress_bar=True
36
+ )
37
+ self.document_store = None
38
+ self.user_conversations = {}
39
+ self.model = ChatOpenAI(
40
+ openai_api_key=openai_api_key,
41
+ model_name="gpt-4o-mini",
42
+ temperature=0.1
43
+ )
44
+ self.documents_loaded = False # Add this line to track document loading
45
+ self.initialize_knowledge_base()
46
+
47
+ def initialize_knowledge_base(self):
48
+ """Initialize or load the knowledge base if it exists."""
49
+ if self.documents_loaded: # Check if documents are already loaded
50
+ print("Documents have already been loaded.")
51
+ return
52
+
53
+ try:
54
+ # First check if index exists
55
+ if not os.path.exists("faiss_index"):
56
+ print("No existing knowledge base found. Creating new one...")
57
+ self.create_new_knowledge_base()
58
+ self.documents_loaded = True # Set flag to True after loading
59
+ return
60
+
61
+ try:
62
+ # Try to load the existing index
63
+ self.document_store = FAISS.load_local(
64
+ "faiss_index",
65
+ self.embeddings,
66
+ allow_dangerous_deserialization=True
67
+ )
68
+
69
+ # Verify by running a test query
70
+ self.document_store.similarity_search_with_score("test query", k=1)
71
+ print("Loaded existing knowledge base.")
72
+ except AssertionError:
73
+ # Dimension mismatch detected
74
+ print("Embedding dimensions mismatch detected. Rebuilding knowledge base...")
75
+ self.create_new_knowledge_base(backup_old=True)
76
+ except Exception as e:
77
+ # Other loading errors
78
+ print(f"Error loading knowledge base: {e}")
79
+ self.create_new_knowledge_base(backup_old=True)
80
+
81
+ self.documents_loaded = True # Set flag to True after loading
82
+
83
+ except Exception as e:
84
+ print(f"Error during knowledge base initialization: {e}")
85
+ self.create_new_knowledge_base()
86
+
87
+ def create_new_knowledge_base(self, backup_old=False):
88
+ """Create a new knowledge base from scratch"""
89
+ # Backup old index if needed
90
+ if backup_old and os.path.exists("faiss_index"):
91
+ backup_dir = f"faiss_index"
92
+ shutil.move("faiss_index", backup_dir)
93
+ print(f"Old index backed up")
94
+
95
+ # Create directory if needed
96
+ os.makedirs("faiss_index", exist_ok=True)
97
+
98
+ # Load documents from web
99
+ docs = []
100
+ for url in CUBIX_DOCS:
101
+ try:
102
+ loader = WebBaseLoader(url)
103
+ docs.extend(loader.load())
104
+ print(f"Loaded document from {url}")
105
+ except Exception as e:
106
+ print(f"Error loading document from {url}: {e}")
107
+
108
+ if not docs:
109
+ print("No documents could be loaded.")
110
+ # Initialize empty vector store
111
+ self.document_store = FAISS.from_texts(["placeholder"], self.embeddings)
112
+ return
113
+
114
+ # Split documents
115
+ text_splitter = RecursiveCharacterTextSplitter(
116
+ chunk_size=500, # Increased from 150 for better context
117
+ chunk_overlap=50, # Added overlap to prevent breaking important context
118
+ separators=["\n\n", "\n", " ", ""], # More intelligent splitting
119
+ length_function=len
120
+ )
121
+ split_docs = text_splitter.split_documents(docs)
122
+
123
+ # Create new vector store from scratch
124
+ self.document_store = FAISS.from_documents(split_docs, self.embeddings)
125
+
126
+ try:
127
+ # Save the new index
128
+ self.document_store.save_local("faiss_index")
129
+ print("Successfully saved new knowledge base to faiss_index directory")
130
+ except Exception as e:
131
+ print(f"Error saving knowledge base: {e}")
132
+
133
+ def add_web_documents(self, urls: List[str]):
134
+ """Add web documents to the knowledge base"""
135
+ if not urls:
136
+ return False
137
+
138
+ # Load documents from web
139
+ docs = []
140
+ for url in urls:
141
+ try:
142
+ print(f"Loading document from {url}")
143
+ loader = WebBaseLoader(url)
144
+ web_docs = loader.load()
145
+
146
+ # Clean and preprocess documents
147
+ for doc in web_docs:
148
+ # Clean HTML artifacts and normalize whitespace
149
+ content = doc.page_content
150
+ content = re.sub(r'\s+', ' ', content) # Normalize whitespace
151
+ content = re.sub(r'<[^>]+>', ' ', content) # Remove HTML tags
152
+ content = re.sub(r'\[.*?\]', '', content) # Remove square brackets content
153
+
154
+ # Update document content
155
+ doc.page_content = content.strip()
156
+
157
+ # Add source URL to metadata if not present
158
+ if 'source' not in doc.metadata:
159
+ doc.metadata['source'] = url
160
+
161
+ docs.extend(web_docs)
162
+ print(f"Successfully loaded document from {url}")
163
+ except Exception as e:
164
+ print(f"Error loading document from {url}: {e}")
165
+
166
+ if not docs:
167
+ print("No documents could be loaded.")
168
+ return False
169
+
170
+ # Split documents
171
+ text_splitter = RecursiveCharacterTextSplitter(
172
+ chunk_size=500, # Increased from 150 for better context
173
+ chunk_overlap=50, # Added overlap to prevent breaking important context
174
+ separators=["\n\n", "\n", " ", ""], # More intelligent splitting
175
+ length_function=len
176
+ )
177
+ split_docs = text_splitter.split_documents(docs)
178
+
179
+ # Create or update vector store
180
+ if self.document_store is None:
181
+ self.document_store = FAISS.from_documents(split_docs, self.embeddings)
182
+ else:
183
+ # Add documents to existing store
184
+ self.document_store.add_documents(split_docs)
185
+
186
+ try:
187
+ # Save the updated index
188
+ self.document_store.save_local("faiss_index")
189
+ print(f"Added {len(split_docs)} document chunks to the knowledge base")
190
+ return True
191
+ except Exception as e:
192
+ print(f"Error saving index: {e}")
193
+ return False
194
+
195
+ def generate_response(self, user_id: str, message: str) -> Dict[str, Any]:
196
+ """Generate a response for a user message."""
197
+ if user_id not in self.user_conversations:
198
+ self.user_conversations[user_id] = []
199
+
200
+ # Clean and normalize the input message
201
+ message = message.strip()
202
+
203
+ # Add the new message using proper LangChain message object
204
+ self.user_conversations[user_id].append(HumanMessage(content=message))
205
+
206
+ # Extract relevant context with improved search parameters
207
+ relevant_context = ""
208
+ context_docs = []
209
+ if self.document_store:
210
+ try:
211
+ # Enhanced retrieval strategy - hybrid search approach
212
+ # First try with higher relevance score threshold for more accurate results
213
+ results = self.document_store.similarity_search_with_score(
214
+ message,
215
+ k=4, # Increased from 3 for better coverage
216
+ score_threshold=0.75 # Slightly relaxed from 0.8 for better recall
217
+ )
218
+
219
+ if results:
220
+ # Sort by score and take top results
221
+ results.sort(key=lambda x: x[1], reverse=True)
222
+
223
+ # Format with scores and metadata for better context
224
+ formatted_results = []
225
+ for doc, score in results:
226
+ if score > 0.75: # Only include relevance context
227
+ # Extract source for better attribution
228
+ source = doc.metadata.get('source', 'Unknown source')
229
+ if isinstance(source, str) and source.startswith('http'):
230
+ source = source.split('/')[-1] if '/' in source else source
231
+
232
+ # Apply intelligent content trimming - keep key information while reducing token usage
233
+ content = doc.page_content
234
+
235
+ # Advanced cleaning: remove redundant whitespace and normalize
236
+ content = re.sub(r'\s+', ' ', content).strip()
237
+
238
+ # Limit content length based on relevance score - higher relevance gets more tokens
239
+ max_length = int(min(800 + (score * 400), 1200)) # Dynamic length based on relevance
240
+ if len(content) > max_length:
241
+ content = content[:max_length] + "..."
242
+
243
+ context = {
244
+ 'content': content,
245
+ 'score': score,
246
+ 'source': source,
247
+ 'metadata': doc.metadata if hasattr(doc, 'metadata') else {}
248
+ }
249
+ context_docs.append(context)
250
+ formatted_results.append(f"SOURCE: {source}\nRELEVANCE: {score:.2f}\n{content}")
251
+
252
+ if formatted_results:
253
+ relevant_context = "\n\n---\n\n".join(formatted_results)
254
+
255
+ # If no high-relevance results found, try broader search
256
+ if not relevant_context:
257
+ results = self.document_store.similarity_search(
258
+ message,
259
+ k=3 # Increased from 2 for better coverage while keeping focus
260
+ )
261
+ if results:
262
+ formatted_results = []
263
+ for doc in results:
264
+ # Extract source for better attribution
265
+ source = doc.metadata.get('source', 'Unknown source')
266
+ if isinstance(source, str) and source.startswith('http'):
267
+ source = source.split('/')[-1] if '/' in source else source
268
+
269
+ # Truncate content if too long (token optimization)
270
+ content = doc.page_content
271
+ if len(content) > 800: # Shorter for fallback results
272
+ content = content[:800] + "..."
273
+
274
+ formatted_results.append(f"SOURCE: {source}\n{content}")
275
+
276
+ relevant_context = "\n\n---\n\n".join(formatted_results)
277
+ except Exception as e:
278
+ print(f"Error during context retrieval: {e}")
279
+
280
+ # Build command specifications for the system prompt
281
+ command_specs_text = "# Available Commands and Required Fields\n\n"
282
+
283
+ for cmd_name, cmd_spec in COMMAND_SPECS.items():
284
+ command_specs_text += f"## {cmd_name}\n"
285
+ command_specs_text += f"Description: {cmd_spec['description']}\n"
286
+ command_specs_text += "Required fields:\n"
287
+
288
+ for field in cmd_spec['required_fields']:
289
+ command_specs_text += f"- {field}\n"
290
+
291
+ command_specs_text += "Examples:\n"
292
+ for example in cmd_spec['examples']:
293
+ command_specs_text += f"Query: \"{example['input']}\"\n"
294
+ command_specs_text += f"Response: {json.dumps(example['output'], ensure_ascii=False)}\n\n"
295
+
296
+ # Add mob mappings information
297
+ mob_mappings_text = "# Entity IDs for Minecraft Mobs\n\nWhen processing kill_mob commands, use these entity IDs in the 'mob' field:\n\n"
298
+ for mob_name, entity_id in MOB_MAPPINGS.items():
299
+ mob_mappings_text += f"- {mob_name}: {entity_id}\n"
300
+ mob_mappings_text += "\nWhen a player mentions a mob, always map it to the corresponding entity ID in your response.\n\n"
301
+
302
+ # Build system prompt from template
303
+ system_prompt = SYSTEM_PROMPT_TEMPLATE['intro'] + "\n\n"
304
+
305
+ # Add role section
306
+ system_prompt += "## Your Role\n"
307
+ for item in SYSTEM_PROMPT_TEMPLATE['role_section']:
308
+ system_prompt += f"- {item}\n"
309
+
310
+ # Add command specs
311
+ system_prompt += f"\n{command_specs_text}\n"
312
+
313
+ # Add mob mappings
314
+ system_prompt += f"\n{mob_mappings_text}\n"
315
+
316
+ # Add command detection guidelines
317
+ system_prompt += "\n## Command Detection Guidelines\n"
318
+ for item in SYSTEM_PROMPT_TEMPLATE['command_detection_guidelines']:
319
+ system_prompt += f"- {item}\n"
320
+
321
+ # Add contextual information processing
322
+ system_prompt += "\n## Contextual Information Processing\n"
323
+ for item in SYSTEM_PROMPT_TEMPLATE['contextual_information_processing']:
324
+ system_prompt += f"- {item}\n"
325
+
326
+ # Add response format
327
+ system_prompt += "\n## Response Format\n"
328
+ for item in SYSTEM_PROMPT_TEMPLATE['response_format']:
329
+ system_prompt += f"- {item}\n"
330
+
331
+ # Add conversation handling instructions
332
+ if 'conversation_handling' in SYSTEM_PROMPT_TEMPLATE:
333
+ system_prompt += "\n## Conversation Handling\n"
334
+ for item in SYSTEM_PROMPT_TEMPLATE['conversation_handling']:
335
+ system_prompt += f"- {item}\n"
336
+
337
+ # Add improved RAG processing instructions
338
+ if 'rag_processing_instructions' in SYSTEM_PROMPT_TEMPLATE:
339
+ system_prompt += "\n## RAG Processing Instructions\n"
340
+ for item in SYSTEM_PROMPT_TEMPLATE['rag_processing_instructions']:
341
+ system_prompt += f"- {item}\n"
342
+
343
+ # Add ambiguity handling instructions if they exist
344
+ if 'ambiguity_handling' in SYSTEM_PROMPT_TEMPLATE:
345
+ system_prompt += "\n## Ambiguity Handling\n"
346
+ for item in SYSTEM_PROMPT_TEMPLATE['ambiguity_handling']:
347
+ system_prompt += f"- {item}\n"
348
+
349
+ # Add token optimization instructions if they exist
350
+ if 'token_optimization' in SYSTEM_PROMPT_TEMPLATE:
351
+ system_prompt += "\n## Token Optimization\n"
352
+ for item in SYSTEM_PROMPT_TEMPLATE['token_optimization']:
353
+ system_prompt += f"- {item}\n"
354
+
355
+ if relevant_context:
356
+ system_prompt += f"""
357
+
358
+ ## Relevant Information from CubixWorld Documentation
359
+ Use this information to guide your response. Pay special attention to specific game mechanics and features:
360
+
361
+ {relevant_context}
362
+ """
363
+
364
+ # Create messages list with proper LangChain message objects
365
+ messages = [SystemMessage(content=system_prompt)]
366
+
367
+ # Get the last 5 conversation messages (reduced from 10 for more focused context)
368
+ history = self.user_conversations[user_id][-5:] if len(self.user_conversations[user_id]) > 0 else []
369
+
370
+ # Add conversation history to the messages
371
+ messages.extend(history)
372
+
373
+ # Add context summary to help model
374
+ if context_docs:
375
+ # Enhanced context summary with confidence signals
376
+ context_summary = "\nContext relevance summary (sorted by relevance):\n"
377
+
378
+ # Sort context by relevance score
379
+ sorted_contexts = sorted(context_docs, key=lambda x: x['score'], reverse=True)
380
+
381
+ for i, ctx in enumerate(sorted_contexts):
382
+ # Add confidence classification
383
+ confidence = "High" if ctx['score'] > 0.9 else "Medium" if ctx['score'] > 0.8 else "Moderate"
384
+ # Add a brief preview of content with source
385
+ source = ctx['source'].split('/')[-1] if '/' in ctx['source'] else ctx['source']
386
+ preview = ctx['content'][:80].replace('\n', ' ') + "..."
387
+ context_summary += f"{i+1}. [{confidence} confidence, score {ctx['score']:.2f}] From {source}: {preview}\n"
388
+
389
+ messages.append(SystemMessage(content=context_summary))
390
+
391
+ # Initialize token usage variables
392
+ total_tokens = 0
393
+ prompt_tokens = 0
394
+ completion_tokens = 0
395
+ total_cost = 0.0
396
+
397
+ # Get response from the model with token tracking
398
+ with get_openai_callback() as cb:
399
+ response = self.model.predict_messages(messages)
400
+ response_content = response.content
401
+
402
+ # Store token usage metrics
403
+ total_tokens = cb.total_tokens
404
+ prompt_tokens = cb.prompt_tokens
405
+ completion_tokens = cb.completion_tokens
406
+ total_cost = cb.total_cost
407
+
408
+ try:
409
+ # Simple JSON extraction
410
+ if "```json" in response_content:
411
+ json_start = response_content.find("```json") + 7
412
+ json_end = response_content.find("```", json_start)
413
+ response_content = response_content[json_start:json_end].strip()
414
+ elif "```" in response_content:
415
+ json_start = response_content.find("```") + 3
416
+ json_end = response_content.find("```", json_start)
417
+ response_content = response_content[json_start:json_end].strip()
418
+
419
+ json_response = {'response': json.loads(response_content)}
420
+
421
+ # Add token usage information to the response
422
+ json_response["token_usage"] = {
423
+ "total_tokens": total_tokens,
424
+ "prompt_tokens": prompt_tokens,
425
+ "completion_tokens": completion_tokens,
426
+ "total_cost_usd": total_cost
427
+ }
428
+
429
+ # Store the bot's response in conversation history as proper AIMessage
430
+ self.user_conversations[user_id].append(AIMessage(content=json.dumps(json_response)))
431
+
432
+ return json_response
433
+
434
+ except json.JSONDecodeError:
435
+ # Simple fallback if JSON parsing fails
436
+ fallback_response = {
437
+ 'type': 'message',
438
+ 'message': response_content
439
+ }
440
+
441
+ self.user_conversations[user_id].append(AIMessage(content=json.dumps(fallback_response)))
442
+
443
+ return fallback_response
requirements.txt ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ pydantic
4
+ python-dotenv
5
+ langchain
6
+ langchain-community
7
+ langchain-core
8
+ langchain-openai
9
+ faiss-cpu
10
+ openai
11
+ beautifulsoup4
12
+ requests
13
+ aiohttp
14
+ gradio