Spaces:
Runtime error
Runtime error
Upload 3 files
Browse files- config.json +96 -87
- rag.py +86 -71
config.json
CHANGED
@@ -32,11 +32,11 @@
|
|
32 |
},
|
33 |
{
|
34 |
"input": "Следуй за мной и убивай встречных зомби",
|
35 |
-
"output": {"type": "follow_me", "message": "Хорошо, я буду следовать за вами и убивать встречных зомби", "repeat": true
|
36 |
},
|
37 |
{
|
38 |
"input": "Иди за мной и добывай камень",
|
39 |
-
"output": {"type": "follow_me", "message": "Хорошо, я буду следовать за вами и добывать камень", "repeat": true
|
40 |
}
|
41 |
]
|
42 |
},
|
@@ -117,28 +117,28 @@
|
|
117 |
]
|
118 |
},
|
119 |
"break_block": {
|
120 |
-
"description": "
|
121 |
"required_fields": ["type", "message", "repeat", "blocks", "quantity"],
|
122 |
"examples": [
|
123 |
{
|
124 |
"input": "Сломай землю",
|
125 |
-
"output": {"type": "break_block", "message": "Хорошо, я сломаю землю", "repeat": false, "blocks": ["minecraft:dirt"], "quantity": 1
|
126 |
},
|
127 |
{
|
128 |
"input": "Сломай 20 земли",
|
129 |
-
"output": {"type": "break_block", "message": "Хорошо, я сломаю 20 блоков земли", "repeat": false, "quantity": 20, "blocks": ["minecraft:dirt"]
|
130 |
},
|
131 |
{
|
132 |
"input": "Копай камень",
|
133 |
-
"output": {"type": "break_block", "message": "Хорошо, я буду копать камень", "repeat": true, "blocks": ["minecraft:stone"], "quantity": 0
|
134 |
},
|
135 |
{
|
136 |
"input": "Копай вниз",
|
137 |
-
"output": {"type": "break_block", "message": "Хорошо, я буду копать вниз", "repeat": true, "blocks": [], "quantity": 0
|
138 |
},
|
139 |
{
|
140 |
"input": "Добудь руду на севере",
|
141 |
-
"output": {"type": "break_block", "message": "Хорошо, я добуду руду на севере", "repeat": false, "blocks": [], "quantity": 0
|
142 |
}
|
143 |
]
|
144 |
},
|
@@ -157,60 +157,60 @@
|
|
157 |
]
|
158 |
},
|
159 |
"fishing": {
|
160 |
-
"description": "
|
161 |
-
"required_fields": ["type", "
|
162 |
"examples": [
|
163 |
{
|
164 |
"input": "Слови рыбу",
|
165 |
-
"output": {"type": "fishing", "
|
166 |
},
|
167 |
{
|
168 |
"input": "Лови рыбу",
|
169 |
-
"output": {"type": "fishing", "
|
170 |
},
|
171 |
{
|
172 |
"input": "Слови 10 рыб",
|
173 |
-
"output": {"type": "fishing", "
|
174 |
}
|
175 |
]
|
176 |
},
|
177 |
"shear_wool": {
|
178 |
-
"description": "
|
179 |
-
"required_fields": ["type", "
|
180 |
"examples": [
|
181 |
{
|
182 |
"input": "Постриги овец",
|
183 |
-
"output": {"type": "shear_wool", "
|
184 |
},
|
185 |
{
|
186 |
"input": "Стриги овец",
|
187 |
-
"output": {"type": "shear_wool", "
|
188 |
},
|
189 |
{
|
190 |
"input": "Постриги 10 овец",
|
191 |
-
"output": {"type": "shear_wool", "
|
192 |
}
|
193 |
]
|
194 |
},
|
195 |
"lighting": {
|
196 |
"description": "Команда для освещения или размещения источников света",
|
197 |
-
"required_fields": ["type", "message"
|
198 |
"examples": [
|
199 |
{
|
200 |
"input": "Подсвети мне",
|
201 |
-
"output": {"type": "lighting", "message": "Хорошо, я буду освещать путь"
|
202 |
},
|
203 |
{
|
204 |
"input": "Освещай",
|
205 |
-
"output": {"type": "lighting", "message": "Включаю освещение"
|
206 |
},
|
207 |
{
|
208 |
"input": "Поставь 10 факелов",
|
209 |
-
"output": {"type": "lighting", "message": "Хорошо, я поставлю 10 факелов"
|
210 |
},
|
211 |
{
|
212 |
"input": "Расставь факелы вокруг дома",
|
213 |
-
"output": {"type": "lighting", "message": "Хорошо, я расставлю факелы вокруг дома"
|
214 |
}
|
215 |
]
|
216 |
},
|
@@ -234,74 +234,83 @@
|
|
234 |
}
|
235 |
},
|
236 |
"system_prompt_template": {
|
237 |
-
"intro": "
|
238 |
-
|
239 |
-
|
240 |
-
"
|
241 |
-
"
|
242 |
-
"
|
243 |
-
"
|
244 |
-
"
|
245 |
-
|
246 |
-
|
247 |
-
"
|
248 |
-
"
|
249 |
-
"
|
250 |
-
"
|
251 |
-
"
|
252 |
-
"
|
253 |
-
"
|
254 |
-
|
255 |
-
|
256 |
-
"
|
257 |
-
"
|
258 |
-
"
|
259 |
-
"When documentation provides specific command formats, follow them exactly",
|
260 |
-
"Consider server limitations and restrictions when generating commands"
|
261 |
-
],
|
262 |
-
"response_format": [
|
263 |
-
"Ensure all JSON responses are properly nested and formatted",
|
264 |
-
"Include all required fields as specified in command schemas",
|
265 |
-
"Validate numeric values are within acceptable ranges",
|
266 |
-
"Use consistent casing and formatting for entity IDs and commands",
|
267 |
-
"Include relevant metadata and context in responses when available"
|
268 |
],
|
269 |
-
|
270 |
-
|
271 |
-
"
|
272 |
-
"
|
273 |
-
"
|
274 |
-
"
|
275 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
276 |
],
|
277 |
-
|
278 |
-
|
279 |
-
"
|
280 |
-
"
|
281 |
-
"
|
282 |
-
"
|
283 |
-
"
|
284 |
-
"
|
285 |
-
"
|
286 |
-
"
|
287 |
-
"
|
|
|
|
|
288 |
],
|
289 |
-
|
290 |
-
|
291 |
-
"
|
292 |
-
"
|
293 |
-
"
|
294 |
-
"
|
295 |
-
"
|
296 |
-
"
|
297 |
],
|
298 |
-
|
299 |
-
|
300 |
-
"
|
301 |
-
"
|
302 |
-
"
|
303 |
-
"
|
304 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
305 |
]
|
306 |
}
|
307 |
}
|
|
|
32 |
},
|
33 |
{
|
34 |
"input": "Следуй за мной и убивай встречных зомби",
|
35 |
+
"output": {"type": "follow_me", "message": "Хорошо, я буду следовать за вами и убивать встречных зомби", "repeat": true}
|
36 |
},
|
37 |
{
|
38 |
"input": "Иди за мной и добывай камень",
|
39 |
+
"output": {"type": "follow_me", "message": "Хорошо, я буду следовать за вами и добывать камень", "repeat": true}
|
40 |
}
|
41 |
]
|
42 |
},
|
|
|
117 |
]
|
118 |
},
|
119 |
"break_block": {
|
120 |
+
"description": "Действие для ломания блока",
|
121 |
"required_fields": ["type", "message", "repeat", "blocks", "quantity"],
|
122 |
"examples": [
|
123 |
{
|
124 |
"input": "Сломай землю",
|
125 |
+
"output": {"type": "break_block", "message": "Хорошо, я сломаю землю", "repeat": false, "blocks": ["minecraft:dirt"], "quantity": 1}
|
126 |
},
|
127 |
{
|
128 |
"input": "Сломай 20 земли",
|
129 |
+
"output": {"type": "break_block", "message": "Хорошо, я сломаю 20 блоков земли", "repeat": false, "quantity": 20, "blocks": ["minecraft:dirt"]}
|
130 |
},
|
131 |
{
|
132 |
"input": "Копай камень",
|
133 |
+
"output": {"type": "break_block", "message": "Хорошо, я буду копать камень", "repeat": true, "blocks": ["minecraft:stone"], "quantity": 0}
|
134 |
},
|
135 |
{
|
136 |
"input": "Копай вниз",
|
137 |
+
"output": {"type": "break_block", "message": "Хорошо, я буду копать вниз", "repeat": true, "blocks": ["minecraft:stone"], "quantity": 0}
|
138 |
},
|
139 |
{
|
140 |
"input": "Добудь руду на севере",
|
141 |
+
"output": {"type": "break_block", "message": "Хорошо, я добуду руду на севере", "repeat": false, "blocks": ["minecraft:iron_ore"], "quantity": 0}
|
142 |
}
|
143 |
]
|
144 |
},
|
|
|
157 |
]
|
158 |
},
|
159 |
"fishing": {
|
160 |
+
"description": "Ловить рыбу.",
|
161 |
+
"required_fields": ["type", "message", "repeat", "quantity"],
|
162 |
"examples": [
|
163 |
{
|
164 |
"input": "Слови рыбу",
|
165 |
+
"output": {"type": "fishing", "message": "Хорошо, я поймаю рыбу", "repeat": false, "quantity": 1}
|
166 |
},
|
167 |
{
|
168 |
"input": "Лови рыбу",
|
169 |
+
"output": {"type": "fishing", "message": "Хорошо, я буду ловить рыбу", "repeat": true, "quantity": 0}
|
170 |
},
|
171 |
{
|
172 |
"input": "Слови 10 рыб",
|
173 |
+
"output": {"type": "fishing", "message": "Хорошо, я поймаю 10 рыб", "repeat": false, "quantity": 10}
|
174 |
}
|
175 |
]
|
176 |
},
|
177 |
"shear_wool": {
|
178 |
+
"description": "Стричь овец",
|
179 |
+
"required_fields": ["type", "message", "repeat", "quantity"],
|
180 |
"examples": [
|
181 |
{
|
182 |
"input": "Постриги овец",
|
183 |
+
"output": {"type": "shear_wool", "message": "Хорошо, я постригу овец", "repeat": false, "quantity": 0}
|
184 |
},
|
185 |
{
|
186 |
"input": "Стриги овец",
|
187 |
+
"output": {"type": "shear_wool", "message": "Хорошо, я буду стричь овец", "repeat": true, "quantity": 0}
|
188 |
},
|
189 |
{
|
190 |
"input": "Постриги 10 овец",
|
191 |
+
"output": {"type": "shear_wool", "message": "Хорошо, я постригу 10 овец", "repeat": false, "quantity": 10}
|
192 |
}
|
193 |
]
|
194 |
},
|
195 |
"lighting": {
|
196 |
"description": "Команда для освещения или размещения источников света",
|
197 |
+
"required_fields": ["type", "message"],
|
198 |
"examples": [
|
199 |
{
|
200 |
"input": "Подсвети мне",
|
201 |
+
"output": {"type": "lighting", "message": "Хорошо, я буду освещать путь"}
|
202 |
},
|
203 |
{
|
204 |
"input": "Освещай",
|
205 |
+
"output": {"type": "lighting", "message": "Включаю освещение"}
|
206 |
},
|
207 |
{
|
208 |
"input": "Поставь 10 факелов",
|
209 |
+
"output": {"type": "lighting", "message": "Хорошо, я поставлю 10 факелов"}
|
210 |
},
|
211 |
{
|
212 |
"input": "Расставь факелы вокруг дома",
|
213 |
+
"output": {"type": "lighting", "message": "Хорошо, я расставлю факелы вокруг дома"}
|
214 |
}
|
215 |
]
|
216 |
},
|
|
|
234 |
}
|
235 |
},
|
236 |
"system_prompt_template": {
|
237 |
+
"intro": "Ты CubixAssistant для сервера Minecraft CubixWorld. Анализируй запросы игроков и конвертируй их в JSON-команды. ОБРАБАТЫВАЙ ТОЛЬКО ОДНУ КОМАНДУ за раз. При нескольких командах отвечай: {\"type\": \"message\", \"message\": \"Я не могу выполнять несколько задач сразу\"}",
|
238 |
+
|
239 |
+
"command_rules": [
|
240 |
+
"[ФОРМАТЫ КОМАНД]",
|
241 |
+
"• СТРОГО придерживайся схемы для каждой команды - не добавляй лишних полей",
|
242 |
+
"• Все команды имеют обязательные поля: type, message",
|
243 |
+
"• Определяй repeat: false для одноразовых команд (\"сделай\", \"принеси\") и true для повторяющихся (\"делай\", \"-ай\" окончания)",
|
244 |
+
"• При запросах с нереально большим количеством (>1000) используй type:message",
|
245 |
+
"• Проверяй каждый ответ на соответствие схеме команды",
|
246 |
+
"",
|
247 |
+
"[СХЕМЫ КОМАНД]",
|
248 |
+
"• follow_me: {type, message, repeat}",
|
249 |
+
"• message: {type, message}",
|
250 |
+
"• kill_mob: {type, message, repeat, mob}",
|
251 |
+
"• break_tree: {type, message, repeat, quantityTrees, quantity, blocks}",
|
252 |
+
"• harvest_crops: {type, message, repeat, harvest}",
|
253 |
+
"• break_block: {type, message, repeat, blocks, quantity} - НЕ ДОБАВЛЯТЬ location",
|
254 |
+
"• cycle_break_block: {type, message}",
|
255 |
+
"• fishing: {type, message, repeat, quantity}",
|
256 |
+
"• shear_wool: {type, message, repeat, quantity}",
|
257 |
+
"• lighting: {type, message} - НЕ ДОБАВЛЯТЬ quantity",
|
258 |
+
"• stop: {type, message}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
259 |
],
|
260 |
+
|
261 |
+
"validation_checklist": [
|
262 |
+
"[КРИТИЧЕСКИЕ ПРОВЕРКИ]",
|
263 |
+
"1. ТОЛЬКО ОДНА команда в ответе, никаких secondary_actions",
|
264 |
+
"2. ВСЕ обязательные поля присутствуют",
|
265 |
+
"3. Массив blocks НИКОГДА не пустой, для break_block минимум [\"minecraft:stone\"]",
|
266 |
+
"4. Правильные ID мобов из справочника (EntityZombie вместо \"зомби\")",
|
267 |
+
"5. При числах >100000 используй {\"type\": \"message\", \"message\": \"не могу ... такое количество\"}",
|
268 |
+
"6. Запросы \"наруби досок\" = break_tree, а не break_block",
|
269 |
+
"7. НИКОГДА не добавляй поля, которых нет в схеме команды",
|
270 |
+
"8. Для команды lighting НЕ добавляй поле quantity, даже если указано число факелов",
|
271 |
+
"9. Используй ТОЛЬКО существующие блоки Minecraft:",
|
272 |
+
" • Дерево: minecraft:oak_log, minecraft:spruce_log, minecraft:birch_log, minecraft:acacia_log, minecraft:dark_oak_log",
|
273 |
+
" • Доски: minecraft:oak_planks, minecraft:spruce_planks, minecraft:birch_planks, minecraft:acacia_planks",
|
274 |
+
" • Камень: minecraft:stone, minecraft:cobblestone",
|
275 |
+
" • При неуверенности используй стандартные (oak_log, stone)"
|
276 |
],
|
277 |
+
|
278 |
+
"examples": [
|
279 |
+
"[ПРИМЕРЫ СТРОГО ПО СХЕМЕ]",
|
280 |
+
"Убей зомби → {\"type\": \"kill_mob\", \"message\": \"Хорошо, я иду убивать зомби\", \"repeat\": false, \"mob\": \"EntityZombie\"}",
|
281 |
+
"Копай шахту → {\"type\": \"break_block\", \"message\": \"Буду копать шахту\", \"repeat\": true, \"blocks\": [\"minecraft:stone\"], \"quantity\": 0}",
|
282 |
+
"Наруби 10 досок дуба → {\"type\": \"break_tree\", \"message\": \"Добуду 10 досок дуба\", \"repeat\": false, \"quantityTrees\": 0, \"quantity\": 10, \"blocks\": [\"minecraft:oak_planks\"]}",
|
283 |
+
"Наруби 9999999 досок → {\"type\": \"message\", \"message\": \"К сожалению, я не могу нарубить такое большое количество досок\"}",
|
284 |
+
"Наруби досок каменного дерева → {\"type\": \"message\", \"message\": \"Извините, каменное дерево не существует в Minecraft\"}",
|
285 |
+
"Наруби дерева и убей зомби → {\"type\": \"message\", \"message\": \"Я не могу выполнять несколько задач сразу\"}",
|
286 |
+
"Включи 10 света → {\"type\": \"lighting\", \"message\": \"Хорошо, я поставлю 10 факелов\"}",
|
287 |
+
"Лови рыбу → {\"type\": \"fishing\", \"message\": \"Буду ловить рыбу\", \"repeat\": true, \"quantity\": 0}",
|
288 |
+
"Постриги 5 овец → {\"type\": \"shear_wool\", \"message\": \"Постригу 5 овец\", \"repeat\": false, \"quantity\": 5}",
|
289 |
+
"Копай вот здесь → {\"type\": \"cycle_break_block\", \"message\": \"Буду копать на этом месте\"}"
|
290 |
],
|
291 |
+
|
292 |
+
"error_handlers": [
|
293 |
+
"[ОШИБКИ И ОСОБЫЕ СЛУЧАИ]",
|
294 |
+
"• Несуществующие блоки → {\"type\": \"message\", \"message\": \"Извините, [блок] не существует в Minecraft\"}",
|
295 |
+
"• Огромные числа → {\"type\": \"message\", \"message\": \"К сожалению, я не могу [действие] такое количество\"}",
|
296 |
+
"• Несколько команд → {\"type\": \"message\", \"message\": \"Я не могу выполнять несколько задач сразу\"}",
|
297 |
+
"• Прив��тствия, вопросы → {\"type\": \"message\", \"message\": \"[дружелюбный ответ]\"}",
|
298 |
+
"• При неоднозначности предпочитай message вместо неверной команды"
|
299 |
],
|
300 |
+
|
301 |
+
"type_detection": [
|
302 |
+
"[ОПРЕДЕЛЕНИЕ ТИПА КОМАНДЫ]",
|
303 |
+
"• follow_me: \"следуй\", \"приди\", \"иди за\"",
|
304 |
+
"• message: общение, вопросы, неизвестные команды",
|
305 |
+
"• kill_mob: \"убей\", \"атакуй\" (мобов)",
|
306 |
+
"• break_tree: \"руби\", \"наруби\" (дерево/доски)",
|
307 |
+
"• harvest_crops: \"собери\", \"собирай\" (урожай)",
|
308 |
+
"• break_block: \"копай\", \"сломай\" (камень/блоки, но не деревья)",
|
309 |
+
"• cycle_break_block: \"копай здесь\", \"ломай под собой\"",
|
310 |
+
"• fishing: \"лови\", \"налови\" (рыбу)",
|
311 |
+
"• shear_wool: \"стриги\" (овец)",
|
312 |
+
"• lighting: \"освещай\", \"поставь факелы\", \"включи свет\"",
|
313 |
+
"• stop: \"стой\", \"хватит\", \"прекрати\""
|
314 |
]
|
315 |
}
|
316 |
}
|
rag.py
CHANGED
@@ -88,9 +88,9 @@ class RAGSystem:
|
|
88 |
"""Create a new knowledge base from scratch"""
|
89 |
# Backup old index if needed
|
90 |
if backup_old and os.path.exists("faiss_index"):
|
91 |
-
backup_dir = f"
|
92 |
shutil.move("faiss_index", backup_dir)
|
93 |
-
print(f"Old index backed up")
|
94 |
|
95 |
# Create directory if needed
|
96 |
os.makedirs("faiss_index", exist_ok=True)
|
@@ -100,7 +100,24 @@ class RAGSystem:
|
|
100 |
for url in CUBIX_DOCS:
|
101 |
try:
|
102 |
loader = WebBaseLoader(url)
|
103 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
104 |
print(f"Loaded document from {url}")
|
105 |
except Exception as e:
|
106 |
print(f"Error loading document from {url}: {e}")
|
@@ -113,9 +130,9 @@ class RAGSystem:
|
|
113 |
|
114 |
# Split documents
|
115 |
text_splitter = RecursiveCharacterTextSplitter(
|
116 |
-
chunk_size=
|
117 |
-
chunk_overlap=
|
118 |
-
separators=["\n\n", "\n", " ", ""], # More intelligent splitting
|
119 |
length_function=len
|
120 |
)
|
121 |
split_docs = text_splitter.split_documents(docs)
|
@@ -126,7 +143,7 @@ class RAGSystem:
|
|
126 |
try:
|
127 |
# Save the new index
|
128 |
self.document_store.save_local("faiss_index")
|
129 |
-
print("Successfully saved new knowledge base to faiss_index directory")
|
130 |
except Exception as e:
|
131 |
print(f"Error saving knowledge base: {e}")
|
132 |
|
@@ -192,6 +209,18 @@ class RAGSystem:
|
|
192 |
print(f"Error saving index: {e}")
|
193 |
return False
|
194 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
195 |
def generate_response(self, user_id: str, message: str) -> Dict[str, Any]:
|
196 |
"""Generate a response for a user message."""
|
197 |
if user_id not in self.user_conversations:
|
@@ -212,18 +241,18 @@ class RAGSystem:
|
|
212 |
# First try with higher relevance score threshold for more accurate results
|
213 |
results = self.document_store.similarity_search_with_score(
|
214 |
message,
|
215 |
-
k=
|
216 |
-
score_threshold=0.
|
217 |
)
|
218 |
|
219 |
if results:
|
220 |
# Sort by score and take top results
|
221 |
-
results.sort(key=lambda x: x[1]
|
222 |
|
223 |
# Format with scores and metadata for better context
|
224 |
formatted_results = []
|
225 |
for doc, score in results:
|
226 |
-
if score
|
227 |
# Extract source for better attribution
|
228 |
source = doc.metadata.get('source', 'Unknown source')
|
229 |
if isinstance(source, str) and source.startswith('http'):
|
@@ -236,7 +265,7 @@ class RAGSystem:
|
|
236 |
content = re.sub(r'\s+', ' ', content).strip()
|
237 |
|
238 |
# Limit content length based on relevance score - higher relevance gets more tokens
|
239 |
-
max_length = int(min(
|
240 |
if len(content) > max_length:
|
241 |
content = content[:max_length] + "..."
|
242 |
|
@@ -256,7 +285,7 @@ class RAGSystem:
|
|
256 |
if not relevant_context:
|
257 |
results = self.document_store.similarity_search(
|
258 |
message,
|
259 |
-
k=
|
260 |
)
|
261 |
if results:
|
262 |
formatted_results = []
|
@@ -268,14 +297,15 @@ class RAGSystem:
|
|
268 |
|
269 |
# Truncate content if too long (token optimization)
|
270 |
content = doc.page_content
|
271 |
-
if len(content) >
|
272 |
-
content = content[:
|
273 |
|
274 |
formatted_results.append(f"SOURCE: {source}\n{content}")
|
275 |
|
276 |
relevant_context = "\n\n---\n\n".join(formatted_results)
|
277 |
except Exception as e:
|
278 |
print(f"Error during context retrieval: {e}")
|
|
|
279 |
|
280 |
# Build command specifications for the system prompt
|
281 |
command_specs_text = "# Available Commands and Required Fields\n\n"
|
@@ -299,66 +329,40 @@ class RAGSystem:
|
|
299 |
mob_mappings_text += f"- {mob_name}: {entity_id}\n"
|
300 |
mob_mappings_text += "\nWhen a player mentions a mob, always map it to the corresponding entity ID in your response.\n\n"
|
301 |
|
302 |
-
# Build system prompt
|
303 |
-
system_prompt =
|
304 |
|
305 |
-
# Add
|
306 |
-
|
307 |
-
|
308 |
-
system_prompt += f"- {item}\n"
|
309 |
|
310 |
-
# Add
|
311 |
-
|
|
|
|
|
|
|
|
|
|
|
312 |
|
313 |
-
|
314 |
-
|
|
|
|
|
|
|
315 |
|
316 |
-
# Add command
|
317 |
-
system_prompt +=
|
318 |
-
|
319 |
-
system_prompt += f"- {item}\n"
|
320 |
-
|
321 |
-
# Add contextual information processing
|
322 |
-
system_prompt += "\n## Contextual Information Processing\n"
|
323 |
-
for item in SYSTEM_PROMPT_TEMPLATE['contextual_information_processing']:
|
324 |
-
system_prompt += f"- {item}\n"
|
325 |
-
|
326 |
-
# Add response format
|
327 |
-
system_prompt += "\n## Response Format\n"
|
328 |
-
for item in SYSTEM_PROMPT_TEMPLATE['response_format']:
|
329 |
-
system_prompt += f"- {item}\n"
|
330 |
-
|
331 |
-
# Add conversation handling instructions
|
332 |
-
if 'conversation_handling' in SYSTEM_PROMPT_TEMPLATE:
|
333 |
-
system_prompt += "\n## Conversation Handling\n"
|
334 |
-
for item in SYSTEM_PROMPT_TEMPLATE['conversation_handling']:
|
335 |
-
system_prompt += f"- {item}\n"
|
336 |
-
|
337 |
-
# Add improved RAG processing instructions
|
338 |
-
if 'rag_processing_instructions' in SYSTEM_PROMPT_TEMPLATE:
|
339 |
-
system_prompt += "\n## RAG Processing Instructions\n"
|
340 |
-
for item in SYSTEM_PROMPT_TEMPLATE['rag_processing_instructions']:
|
341 |
-
system_prompt += f"- {item}\n"
|
342 |
-
|
343 |
-
# Add ambiguity handling instructions if they exist
|
344 |
-
if 'ambiguity_handling' in SYSTEM_PROMPT_TEMPLATE:
|
345 |
-
system_prompt += "\n## Ambiguity Handling\n"
|
346 |
-
for item in SYSTEM_PROMPT_TEMPLATE['ambiguity_handling']:
|
347 |
-
system_prompt += f"- {item}\n"
|
348 |
-
|
349 |
-
# Add token optimization instructions if they exist
|
350 |
-
if 'token_optimization' in SYSTEM_PROMPT_TEMPLATE:
|
351 |
-
system_prompt += "\n## Token Optimization\n"
|
352 |
-
for item in SYSTEM_PROMPT_TEMPLATE['token_optimization']:
|
353 |
-
system_prompt += f"- {item}\n"
|
354 |
|
|
|
355 |
if relevant_context:
|
356 |
system_prompt += f"""
|
|
|
|
|
357 |
|
358 |
-
|
359 |
-
Use this information to guide your response. Pay special attention to specific game mechanics and features:
|
360 |
|
361 |
-
|
362 |
"""
|
363 |
|
364 |
# Create messages list with proper LangChain message objects
|
@@ -376,18 +380,27 @@ class RAGSystem:
|
|
376 |
context_summary = "\nContext relevance summary (sorted by relevance):\n"
|
377 |
|
378 |
# Sort context by relevance score
|
379 |
-
sorted_contexts = sorted(context_docs, key=lambda x: x['score']
|
380 |
|
381 |
for i, ctx in enumerate(sorted_contexts):
|
382 |
-
# Add confidence classification
|
383 |
-
confidence = "High" if ctx['score']
|
384 |
# Add a brief preview of content with source
|
385 |
source = ctx['source'].split('/')[-1] if '/' in ctx['source'] else ctx['source']
|
386 |
-
preview = ctx['content'][:
|
387 |
context_summary += f"{i+1}. [{confidence} confidence, score {ctx['score']:.2f}] From {source}: {preview}\n"
|
388 |
|
389 |
messages.append(SystemMessage(content=context_summary))
|
390 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
391 |
# Initialize token usage variables
|
392 |
total_tokens = 0
|
393 |
prompt_tokens = 0
|
@@ -416,7 +429,9 @@ class RAGSystem:
|
|
416 |
json_end = response_content.find("```", json_start)
|
417 |
response_content = response_content[json_start:json_end].strip()
|
418 |
|
419 |
-
|
|
|
|
|
420 |
|
421 |
# Add token usage information to the response
|
422 |
json_response["token_usage"] = {
|
|
|
88 |
"""Create a new knowledge base from scratch"""
|
89 |
# Backup old index if needed
|
90 |
if backup_old and os.path.exists("faiss_index"):
|
91 |
+
backup_dir = f"faiss_index_backup"
|
92 |
shutil.move("faiss_index", backup_dir)
|
93 |
+
print(f"Old index backed up to {backup_dir}")
|
94 |
|
95 |
# Create directory if needed
|
96 |
os.makedirs("faiss_index", exist_ok=True)
|
|
|
100 |
for url in CUBIX_DOCS:
|
101 |
try:
|
102 |
loader = WebBaseLoader(url)
|
103 |
+
web_docs = loader.load()
|
104 |
+
|
105 |
+
# Clean and preprocess documents
|
106 |
+
for doc in web_docs:
|
107 |
+
# Clean HTML artifacts and normalize whitespace
|
108 |
+
content = doc.page_content
|
109 |
+
content = re.sub(r'\s+', ' ', content) # Normalize whitespace
|
110 |
+
content = re.sub(r'<[^>]+>', ' ', content) # Remove HTML tags
|
111 |
+
content = re.sub(r'\[.*?\]', '', content) # Remove square brackets content
|
112 |
+
|
113 |
+
# Update document content
|
114 |
+
doc.page_content = content.strip()
|
115 |
+
|
116 |
+
# Add source URL to metadata if not present
|
117 |
+
if 'source' not in doc.metadata:
|
118 |
+
doc.metadata['source'] = url
|
119 |
+
|
120 |
+
docs.extend(web_docs)
|
121 |
print(f"Loaded document from {url}")
|
122 |
except Exception as e:
|
123 |
print(f"Error loading document from {url}: {e}")
|
|
|
130 |
|
131 |
# Split documents
|
132 |
text_splitter = RecursiveCharacterTextSplitter(
|
133 |
+
chunk_size=800, # Increased from 500 for better context
|
134 |
+
chunk_overlap=100, # Increased overlap to prevent breaking important context
|
135 |
+
separators=["\n\n", "\n", ". ", " ", ""], # More intelligent splitting
|
136 |
length_function=len
|
137 |
)
|
138 |
split_docs = text_splitter.split_documents(docs)
|
|
|
143 |
try:
|
144 |
# Save the new index
|
145 |
self.document_store.save_local("faiss_index")
|
146 |
+
print(f"Successfully saved new knowledge base to faiss_index directory with {len(split_docs)} chunks")
|
147 |
except Exception as e:
|
148 |
print(f"Error saving knowledge base: {e}")
|
149 |
|
|
|
209 |
print(f"Error saving index: {e}")
|
210 |
return False
|
211 |
|
212 |
+
def _filter_response_fields(self, response_dict: dict) -> dict:
|
213 |
+
"""Filter out fields that are not in required_fields for the command type."""
|
214 |
+
if not isinstance(response_dict, dict) or 'type' not in response_dict:
|
215 |
+
return response_dict
|
216 |
+
|
217 |
+
command_type = response_dict['type']
|
218 |
+
if command_type not in COMMAND_SPECS:
|
219 |
+
return response_dict
|
220 |
+
|
221 |
+
required_fields = COMMAND_SPECS[command_type]['required_fields']
|
222 |
+
return {k: v for k, v in response_dict.items() if k in required_fields}
|
223 |
+
|
224 |
def generate_response(self, user_id: str, message: str) -> Dict[str, Any]:
|
225 |
"""Generate a response for a user message."""
|
226 |
if user_id not in self.user_conversations:
|
|
|
241 |
# First try with higher relevance score threshold for more accurate results
|
242 |
results = self.document_store.similarity_search_with_score(
|
243 |
message,
|
244 |
+
k=5, # Increased from 4 for better coverage
|
245 |
+
score_threshold=0.6 # Relaxed from 0.75 for better recall with Russian queries
|
246 |
)
|
247 |
|
248 |
if results:
|
249 |
# Sort by score and take top results
|
250 |
+
results.sort(key=lambda x: x[1])
|
251 |
|
252 |
# Format with scores and metadata for better context
|
253 |
formatted_results = []
|
254 |
for doc, score in results:
|
255 |
+
if score < 1.5: # Only include relevance context (lower score is better in FAISS)
|
256 |
# Extract source for better attribution
|
257 |
source = doc.metadata.get('source', 'Unknown source')
|
258 |
if isinstance(source, str) and source.startswith('http'):
|
|
|
265 |
content = re.sub(r'\s+', ' ', content).strip()
|
266 |
|
267 |
# Limit content length based on relevance score - higher relevance gets more tokens
|
268 |
+
max_length = int(min(1000, 1500)) # Increased length for better context
|
269 |
if len(content) > max_length:
|
270 |
content = content[:max_length] + "..."
|
271 |
|
|
|
285 |
if not relevant_context:
|
286 |
results = self.document_store.similarity_search(
|
287 |
message,
|
288 |
+
k=4 # Increased from 3 for better coverage
|
289 |
)
|
290 |
if results:
|
291 |
formatted_results = []
|
|
|
297 |
|
298 |
# Truncate content if too long (token optimization)
|
299 |
content = doc.page_content
|
300 |
+
if len(content) > 1000: # Increased from 800 for better context
|
301 |
+
content = content[:1000] + "..."
|
302 |
|
303 |
formatted_results.append(f"SOURCE: {source}\n{content}")
|
304 |
|
305 |
relevant_context = "\n\n---\n\n".join(formatted_results)
|
306 |
except Exception as e:
|
307 |
print(f"Error during context retrieval: {e}")
|
308 |
+
relevant_context = f"Error retrieving context: {str(e)}"
|
309 |
|
310 |
# Build command specifications for the system prompt
|
311 |
command_specs_text = "# Available Commands and Required Fields\n\n"
|
|
|
329 |
mob_mappings_text += f"- {mob_name}: {entity_id}\n"
|
330 |
mob_mappings_text += "\nWhen a player mentions a mob, always map it to the corresponding entity ID in your response.\n\n"
|
331 |
|
332 |
+
# Build system prompt with new structure
|
333 |
+
system_prompt = ""
|
334 |
|
335 |
+
# Add intro
|
336 |
+
if 'intro' in SYSTEM_PROMPT_TEMPLATE:
|
337 |
+
system_prompt += SYSTEM_PROMPT_TEMPLATE['intro'] + "\n\n"
|
|
|
338 |
|
339 |
+
# Add all sections from the template in order
|
340 |
+
sections = [
|
341 |
+
'command_rules',
|
342 |
+
'validation_checklist',
|
343 |
+
'examples',
|
344 |
+
'error_handlers'
|
345 |
+
]
|
346 |
|
347 |
+
for section in sections:
|
348 |
+
if section in SYSTEM_PROMPT_TEMPLATE:
|
349 |
+
for item in SYSTEM_PROMPT_TEMPLATE[section]:
|
350 |
+
system_prompt += item + "\n"
|
351 |
+
system_prompt += "\n"
|
352 |
|
353 |
+
# Add command specs and mob mappings from our generated text
|
354 |
+
system_prompt += command_specs_text + "\n"
|
355 |
+
system_prompt += mob_mappings_text + "\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
356 |
|
357 |
+
# Add relevant context if available
|
358 |
if relevant_context:
|
359 |
system_prompt += f"""
|
360 |
+
## Relevant Information from CubixWorld Documentation
|
361 |
+
Use this information to guide your response. If the user is asking about game updates, features, or information that appears in this documentation, provide that information in your response:
|
362 |
|
363 |
+
{relevant_context}
|
|
|
364 |
|
365 |
+
IMPORTANT: If the user is asking about information contained in the documentation above, you SHOULD provide that information in your response. Do not say you cannot answer questions about game updates or features if the information is available in the documentation.
|
366 |
"""
|
367 |
|
368 |
# Create messages list with proper LangChain message objects
|
|
|
380 |
context_summary = "\nContext relevance summary (sorted by relevance):\n"
|
381 |
|
382 |
# Sort context by relevance score
|
383 |
+
sorted_contexts = sorted(context_docs, key=lambda x: x['score'])
|
384 |
|
385 |
for i, ctx in enumerate(sorted_contexts):
|
386 |
+
# Add confidence classification (lower score is better in FAISS)
|
387 |
+
confidence = "High" if ctx['score'] < 0.3 else "Medium" if ctx['score'] < 0.6 else "Moderate"
|
388 |
# Add a brief preview of content with source
|
389 |
source = ctx['source'].split('/')[-1] if '/' in ctx['source'] else ctx['source']
|
390 |
+
preview = ctx['content'][:100].replace('\n', ' ') + "..."
|
391 |
context_summary += f"{i+1}. [{confidence} confidence, score {ctx['score']:.2f}] From {source}: {preview}\n"
|
392 |
|
393 |
messages.append(SystemMessage(content=context_summary))
|
394 |
|
395 |
+
# Add special instruction for information questions
|
396 |
+
if any(keyword in message.lower() for keyword in ["что", "какие", "когда", "обновление", "новое", "осеннее", "autumn"]):
|
397 |
+
info_instruction = """
|
398 |
+
IMPORTANT INSTRUCTION: The user is asking about game information or updates. If you have relevant information in the context provided,
|
399 |
+
you MUST share that information in your response using the message command type. Do not refuse to answer questions about game updates
|
400 |
+
or features if the information is available in the context.
|
401 |
+
"""
|
402 |
+
messages.append(SystemMessage(content=info_instruction))
|
403 |
+
|
404 |
# Initialize token usage variables
|
405 |
total_tokens = 0
|
406 |
prompt_tokens = 0
|
|
|
429 |
json_end = response_content.find("```", json_start)
|
430 |
response_content = response_content[json_start:json_end].strip()
|
431 |
|
432 |
+
parsed_response = json.loads(response_content)
|
433 |
+
filtered_response = self._filter_response_fields(parsed_response)
|
434 |
+
json_response = {'response': filtered_response}
|
435 |
|
436 |
# Add token usage information to the response
|
437 |
json_response["token_usage"] = {
|