Illia56 commited on
Commit
125c7fd
·
verified ·
1 Parent(s): b70539a

Upload 3 files

Browse files
Files changed (2) hide show
  1. config.json +96 -87
  2. rag.py +86 -71
config.json CHANGED
@@ -32,11 +32,11 @@
32
  },
33
  {
34
  "input": "Следуй за мной и убивай встречных зомби",
35
- "output": {"type": "follow_me", "message": "Хорошо, я буду следовать за вами и убивать встречных зомби", "repeat": true, "secondary_action": {"type": "kill_mob", "mob": "EntityZombie", "repeat": true}}
36
  },
37
  {
38
  "input": "Иди за мной и добывай камень",
39
- "output": {"type": "follow_me", "message": "Хорошо, я буду следовать за вами и добывать камень", "repeat": true, "secondary_action": {"type": "break_block", "blocks": ["minecraft:stone"], "repeat": true}}
40
  }
41
  ]
42
  },
@@ -117,28 +117,28 @@
117
  ]
118
  },
119
  "break_block": {
120
- "description": "Команда для ломания блоков",
121
  "required_fields": ["type", "message", "repeat", "blocks", "quantity"],
122
  "examples": [
123
  {
124
  "input": "Сломай землю",
125
- "output": {"type": "break_block", "message": "Хорошо, я сломаю землю", "repeat": false, "blocks": ["minecraft:dirt"], "quantity": 1, "location": ""}
126
  },
127
  {
128
  "input": "Сломай 20 земли",
129
- "output": {"type": "break_block", "message": "Хорошо, я сломаю 20 блоков земли", "repeat": false, "quantity": 20, "blocks": ["minecraft:dirt"], "location": ""}
130
  },
131
  {
132
  "input": "Копай камень",
133
- "output": {"type": "break_block", "message": "Хорошо, я буду копать камень", "repeat": true, "blocks": ["minecraft:stone"], "quantity": 0, "location": ""}
134
  },
135
  {
136
  "input": "Копай вниз",
137
- "output": {"type": "break_block", "message": "Хорошо, я буду копать вниз", "repeat": true, "blocks": [], "quantity": 0, "location": "вниз"}
138
  },
139
  {
140
  "input": "Добудь руду на севере",
141
- "output": {"type": "break_block", "message": "Хорошо, я добуду руду на севере", "repeat": false, "blocks": [], "quantity": 0, "location": "на севере"}
142
  }
143
  ]
144
  },
@@ -157,60 +157,60 @@
157
  ]
158
  },
159
  "fishing": {
160
- "description": "Команда для ловли рыбы",
161
- "required_fields": ["type", "repeat", "quantity", "message"],
162
  "examples": [
163
  {
164
  "input": "Слови рыбу",
165
- "output": {"type": "fishing", "repeat": false, "quantity": 1, "message": "Хорошо, я поймаю рыбу"}
166
  },
167
  {
168
  "input": "Лови рыбу",
169
- "output": {"type": "fishing", "repeat": true, "quantity": 0, "message": "Хорошо, я буду ловить рыбу"}
170
  },
171
  {
172
  "input": "Слови 10 рыб",
173
- "output": {"type": "fishing", "repeat": false, "quantity": 10, "message": "Хорошо, я поймаю 10 рыб"}
174
  }
175
  ]
176
  },
177
  "shear_wool": {
178
- "description": "Команда для стрижки овец",
179
- "required_fields": ["type", "repeat", "quantity", "message"],
180
  "examples": [
181
  {
182
  "input": "Постриги овец",
183
- "output": {"type": "shear_wool", "repeat": false, "quantity": 0, "message": "Хорошо, я постригу овец"}
184
  },
185
  {
186
  "input": "Стриги овец",
187
- "output": {"type": "shear_wool", "repeat": true, "quantity": 0, "message": "Хорошо, я буду стричь овец"}
188
  },
189
  {
190
  "input": "Постриги 10 овец",
191
- "output": {"type": "shear_wool", "repeat": false, "quantity": 10, "message": "Хорошо, я постригу 10 овец"}
192
  }
193
  ]
194
  },
195
  "lighting": {
196
  "description": "Команда для освещения или размещения источников света",
197
- "required_fields": ["type", "message", "quantity"],
198
  "examples": [
199
  {
200
  "input": "Подсвети мне",
201
- "output": {"type": "lighting", "message": "Хорошо, я буду освещать путь", "quantity": 0}
202
  },
203
  {
204
  "input": "Освещай",
205
- "output": {"type": "lighting", "message": "Включаю освещение", "quantity": 0}
206
  },
207
  {
208
  "input": "Поставь 10 факелов",
209
- "output": {"type": "lighting", "message": "Хорошо, я поставлю 10 факелов", "quantity": 10}
210
  },
211
  {
212
  "input": "Расставь факелы вокруг дома",
213
- "output": {"type": "lighting", "message": "Хорошо, я расставлю факелы вокруг дома", "quantity": 0}
214
  }
215
  ]
216
  },
@@ -234,74 +234,83 @@
234
  }
235
  },
236
  "system_prompt_template": {
237
- "intro": "You are an AI assistant for the Minecraft game server called CubixWorld. Your task is to understand player requests in Russian and convert them into structured JSON commands with high precision and reliability.",
238
- "role_section": [
239
- "Analyze player messages carefully to determine their exact intent and required actions",
240
- "Convert natural language requests into precise JSON commands following the exact specifications",
241
- "Ensure all command parameters are validated and properly formatted",
242
- "Respond with valid JSON only, maintaining strict schema compliance",
243
- "When uncertain about intent, prefer message responses over incorrect commands",
244
- "For conversational messages like greetings, thanks, or jokes, respond in a friendly and engaging manner"
245
- ],
246
- "command_detection_guidelines": [
247
- "Carefully analyze verb forms and temporal indicators to determine command type:",
248
- "- Continuous actions: \"продолжай\", \"постоянно\", verbs with \"-ай\" suffix",
249
- "- One-time commands: \"сделай\", \"сломай\", \"приди\"",
250
- "For compound actions, identify primary and secondary components clearly",
251
- "Extract and validate all numeric quantities before including in commands",
252
- "Process spatial and directional information with high precision",
253
- "For conversational inputs (greetings, thanks, jokes, etc.), use the 'message' type response"
254
- ],
255
- "contextual_information_processing": [
256
- "Prioritize exact matches from documentation over general knowledge",
257
- "Cross-reference game mechanics with server-specific features",
258
- "Use precise entity IDs as specified in mob mappings",
259
- "When documentation provides specific command formats, follow them exactly",
260
- "Consider server limitations and restrictions when generating commands"
261
- ],
262
- "response_format": [
263
- "Ensure all JSON responses are properly nested and formatted",
264
- "Include all required fields as specified in command schemas",
265
- "Validate numeric values are within acceptable ranges",
266
- "Use consistent casing and formatting for entity IDs and commands",
267
- "Include relevant metadata and context in responses when available"
268
  ],
269
- "conversation_handling": [
270
- "For greetings (привет, здравствуй, etc.), respond with a friendly greeting and offer to help",
271
- "For questions about your status (как дела, как ты, etc.), respond positively and ask how you can assist",
272
- "For expressions of gratitude (спасибо, благодарю, etc.), acknowledge with a friendly response",
273
- "For requests for jokes or fun content, respond with a Minecraft-themed joke or fun fact",
274
- "Always maintain a helpful, friendly tone appropriate for a game assistant",
275
- "Support both Russian and Ukrainian language inputs with appropriate responses"
 
 
 
 
 
 
 
 
 
276
  ],
277
- "rag_processing_instructions": [
278
- "When contextual information is provided, carefully analyze and extract specific mechanics, rules, or features unique to CubixWorld",
279
- "Prioritize recently retrieved information over older knowledge when they conflict",
280
- "When working with information from multiple sources, synthesize a coherent understanding rather than just concatenating facts",
281
- "For highly technical queries, extract and include specific numbers, formulas, or technical details from the retrieved content",
282
- "Match the terminology used in the retrieved documentation when formulating responses",
283
- "If contextual information appears incomplete or contradictory, acknowledge the limitations in your response",
284
- "When processing game update information, clearly distinguish between new features and pre-existing ones",
285
- "Apply different weights to different sources based on relevance score - prioritize higher scoring sources",
286
- "For related items mentioned in context (like tools, weapons, or materials), include their relationships in your understanding",
287
- "Always cite specific game mechanics exactly as described in the documentation rather than making assumptions"
 
 
288
  ],
289
- "ambiguity_handling": [
290
- "For ambiguous commands, identify the specific ambiguity and propose the most likely interpretation",
291
- "When a player uses terminology not found in documentation, map to the closest documented concept",
292
- "For requests that could map to multiple command types, analyze verb usage and context to determine the most appropriate one",
293
- "If a request contains conflicting parameters, prioritize the most recently stated ones",
294
- "For vague spatial references, default to the player's current location or field of view",
295
- "When uncertain about quantities, default to 1 for singular references and continuous action for plural ones",
296
- "For unprecedented or novel requests, combine existing command structures in logical ways rather than rejecting outright"
297
  ],
298
- "token_optimization": [
299
- "Focus on the most relevant contextual information, ignoring tangential details",
300
- "Maintain brevity in responses while ensuring all required information is included",
301
- "When processing documentation, prioritize sections that directly address the current query",
302
- "For complex multi-part requests, break down processing into logical components",
303
- "Use concise language in responses while maintaining clarity and friendliness",
304
- "When multiple similar contextual examples exist, focus on the closest matching ones"
 
 
 
 
 
 
 
305
  ]
306
  }
307
  }
 
32
  },
33
  {
34
  "input": "Следуй за мной и убивай встречных зомби",
35
+ "output": {"type": "follow_me", "message": "Хорошо, я буду следовать за вами и убивать встречных зомби", "repeat": true}
36
  },
37
  {
38
  "input": "Иди за мной и добывай камень",
39
+ "output": {"type": "follow_me", "message": "Хорошо, я буду следовать за вами и добывать камень", "repeat": true}
40
  }
41
  ]
42
  },
 
117
  ]
118
  },
119
  "break_block": {
120
+ "description": "Действие для ломания блока",
121
  "required_fields": ["type", "message", "repeat", "blocks", "quantity"],
122
  "examples": [
123
  {
124
  "input": "Сломай землю",
125
+ "output": {"type": "break_block", "message": "Хорошо, я сломаю землю", "repeat": false, "blocks": ["minecraft:dirt"], "quantity": 1}
126
  },
127
  {
128
  "input": "Сломай 20 земли",
129
+ "output": {"type": "break_block", "message": "Хорошо, я сломаю 20 блоков земли", "repeat": false, "quantity": 20, "blocks": ["minecraft:dirt"]}
130
  },
131
  {
132
  "input": "Копай камень",
133
+ "output": {"type": "break_block", "message": "Хорошо, я буду копать камень", "repeat": true, "blocks": ["minecraft:stone"], "quantity": 0}
134
  },
135
  {
136
  "input": "Копай вниз",
137
+ "output": {"type": "break_block", "message": "Хорошо, я буду копать вниз", "repeat": true, "blocks": ["minecraft:stone"], "quantity": 0}
138
  },
139
  {
140
  "input": "Добудь руду на севере",
141
+ "output": {"type": "break_block", "message": "Хорошо, я добуду руду на севере", "repeat": false, "blocks": ["minecraft:iron_ore"], "quantity": 0}
142
  }
143
  ]
144
  },
 
157
  ]
158
  },
159
  "fishing": {
160
+ "description": "Ловить рыбу.",
161
+ "required_fields": ["type", "message", "repeat", "quantity"],
162
  "examples": [
163
  {
164
  "input": "Слови рыбу",
165
+ "output": {"type": "fishing", "message": "Хорошо, я поймаю рыбу", "repeat": false, "quantity": 1}
166
  },
167
  {
168
  "input": "Лови рыбу",
169
+ "output": {"type": "fishing", "message": "Хорошо, я буду ловить рыбу", "repeat": true, "quantity": 0}
170
  },
171
  {
172
  "input": "Слови 10 рыб",
173
+ "output": {"type": "fishing", "message": "Хорошо, я поймаю 10 рыб", "repeat": false, "quantity": 10}
174
  }
175
  ]
176
  },
177
  "shear_wool": {
178
+ "description": "Стричь овец",
179
+ "required_fields": ["type", "message", "repeat", "quantity"],
180
  "examples": [
181
  {
182
  "input": "Постриги овец",
183
+ "output": {"type": "shear_wool", "message": "Хорошо, я постригу овец", "repeat": false, "quantity": 0}
184
  },
185
  {
186
  "input": "Стриги овец",
187
+ "output": {"type": "shear_wool", "message": "Хорошо, я буду стричь овец", "repeat": true, "quantity": 0}
188
  },
189
  {
190
  "input": "Постриги 10 овец",
191
+ "output": {"type": "shear_wool", "message": "Хорошо, я постригу 10 овец", "repeat": false, "quantity": 10}
192
  }
193
  ]
194
  },
195
  "lighting": {
196
  "description": "Команда для освещения или размещения источников света",
197
+ "required_fields": ["type", "message"],
198
  "examples": [
199
  {
200
  "input": "Подсвети мне",
201
+ "output": {"type": "lighting", "message": "Хорошо, я буду освещать путь"}
202
  },
203
  {
204
  "input": "Освещай",
205
+ "output": {"type": "lighting", "message": "Включаю освещение"}
206
  },
207
  {
208
  "input": "Поставь 10 факелов",
209
+ "output": {"type": "lighting", "message": "Хорошо, я поставлю 10 факелов"}
210
  },
211
  {
212
  "input": "Расставь факелы вокруг дома",
213
+ "output": {"type": "lighting", "message": "Хорошо, я расставлю факелы вокруг дома"}
214
  }
215
  ]
216
  },
 
234
  }
235
  },
236
  "system_prompt_template": {
237
+ "intro": "Ты CubixAssistant для сервера Minecraft CubixWorld. Анализируй запросы игроков и конвертируй их в JSON-команды. ОБРАБАТЫВАЙ ТОЛЬКО ОДНУ КОМАНДУ за раз. При нескольких командах отвечай: {\"type\": \"message\", \"message\": \"Я не могу выполнять несколько задач сразу\"}",
238
+
239
+ "command_rules": [
240
+ "[ФОРМАТЫ КОМАНД]",
241
+ " СТРОГО придерживайся схемы для каждой команды - не добавляй лишних полей",
242
+ " Все команды имеют обязательные поля: type, message",
243
+ " Определяй repeat: false для одноразовых команд (\"сделай\", \"принеси\") и true для повторяющихся (\"делай\", \"-ай\" окончания)",
244
+ " При запросах с нереально большим количеством (>1000) используй type:message",
245
+ "• Проверяй каждый ответ на соответствие схеме команды",
246
+ "",
247
+ "[СХЕМЫ КОМАНД]",
248
+ " follow_me: {type, message, repeat}",
249
+ " message: {type, message}",
250
+ " kill_mob: {type, message, repeat, mob}",
251
+ " break_tree: {type, message, repeat, quantityTrees, quantity, blocks}",
252
+ " harvest_crops: {type, message, repeat, harvest}",
253
+ " break_block: {type, message, repeat, blocks, quantity} - НЕ ДОБАВЛЯТЬ location",
254
+ "• cycle_break_block: {type, message}",
255
+ "• fishing: {type, message, repeat, quantity}",
256
+ " shear_wool: {type, message, repeat, quantity}",
257
+ " lighting: {type, message} - НЕ ДОБАВЛЯТЬ quantity",
258
+ " stop: {type, message}"
 
 
 
 
 
 
 
 
 
259
  ],
260
+
261
+ "validation_checklist": [
262
+ "[КРИТИЧЕСКИЕ ПРОВЕРКИ]",
263
+ "1. ТОЛЬКО ОДНА команда в ответе, никаких secondary_actions",
264
+ "2. ВСЕ обязательные поля присутствуют",
265
+ "3. Массив blocks НИКОГДА не пустой, для break_block минимум [\"minecraft:stone\"]",
266
+ "4. Правильные ID мобов из справочника (EntityZombie вместо \"зомби\")",
267
+ "5. При числах >100000 используй {\"type\": \"message\", \"message\": \"не могу ... такое количество\"}",
268
+ "6. Запросы \"наруби досок\" = break_tree, а не break_block",
269
+ "7. НИКОГДА не добавляй поля, которых нет в схеме команды",
270
+ "8. Для команды lighting НЕ добавляй поле quantity, даже если указано число факелов",
271
+ "9. Используй ТОЛЬКО существующие блоки Minecraft:",
272
+ " • Дерево: minecraft:oak_log, minecraft:spruce_log, minecraft:birch_log, minecraft:acacia_log, minecraft:dark_oak_log",
273
+ " • Доски: minecraft:oak_planks, minecraft:spruce_planks, minecraft:birch_planks, minecraft:acacia_planks",
274
+ " • Камень: minecraft:stone, minecraft:cobblestone",
275
+ " • При неуверенности используй стандартные (oak_log, stone)"
276
  ],
277
+
278
+ "examples": [
279
+ "[ПРИМЕРЫ СТРОГО ПО СХЕМЕ]",
280
+ "Убей зомби {\"type\": \"kill_mob\", \"message\": \"Хорошо, я иду убивать зомби\", \"repeat\": false, \"mob\": \"EntityZombie\"}",
281
+ "Копай шахту {\"type\": \"break_block\", \"message\": \"Буду копать шахту\", \"repeat\": true, \"blocks\": [\"minecraft:stone\"], \"quantity\": 0}",
282
+ "Наруби 10 досок дуба {\"type\": \"break_tree\", \"message\": \"Добуду 10 досок дуба\", \"repeat\": false, \"quantityTrees\": 0, \"quantity\": 10, \"blocks\": [\"minecraft:oak_planks\"]}",
283
+ "Наруби 9999999 досок {\"type\": \"message\", \"message\": \"К сожалению, я не могу нарубить такое большое количество досок\"}",
284
+ "Наруби досок каменного дерева → {\"type\": \"message\", \"message\": \"Извините, каменное дерево не существует в Minecraft\"}",
285
+ "Наруби дерева и убей зомби {\"type\": \"message\", \"message\": \"Я не могу выполнять несколько задач сразу\"}",
286
+ "Включи 10 света {\"type\": \"lighting\", \"message\": \"Хорошо, я поставлю 10 факелов\"}",
287
+ "Лови рыбу {\"type\": \"fishing\", \"message\": \"Буду ловить рыбу\", \"repeat\": true, \"quantity\": 0}",
288
+ "Постриги 5 овец → {\"type\": \"shear_wool\", \"message\": \"Постригу 5 овец\", \"repeat\": false, \"quantity\": 5}",
289
+ "Копай вот здесь → {\"type\": \"cycle_break_block\", \"message\": \"Буду копать на этом месте\"}"
290
  ],
291
+
292
+ "error_handlers": [
293
+ "[ОШИБКИ И ОСОБЫЕ СЛУЧАИ]",
294
+ " Несуществующие блоки {\"type\": \"message\", \"message\": \"Извините, [блок] не существует в Minecraft\"}",
295
+ " Огромные числа {\"type\": \"message\", \"message\": \"К сожалению, я не могу [действие] такое количество\"}",
296
+ " Несколько команд {\"type\": \"message\", \"message\": \"Я не могу выполнять несколько задач сразу\"}",
297
+ " Прив��тствия, вопросы {\"type\": \"message\", \"message\": \"[дружелюбный ответ]\"}",
298
+ " При неоднозначности предпочитай message вместо неверной команды"
299
  ],
300
+
301
+ "type_detection": [
302
+ "[ОПРЕДЕЛЕНИЕ ТИПА КОМАНДЫ]",
303
+ " follow_me: \"следуй\", \"приди\", \"иди за\"",
304
+ " message: общение, вопросы, неизвестные команды",
305
+ " kill_mob: \"убей\", \"атакуй\" (мобов)",
306
+ " break_tree: \"руби\", \"наруби\" (дерево/доски)",
307
+ "• harvest_crops: \"собери\", \"собирай\" (урожай)",
308
+ "• break_block: \"копай\", \"сломай\" (камень/блоки, но не деревья)",
309
+ "• cycle_break_block: \"копай здесь\", \"ломай под собой\"",
310
+ "• fishing: \"лови\", \"налови\" (рыбу)",
311
+ "• shear_wool: \"стриги\" (овец)",
312
+ "• lighting: \"освещай\", \"поставь факелы\", \"включи свет\"",
313
+ "• stop: \"стой\", \"хватит\", \"прекрати\""
314
  ]
315
  }
316
  }
rag.py CHANGED
@@ -88,9 +88,9 @@ class RAGSystem:
88
  """Create a new knowledge base from scratch"""
89
  # Backup old index if needed
90
  if backup_old and os.path.exists("faiss_index"):
91
- backup_dir = f"faiss_index"
92
  shutil.move("faiss_index", backup_dir)
93
- print(f"Old index backed up")
94
 
95
  # Create directory if needed
96
  os.makedirs("faiss_index", exist_ok=True)
@@ -100,7 +100,24 @@ class RAGSystem:
100
  for url in CUBIX_DOCS:
101
  try:
102
  loader = WebBaseLoader(url)
103
- docs.extend(loader.load())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
  print(f"Loaded document from {url}")
105
  except Exception as e:
106
  print(f"Error loading document from {url}: {e}")
@@ -113,9 +130,9 @@ class RAGSystem:
113
 
114
  # Split documents
115
  text_splitter = RecursiveCharacterTextSplitter(
116
- chunk_size=500, # Increased from 150 for better context
117
- chunk_overlap=50, # Added overlap to prevent breaking important context
118
- separators=["\n\n", "\n", " ", ""], # More intelligent splitting
119
  length_function=len
120
  )
121
  split_docs = text_splitter.split_documents(docs)
@@ -126,7 +143,7 @@ class RAGSystem:
126
  try:
127
  # Save the new index
128
  self.document_store.save_local("faiss_index")
129
- print("Successfully saved new knowledge base to faiss_index directory")
130
  except Exception as e:
131
  print(f"Error saving knowledge base: {e}")
132
 
@@ -192,6 +209,18 @@ class RAGSystem:
192
  print(f"Error saving index: {e}")
193
  return False
194
 
 
 
 
 
 
 
 
 
 
 
 
 
195
  def generate_response(self, user_id: str, message: str) -> Dict[str, Any]:
196
  """Generate a response for a user message."""
197
  if user_id not in self.user_conversations:
@@ -212,18 +241,18 @@ class RAGSystem:
212
  # First try with higher relevance score threshold for more accurate results
213
  results = self.document_store.similarity_search_with_score(
214
  message,
215
- k=4, # Increased from 3 for better coverage
216
- score_threshold=0.75 # Slightly relaxed from 0.8 for better recall
217
  )
218
 
219
  if results:
220
  # Sort by score and take top results
221
- results.sort(key=lambda x: x[1], reverse=True)
222
 
223
  # Format with scores and metadata for better context
224
  formatted_results = []
225
  for doc, score in results:
226
- if score > 0.75: # Only include relevance context
227
  # Extract source for better attribution
228
  source = doc.metadata.get('source', 'Unknown source')
229
  if isinstance(source, str) and source.startswith('http'):
@@ -236,7 +265,7 @@ class RAGSystem:
236
  content = re.sub(r'\s+', ' ', content).strip()
237
 
238
  # Limit content length based on relevance score - higher relevance gets more tokens
239
- max_length = int(min(800 + (score * 400), 1200)) # Dynamic length based on relevance
240
  if len(content) > max_length:
241
  content = content[:max_length] + "..."
242
 
@@ -256,7 +285,7 @@ class RAGSystem:
256
  if not relevant_context:
257
  results = self.document_store.similarity_search(
258
  message,
259
- k=3 # Increased from 2 for better coverage while keeping focus
260
  )
261
  if results:
262
  formatted_results = []
@@ -268,14 +297,15 @@ class RAGSystem:
268
 
269
  # Truncate content if too long (token optimization)
270
  content = doc.page_content
271
- if len(content) > 800: # Shorter for fallback results
272
- content = content[:800] + "..."
273
 
274
  formatted_results.append(f"SOURCE: {source}\n{content}")
275
 
276
  relevant_context = "\n\n---\n\n".join(formatted_results)
277
  except Exception as e:
278
  print(f"Error during context retrieval: {e}")
 
279
 
280
  # Build command specifications for the system prompt
281
  command_specs_text = "# Available Commands and Required Fields\n\n"
@@ -299,66 +329,40 @@ class RAGSystem:
299
  mob_mappings_text += f"- {mob_name}: {entity_id}\n"
300
  mob_mappings_text += "\nWhen a player mentions a mob, always map it to the corresponding entity ID in your response.\n\n"
301
 
302
- # Build system prompt from template
303
- system_prompt = SYSTEM_PROMPT_TEMPLATE['intro'] + "\n\n"
304
 
305
- # Add role section
306
- system_prompt += "## Your Role\n"
307
- for item in SYSTEM_PROMPT_TEMPLATE['role_section']:
308
- system_prompt += f"- {item}\n"
309
 
310
- # Add command specs
311
- system_prompt += f"\n{command_specs_text}\n"
 
 
 
 
 
312
 
313
- # Add mob mappings
314
- system_prompt += f"\n{mob_mappings_text}\n"
 
 
 
315
 
316
- # Add command detection guidelines
317
- system_prompt += "\n## Command Detection Guidelines\n"
318
- for item in SYSTEM_PROMPT_TEMPLATE['command_detection_guidelines']:
319
- system_prompt += f"- {item}\n"
320
-
321
- # Add contextual information processing
322
- system_prompt += "\n## Contextual Information Processing\n"
323
- for item in SYSTEM_PROMPT_TEMPLATE['contextual_information_processing']:
324
- system_prompt += f"- {item}\n"
325
-
326
- # Add response format
327
- system_prompt += "\n## Response Format\n"
328
- for item in SYSTEM_PROMPT_TEMPLATE['response_format']:
329
- system_prompt += f"- {item}\n"
330
-
331
- # Add conversation handling instructions
332
- if 'conversation_handling' in SYSTEM_PROMPT_TEMPLATE:
333
- system_prompt += "\n## Conversation Handling\n"
334
- for item in SYSTEM_PROMPT_TEMPLATE['conversation_handling']:
335
- system_prompt += f"- {item}\n"
336
-
337
- # Add improved RAG processing instructions
338
- if 'rag_processing_instructions' in SYSTEM_PROMPT_TEMPLATE:
339
- system_prompt += "\n## RAG Processing Instructions\n"
340
- for item in SYSTEM_PROMPT_TEMPLATE['rag_processing_instructions']:
341
- system_prompt += f"- {item}\n"
342
-
343
- # Add ambiguity handling instructions if they exist
344
- if 'ambiguity_handling' in SYSTEM_PROMPT_TEMPLATE:
345
- system_prompt += "\n## Ambiguity Handling\n"
346
- for item in SYSTEM_PROMPT_TEMPLATE['ambiguity_handling']:
347
- system_prompt += f"- {item}\n"
348
-
349
- # Add token optimization instructions if they exist
350
- if 'token_optimization' in SYSTEM_PROMPT_TEMPLATE:
351
- system_prompt += "\n## Token Optimization\n"
352
- for item in SYSTEM_PROMPT_TEMPLATE['token_optimization']:
353
- system_prompt += f"- {item}\n"
354
 
 
355
  if relevant_context:
356
  system_prompt += f"""
 
 
357
 
358
- ## Relevant Information from CubixWorld Documentation
359
- Use this information to guide your response. Pay special attention to specific game mechanics and features:
360
 
361
- {relevant_context}
362
  """
363
 
364
  # Create messages list with proper LangChain message objects
@@ -376,18 +380,27 @@ class RAGSystem:
376
  context_summary = "\nContext relevance summary (sorted by relevance):\n"
377
 
378
  # Sort context by relevance score
379
- sorted_contexts = sorted(context_docs, key=lambda x: x['score'], reverse=True)
380
 
381
  for i, ctx in enumerate(sorted_contexts):
382
- # Add confidence classification
383
- confidence = "High" if ctx['score'] > 0.9 else "Medium" if ctx['score'] > 0.8 else "Moderate"
384
  # Add a brief preview of content with source
385
  source = ctx['source'].split('/')[-1] if '/' in ctx['source'] else ctx['source']
386
- preview = ctx['content'][:80].replace('\n', ' ') + "..."
387
  context_summary += f"{i+1}. [{confidence} confidence, score {ctx['score']:.2f}] From {source}: {preview}\n"
388
 
389
  messages.append(SystemMessage(content=context_summary))
390
 
 
 
 
 
 
 
 
 
 
391
  # Initialize token usage variables
392
  total_tokens = 0
393
  prompt_tokens = 0
@@ -416,7 +429,9 @@ class RAGSystem:
416
  json_end = response_content.find("```", json_start)
417
  response_content = response_content[json_start:json_end].strip()
418
 
419
- json_response = {'response': json.loads(response_content)}
 
 
420
 
421
  # Add token usage information to the response
422
  json_response["token_usage"] = {
 
88
  """Create a new knowledge base from scratch"""
89
  # Backup old index if needed
90
  if backup_old and os.path.exists("faiss_index"):
91
+ backup_dir = f"faiss_index_backup"
92
  shutil.move("faiss_index", backup_dir)
93
+ print(f"Old index backed up to {backup_dir}")
94
 
95
  # Create directory if needed
96
  os.makedirs("faiss_index", exist_ok=True)
 
100
  for url in CUBIX_DOCS:
101
  try:
102
  loader = WebBaseLoader(url)
103
+ web_docs = loader.load()
104
+
105
+ # Clean and preprocess documents
106
+ for doc in web_docs:
107
+ # Clean HTML artifacts and normalize whitespace
108
+ content = doc.page_content
109
+ content = re.sub(r'\s+', ' ', content) # Normalize whitespace
110
+ content = re.sub(r'<[^>]+>', ' ', content) # Remove HTML tags
111
+ content = re.sub(r'\[.*?\]', '', content) # Remove square brackets content
112
+
113
+ # Update document content
114
+ doc.page_content = content.strip()
115
+
116
+ # Add source URL to metadata if not present
117
+ if 'source' not in doc.metadata:
118
+ doc.metadata['source'] = url
119
+
120
+ docs.extend(web_docs)
121
  print(f"Loaded document from {url}")
122
  except Exception as e:
123
  print(f"Error loading document from {url}: {e}")
 
130
 
131
  # Split documents
132
  text_splitter = RecursiveCharacterTextSplitter(
133
+ chunk_size=800, # Increased from 500 for better context
134
+ chunk_overlap=100, # Increased overlap to prevent breaking important context
135
+ separators=["\n\n", "\n", ". ", " ", ""], # More intelligent splitting
136
  length_function=len
137
  )
138
  split_docs = text_splitter.split_documents(docs)
 
143
  try:
144
  # Save the new index
145
  self.document_store.save_local("faiss_index")
146
+ print(f"Successfully saved new knowledge base to faiss_index directory with {len(split_docs)} chunks")
147
  except Exception as e:
148
  print(f"Error saving knowledge base: {e}")
149
 
 
209
  print(f"Error saving index: {e}")
210
  return False
211
 
212
+ def _filter_response_fields(self, response_dict: dict) -> dict:
213
+ """Filter out fields that are not in required_fields for the command type."""
214
+ if not isinstance(response_dict, dict) or 'type' not in response_dict:
215
+ return response_dict
216
+
217
+ command_type = response_dict['type']
218
+ if command_type not in COMMAND_SPECS:
219
+ return response_dict
220
+
221
+ required_fields = COMMAND_SPECS[command_type]['required_fields']
222
+ return {k: v for k, v in response_dict.items() if k in required_fields}
223
+
224
  def generate_response(self, user_id: str, message: str) -> Dict[str, Any]:
225
  """Generate a response for a user message."""
226
  if user_id not in self.user_conversations:
 
241
  # First try with higher relevance score threshold for more accurate results
242
  results = self.document_store.similarity_search_with_score(
243
  message,
244
+ k=5, # Increased from 4 for better coverage
245
+ score_threshold=0.6 # Relaxed from 0.75 for better recall with Russian queries
246
  )
247
 
248
  if results:
249
  # Sort by score and take top results
250
+ results.sort(key=lambda x: x[1])
251
 
252
  # Format with scores and metadata for better context
253
  formatted_results = []
254
  for doc, score in results:
255
+ if score < 1.5: # Only include relevance context (lower score is better in FAISS)
256
  # Extract source for better attribution
257
  source = doc.metadata.get('source', 'Unknown source')
258
  if isinstance(source, str) and source.startswith('http'):
 
265
  content = re.sub(r'\s+', ' ', content).strip()
266
 
267
  # Limit content length based on relevance score - higher relevance gets more tokens
268
+ max_length = int(min(1000, 1500)) # Increased length for better context
269
  if len(content) > max_length:
270
  content = content[:max_length] + "..."
271
 
 
285
  if not relevant_context:
286
  results = self.document_store.similarity_search(
287
  message,
288
+ k=4 # Increased from 3 for better coverage
289
  )
290
  if results:
291
  formatted_results = []
 
297
 
298
  # Truncate content if too long (token optimization)
299
  content = doc.page_content
300
+ if len(content) > 1000: # Increased from 800 for better context
301
+ content = content[:1000] + "..."
302
 
303
  formatted_results.append(f"SOURCE: {source}\n{content}")
304
 
305
  relevant_context = "\n\n---\n\n".join(formatted_results)
306
  except Exception as e:
307
  print(f"Error during context retrieval: {e}")
308
+ relevant_context = f"Error retrieving context: {str(e)}"
309
 
310
  # Build command specifications for the system prompt
311
  command_specs_text = "# Available Commands and Required Fields\n\n"
 
329
  mob_mappings_text += f"- {mob_name}: {entity_id}\n"
330
  mob_mappings_text += "\nWhen a player mentions a mob, always map it to the corresponding entity ID in your response.\n\n"
331
 
332
+ # Build system prompt with new structure
333
+ system_prompt = ""
334
 
335
+ # Add intro
336
+ if 'intro' in SYSTEM_PROMPT_TEMPLATE:
337
+ system_prompt += SYSTEM_PROMPT_TEMPLATE['intro'] + "\n\n"
 
338
 
339
+ # Add all sections from the template in order
340
+ sections = [
341
+ 'command_rules',
342
+ 'validation_checklist',
343
+ 'examples',
344
+ 'error_handlers'
345
+ ]
346
 
347
+ for section in sections:
348
+ if section in SYSTEM_PROMPT_TEMPLATE:
349
+ for item in SYSTEM_PROMPT_TEMPLATE[section]:
350
+ system_prompt += item + "\n"
351
+ system_prompt += "\n"
352
 
353
+ # Add command specs and mob mappings from our generated text
354
+ system_prompt += command_specs_text + "\n"
355
+ system_prompt += mob_mappings_text + "\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
356
 
357
+ # Add relevant context if available
358
  if relevant_context:
359
  system_prompt += f"""
360
+ ## Relevant Information from CubixWorld Documentation
361
+ Use this information to guide your response. If the user is asking about game updates, features, or information that appears in this documentation, provide that information in your response:
362
 
363
+ {relevant_context}
 
364
 
365
+ IMPORTANT: If the user is asking about information contained in the documentation above, you SHOULD provide that information in your response. Do not say you cannot answer questions about game updates or features if the information is available in the documentation.
366
  """
367
 
368
  # Create messages list with proper LangChain message objects
 
380
  context_summary = "\nContext relevance summary (sorted by relevance):\n"
381
 
382
  # Sort context by relevance score
383
+ sorted_contexts = sorted(context_docs, key=lambda x: x['score'])
384
 
385
  for i, ctx in enumerate(sorted_contexts):
386
+ # Add confidence classification (lower score is better in FAISS)
387
+ confidence = "High" if ctx['score'] < 0.3 else "Medium" if ctx['score'] < 0.6 else "Moderate"
388
  # Add a brief preview of content with source
389
  source = ctx['source'].split('/')[-1] if '/' in ctx['source'] else ctx['source']
390
+ preview = ctx['content'][:100].replace('\n', ' ') + "..."
391
  context_summary += f"{i+1}. [{confidence} confidence, score {ctx['score']:.2f}] From {source}: {preview}\n"
392
 
393
  messages.append(SystemMessage(content=context_summary))
394
 
395
+ # Add special instruction for information questions
396
+ if any(keyword in message.lower() for keyword in ["что", "какие", "когда", "обновление", "новое", "осеннее", "autumn"]):
397
+ info_instruction = """
398
+ IMPORTANT INSTRUCTION: The user is asking about game information or updates. If you have relevant information in the context provided,
399
+ you MUST share that information in your response using the message command type. Do not refuse to answer questions about game updates
400
+ or features if the information is available in the context.
401
+ """
402
+ messages.append(SystemMessage(content=info_instruction))
403
+
404
  # Initialize token usage variables
405
  total_tokens = 0
406
  prompt_tokens = 0
 
429
  json_end = response_content.find("```", json_start)
430
  response_content = response_content[json_start:json_end].strip()
431
 
432
+ parsed_response = json.loads(response_content)
433
+ filtered_response = self._filter_response_fields(parsed_response)
434
+ json_response = {'response': filtered_response}
435
 
436
  # Add token usage information to the response
437
  json_response["token_usage"] = {