Spaces:

Illia56
/

MineCraftAI

Runtime error

App Files Files Community

Illia56 commited on Mar 10

Commit

b70539a

verified ·

1 Parent(s): 4d29a9c

Upload 8 files

Browse files

Files changed (9) hide show

.gitattributes +1 -0
Dockerfile +15 -0
config.json +307 -0
docker-compose.yml +19 -0
faiss_index/index.faiss +3 -0
faiss_index/index.pkl +3 -0
main.py +85 -0
rag.py +443 -0
requirements.txt +14 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+faiss_index/index.faiss filter=lfs diff=lfs merge=lfs -text

Dockerfile ADDED Viewed

	@@ -0,0 +1,15 @@

+FROM python:3.11-slim
+WORKDIR /app
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+COPY . .
+# Create faiss_index directory and ensure proper permissions
+RUN mkdir -p /app/faiss_index && chmod 777 /app/faiss_index
+EXPOSE 8000
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]

config.json ADDED Viewed

	@@ -0,0 +1,307 @@

+{
+    "cubix_docs": [
+        "https://cubixworld.net/autumn-update-2024",
+        "https://cubixworld.net/articles/avtokraft-shakhterskogo-lazera-i-almaznogo-bura"
+    ],
+    "mob_mappings": {
+        "зомби": "EntityZombie",
+        "скелет": "EntitySkeleton",
+        "паук": "EntitySpider",
+        "крипер": "EntityCreeper",
+        "свинья": "EntityPig",
+        "корова": "EntityCow",
+        "овца": "EntitySheep",
+        "курица": "EntityChicken",
+        "волк": "EntityWolf",
+        "кошка": "EntityOcelot",
+        "лошадь": "EntityHorse",
+        "мобы": "Mobs"
+    },
+    "command_specs": {
+        "follow_me": {
+            "description": "Команда для следования за игроком или перемещения к игроку",
+            "required_fields": ["type", "message", "repeat"],
+            "examples": [
+                {
+                    "input": "Следуй за мной",
+                    "output": {"type": "follow_me", "message": "Хорошо, я буду следовать за вами", "repeat": true}
+                },
+                {
+                    "input": "Приди ко мне",
+                    "output": {"type": "follow_me", "message": "Сейчас подойду к вам", "repeat": false}
+                },
+                {
+                    "input": "Следуй за мной и убивай встречных зомби",
+                    "output": {"type": "follow_me", "message": "Хорошо, я буду следовать за вами и убивать встречных зомби", "repeat": true, "secondary_action": {"type": "kill_mob", "mob": "EntityZombie", "repeat": true}}
+                },
+                {
+                    "input": "Иди за мной и добывай камень",
+                    "output": {"type": "follow_me", "message": "Хорошо, я буду следовать за вами и добывать камень", "repeat": true, "secondary_action": {"type": "break_block", "blocks": ["minecraft:stone"], "repeat": true}}
+                }
+            ]
+        },
+        "message": {
+            "description": "Обычное сообщение в чате или запоминание обращения",
+            "required_fields": ["type", "message"],
+            "examples": [
+                {
+                    "input": "Привет, как дела?",
+                    "output": {"type": "message", "message": "Привет! У меня всё хорошо, готов помочь вам в игре!"}
+                },
+                {
+                    "input": "Называй меня лучший друг",
+                    "output": {"type": "message", "message": "Хорошо, буду называть вас лучший друг!"}
+                }
+            ]
+        },
+        "kill_mob": {
+            "description": "Команда для убийства мобов или животных. Поле mob должно содержать entity ID моба из списка доступных.",
+            "required_fields": ["type", "message", "repeat", "mob"],
+            "examples": [
+                {
+                    "input": "Убей зомби",
+                    "output": {"type": "kill_mob", "message": "Хорошо, я иду убивать зомби", "repeat": false, "mob": "EntityZombie"}
+                },
+                {
+                    "input": "Убивай мобов",
+                    "output": {"type": "kill_mob", "message": "Хорошо, я буду убивать мобов", "repeat": true, "mob": "Mobs"}
+                },
+                {
+                    "input": "Убивай скелетов",
+                    "output": {"type": "kill_mob", "message": "Хорошо, я буду убивать скелетов", "repeat": true, "mob": "EntitySkeleton"}
+                },
+                {
+                    "input": "Убей всех свиней",
+                    "output": {"type": "kill_mob", "message": "Хорошо, я убью всех свиней", "repeat": false, "mob": "EntityPig"}
+                }
+            ]
+        },
+        "break_tree": {
+            "description": "Команда для ломания деревьев",
+            "required_fields": ["type", "message", "repeat", "quantityTrees", "quantity", "blocks"],
+            "examples": [
+                {
+                    "input": "Сломай дерево",
+                    "output": {"type": "break_tree", "message": "Хорошо, я сломаю дерево", "repeat": false, "quantityTrees": 1, "quantity": 0, "blocks": []}
+                },
+                {
+                    "input": "Добудь дерева",
+                    "output": {"type": "break_tree", "message": "Хорошо, я буду добывать дерево", "repeat": true, "quantityTrees": 0, "quantity": 0, "blocks": []}
+                },
+                {
+                    "input": "Добудь 10 деревьев",
+                    "output": {"type": "break_tree", "message": "Хорошо, я добуду 10 деревьев", "repeat": false, "quantityTrees": 10, "quantity": 0, "blocks": []}
+                },
+                {
+                    "input": "Добудь 50 блоков дуба",
+                    "output": {"type": "break_tree", "message": "Хорошо, я добуду 50 блоков дуба", "repeat": false, "quantityTrees": 0, "quantity": 50, "blocks": ["minecraft:oak_log"]}
+                }
+            ]
+        },
+        "harvest_crops": {
+            "description": "Команда для сбора урожая",
+            "required_fields": ["type", "message", "repeat", "harvest"],
+            "examples": [
+                {
+                    "input": "Собери урожай",
+                    "output": {"type": "harvest_crops", "message": "Хорошо, я соберу урожай", "repeat": false, "harvest": []}
+                },
+                {
+                    "input": "Собирай пшеницу",
+                    "output": {"type": "harvest_crops", "message": "Хорошо, я буду собирать пшеницу", "repeat": true, "harvest": ["пшеница"]}
+                },
+                {
+                    "input": "Собирай картофель и морковь",
+                    "output": {"type": "harvest_crops", "message": "Хорошо, я буду собирать картофель и морковь", "repeat": true, "harvest": ["картофель", "морковь"]}
+                }
+            ]
+        },
+        "break_block": {
+            "description": "Команда для ломания блоков",
+            "required_fields": ["type", "message", "repeat", "blocks", "quantity"],
+            "examples": [
+                {
+                    "input": "Сломай землю",
+                    "output": {"type": "break_block", "message": "Хорошо, я сломаю землю", "repeat": false, "blocks": ["minecraft:dirt"], "quantity": 1, "location": ""}
+                },
+                {
+                    "input": "Сломай 20 земли",
+                    "output": {"type": "break_block", "message": "Хорошо, я сломаю 20 блоков земли", "repeat": false, "quantity": 20, "blocks": ["minecraft:dirt"], "location": ""}
+                },
+                {
+                    "input": "Копай камень",
+                    "output": {"type": "break_block", "message": "Хорошо, я буду копать камень", "repeat": true, "blocks": ["minecraft:stone"], "quantity": 0, "location": ""}
+                },
+                {
+                    "input": "Копай вниз",
+                    "output": {"type": "break_block", "message": "Хорошо, я буду копать вниз", "repeat": true, "blocks": [], "quantity": 0, "location": "вниз"}
+                },
+                {
+                    "input": "Добудь руду на севере",
+                    "output": {"type": "break_block", "message": "Хорошо, я добуду руду на севере", "repeat": false, "blocks": [], "quantity": 0, "location": "на севере"}
+                }
+            ]
+        },
+        "cycle_break_block": {
+            "description": "Команда для цикличного ломания блока на координатах игрока",
+            "required_fields": ["type", "message"],
+            "examples": [
+                {
+                    "input": "Копай вот здесь постоянно",
+                    "output": {"type": "cycle_break_block", "message": "Буду копать на этом месте"}
+                },
+                {
+                    "input": "Ломай блок под собой",
+                    "output": {"type": "cycle_break_block", "message": "Буду ломать блок в этом месте"}
+                }
+            ]
+        },
+        "fishing": {
+            "description": "Команда для ловли рыбы",
+            "required_fields": ["type", "repeat", "quantity", "message"],
+            "examples": [
+                {
+                    "input": "Слови рыбу",
+                    "output": {"type": "fishing", "repeat": false, "quantity": 1, "message": "Хорошо, я поймаю рыбу"}
+                },
+                {
+                    "input": "Лови рыбу",
+                    "output": {"type": "fishing", "repeat": true, "quantity": 0, "message": "Хорошо, я буду ловить рыбу"}
+                },
+                {
+                    "input": "Слови 10 рыб",
+                    "output": {"type": "fishing", "repeat": false, "quantity": 10, "message": "Хоро��о, я поймаю 10 рыб"}
+                }
+            ]
+        },
+        "shear_wool": {
+            "description": "Команда для стрижки овец",
+            "required_fields": ["type", "repeat", "quantity", "message"],
+            "examples": [
+                {
+                    "input": "Постриги овец",
+                    "output": {"type": "shear_wool", "repeat": false, "quantity": 0, "message": "Хорошо, я постригу овец"}
+                },
+                {
+                    "input": "Стриги овец",
+                    "output": {"type": "shear_wool", "repeat": true, "quantity": 0, "message": "Хорошо, я буду стричь овец"}
+                },
+                {
+                    "input": "Постриги 10 овец",
+                    "output": {"type": "shear_wool", "repeat": false, "quantity": 10, "message": "Хорошо, я постригу 10 овец"}
+                }
+            ]
+        },
+        "lighting": {
+            "description": "Команда для освещения или размещения источников света",
+            "required_fields": ["type", "message", "quantity"],
+            "examples": [
+                {
+                    "input": "Подсвети мне",
+                    "output": {"type": "lighting", "message": "Хорошо, я буду освещать путь", "quantity": 0}
+                },
+                {
+                    "input": "Освещай",
+                    "output": {"type": "lighting", "message": "Включаю освещение", "quantity": 0}
+                },
+                {
+                    "input": "Поставь 10 факелов",
+                    "output": {"type": "lighting", "message": "Хорошо, я поставлю 10 факелов", "quantity": 10}
+                },
+                {
+                    "input": "Расставь факелы вокруг дома",
+                    "output": {"type": "lighting", "message": "Хорошо, я расставлю факелы вокруг дома", "quantity": 0}
+                }
+            ]
+        },
+        "stop": {
+            "description": "Команда для остановки действий бота",
+            "required_fields": ["type", "message"],
+            "examples": [
+                {
+                    "input": "Остановись",
+                    "output": {"type": "stop", "message": "Я остановился"}
+                },
+                {
+                    "input": "Прекрати убивать зомби",
+                    "output": {"type": "stop", "message": "Я прекратил убивать зомби"}
+                },
+                {
+                    "input": "Хватит копать",
+                    "output": {"type": "stop", "message": "Я прекратил копать"}
+                }
+            ]
+        }
+    },
+    "system_prompt_template": {
+        "intro": "You are an AI assistant for the Minecraft game server called CubixWorld. Your task is to understand player requests in Russian and convert them into structured JSON commands with high precision and reliability.",
+        "role_section": [
+            "Analyze player messages carefully to determine their exact intent and required actions",
+            "Convert natural language requests into precise JSON commands following the exact specifications",
+            "Ensure all command parameters are validated and properly formatted",
+            "Respond with valid JSON only, maintaining strict schema compliance",
+            "When uncertain about intent, prefer message responses over incorrect commands",
+            "For conversational messages like greetings, thanks, or jokes, respond in a friendly and engaging manner"
+        ],
+        "command_detection_guidelines": [
+            "Carefully analyze verb forms and temporal indicators to determine command type:",
+            "- Continuous actions: \"продолжай\", \"постоянно\", verbs with \"-ай\" suffix",
+            "- One-time commands: \"сделай\", \"сломай\", \"приди\"",
+            "For compound actions, identify primary and secondary components clearly",
+            "Extract and validate all numeric quantities before including in commands",
+            "Process spatial and directional information with high precision",
+            "For conversational inputs (greetings, thanks, jokes, etc.), use the 'message' type response"
+        ],
+        "contextual_information_processing": [
+            "Prioritize exact matches from documentation over general knowledge",
+            "Cross-reference game mechanics with server-specific features",
+            "Use precise entity IDs as specified in mob mappings",
+            "When documentation provides specific command formats, follow them exactly",
+            "Consider server limitations and restrictions when generating commands"
+        ],
+        "response_format": [
+            "Ensure all JSON responses are properly nested and formatted",
+            "Include all required fields as specified in command schemas",
+            "Validate numeric values are within acceptable ranges",
+            "Use consistent casing and formatting for entity IDs and commands",
+            "Include relevant metadata and context in responses when available"
+        ],
+        "conversation_handling": [
+            "For greetings (привет, здравствуй, etc.), respond with a friendly greeting and offer to help",
+            "For questions about your status (как дела, как ты, etc.), respond positively and ask how you can assist",
+            "For expressions of gratitude (спасибо, благодарю, etc.), acknowledge with a friendly response",
+            "For requests for jokes or fun content, respond with a Minecraft-themed joke or fun fact",
+            "Always maintain a helpful, friendly tone appropriate for a game assistant",
+            "Support both Russian and Ukrainian language inputs with appropriate responses"
+        ],
+        "rag_processing_instructions": [
+            "When contextual information is provided, carefully analyze and extract specific mechanics, rules, or features unique to CubixWorld",
+            "Prioritize recently retrieved information over older knowledge when they conflict",
+            "When working with information from multiple sources, synthesize a coherent understanding rather than just concatenating facts",
+            "For highly technical queries, extract and include specific numbers, formulas, or technical details from the retrieved content",
+            "Match the terminology used in the retrieved documentation when formulating responses",
+            "If contextual information appears incomplete or contradictory, acknowledge the limitations in your response",
+            "When processing game update information, clearly distinguish between new features and pre-existing ones",
+            "Apply different weights to different sources based on relevance score - prioritize higher scoring sources",
+            "For related items mentioned in context (like tools, weapons, or materials), include their relationships in your understanding",
+            "Always cite specific game mechanics exactly as described in the documentation rather than making assumptions"
+        ],
+        "ambiguity_handling": [
+            "For ambiguous commands, identify the specific ambiguity and propose the most likely interpretation",
+            "When a player uses terminology not found in documentation, map to the closest documented concept",
+            "For requests that could map to multiple command types, analyze verb usage and context to determine the most appropriate one",
+            "If a request contains conflicting parameters, prioritize the most recently stated ones",
+            "For vague spatial references, default to the player's current location or field of view",
+            "When uncertain about quantities, default to 1 for singular references and continuous action for plural ones",
+            "For unprecedented or novel requests, combine existing command structures in logical ways rather than rejecting outright"
+        ],
+        "token_optimization": [
+            "Focus on the most relevant contextual information, ignoring tangential details",
+            "Maintain brevity in responses while ensuring all required information is included",
+            "When processing documentation, prioritize sections that directly address the current query",
+            "For complex multi-part requests, break down processing into logical components",
+            "Use concise language in responses while maintaining clarity and friendliness",
+            "When multiple similar contextual examples exist, focus on the closest matching ones"
+        ]
+    }
+}

docker-compose.yml ADDED Viewed

	@@ -0,0 +1,19 @@

+version: '3.8'
+services:
+  app:
+    build: .
+    ports:
+      - "8000:8000"
+    volumes:
+      - .:/app
+    command: uvicorn main:app --host 0.0.0.0 --port 8000 --reload
+    environment:
+      - ENVIRONMENT=development
+      - OPENAI_API_KEY=${OPENAI_API_KEY}
+    env_file:
+      - .env
+# Define the named volume for faiss data
+volumes:
+  faiss_data:

faiss_index/index.faiss ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:65ffa3256b20d95302c95fd3c367fbe3bf95f2e4a83f32513d59bb6e4a098cf0
+size 620589

faiss_index/index.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8b2d64ba81316d3d7e0bf2fb83fc120ecac92c2ddae5ce12c8be1fd37803e97d
+size 78654

main.py ADDED Viewed

	@@ -0,0 +1,85 @@

+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel
+from datetime import datetime
+import os
+from dotenv import load_dotenv
+from typing import List, Optional
+from uuid import uuid4
+from rag import RAGSystem, load_config
+# Load environment variables
+load_dotenv()
+# Load configuration
+CONFIG = load_config()
+MOB_MAPPINGS = CONFIG['mob_mappings']
+# Get API key
+OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
+if not OPENAI_API_KEY:
+    raise ValueError("OPENAI_API_KEY environment variable is not set")
+# Initialize FastAPI app
+app = FastAPI(
+    title="CubixAI API",
+    description="API for CubixAI - Minecraft bot with AI capabilities",
+    version="0.1.0",
+)
+# Initialize RAG system
+rag_system = RAGSystem(openai_api_key=OPENAI_API_KEY)
+# Status response model
+class StatusResponse(BaseModel):
+    status: str
+    version: str
+    timestamp: str
+# Message request model
+class MessageRequest(BaseModel):
+    message: str
+    user_id: Optional[str] = None
+# Document URL model
+class DocumentURLs(BaseModel):
+    urls: List[str]
+app.get("/")
+def read_root():
+    return {"Hello": "World"}
+@app.get("/status", response_model=StatusResponse)
+async def get_status():
+    """Get the current status of the API."""
+    return StatusResponse(
+        status="online",
+        version=app.version,
+        timestamp=datetime.now().isoformat()
+    )
+@app.post("/process_message")
+async def process_message(request: MessageRequest):
+    """Process a message from a player and generate a structured JSON response."""
+    user_id = request.user_id or str(uuid4())
+    response = rag_system.generate_response(user_id, request.message)
+    # Just add user_id to response without other modifications
+    if isinstance(response, dict):
+        response["user_id"] = user_id
+    return response
+@app.get("/mob_mappings")
+async def get_mob_mappings():
+    """Get the mapping of mob names to entity IDs."""
+    return MOB_MAPPINGS
+@app.get("/command_specs")
+async def get_command_specs():
+    """Get the command specifications."""
+    return CONFIG['command_specs']
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run("main:app", host="0.0.0.0", port=8000, reload=True)

rag.py ADDED Viewed

	@@ -0,0 +1,443 @@

+from langchain_openai import OpenAIEmbeddings
+from langchain_openai import AzureOpenAIEmbeddings
+from langchain_community.vectorstores import FAISS
+from langchain_community.document_loaders import WebBaseLoader
+from langchain_text_splitters import RecursiveCharacterTextSplitter
+from langchain_openai import ChatOpenAI
+from langchain_core.messages import SystemMessage, HumanMessage, AIMessage
+from langchain_community.callbacks import get_openai_callback
+from typing import List, Dict, Any
+import json
+import os
+import re
+import shutil
+# Load configuration from JSON file
+def load_config():
+    config_path = os.path.join(os.path.dirname(__file__), 'config.json')
+    with open(config_path, 'r', encoding='utf-8') as f:
+        return json.load(f)
+# Load configuration
+CONFIG = load_config()
+CUBIX_DOCS = CONFIG['cubix_docs']
+MOB_MAPPINGS = CONFIG['mob_mappings']
+COMMAND_SPECS = CONFIG['command_specs']
+SYSTEM_PROMPT_TEMPLATE = CONFIG['system_prompt_template']
+class RAGSystem:
+    def __init__(self, openai_api_key: str):
+        self.openai_api_key = openai_api_key
+        self.embeddings = OpenAIEmbeddings(
+            openai_api_key=openai_api_key,
+            model="text-embedding-3-large",
+            dimensions=1536,  # Explicitly setting dimensions for consistency
+            show_progress_bar=True
+        )
+        self.document_store = None
+        self.user_conversations = {}
+        self.model = ChatOpenAI(
+            openai_api_key=openai_api_key,
+            model_name="gpt-4o-mini",
+            temperature=0.1
+        )
+        self.documents_loaded = False  # Add this line to track document loading
+        self.initialize_knowledge_base()
+    def initialize_knowledge_base(self):
+        """Initialize or load the knowledge base if it exists."""
+        if self.documents_loaded:  # Check if documents are already loaded
+            print("Documents have already been loaded.")
+            return
+        try:
+            # First check if index exists
+            if not os.path.exists("faiss_index"):
+                print("No existing knowledge base found. Creating new one...")
+                self.create_new_knowledge_base()
+                self.documents_loaded = True  # Set flag to True after loading
+                return
+            try:
+                # Try to load the existing index
+                self.document_store = FAISS.load_local(
+                    "faiss_index",
+                    self.embeddings,
+                    allow_dangerous_deserialization=True
+                )
+                # Verify by running a test query
+                self.document_store.similarity_search_with_score("test query", k=1)
+                print("Loaded existing knowledge base.")
+            except AssertionError:
+                # Dimension mismatch detected
+                print("Embedding dimensions mismatch detected. Rebuilding knowledge base...")
+                self.create_new_knowledge_base(backup_old=True)
+            except Exception as e:
+                # Other loading errors
+                print(f"Error loading knowledge base: {e}")
+                self.create_new_knowledge_base(backup_old=True)
+            self.documents_loaded = True  # Set flag to True after loading
+        except Exception as e:
+            print(f"Error during knowledge base initialization: {e}")
+            self.create_new_knowledge_base()
+    def create_new_knowledge_base(self, backup_old=False):
+        """Create a new knowledge base from scratch"""
+        # Backup old index if needed
+        if backup_old and os.path.exists("faiss_index"):
+            backup_dir = f"faiss_index"
+            shutil.move("faiss_index", backup_dir)
+            print(f"Old index backed up")
+        # Create directory if needed
+        os.makedirs("faiss_index", exist_ok=True)
+        # Load documents from web
+        docs = []
+        for url in CUBIX_DOCS:
+            try:
+                loader = WebBaseLoader(url)
+                docs.extend(loader.load())
+                print(f"Loaded document from {url}")
+            except Exception as e:
+                print(f"Error loading document from {url}: {e}")
+        if not docs:
+            print("No documents could be loaded.")
+            # Initialize empty vector store
+            self.document_store = FAISS.from_texts(["placeholder"], self.embeddings)
+            return
+        # Split documents
+        text_splitter = RecursiveCharacterTextSplitter(
+            chunk_size=500,  # Increased from 150 for better context
+            chunk_overlap=50,  # Added overlap to prevent breaking important context
+            separators=["\n\n", "\n", " ", ""],  # More intelligent splitting
+            length_function=len
+        )
+        split_docs = text_splitter.split_documents(docs)
+        # Create new vector store from scratch
+        self.document_store = FAISS.from_documents(split_docs, self.embeddings)
+        try:
+            # Save the new index
+            self.document_store.save_local("faiss_index")
+            print("Successfully saved new knowledge base to faiss_index directory")
+        except Exception as e:
+            print(f"Error saving knowledge base: {e}")
+    def add_web_documents(self, urls: List[str]):
+        """Add web documents to the knowledge base"""
+        if not urls:
+            return False
+        # Load documents from web
+        docs = []
+        for url in urls:
+            try:
+                print(f"Loading document from {url}")
+                loader = WebBaseLoader(url)
+                web_docs = loader.load()
+                # Clean and preprocess documents
+                for doc in web_docs:
+                    # Clean HTML artifacts and normalize whitespace
+                    content = doc.page_content
+                    content = re.sub(r'\s+', ' ', content)  # Normalize whitespace
+                    content = re.sub(r'<[^>]+>', ' ', content)  # Remove HTML tags
+                    content = re.sub(r'\[.*?\]', '', content)  # Remove square brackets content
+                    # Update document content
+                    doc.page_content = content.strip()
+                    # Add source URL to metadata if not present
+                    if 'source' not in doc.metadata:
+                        doc.metadata['source'] = url
+                docs.extend(web_docs)
+                print(f"Successfully loaded document from {url}")
+            except Exception as e:
+                print(f"Error loading document from {url}: {e}")
+        if not docs:
+            print("No documents could be loaded.")
+            return False
+        # Split documents
+        text_splitter = RecursiveCharacterTextSplitter(
+            chunk_size=500,  # Increased from 150 for better context
+            chunk_overlap=50,  # Added overlap to prevent breaking important context
+            separators=["\n\n", "\n", " ", ""],  # More intelligent splitting
+            length_function=len
+        )
+        split_docs = text_splitter.split_documents(docs)
+        # Create or update vector store
+        if self.document_store is None:
+            self.document_store = FAISS.from_documents(split_docs, self.embeddings)
+        else:
+            # Add documents to existing store
+            self.document_store.add_documents(split_docs)
+        try:
+            # Save the updated index
+            self.document_store.save_local("faiss_index")
+            print(f"Added {len(split_docs)} document chunks to the knowledge base")
+            return True
+        except Exception as e:
+            print(f"Error saving index: {e}")
+            return False
+    def generate_response(self, user_id: str, message: str) -> Dict[str, Any]:
+        """Generate a response for a user message."""
+        if user_id not in self.user_conversations:
+            self.user_conversations[user_id] = []
+        # Clean and normalize the input message
+        message = message.strip()
+        # Add the new message using proper LangChain message object
+        self.user_conversations[user_id].append(HumanMessage(content=message))
+        # Extract relevant context with improved search parameters
+        relevant_context = ""
+        context_docs = []
+        if self.document_store:
+            try:
+                # Enhanced retrieval strategy - hybrid search approach
+                # First try with higher relevance score threshold for more accurate results
+                results = self.document_store.similarity_search_with_score(
+                    message,
+                    k=4,  # Increased from 3 for better coverage
+                    score_threshold=0.75  # Slightly relaxed from 0.8 for better recall
+                )
+                if results:
+                    # Sort by score and take top results
+                    results.sort(key=lambda x: x[1], reverse=True)
+                    # Format with scores and metadata for better context
+                    formatted_results = []
+                    for doc, score in results:
+                        if score > 0.75:  # Only include relevance context
+                            # Extract source for better attribution
+                            source = doc.metadata.get('source', 'Unknown source')
+                            if isinstance(source, str) and source.startswith('http'):
+                                source = source.split('/')[-1] if '/' in source else source
+                            # Apply intelligent content trimming - keep key information while reducing token usage
+                            content = doc.page_content
+                            # Advanced cleaning: remove redundant whitespace and normalize
+                            content = re.sub(r'\s+', ' ', content).strip()
+                            # Limit content length based on relevance score - higher relevance gets more tokens
+                            max_length = int(min(800 + (score * 400), 1200))  # Dynamic length based on relevance
+                            if len(content) > max_length:
+                                content = content[:max_length] + "..."
+                            context = {
+                                'content': content,
+                                'score': score,
+                                'source': source,
+                                'metadata': doc.metadata if hasattr(doc, 'metadata') else {}
+                            }
+                            context_docs.append(context)
+                            formatted_results.append(f"SOURCE: {source}\nRELEVANCE: {score:.2f}\n{content}")
+                    if formatted_results:
+                        relevant_context = "\n\n---\n\n".join(formatted_results)
+                # If no high-relevance results found, try broader search
+                if not relevant_context:
+                    results = self.document_store.similarity_search(
+                        message,
+                        k=3  # Increased from 2 for better coverage while keeping focus
+                    )
+                    if results:
+                        formatted_results = []
+                        for doc in results:
+                            # Extract source for better attribution
+                            source = doc.metadata.get('source', 'Unknown source')
+                            if isinstance(source, str) and source.startswith('http'):
+                                source = source.split('/')[-1] if '/' in source else source
+                            # Truncate content if too long (token optimization)
+                            content = doc.page_content
+                            if len(content) > 800:  # Shorter for fallback results
+                                content = content[:800] + "..."
+                            formatted_results.append(f"SOURCE: {source}\n{content}")
+                        relevant_context = "\n\n---\n\n".join(formatted_results)
+            except Exception as e:
+                print(f"Error during context retrieval: {e}")
+        # Build command specifications for the system prompt
+        command_specs_text = "# Available Commands and Required Fields\n\n"
+        for cmd_name, cmd_spec in COMMAND_SPECS.items():
+            command_specs_text += f"## {cmd_name}\n"
+            command_specs_text += f"Description: {cmd_spec['description']}\n"
+            command_specs_text += "Required fields:\n"
+            for field in cmd_spec['required_fields']:
+                command_specs_text += f"- {field}\n"
+            command_specs_text += "Examples:\n"
+            for example in cmd_spec['examples']:
+                command_specs_text += f"Query: \"{example['input']}\"\n"
+                command_specs_text += f"Response: {json.dumps(example['output'], ensure_ascii=False)}\n\n"
+        # Add mob mappings information
+        mob_mappings_text = "# Entity IDs for Minecraft Mobs\n\nWhen processing kill_mob commands, use these entity IDs in the 'mob' field:\n\n"
+        for mob_name, entity_id in MOB_MAPPINGS.items():
+            mob_mappings_text += f"- {mob_name}: {entity_id}\n"
+        mob_mappings_text += "\nWhen a player mentions a mob, always map it to the corresponding entity ID in your response.\n\n"
+        # Build system prompt from template
+        system_prompt = SYSTEM_PROMPT_TEMPLATE['intro'] + "\n\n"
+        # Add role section
+        system_prompt += "## Your Role\n"
+        for item in SYSTEM_PROMPT_TEMPLATE['role_section']:
+            system_prompt += f"- {item}\n"
+        # Add command specs
+        system_prompt += f"\n{command_specs_text}\n"
+        # Add mob mappings
+        system_prompt += f"\n{mob_mappings_text}\n"
+        # Add command detection guidelines
+        system_prompt += "\n## Command Detection Guidelines\n"
+        for item in SYSTEM_PROMPT_TEMPLATE['command_detection_guidelines']:
+            system_prompt += f"- {item}\n"
+        # Add contextual information processing
+        system_prompt += "\n## Contextual Information Processing\n"
+        for item in SYSTEM_PROMPT_TEMPLATE['contextual_information_processing']:
+            system_prompt += f"- {item}\n"
+        # Add response format
+        system_prompt += "\n## Response Format\n"
+        for item in SYSTEM_PROMPT_TEMPLATE['response_format']:
+            system_prompt += f"- {item}\n"
+        # Add conversation handling instructions
+        if 'conversation_handling' in SYSTEM_PROMPT_TEMPLATE:
+            system_prompt += "\n## Conversation Handling\n"
+            for item in SYSTEM_PROMPT_TEMPLATE['conversation_handling']:
+                system_prompt += f"- {item}\n"
+        # Add improved RAG processing instructions
+        if 'rag_processing_instructions' in SYSTEM_PROMPT_TEMPLATE:
+            system_prompt += "\n## RAG Processing Instructions\n"
+            for item in SYSTEM_PROMPT_TEMPLATE['rag_processing_instructions']:
+                system_prompt += f"- {item}\n"
+        # Add ambiguity handling instructions if they exist
+        if 'ambiguity_handling' in SYSTEM_PROMPT_TEMPLATE:
+            system_prompt += "\n## Ambiguity Handling\n"
+            for item in SYSTEM_PROMPT_TEMPLATE['ambiguity_handling']:
+                system_prompt += f"- {item}\n"
+        # Add token optimization instructions if they exist
+        if 'token_optimization' in SYSTEM_PROMPT_TEMPLATE:
+            system_prompt += "\n## Token Optimization\n"
+            for item in SYSTEM_PROMPT_TEMPLATE['token_optimization']:
+                system_prompt += f"- {item}\n"
+        if relevant_context:
+            system_prompt += f"""
+        ## Relevant Information from CubixWorld Documentation
+        Use this information to guide your response. Pay special attention to specific game mechanics and features:
+        {relevant_context}
+            """
+        # Create messages list with proper LangChain message objects
+        messages = [SystemMessage(content=system_prompt)]
+        # Get the last 5 conversation messages (reduced from 10 for more focused context)
+        history = self.user_conversations[user_id][-5:] if len(self.user_conversations[user_id]) > 0 else []
+        # Add conversation history to the messages
+        messages.extend(history)
+        # Add context summary to help model
+        if context_docs:
+            # Enhanced context summary with confidence signals
+            context_summary = "\nContext relevance summary (sorted by relevance):\n"
+            # Sort context by relevance score
+            sorted_contexts = sorted(context_docs, key=lambda x: x['score'], reverse=True)
+            for i, ctx in enumerate(sorted_contexts):
+                # Add confidence classification
+                confidence = "High" if ctx['score'] > 0.9 else "Medium" if ctx['score'] > 0.8 else "Moderate"
+                # Add a brief preview of content with source
+                source = ctx['source'].split('/')[-1] if '/' in ctx['source'] else ctx['source']
+                preview = ctx['content'][:80].replace('\n', ' ') + "..."
+                context_summary += f"{i+1}. [{confidence} confidence, score {ctx['score']:.2f}] From {source}: {preview}\n"
+            messages.append(SystemMessage(content=context_summary))
+        # Initialize token usage variables
+        total_tokens = 0
+        prompt_tokens = 0
+        completion_tokens = 0
+        total_cost = 0.0
+        # Get response from the model with token tracking
+        with get_openai_callback() as cb:
+            response = self.model.predict_messages(messages)
+            response_content = response.content
+            # Store token usage metrics
+            total_tokens = cb.total_tokens
+            prompt_tokens = cb.prompt_tokens
+            completion_tokens = cb.completion_tokens
+            total_cost = cb.total_cost
+        try:
+            # Simple JSON extraction
+            if "```json" in response_content:
+                json_start = response_content.find("```json") + 7
+                json_end = response_content.find("```", json_start)
+                response_content = response_content[json_start:json_end].strip()
+            elif "```" in response_content:
+                json_start = response_content.find("```") + 3
+                json_end = response_content.find("```", json_start)
+                response_content = response_content[json_start:json_end].strip()
+            json_response = {'response': json.loads(response_content)}
+            # Add token usage information to the response
+            json_response["token_usage"] = {
+                "total_tokens": total_tokens,
+                "prompt_tokens": prompt_tokens,
+                "completion_tokens": completion_tokens,
+                "total_cost_usd": total_cost
+            }
+            # Store the bot's response in conversation history as proper AIMessage
+            self.user_conversations[user_id].append(AIMessage(content=json.dumps(json_response)))
+            return json_response
+        except json.JSONDecodeError:
+            # Simple fallback if JSON parsing fails
+            fallback_response = {
+               'type': 'message',
+               'message': response_content
+            }
+            self.user_conversations[user_id].append(AIMessage(content=json.dumps(fallback_response)))
+            return fallback_response

requirements.txt ADDED Viewed

	@@ -0,0 +1,14 @@

+fastapi
+uvicorn
+pydantic
+python-dotenv
+langchain
+langchain-community
+langchain-core
+langchain-openai
+faiss-cpu
+openai
+beautifulsoup4
+requests
+aiohttp
+gradio