Spaces:

YongdongWang
/

DART-LLM-Multi-Model-GGUF

Build error

App Files Files Community

Yongdong commited on Jul 23

Commit

a4f228c

1 Parent(s): b4ebf95

Implement GGUF model support with DAG visualization

Browse files

Files changed (3) hide show

app.py +116 -102
dag_visualizer.py +334 -0
requirements.txt +5 -4

app.py CHANGED Viewed

@@ -3,6 +3,7 @@ import spaces  # Import spaces module for ZeroGPU
 from huggingface_hub import login
 import os
 from json_processor import JsonProcessor
 import json
 # 1) Read Secrets
@@ -12,8 +13,9 @@ if not hf_token:
 # 2) Login to ensure all subsequent from_pretrained calls have proper permissions
 login(hf_token)
-import torch
-from transformers import AutoTokenizer, AutoModelForCausalLM
 import warnings
 import os
 warnings.filterwarnings("ignore")
@@ -22,28 +24,37 @@ warnings.filterwarnings("ignore")
 MODEL_CONFIGS = {
     "1B": {
         "name": "Dart-llm-model-1B",
-        "gguf_model": "YongdongWang/llama-3.2-1b-lora-qlora-dart-llm-gguf"
     },
     "3B": {
         "name": "Dart-llm-model-3B",
-        "gguf_model": "YongdongWang/llama-3.2-3b-lora-qlora-dart-llm-gguf"
     },
     "8B": {
         "name": "Dart-llm-model-8B",
-        "gguf_model": "YongdongWang/llama-3.1-8b-lora-qlora-dart-llm-gguf"
     }
 }
 DEFAULT_MODEL = "1B"  # Set 1B as default
 # Global variables to store model and tokenizer
-model = None
 tokenizer = None
 current_model_config = None
 model_loaded = False
 def load_model_and_tokenizer(selected_model=DEFAULT_MODEL):
-    """Load tokenizer from GGUF model - executed on CPU"""
     global tokenizer, model_loaded, current_model_config
     if model_loaded and current_model_config == selected_model:
@@ -51,10 +62,10 @@ def load_model_and_tokenizer(selected_model=DEFAULT_MODEL):
     print(f"🔄 Loading tokenizer for {MODEL_CONFIGS[selected_model]['name']}...")
-    # Load tokenizer from GGUF model repository
-    gguf_model = MODEL_CONFIGS[selected_model]["gguf_model"]
     tokenizer = AutoTokenizer.from_pretrained(
-        gguf_model,
         use_fast=False,
         trust_remote_code=True
     )
@@ -66,44 +77,51 @@ def load_model_and_tokenizer(selected_model=DEFAULT_MODEL):
     print("✅ Tokenizer loaded successfully!")
 @spaces.GPU(duration=60)  # Request GPU for loading model at startup
-def load_model_on_gpu(selected_model=DEFAULT_MODEL):
-    """Load GGUF model on GPU"""
-    global model
     # If model is already loaded and it's the same model, return it
-    if model is not None and current_model_config == selected_model:
-        return model
     # Clear existing model if switching
-    if model is not None:
         print("🗑️ Clearing existing model from GPU...")
-        del model
-        torch.cuda.empty_cache()
-        model = None
     model_config = MODEL_CONFIGS[selected_model]
-    print(f"🔄 Loading {model_config['name']} GGUF model on GPU...")
     try:
-        # Load GGUF model directly (already quantized and merged)
-        model = AutoModelForCausalLM.from_pretrained(
-            model_config["gguf_model"],
-            device_map="auto",
-            torch_dtype=torch.float16,
-            trust_remote_code=True,
-            low_cpu_mem_usage=True
         )
-        model.eval()
-        print(f"✅ {model_config['name']} GGUF model loaded on GPU successfully!")
-        return model
     except Exception as load_error:
         print(f"❌ GGUF Model loading failed: {load_error}")
         raise load_error
 def process_json_in_response(response):
-    """Process and format JSON content in the response"""
     try:
         # Check if response contains JSON-like content
         if '{' in response and '}' in response:
@@ -115,6 +133,17 @@ def process_json_in_response(response):
             if processed_json:
                 # Format the JSON nicely
                 formatted_json = json.dumps(processed_json, indent=2, ensure_ascii=False)
                 # Replace the JSON part in the response
                 import re
                 json_pattern = r'\{.*\}'
@@ -123,26 +152,22 @@ def process_json_in_response(response):
                     # Replace the matched JSON with the formatted version
                     response = response.replace(match.group(), formatted_json)
-        return response
     except Exception:
         # If processing fails, return original response
-        return response
 @spaces.GPU(duration=60)  # GPU inference
 def generate_response_gpu(prompt, max_tokens=512, selected_model=DEFAULT_MODEL):
-    """Generate response - executed on GPU"""
-    global model
-    # Ensure tokenizer is loaded
-    if tokenizer is None or current_model_config != selected_model:
-        load_model_and_tokenizer(selected_model)
     # Ensure model is loaded on GPU
-    if model is None or current_model_config != selected_model:
-        model = load_model_on_gpu(selected_model)
-    if model is None:
-        return "❌ Model failed to load. Please check the Space logs."
     try:
         formatted_prompt = (
@@ -151,67 +176,44 @@ def generate_response_gpu(prompt, max_tokens=512, selected_model=DEFAULT_MODEL):
             "### Response:\n"
         )
-        # Encode input
-        inputs = tokenizer(
-            formatted_prompt,
-            return_tensors="pt",
-            truncation=True,
-            max_length=2048
-        ).to(model.device)
-        # Generate response
-        with torch.no_grad():
-            outputs = model.generate(
-                **inputs,
-                max_new_tokens=max_tokens,
-                do_sample=False,
-                temperature=None,
-                top_p=None,
-                pad_token_id=tokenizer.pad_token_id,
-                eos_token_id=tokenizer.eos_token_id,
-                repetition_penalty=1.1,
-                early_stopping=True,
-                no_repeat_ngram_size=3
-            )
-        # Decode output
-        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-        # Extract generated part
-        if "### Response:" in response:
-            response = response.split("### Response:")[-1].strip()
-        elif len(response) > len(formatted_prompt):
-            response = response[len(formatted_prompt):].strip()
-        # Process JSON if present in response
-        response = process_json_in_response(response)
-        return response if response else "❌ No response generated. Please try again with a different prompt."
     except Exception as generation_error:
-        return f"❌ Generation Error: {str(generation_error)}"
 def chat_interface(message, history, max_tokens, selected_model):
     """Chat interface - runs on CPU, calls GPU functions"""
     if not message.strip():
-        return history, ""
-    # Initialize tokenizer (if needed)
-    if tokenizer is None or current_model_config != selected_model:
-        load_model_and_tokenizer(selected_model)
     try:
         # Call GPU function to generate response
-        response = generate_response_gpu(message, max_tokens, selected_model)
         history.append((message, response))
-        return history, ""
     except Exception as chat_error:
         error_msg = f"❌ Chat Error: {str(chat_error)}"
         history.append((message, error_msg))
-        return history, ""
-# Load tokenizer at startup with default model
-load_model_and_tokenizer(DEFAULT_MODEL)
 # Create Gradio application
 with gr.Blocks(
@@ -229,27 +231,30 @@ with gr.Blocks(
     Choose from **three GGUF quantized models** specialized for **robot task planning** using QLoRA fine-tuning:
-    - **🚀 Dart-llm-model-1B** (Default): Fastest inference, optimized for speed
-    - **⚖️ Dart-llm-model-3B**: Balanced performance and quality
-    - **🎯 Dart-llm-model-8B**: Best quality output, higher latency
-    **GGUF Format**: These models are pre-quantized and optimized for efficient deployment, combining the base model and LoRA adaptations.
-    **Capabilities**: Convert natural language robot commands into structured task sequences for excavators, dump trucks, and other construction robots.
-    **Models**:
-    - [YongdongWang/llama-3.2-1b-lora-qlora-dart-llm-gguf](https://huggingface.co/YongdongWang/llama-3.2-1b-lora-qlora-dart-llm-gguf) (Default)
-    - [YongdongWang/llama-3.2-3b-lora-qlora-dart-llm-gguf](https://huggingface.co/YongdongWang/llama-3.2-3b-lora-qlora-dart-llm-gguf)
-    - [YongdongWang/llama-3.1-8b-lora-qlora-dart-llm-gguf](https://huggingface.co/YongdongWang/llama-3.1-8b-lora-qlora-dart-llm-gguf)
     ⚡ **Using ZeroGPU**: This Space uses dynamic GPU allocation (Nvidia H200). First generation might take a bit longer.
     """)
     with gr.Row():
-        with gr.Column(scale=3):
             chatbot = gr.Chatbot(
                 label="Task Planning Results",
-                height=500,
                 show_label=True,
                 container=True,
                 bubble_full_width=False,
@@ -269,6 +274,15 @@ with gr.Blocks(
                 send_btn = gr.Button("🚀 Generate Tasks", variant="primary", size="sm")
                 clear_btn = gr.Button("🗑️ Clear", variant="secondary", size="sm")
         with gr.Column(scale=1):
             gr.Markdown("### ⚙️ Generation Settings")
@@ -317,18 +331,18 @@ with gr.Blocks(
     msg.submit(
         chat_interface,
         inputs=[msg, chatbot, max_tokens, model_selector],
-        outputs=[chatbot, msg]
     )
     send_btn.click(
         chat_interface,
         inputs=[msg, chatbot, max_tokens, model_selector],
-        outputs=[chatbot, msg]
     )
     clear_btn.click(
-        lambda: ([], ""),
-        outputs=[chatbot, msg]
     )
 if __name__ == "__main__":

 from huggingface_hub import login
 import os
 from json_processor import JsonProcessor
+from dag_visualizer import DAGVisualizer
 import json
 # 1) Read Secrets
 # 2) Login to ensure all subsequent from_pretrained calls have proper permissions
 login(hf_token)
+from transformers import AutoTokenizer
+from huggingface_hub import hf_hub_download
+from llama_cpp import Llama
 import warnings
 import os
 warnings.filterwarnings("ignore")
 MODEL_CONFIGS = {
     "1B": {
         "name": "Dart-llm-model-1B",
+        "base_model": "meta-llama/Llama-3.2-1B",  # For tokenizer
+        "gguf_model": "YongdongWang/llama-3.2-1b-lora-qlora-dart-llm-gguf",
+        "gguf_file": "llama_3.2_1b-lora-qlora-dart-llm_q5_k_m.gguf"
     },
     "3B": {
         "name": "Dart-llm-model-3B",
+        "base_model": "meta-llama/Llama-3.2-3B",  # For tokenizer
+        "gguf_model": "YongdongWang/llama-3.2-3b-lora-qlora-dart-llm-gguf",
+        "gguf_file": "llama_3.2_3b-lora-qlora-dart-llm_q4_k_m.gguf"
     },
     "8B": {
         "name": "Dart-llm-model-8B",
+        "base_model": "meta-llama/Llama-3.1-8B",  # For tokenizer
+        "gguf_model": "YongdongWang/llama-3.1-8b-lora-qlora-dart-llm-gguf",
+        "gguf_file": "llama3.1-8b-lora-qlora-dart-llm_q4_k_m_fp16.gguf"
     }
 }
 DEFAULT_MODEL = "1B"  # Set 1B as default
 # Global variables to store model and tokenizer
+llm_model = None
 tokenizer = None
 current_model_config = None
 model_loaded = False
+# Initialize DAG visualizer
+dag_visualizer = DAGVisualizer()
 def load_model_and_tokenizer(selected_model=DEFAULT_MODEL):
+    """Load tokenizer - executed on CPU"""
     global tokenizer, model_loaded, current_model_config
     if model_loaded and current_model_config == selected_model:
     print(f"🔄 Loading tokenizer for {MODEL_CONFIGS[selected_model]['name']}...")
+    # Load tokenizer from base model
+    base_model = MODEL_CONFIGS[selected_model]["base_model"]
     tokenizer = AutoTokenizer.from_pretrained(
+        base_model,
         use_fast=False,
         trust_remote_code=True
     )
     print("✅ Tokenizer loaded successfully!")
 @spaces.GPU(duration=60)  # Request GPU for loading model at startup
+def load_gguf_model_on_gpu(selected_model=DEFAULT_MODEL):
+    """Load GGUF model using llama-cpp-python"""
+    global llm_model
     # If model is already loaded and it's the same model, return it
+    if llm_model is not None and current_model_config == selected_model:
+        return llm_model
     # Clear existing model if switching
+    if llm_model is not None:
         print("🗑️ Clearing existing model from GPU...")
+        del llm_model
+        llm_model = None
     model_config = MODEL_CONFIGS[selected_model]
+    print(f"🔄 Loading {model_config['name']} GGUF model...")
     try:
+        # Download GGUF model file from HuggingFace Hub
+        model_file = hf_hub_download(
+            repo_id=model_config["gguf_model"],
+            filename=model_config["gguf_file"],
+            cache_dir="./gguf_cache"
+        )
+        print(f"📦 Downloaded GGUF file: {model_file}")
+        # Load GGUF model with llama-cpp-python
+        llm_model = Llama(
+            model_path=model_file,
+            n_ctx=2048,  # Context length
+            n_gpu_layers=-1,  # Use all GPU layers if available
+            verbose=False
         )
+        print(f"✅ {model_config['name']} GGUF model loaded successfully!")
+        return llm_model
     except Exception as load_error:
         print(f"❌ GGUF Model loading failed: {load_error}")
         raise load_error
 def process_json_in_response(response):
+    """Process and format JSON content in the response, and generate DAG visualization"""
+    dag_image_path = None
     try:
         # Check if response contains JSON-like content
         if '{' in response and '}' in response:
             if processed_json:
                 # Format the JSON nicely
                 formatted_json = json.dumps(processed_json, indent=2, ensure_ascii=False)
+                # Generate DAG visualization if the JSON contains tasks
+                if "tasks" in processed_json and processed_json["tasks"]:
+                    try:
+                        dag_image_path = dag_visualizer.create_dag_visualization(
+                            processed_json,
+                            title="Robot Task Dependency Graph"
+                        )
+                    except Exception as e:
+                        print(f"DAG visualization failed: {e}")
                 # Replace the JSON part in the response
                 import re
                 json_pattern = r'\{.*\}'
                     # Replace the matched JSON with the formatted version
                     response = response.replace(match.group(), formatted_json)
+        return response, dag_image_path
     except Exception:
         # If processing fails, return original response
+        return response, None
 @spaces.GPU(duration=60)  # GPU inference
 def generate_response_gpu(prompt, max_tokens=512, selected_model=DEFAULT_MODEL):
+    """Generate response using GGUF model - executed on GPU"""
+    global llm_model
     # Ensure model is loaded on GPU
+    if llm_model is None or current_model_config != selected_model:
+        llm_model = load_gguf_model_on_gpu(selected_model)
+    if llm_model is None:
+        return ("❌ GGUF Model failed to load. Please check the Space logs.", None)
     try:
         formatted_prompt = (
             "### Response:\n"
         )
+        # Generate response using llama-cpp-python
+        output = llm_model(
+            formatted_prompt,
+            max_tokens=max_tokens,
+            stop=["### Instruction:", "###"],
+            echo=False,
+            temperature=0.1,
+            top_p=0.9,
+            repeat_penalty=1.1
+        )
+        # Extract the generated text
+        response = output['choices'][0]['text'].strip()
+        # Process JSON if present in response and generate DAG
+        response, dag_image_path = process_json_in_response(response)
+        return (response if response else "❌ No response generated. Please try again with a different prompt.", dag_image_path)
     except Exception as generation_error:
+        return (f"❌ Generation Error: {str(generation_error)}", None)
 def chat_interface(message, history, max_tokens, selected_model):
     """Chat interface - runs on CPU, calls GPU functions"""
     if not message.strip():
+        return history, "", None
     try:
         # Call GPU function to generate response
+        response, dag_image_path = generate_response_gpu(message, max_tokens, selected_model)
         history.append((message, response))
+        return history, "", dag_image_path
     except Exception as chat_error:
         error_msg = f"❌ Chat Error: {str(chat_error)}"
         history.append((message, error_msg))
+        return history, "", None
+# GGUF models include tokenizer, no separate loading needed
 # Create Gradio application
 with gr.Blocks(
     Choose from **three GGUF quantized models** specialized for **robot task planning** using QLoRA fine-tuning:
+    - **🚀 Dart-llm-model-1B** (Default): Fastest inference, Q5_K_M quantization
+    - **⚖️ Dart-llm-model-3B**: Balanced performance, Q4_K_M quantization
+    - **🎯 Dart-llm-model-8B**: Best quality output, Q4_K_M quantization
+    **GGUF Implementation**: Uses native GGUF format with llama-cpp-python for optimal memory efficiency and GPU acceleration.
+    **Capabilities**:
+    - Convert natural language robot commands into structured task sequences
+    - **NEW: Automatic DAG Visualization** - Generates visual dependency graphs for robot task sequences
+    - Support for excavators, dump trucks, and other construction robots
+    **GGUF Models**:
+    - [YongdongWang/llama-3.2-1b-lora-qlora-dart-llm-gguf](https://huggingface.co/YongdongWang/llama-3.2-1b-lora-qlora-dart-llm-gguf) (Default - Q5_K_M)
+    - [YongdongWang/llama-3.2-3b-lora-qlora-dart-llm-gguf](https://huggingface.co/YongdongWang/llama-3.2-3b-lora-qlora-dart-llm-gguf) (Q4_K_M)
+    - [YongdongWang/llama-3.1-8b-lora-qlora-dart-llm-gguf](https://huggingface.co/YongdongWang/llama-3.1-8b-lora-qlora-dart-llm-gguf) (Q4_K_M)
     ⚡ **Using ZeroGPU**: This Space uses dynamic GPU allocation (Nvidia H200). First generation might take a bit longer.
     """)
     with gr.Row():
+        with gr.Column(scale=2):
             chatbot = gr.Chatbot(
                 label="Task Planning Results",
+                height=400,
                 show_label=True,
                 container=True,
                 bubble_full_width=False,
                 send_btn = gr.Button("🚀 Generate Tasks", variant="primary", size="sm")
                 clear_btn = gr.Button("🗑️ Clear", variant="secondary", size="sm")
+        with gr.Column(scale=2):
+            dag_image = gr.Image(
+                label="Task Dependency Graph (DAG)",
+                show_label=True,
+                container=True,
+                height=400,
+                interactive=False
+            )
         with gr.Column(scale=1):
             gr.Markdown("### ⚙️ Generation Settings")
     msg.submit(
         chat_interface,
         inputs=[msg, chatbot, max_tokens, model_selector],
+        outputs=[chatbot, msg, dag_image]
     )
     send_btn.click(
         chat_interface,
         inputs=[msg, chatbot, max_tokens, model_selector],
+        outputs=[chatbot, msg, dag_image]
     )
     clear_btn.click(
+        lambda: ([], "", None),
+        outputs=[chatbot, msg, dag_image]
     )
 if __name__ == "__main__":

dag_visualizer.py ADDED Viewed

	@@ -0,0 +1,334 @@

+import matplotlib.pyplot as plt
+import matplotlib
+matplotlib.use('Agg')  # Use non-interactive backend for server environments
+import networkx as nx
+import json
+import numpy as np
+from loguru import logger
+import os
+import tempfile
+from datetime import datetime
+class DAGVisualizer:
+    def __init__(self):
+        # Configure Matplotlib to use IEEE-style parameters
+        plt.rcParams.update({
+            'font.family': 'DejaVu Sans',  # Use available font instead of Times New Roman
+            'font.size': 10,
+            'axes.linewidth': 1.2,
+            'axes.labelsize': 12,
+            'xtick.labelsize': 10,
+            'ytick.labelsize': 10,
+            'legend.fontsize': 10,
+            'figure.titlesize': 14
+        })
+    def create_dag_from_tasks(self, task_data):
+        """
+        Create a directed graph from task data.
+        Args:
+            task_data: Dictionary containing tasks with structure like:
+                {
+                    "tasks": [
+                        {
+                            "task": "task_name",
+                            "instruction_function": {
+                                "name": "function_name",
+                                "robot_ids": ["robot1", "robot2"],
+                                "dependencies": ["dependency_task"],
+                                "object_keywords": ["object1", "object2"]
+                            }
+                        }
+                    ]
+                }
+        Returns:
+            NetworkX DiGraph object
+        """
+        if not task_data or "tasks" not in task_data:
+            logger.warning("Invalid task data structure")
+            return None
+        # Create a directed graph
+        G = nx.DiGraph()
+        # Add nodes and store mapping from task name to ID
+        task_mapping = {}
+        for i, task in enumerate(task_data["tasks"]):
+            task_id = i + 1
+            task_name = task["task"]
+            task_mapping[task_name] = task_id
+            # Add node with attributes
+            G.add_node(task_id,
+                      name=task_name,
+                      function=task["instruction_function"]["name"],
+                      robots=task["instruction_function"].get("robot_ids", []),
+                      objects=task["instruction_function"].get("object_keywords", []))
+        # Add dependency edges
+        for i, task in enumerate(task_data["tasks"]):
+            task_id = i + 1
+            dependencies = task["instruction_function"]["dependencies"]
+            for dep in dependencies:
+                if dep in task_mapping:
+                    dep_id = task_mapping[dep]
+                    G.add_edge(dep_id, task_id)
+        return G
+    def calculate_layout(self, G):
+        """
+        Calculate hierarchical layout for the graph based on dependencies.
+        """
+        if not G:
+            return {}
+        # Calculate layers based on dependencies
+        layers = {}
+        def get_layer(node_id, visited=None):
+            if visited is None:
+                visited = set()
+            if node_id in visited:
+                return 0
+            visited.add(node_id)
+            predecessors = list(G.predecessors(node_id))
+            if not predecessors:
+                return 0
+            return max(get_layer(pred, visited.copy()) for pred in predecessors) + 1
+        for node in G.nodes():
+            layer = get_layer(node)
+            layers.setdefault(layer, []).append(node)
+        # Calculate positions by layer
+        pos = {}
+        layer_height = 3.0
+        node_width = 4.0
+        for layer_idx, nodes in layers.items():
+            y = layer_height * (len(layers) - 1 - layer_idx)
+            start_x = -(len(nodes) - 1) * node_width / 2
+            for i, node in enumerate(sorted(nodes)):
+                pos[node] = (start_x + i * node_width, y)
+        return pos
+    def create_dag_visualization(self, task_data, title="Robot Task Dependency Graph"):
+        """
+        Create a DAG visualization from task data and return the image path.
+        Args:
+            task_data: Task data dictionary
+            title: Title for the graph
+        Returns:
+            str: Path to the generated image file
+        """
+        try:
+            # Create graph
+            G = self.create_dag_from_tasks(task_data)
+            if not G or len(G.nodes()) == 0:
+                logger.warning("No tasks found or invalid graph structure")
+                return None
+            # Calculate layout
+            pos = self.calculate_layout(G)
+            # Create figure
+            fig, ax = plt.subplots(1, 1, figsize=(max(12, len(G.nodes()) * 2), 8))
+            # Draw edges with arrows
+            nx.draw_networkx_edges(G, pos,
+                                  edge_color='#2E86AB',
+                                  arrows=True,
+                                  arrowsize=20,
+                                  arrowstyle='->',
+                                  width=2,
+                                  alpha=0.8,
+                                  connectionstyle="arc3,rad=0.1")
+            # Color nodes based on their position in the graph
+            node_colors = []
+            for node in G.nodes():
+                if G.in_degree(node) == 0:  # Start nodes
+                    node_colors.append('#F24236')
+                elif G.out_degree(node) == 0:  # End nodes
+                    node_colors.append('#A23B72')
+                else:  # Intermediate nodes
+                    node_colors.append('#F18F01')
+            # Draw nodes
+            nx.draw_networkx_nodes(G, pos,
+                                  node_color=node_colors,
+                                  node_size=3500,
+                                  alpha=0.9,
+                                  edgecolors='black',
+                                  linewidths=2)
+            # Label nodes with task IDs
+            node_labels = {node: f"T{node}" for node in G.nodes()}
+            nx.draw_networkx_labels(G, pos, node_labels,
+                                   font_size=18,
+                                   font_weight='bold',
+                                   font_color='white')
+            # Add detailed info text boxes for each task
+            for i, node in enumerate(G.nodes()):
+                x, y = pos[node]
+                function_name = G.nodes[node]['function']
+                robots = G.nodes[node]['robots']
+                objects = G.nodes[node]['objects']
+                # Create info text content
+                info_text = f"Task {node}: {function_name.replace('_', ' ').title()}\n"
+                if robots:
+                    robot_text = ", ".join([r.replace('robot_', '').replace('_', ' ').title() for r in robots])
+                    info_text += f"Robots: {robot_text}\n"
+                if objects:
+                    object_text = ", ".join(objects)
+                    info_text += f"Objects: {object_text}"
+                # Calculate offset based on node position to avoid overlaps
+                offset_x = 2.2 if i % 2 == 0 else -2.2
+                offset_y = 0.5 if i % 4 < 2 else -0.5
+                # Choose alignment based on offset direction
+                h_align = 'left' if offset_x > 0 else 'right'
+                # Draw text box
+                bbox_props = dict(boxstyle="round,pad=0.4",
+                                 facecolor='white',
+                                 edgecolor='gray',
+                                 alpha=0.95,
+                                 linewidth=1)
+                ax.text(x + offset_x, y + offset_y, info_text,
+                       bbox=bbox_props,
+                       fontsize=12,
+                       verticalalignment='center',
+                       horizontalalignment=h_align,
+                       weight='bold')
+                # Draw dashed connector line from node to text box
+                ax.plot([x, x + offset_x], [y, y + offset_y],
+                        linestyle='--', color='gray', alpha=0.6, linewidth=1)
+            # Expand axis limits to fit everything
+            x_vals = [coord[0] for coord in pos.values()]
+            y_vals = [coord[1] for coord in pos.values()]
+            ax.set_xlim(min(x_vals) - 4.0, max(x_vals) + 4.0)
+            ax.set_ylim(min(y_vals) - 2.0, max(y_vals) + 2.0)
+            # Set overall figure properties
+            ax.set_title(title, fontsize=16, fontweight='bold', pad=20)
+            ax.set_aspect('equal')
+            ax.margins(0.2)
+            ax.axis('off')
+            # Add legend for node types - Hidden to avoid covering content
+            # legend_elements = [
+            #     plt.Line2D([0], [0], marker='o', color='w', markerfacecolor='#F24236',
+            #                markersize=10, label='Start Tasks', markeredgecolor='black'),
+            #     plt.Line2D([0], [0], marker='o', color='w', markerfacecolor='#A23B72',
+            #                markersize=10, label='End Tasks', markeredgecolor='black'),
+            #     plt.Line2D([0], [0], marker='o', color='w', markerfacecolor='#F18F01',
+            #                markersize=10, label='Intermediate Tasks', markeredgecolor='black'),
+            #     plt.Line2D([0], [0], color='#2E86AB', linewidth=2, label='Dependencies')
+            # ]
+            # ax.legend(handles=legend_elements, loc='upper left', bbox_to_anchor=(1.05, 1.05))
+            # Adjust layout and save
+            plt.tight_layout()
+            # Create temporary file for saving the image
+            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+            temp_dir = tempfile.gettempdir()
+            image_path = os.path.join(temp_dir, f'dag_visualization_{timestamp}.png')
+            plt.savefig(image_path, dpi=400, bbox_inches='tight',
+                        pad_inches=0.1, facecolor='white', edgecolor='none')
+            plt.close(fig)  # Close figure to free memory
+            logger.info(f"DAG visualization saved to: {image_path}")
+            return image_path
+        except Exception as e:
+            logger.error(f"Error creating DAG visualization: {e}")
+            return None
+    def create_simplified_dag_visualization(self, task_data, title="Robot Task Graph"):
+        """
+        Create a simplified DAG visualization suitable for smaller displays.
+        Args:
+            task_data: Task data dictionary
+            title: Title for the graph
+        Returns:
+            str: Path to the generated image file
+        """
+        try:
+            # Create graph
+            G = self.create_dag_from_tasks(task_data)
+            if not G or len(G.nodes()) == 0:
+                logger.warning("No tasks found or invalid graph structure")
+                return None
+            # Calculate layout
+            pos = self.calculate_layout(G)
+            # Create figure for simplified graph
+            fig, ax = plt.subplots(1, 1, figsize=(10, 6))
+            # Draw edges
+            nx.draw_networkx_edges(G, pos,
+                                  edge_color='black',
+                                  arrows=True,
+                                  arrowsize=15,
+                                  arrowstyle='->',
+                                  width=1.5)
+            # Draw nodes
+            nx.draw_networkx_nodes(G, pos,
+                                  node_color='lightblue',
+                                  node_size=3000,
+                                  edgecolors='black',
+                                  linewidths=1.5)
+            # Add node labels with simplified names
+            labels = {}
+            for node in G.nodes():
+                function_name = G.nodes[node]['function']
+                simplified_name = function_name.replace('_', ' ').title()
+                if len(simplified_name) > 15:
+                    simplified_name = simplified_name[:12] + "..."
+                labels[node] = f"T{node}\n{simplified_name}"
+            nx.draw_networkx_labels(G, pos, labels,
+                                   font_size=11,
+                                   font_weight='bold')
+            ax.set_title(title, fontsize=14, fontweight='bold')
+            ax.axis('off')
+            # Adjust layout and save
+            plt.tight_layout()
+            # Create temporary file for saving the image
+            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+            temp_dir = tempfile.gettempdir()
+            image_path = os.path.join(temp_dir, f'simple_dag_{timestamp}.png')
+            plt.savefig(image_path, dpi=400, bbox_inches='tight')
+            plt.close(fig)  # Close figure to free memory
+            logger.info(f"Simplified DAG visualization saved to: {image_path}")
+            return image_path
+        except Exception as e:
+            logger.error(f"Error creating simplified DAG visualization: {e}")
+            return None

requirements.txt CHANGED Viewed

@@ -1,12 +1,13 @@
 pydantic
 gradio
 transformers
-torch
-peft
-bitsandbytes
 accelerate
 scipy
 sentencepiece
 protobuf
-spaces
 loguru

 pydantic
 gradio
 transformers
 accelerate
 scipy
 sentencepiece
 protobuf
 loguru
+matplotlib
+networkx
+numpy
+llama-cpp-python
+huggingface_hub