Final_Assignment_Template

Sleeping

File size: 5,837 Bytes

import os
from dotenv import load_dotenv
from huggingface_hub import InferenceClient
from smolagents import CodeAgent, Model, ChatMessage
import tools.tools as tls  # Your tool definitions

load_dotenv()

"""
    enforce_strict_role_alternation()

    Ensures that messages follow the required pattern:
    'user/assistant/user/assistant/...', starting with an optional 'system' message.
    
    This is necessary because many chat-based models (e.g., ChatCompletion APIs)
    expect the conversation format to alternate strictly between user and assistant roles,
    possibly preceded by a single system message.

    Parameters:
    -----------
    messages : list of dict
        The message history. Each message is expected to be a dictionary with a 'role' key 
        ('user', 'assistant', or 'system') and a 'content' key.

    Returns:
    --------
    cleaned : list of dict
        A sanitized version of the messages list that follows the correct role alternation rules.
    """
def enforce_strict_role_alternation(messages):
    cleaned = []        # List to store the cleaned message sequence
    last_role = None    # Tracks the last valid role added to ensure alternation

    for msg in messages:
        role = msg["role"]

        # Skip any message that doesn't have a valid role
        if role not in ("user", "assistant", "system"):
            continue

        # Allow a single 'system' message only at the very beginning
        if role == "system" and not cleaned:
            cleaned.append(msg)
            continue

        # Skip messages with the same role as the previous one (breaks alternation)
        if role == last_role:
            continue

        # Add the valid message to the cleaned list
        cleaned.append(msg)
        last_role = role  # Update the last role for the next iteration

    return cleaned


# Define a custom model class that wraps around Hugging Face's InferenceClient for chat-based models
class HuggingFaceChatModel(Model):
    def __init__(self):
        # Set the model ID for the specific Hugging Face model to use
        model_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"

        # Create an InferenceClient with the model ID and the Hugging Face token from your environment
        self.client = InferenceClient(model=model_id, token=os.getenv("HF_TOKEN"))

    def generate(self, messages, stop_sequences=None):
        """
        Generates a response from the chat model based on the input message history.

        Parameters:
        -----------
        messages : list of dict
            A list of message dicts in OpenAI-style format, e.g.:
            [{"role": "user", "content": "Hello"}, {"role": "assistant", "content": "Hi!"}]

        stop_sequences : list of str, optional
            A list of strings that will stop generation when encountered. Default is ["Task"].

        Returns:
        --------
        ChatMessage
            A formatted response object with role='assistant' and the model-generated content.
        """

        # Set default stop sequences if none provided
        if stop_sequences is None:
            stop_sequences = ["Task"]

        # 💡 Preprocess: Enforce valid alternation of user/assistant messages
        cleaned_messages = enforce_strict_role_alternation(messages)

        # 🔧 Call the Hugging Face chat API with cleaned messages
        response = self.client.chat_completion(
            messages=cleaned_messages,
            stop=stop_sequences,
            max_tokens=1024  # Limit the number of tokens generated in the reply
        )

        # 📦 Extract content from the model response and wrap it in a ChatMessage object
        content = response.choices[0].message["content"]
        return ChatMessage(role="assistant", content=content)


# ✅ Basic Agent with SmolAgents
class BasicAgent:
    def __init__(self):
        # Informative log to indicate that the agent is being initialized
        print("✅ BasicAgent initialized with Hugging Face chat model.")

        # Instantiate your custom model that wraps the Hugging Face InferenceClient
        self.model = HuggingFaceChatModel()
        
        # Create the CodeAgent, which uses the tools and the chat model
        self.agent = CodeAgent(
            tools=[tls.search_tool, tls.calculate_cargo_travel_time],  # Your list of tools
            model=self.model,  # The model to generate tool-using responses
            additional_authorized_imports=["pandas"],  # Optional: allow use of pandas in generated code
            max_steps=20,  # Limit the number of planning steps (tool calls + reasoning)
        )

    def __call__(self, messages) -> str:
        """
        Handle a call to the agent with either a single question or a message history.

        Parameters:
        -----------
        messages : Union[str, List[Dict[str, str]]]
            The input from the chat interface — either:
            - a plain string (just one message)
            - a list of dicts, like [{"role": "user", "content": "What's the weather?"}]

        Returns:
        --------
        str
            The assistant's response as a string.
        """

        # If the input is a chat history (list of messages), get the most recent user message
        if isinstance(messages, list):
            question = messages[-1]["content"]  # Extract last message content
        else:
            question = messages  # If it's just a string, use it directly

        # Log the input for debugging
        print(f"📥 Received question: {question[:60]}...")

        # Run the CodeAgent to get a response (may include tool use)
        response = self.agent.run(question)

        # Log the response for debugging
        print(f"📤 Response generated: {response[:60]}...")

        return response  # Return final result