Spaces:
Sleeping
Sleeping
import os | |
from dotenv import load_dotenv | |
from huggingface_hub import InferenceClient | |
from smolagents import CodeAgent, Model, ChatMessage | |
import tools.tools as tls # Your tool definitions | |
load_dotenv() | |
""" | |
enforce_strict_role_alternation() | |
Ensures that messages follow the required pattern: | |
'user/assistant/user/assistant/...', starting with an optional 'system' message. | |
This is necessary because many chat-based models (e.g., ChatCompletion APIs) | |
expect the conversation format to alternate strictly between user and assistant roles, | |
possibly preceded by a single system message. | |
Parameters: | |
----------- | |
messages : list of dict | |
The message history. Each message is expected to be a dictionary with a 'role' key | |
('user', 'assistant', or 'system') and a 'content' key. | |
Returns: | |
-------- | |
cleaned : list of dict | |
A sanitized version of the messages list that follows the correct role alternation rules. | |
""" | |
def enforce_strict_role_alternation(messages): | |
cleaned = [] # List to store the cleaned message sequence | |
last_role = None # Tracks the last valid role added to ensure alternation | |
for msg in messages: | |
role = msg["role"] | |
# Skip any message that doesn't have a valid role | |
if role not in ("user", "assistant", "system"): | |
continue | |
# Allow a single 'system' message only at the very beginning | |
if role == "system" and not cleaned: | |
cleaned.append(msg) | |
continue | |
# Skip messages with the same role as the previous one (breaks alternation) | |
if role == last_role: | |
continue | |
# Add the valid message to the cleaned list | |
cleaned.append(msg) | |
last_role = role # Update the last role for the next iteration | |
return cleaned | |
# Define a custom model class that wraps around Hugging Face's InferenceClient for chat-based models | |
class HuggingFaceChatModel(Model): | |
def __init__(self): | |
# Set the model ID for the specific Hugging Face model to use | |
model_id = "mistralai/Mixtral-8x7B-Instruct-v0.1" | |
# Create an InferenceClient with the model ID and the Hugging Face token from your environment | |
self.client = InferenceClient(model=model_id, token=os.getenv("HF_TOKEN")) | |
def generate(self, messages, stop_sequences=None): | |
""" | |
Generates a response from the chat model based on the input message history. | |
Parameters: | |
----------- | |
messages : list of dict | |
A list of message dicts in OpenAI-style format, e.g.: | |
[{"role": "user", "content": "Hello"}, {"role": "assistant", "content": "Hi!"}] | |
stop_sequences : list of str, optional | |
A list of strings that will stop generation when encountered. Default is ["Task"]. | |
Returns: | |
-------- | |
ChatMessage | |
A formatted response object with role='assistant' and the model-generated content. | |
""" | |
# Set default stop sequences if none provided | |
if stop_sequences is None: | |
stop_sequences = ["Task"] | |
# π‘ Preprocess: Enforce valid alternation of user/assistant messages | |
cleaned_messages = enforce_strict_role_alternation(messages) | |
# π§ Call the Hugging Face chat API with cleaned messages | |
response = self.client.chat_completion( | |
messages=cleaned_messages, | |
stop=stop_sequences, | |
max_tokens=1024 # Limit the number of tokens generated in the reply | |
) | |
# π¦ Extract content from the model response and wrap it in a ChatMessage object | |
content = response.choices[0].message["content"] | |
return ChatMessage(role="assistant", content=content) | |
# β Basic Agent with SmolAgents | |
class BasicAgent: | |
def __init__(self): | |
# Informative log to indicate that the agent is being initialized | |
print("β BasicAgent initialized with Hugging Face chat model.") | |
# Instantiate your custom model that wraps the Hugging Face InferenceClient | |
self.model = HuggingFaceChatModel() | |
# Create the CodeAgent, which uses the tools and the chat model | |
self.agent = CodeAgent( | |
tools=[tls.search_tool, tls.calculate_cargo_travel_time], # Your list of tools | |
model=self.model, # The model to generate tool-using responses | |
additional_authorized_imports=["pandas"], # Optional: allow use of pandas in generated code | |
max_steps=20, # Limit the number of planning steps (tool calls + reasoning) | |
) | |
def __call__(self, messages) -> str: | |
""" | |
Handle a call to the agent with either a single question or a message history. | |
Parameters: | |
----------- | |
messages : Union[str, List[Dict[str, str]]] | |
The input from the chat interface β either: | |
- a plain string (just one message) | |
- a list of dicts, like [{"role": "user", "content": "What's the weather?"}] | |
Returns: | |
-------- | |
str | |
The assistant's response as a string. | |
""" | |
# If the input is a chat history (list of messages), get the most recent user message | |
if isinstance(messages, list): | |
question = messages[-1]["content"] # Extract last message content | |
else: | |
question = messages # If it's just a string, use it directly | |
# Log the input for debugging | |
print(f"π₯ Received question: {question[:60]}...") | |
# Run the CodeAgent to get a response (may include tool use) | |
response = self.agent.run(question) | |
# Log the response for debugging | |
print(f"π€ Response generated: {response[:60]}...") | |
return response # Return final result | |