Psiska's picture
commented code no Conversational Memory
387433f
raw
history blame
5.84 kB
import os
from dotenv import load_dotenv
from huggingface_hub import InferenceClient
from smolagents import CodeAgent, Model, ChatMessage
import tools.tools as tls # Your tool definitions
load_dotenv()
"""
enforce_strict_role_alternation()
Ensures that messages follow the required pattern:
'user/assistant/user/assistant/...', starting with an optional 'system' message.
This is necessary because many chat-based models (e.g., ChatCompletion APIs)
expect the conversation format to alternate strictly between user and assistant roles,
possibly preceded by a single system message.
Parameters:
-----------
messages : list of dict
The message history. Each message is expected to be a dictionary with a 'role' key
('user', 'assistant', or 'system') and a 'content' key.
Returns:
--------
cleaned : list of dict
A sanitized version of the messages list that follows the correct role alternation rules.
"""
def enforce_strict_role_alternation(messages):
cleaned = [] # List to store the cleaned message sequence
last_role = None # Tracks the last valid role added to ensure alternation
for msg in messages:
role = msg["role"]
# Skip any message that doesn't have a valid role
if role not in ("user", "assistant", "system"):
continue
# Allow a single 'system' message only at the very beginning
if role == "system" and not cleaned:
cleaned.append(msg)
continue
# Skip messages with the same role as the previous one (breaks alternation)
if role == last_role:
continue
# Add the valid message to the cleaned list
cleaned.append(msg)
last_role = role # Update the last role for the next iteration
return cleaned
# Define a custom model class that wraps around Hugging Face's InferenceClient for chat-based models
class HuggingFaceChatModel(Model):
def __init__(self):
# Set the model ID for the specific Hugging Face model to use
model_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"
# Create an InferenceClient with the model ID and the Hugging Face token from your environment
self.client = InferenceClient(model=model_id, token=os.getenv("HF_TOKEN"))
def generate(self, messages, stop_sequences=None):
"""
Generates a response from the chat model based on the input message history.
Parameters:
-----------
messages : list of dict
A list of message dicts in OpenAI-style format, e.g.:
[{"role": "user", "content": "Hello"}, {"role": "assistant", "content": "Hi!"}]
stop_sequences : list of str, optional
A list of strings that will stop generation when encountered. Default is ["Task"].
Returns:
--------
ChatMessage
A formatted response object with role='assistant' and the model-generated content.
"""
# Set default stop sequences if none provided
if stop_sequences is None:
stop_sequences = ["Task"]
# πŸ’‘ Preprocess: Enforce valid alternation of user/assistant messages
cleaned_messages = enforce_strict_role_alternation(messages)
# πŸ”§ Call the Hugging Face chat API with cleaned messages
response = self.client.chat_completion(
messages=cleaned_messages,
stop=stop_sequences,
max_tokens=1024 # Limit the number of tokens generated in the reply
)
# πŸ“¦ Extract content from the model response and wrap it in a ChatMessage object
content = response.choices[0].message["content"]
return ChatMessage(role="assistant", content=content)
# βœ… Basic Agent with SmolAgents
class BasicAgent:
def __init__(self):
# Informative log to indicate that the agent is being initialized
print("βœ… BasicAgent initialized with Hugging Face chat model.")
# Instantiate your custom model that wraps the Hugging Face InferenceClient
self.model = HuggingFaceChatModel()
# Create the CodeAgent, which uses the tools and the chat model
self.agent = CodeAgent(
tools=[tls.search_tool, tls.calculate_cargo_travel_time], # Your list of tools
model=self.model, # The model to generate tool-using responses
additional_authorized_imports=["pandas"], # Optional: allow use of pandas in generated code
max_steps=20, # Limit the number of planning steps (tool calls + reasoning)
)
def __call__(self, messages) -> str:
"""
Handle a call to the agent with either a single question or a message history.
Parameters:
-----------
messages : Union[str, List[Dict[str, str]]]
The input from the chat interface β€” either:
- a plain string (just one message)
- a list of dicts, like [{"role": "user", "content": "What's the weather?"}]
Returns:
--------
str
The assistant's response as a string.
"""
# If the input is a chat history (list of messages), get the most recent user message
if isinstance(messages, list):
question = messages[-1]["content"] # Extract last message content
else:
question = messages # If it's just a string, use it directly
# Log the input for debugging
print(f"πŸ“₯ Received question: {question[:60]}...")
# Run the CodeAgent to get a response (may include tool use)
response = self.agent.run(question)
# Log the response for debugging
print(f"πŸ“€ Response generated: {response[:60]}...")
return response # Return final result