Agents_JA_Final_Assignment_Template

Sleeping

App Files Files Community

jarguello76 commited on May 28

Commit

692591b

verified ·

1 Parent(s): 6c8d237

Update tools.py

Browse files

Files changed (1) hide show

tools.py +201 -400

tools.py CHANGED Viewed

@@ -1,404 +1,205 @@
-from smolagents import Tool
-import random
-from huggingface_hub import list_models
-import requests
 import os
-import sqlite3
-from googletrans import Translator
-from gtts import gTTS
-import speech_recognition as sr
-import cv2
 import numpy as np
-from textblob import TextBlob
-# Initialize the DuckDuckGo search tool
-# search_tool = DuckDuckGoSearchTool()
-class WeatherInfoTool(Tool):
-    name = "weather_info"
-    description = "Fetches weather information for a given location."
-    inputs = {
-        "location": {
-            "type": "string",
-            "description": "The location to get weather information for."
-        }
-    }
-    output_type = "string"
-    def forward(self, location: str):
-        # Use a real weather API here
-        api_key = os.getenv("WEATHER_API_KEY")
-        if not api_key:
-            return "Weather API key not found."
-        try:
-            response = requests.get(f"http://api.weatherapi.com/v1/current.json?key={api_key}&q={location}")
-            response.raise_for_status()
-            data = response.json()
-            condition = data["current"]["condition"]["text"]
-            temp_c = data["current"]["temp_c"]
-            return f"Weather in {location}: {condition}, {temp_c}°C"
-        except Exception as e:
-            return f"Error fetching weather for {location}: {str(e)}"
-class HubStatsTool(Tool):
-    name = "hub_stats"
-    description = "Fetches the most downloaded model from a specific author on the Hugging Face Hub."
-    inputs = {
-        "author": {
-            "type": "string",
-            "description": "The username of the model author/organization to find models from."
-        }
-    }
-    output_type = "string"
-    def forward(self, author: str):
-        try:
-            # List models from the specified author, sorted by downloads
-            models = list(list_models(author=author, sort="downloads", direction=-1, limit=1))
-            if models:
-                model = models[0]
-                return f"The most downloaded model by {author} is {model.id} with {model.downloads:,} downloads."
-            else:
-                return f"No models found for author {author}."
-        except Exception as e:
-            return f"Error fetching models for {author}: {str(e)}"
-class CalendarTool(Tool):
-    name = "calendar"
-    description = "Manages and retrieves information about dates and events."
-    inputs = {
-        "action": {
-            "type": "string",
-            "description": "The action to perform (e.g., 'add', 'get', 'delete')."
-        },
-        "date": {
-            "type": "string",
-            "description": "The date of the event (format: YYYY-MM-DD)."
-        },
-        "event": {
-            "type": "string",
-            "description": "The event description.",
-            "nullable": True  # Add this line to specify that 'event' is nullable
-        }
-    }
-    output_type = "string"
-    def __init__(self):
-        self.events = {}
-    def forward(self, action: str, date: str, event: str = None):
-        if action == "add":
-            self.events[date] = event
-            return f"Event '{event}' added to {date}."
-        elif action == "get":
-            return f"Event on {date}: {self.events.get(date, 'No event found.')}"
-        elif action == "delete":
-            if date in self.events:
-                del self.events[date]
-                return f"Event on {date} deleted."
-            else:
-                return f"No event found on {date}."
-        else:
-            return "Invalid action."
-class CalculatorTool(Tool):
-    name = "calculator"
-    description = "Performs mathematical calculations."
-    inputs = {
-        "expression": {
-            "type": "string",
-            "description": "The mathematical expression to evaluate."
-        }
-    }
-    output_type = "string"
-    def forward(self, expression: str):
-        try:
-            result = eval(expression)
-            return f"The result of the expression '{expression}' is {result}."
-        except Exception as e:
-            return f"Error evaluating expression: {str(e)}"
-class EmailTool(Tool):
-    name = "email"
-    description = "Sends and receives emails."
-    inputs = {
-        "action": {
-            "type": "string",
-            "description": "The action to perform (e.g., 'send')."
-        },
-        "to": {
-            "type": "string",
-            "description": "The recipient's email address."
-        },
-        "subject": {
-            "type": "string",
-            "description": "The subject of the email."
-        },
-        "body": {
-            "type": "string",
-            "description": "The body of the email."
-        }
-    }
-    output_type = "string"
-    def __init__(self, smtp_server, smtp_port, email, password):
-        self.smtp_server = smtp_server
-        self.smtp_port = smtp_port
-        self.email = email
-        self.password = password
-    def forward(self, action: str, to: str, subject: str, body: str):
-        if action == "send":
-            try:
-                msg = MIMEText(body)
-                msg['Subject'] = subject
-                msg['From'] = self.email
-                msg['To'] = to
-                with smtplib.SMTP(self.smtp_server, self.smtp_port) as server:
-                    server.starttls()
-                    server.login(self.email, self.password)
-                    server.sendmail(self.email, [to], msg.as_string())
-                return f"Email sent to {to}."
-            except Exception as e:
-                return f"Error sending email: {str(e)}"
-        else:
-            return "Invalid action."
-class FileManagementTool(Tool):
-    name = "file_management"
-    description = "Handles file operations like reading, writing, and managing files."
-    inputs = {
-        "action": {
-            "type": "string",
-            "description": "The action to perform (e.g., 'read', 'write', 'delete')."
-        },
-        "file_path": {
-            "type": "string",
-            "description": "The path of the file."
-        },
-        "content": {
-            "type": "string",
-            "description": "The content to write to the file.",
-            "nullable": True  # Add this line to specify that 'content' is nullable
-        }
-    }
-    output_type = "string"
-    def forward(self, action: str, file_path: str, content: str = None):
-        if action == "read":
-            try:
-                with open(file_path, 'r') as file:
-                    content = file.read()
-                return f"Content of {file_path}: {content}"
-            except Exception as e:
-                return f"Error reading file: {str(e)}"
-        elif action == "write":
-            try:
-                with open(file_path, 'w') as file:
-                    file.write(content)
-                return f"Content written to {file_path}."
-            except Exception as e:
-                return f"Error writing to file: {str(e)}"
-        elif action == "delete":
-            try:
-                os.remove(file_path)
-                return f"File {file_path} deleted."
-            except Exception as e:
-                return f"Error deleting file: {str(e)}"
         else:
-            return "Invalid action."
-class DatabaseQueryTool(Tool):
-    name = "database_query"
-    description = "Interacts with databases for storing and retrieving information."
-    inputs = {
-        "action": {
-            "type": "string",
-            "description": "The action to perform (e.g., 'query', 'insert')."
-        },
-        "query": {
-            "type": "string",
-            "description": "The SQL query to execute."
-        }
-    }
-    output_type = "string"
-    def __init__(self, db_path):
-        self.db_path = db_path
-    def forward(self, action: str, query: str):
-        try:
-            conn = sqlite3.connect(self.db_path)
-            cursor = conn.cursor()
-            if action == "query":
-                cursor.execute(query)
-                results = cursor.fetchall()
-                return f"Query results: {results}"
-            elif action == "insert":
-                cursor.execute(query)
-                conn.commit()
-                return "Data inserted successfully."
-            else:
-                return "Invalid action."
-        except Exception as e:
-            return f"Error executing query: {str(e)}"
-        finally:
-            conn.close()
-class TranslationTool(Tool):
-    name = "translation"
-    description = "Translates text between different languages."
-    inputs = {
-        "text": {
-            "type": "string",
-            "description": "The text to translate."
-        },
-        "src_lang": {
-            "type": "string",
-            "description": "The source language code."
-        },
-        "dest_lang": {
-            "type": "string",
-            "description": "The destination language code."
-        }
-    }
-    output_type = "string"
-    def forward(self, text: str, src_lang: str, dest_lang: str):
-        try:
-            translator = Translator()
-            translation = translator.translate(text, src=src_lang, dest=dest_lang)
-            return f"Translated text: {translation.text}"
-        except Exception as e:
-            return f"Error translating text: {str(e)}"
-class TextToSpeechTool(Tool):
-    name = "text_to_speech"
-    description = "Converts text to speech."
-    inputs = {
-        "text": {
-            "type": "string",
-            "description": "The text to convert to speech."
-        }
-    }
-    output_type = "string"
-    def forward(self, text: str):
-        try:
-            tts = gTTS(text=text, lang='en')
-            tts.save("output.mp3")
-            return "Text converted to speech and saved as output.mp3."
-        except Exception as e:
-            return f"Error converting text to speech: {str(e)}"
-class SpeechToTextTool(Tool):
-    name = "speech_to_text"
-    description = "Converts speech to text."
-    inputs = {
-        "audio_file": {
-            "type": "string",
-            "description": "The path to the audio file to convert to text."
-        }
-    }
-    output_type = "string"
-    def forward(self, audio_file: str):
-        try:
-            recognizer = sr.Recognizer()
-            with sr.AudioFile(audio_file) as source:
-                audio = recognizer.record(source)
-                text = recognizer.recognize_google(audio)
-                return f"Converted speech to text: {text}"
-        except Exception as e:
-            return f"Error converting speech to text: {str(e)}"
-class ImageRecognitionTool(Tool):
-    name = "image_recognition"
-    description = "Analyzes and interprets images."
-    inputs = {
-        "image_path": {
-            "type": "string",
-            "description": "The path to the image to analyze."
-        }
-    }
-    output_type = "string"
-    def forward(self, image_path: str):
-        try:
-            image = cv2.imread(image_path)
-            gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
-            faces = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml').detectMultiScale(gray, 1.3, 5)
-            return f"Found {len(faces)} faces in the image."
-        except Exception as e:
-            return f"Error analyzing image: {str(e)}"
-class NLPTool(Tool):
-    name = "nlp"
-    description = "Performs advanced text processing tasks like sentiment analysis, named entity recognition, etc."
-    inputs = {
-        "text": {
-            "type": "string",
-            "description": "The text to analyze."
-        },
-        "task": {
-            "type": "string",
-            "description": "The NLP task to perform (e.g., 'sentiment', 'entities')."
-        }
-    }
-    output_type = "string"
-    def forward(self, text: str, task: str):
-        blob = TextBlob(text)
-        if task == "sentiment":
-            sentiment = blob.sentiment
-            return f"Sentiment analysis: Polarity={sentiment.polarity}, Subjectivity={sentiment.subjectivity}"
-        elif task == "entities":
-            entities = blob.noun_phrases
-            return f"Named entities: {entities}"
-        else:
-            return "Invalid task."
-class APIIntegrationTool(Tool):
-    name = "api_integration"
-    description = "Interacts with various external APIs for fetching or sending data."
-    inputs = {
-        "api_url": {
-            "type": "string",
-            "description": "The URL of the API endpoint."
-        },
-        "method": {
-            "type": "string",
-            "description": "The HTTP method to use (e.g., 'GET', 'POST')."
-        },
-        "data": {
-            "type": "string",
-            "description": "The data to send with the request.",
-            "nullable": True  # Add this line to specify that 'data' is nullable
-        }
-    }
-    output_type = "string"
-    def forward(self, api_url: str, method: str, data: str = None):
-        try:
-            if method == "GET":
-                response = requests.get(api_url)
-            elif method == "POST":
-                response = requests.post(api_url, json=data)
-            else:
-                return "Invalid method."
-            response.raise_for_status()
-            return f"API response: {response.json()}"
-        except Exception as e:
-            return f"Error interacting with API: {str(e)}"

+"""LangGraph Agent with CSV-based Vector Store"""
 import os
+import ast
+import pandas as pd
 import numpy as np
+from sklearn.metrics.pairwise import cosine_similarity
+from dotenv import load_dotenv
+from langgraph.graph import START, StateGraph, MessagesState
+from langgraph.prebuilt import tools_condition, ToolNode
+from langchain_google_genai import ChatGoogleGenerativeAI
+from langchain_groq import ChatGroq
+from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
+from langchain_community.tools.tavily_search import TavilySearchResults
+from langchain_community.document_loaders import WikipediaLoader, ArxivLoader
+from langchain_core.messages import SystemMessage, HumanMessage
+from langchain_core.tools import tool
+load_dotenv()
+# Math tools
+@tool
+def multiply(a: int, b: int) -> int:
+    """Multiply two numbers."""
+    return a * b
+@tool
+def add(a: int, b: int) -> int:
+    """Add two numbers."""
+    return a + b
+@tool
+def subtract(a: int, b: int) -> int:
+    """Subtract two numbers."""
+    return a - b
+@tool
+def divide(a: int, b: int) -> float:
+    """Divide two numbers."""
+    if b == 0:
+        raise ValueError("Cannot divide by zero.")
+    return a / b
+@tool
+def modulus(a: int, b: int) -> int:
+    """Get the modulus of two numbers."""
+    return a % b
+# Search tools
+@tool
+def wiki_search(query: str) -> str:
+    """Search Wikipedia for a query and return maximum 2 results."""
+    search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
+    formatted_search_docs = "\n\n---\n\n".join(
+        [f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
+         for doc in search_docs])
+    return formatted_search_docs
+@tool
+def web_search(query: str) -> str:
+    """Search Tavily for a query and return maximum 3 results."""
+    search_docs = TavilySearchResults(max_results=3).invoke(query=query)
+    formatted_search_docs = "\n\n---\n\n".join(
+        [f'<Document source="{doc.get("url", "")}" title="{doc.get("title", "")}"/>\n{doc.get("content", "")}\n</Document>'
+         for doc in search_docs])
+    return formatted_search_docs
+@tool
+def arxiv_search(query: str) -> str:
+    """Search Arxiv for a query and return maximum 3 results."""
+    search_docs = ArxivLoader(query=query, load_max_docs=3).load()
+    formatted_search_docs = "\n\n---\n\n".join(
+        [f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content[:1000]}\n</Document>'
+         for doc in search_docs])
+    return formatted_search_docs
+# CSV-based Vector Store Class
+class CSVVectorStore:
+    def __init__(self, csv_file_path: str):
+        """Initialize the CSV vector store."""
+        self.df = pd.read_csv(csv_file_path)
+        # Convert string representation of embeddings to numpy arrays
+        self.df['embedding'] = self.df['embedding'].apply(ast.literal_eval)
+        self.embeddings_matrix = np.array(self.df['embedding'].tolist())
+    def similarity_search(self, query_embedding: np.ndarray, k: int = 1):
+        """Find most similar documents to the query embedding."""
+        # Calculate cosine similarity
+        similarities = cosine_similarity([query_embedding], self.embeddings_matrix)[0]
+        # Get top k indices
+        top_indices = np.argsort(similarities)[-k:][::-1]
+        # Return results in a format similar to LangChain's Document
+        results = []
+        for idx in top_indices:
+            class Document:
+                def __init__(self, page_content, metadata):
+                    self.page_content = page_content
+                    self.metadata = metadata
+            doc = Document(
+                page_content=self.df.iloc[idx]['content'],
+                metadata=ast.literal_eval(self.df.iloc[idx]['metadata']) if isinstance(self.df.iloc[idx]['metadata'], str) else self.df.iloc[idx]['metadata']
+            )
+            results.append(doc)
+        return results
+# System prompt
+system_prompt = """You are a helpful assistant tasked with answering questions using a set of tools. Now, I will ask you a question. Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, do not use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, do not use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string. Your answer should only start with 'FINAL ANSWER: ', then follows with the answer."""
+# Tools list
+tools = [
+    multiply,
+    add,
+    subtract,
+    divide,
+    modulus,
+    wiki_search,
+    web_search,
+    arxiv_search,
+]
+def build_graph(provider: str = "groq", csv_file_path: str = "embeddings.csv"):
+    """Build the graph with CSV-based vector store."""
+    # Initialize CSV vector store
+    vector_store = CSVVectorStore(csv_file_path)
+    # System message
+    sys_msg = SystemMessage(content=system_prompt)
+    # Initialize LLM based on provider
+    if provider == "google":
+        llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", temperature=0)
+    elif provider == "groq":
+        llm = ChatGroq(model="qwen-qwq-32b", temperature=0)
+    elif provider == "huggingface":
+        llm = ChatHuggingFace(
+            llm=HuggingFaceEndpoint(
+                url="https://api-inference.huggingface.co/models/Meta-DeepLearning/llama-2-7b-chat-hf",
+                temperature=0,
+            ),
+        )
+    else:
+        raise ValueError("Invalid provider. Choose 'google', 'groq' or 'huggingface'.")
+    # Bind tools to LLM
+    llm_with_tools = llm.bind_tools(tools)
+    # Helper function to get query embedding (simplified - you might want to use the same embedding model)
+    def get_query_embedding(query: str) -> np.ndarray:
+        # For now, return a random embedding - in practice, you'd use the same embedding model
+        # that was used to create the CSV embeddings
+        return np.random.rand(768)  # Assuming 768-dim embeddings
+    # Nodes
+    def assistant(state: MessagesState):
+        """Assistant node."""
+        return {"messages": [llm_with_tools.invoke(state["messages"])]}
+    def retriever(state: MessagesState):
+        """Retriever node using CSV vector store."""
+        query = state["messages"][-1].content if state["messages"] else ""
+        # Get query embedding (this is simplified - you'd use proper embedding model)
+        query_embedding = get_query_embedding(query)
+        # Search for similar documents
+        similar_docs = vector_store.similarity_search(query_embedding, k=1)
+        if similar_docs:
+            example_msg = HumanMessage(
+                content=f"Here I provide a similar question and answer for reference: \n\n{similar_docs[0].page_content}",
+            )
+            return {"messages": [sys_msg] + state["messages"] + [example_msg]}
         else:
+            return {"messages": [sys_msg] + state["messages"]}
+    # Build graph
+    builder = StateGraph(MessagesState)
+    builder.add_node("retriever", retriever)
+    builder.add_node("assistant", assistant)
+    builder.add_node("tools", ToolNode(tools))
+    builder.add_edge(START, "retriever")
+    builder.add_edge("retriever", "assistant")
+    builder.add_conditional_edges("assistant", tools_condition)
+    builder.add_edge("tools", "assistant")
+    return builder.compile()
+# Test
+if __name__ == "__main__":
+    question = "When was a picture of St. Thomas Aquinas first added to the Wikipedia page on the Principle of double effect?"
+    # Build the graph (you'll need to provide the path to your CSV file)
+    graph = build_graph(provider="groq", csv_file_path="your_embeddings.csv")
+    # Run the graph
+    messages = [HumanMessage(content=question)]
+    messages = graph.invoke({"messages": messages})
+    for m in messages["messages"]:
+        m.pretty_print()