Spaces:
Runtime error
Runtime error
abtsousa
commited on
Commit
·
603a029
1
Parent(s):
335359d
Refactor API configuration and implement rate limiting in agent calls
Browse files- agent/nodes.py +13 -2
- app.py +6 -31
agent/nodes.py
CHANGED
|
@@ -13,14 +13,23 @@ from agent.prompts import get_system_prompt
|
|
| 13 |
from agent.state import State
|
| 14 |
from langchain_core.messages import SystemMessage, HumanMessage
|
| 15 |
from langgraph.prebuilt import ToolNode
|
|
|
|
| 16 |
|
| 17 |
-
|
| 18 |
-
|
|
|
|
| 19 |
API_KEY_ENV_VAR = "OPENROUTER_API_KEY"
|
| 20 |
if API_KEY_ENV_VAR not in os.environ:
|
| 21 |
print(f"Please set the environment variable {API_KEY_ENV_VAR}.")
|
| 22 |
os.environ[API_KEY_ENV_VAR] = getpass(f"Enter your {API_KEY_ENV_VAR} (will not be echoed): ")
|
| 23 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
### Helper functions ###
|
| 25 |
|
| 26 |
def _get_model() -> BaseChatModel:
|
|
@@ -32,10 +41,12 @@ def _get_model() -> BaseChatModel:
|
|
| 32 |
# )
|
| 33 |
|
| 34 |
api_key = os.getenv(API_KEY_ENV_VAR)
|
|
|
|
| 35 |
return ChatOpenAI(
|
| 36 |
api_key=SecretStr(api_key) if api_key else None,
|
| 37 |
base_url=API_BASE_URL,
|
| 38 |
model=MODEL_NAME,
|
|
|
|
| 39 |
metadata={
|
| 40 |
"reasoning": {
|
| 41 |
"effort": "high" # Use high reasoning effort
|
|
|
|
| 13 |
from agent.state import State
|
| 14 |
from langchain_core.messages import SystemMessage, HumanMessage
|
| 15 |
from langgraph.prebuilt import ToolNode
|
| 16 |
+
from langchain_core.rate_limiters import InMemoryRateLimiter
|
| 17 |
|
| 18 |
+
|
| 19 |
+
API_BASE_URL = "https://openrouter.ai/api/v1"
|
| 20 |
+
MODEL_NAME = "openai/gpt-oss-120b"
|
| 21 |
API_KEY_ENV_VAR = "OPENROUTER_API_KEY"
|
| 22 |
if API_KEY_ENV_VAR not in os.environ:
|
| 23 |
print(f"Please set the environment variable {API_KEY_ENV_VAR}.")
|
| 24 |
os.environ[API_KEY_ENV_VAR] = getpass(f"Enter your {API_KEY_ENV_VAR} (will not be echoed): ")
|
| 25 |
|
| 26 |
+
# Global singleton rate limiter
|
| 27 |
+
_rate_limiter = InMemoryRateLimiter(
|
| 28 |
+
requests_per_second=1,
|
| 29 |
+
check_every_n_seconds=0.1, # Wake up every 100 ms to check whether allowed to make a request,
|
| 30 |
+
max_bucket_size=5, # Controls the maximum burst size.
|
| 31 |
+
)
|
| 32 |
+
|
| 33 |
### Helper functions ###
|
| 34 |
|
| 35 |
def _get_model() -> BaseChatModel:
|
|
|
|
| 41 |
# )
|
| 42 |
|
| 43 |
api_key = os.getenv(API_KEY_ENV_VAR)
|
| 44 |
+
|
| 45 |
return ChatOpenAI(
|
| 46 |
api_key=SecretStr(api_key) if api_key else None,
|
| 47 |
base_url=API_BASE_URL,
|
| 48 |
model=MODEL_NAME,
|
| 49 |
+
rate_limiter=_rate_limiter,
|
| 50 |
metadata={
|
| 51 |
"reasoning": {
|
| 52 |
"effort": "high" # Use high reasoning effort
|
app.py
CHANGED
|
@@ -2,24 +2,12 @@ import os
|
|
| 2 |
import gradio as gr
|
| 3 |
import requests
|
| 4 |
import pandas as pd
|
| 5 |
-
from langchain_openai import ChatOpenAI
|
| 6 |
from os import getenv
|
| 7 |
from dotenv import load_dotenv
|
| 8 |
-
from
|
| 9 |
-
from
|
| 10 |
-
|
| 11 |
-
from
|
| 12 |
-
|
| 13 |
-
from langgraph.graph import StateGraph, START, END
|
| 14 |
-
from langgraph.graph.message import add_messages
|
| 15 |
-
import asyncio # Added for async processing
|
| 16 |
-
import time # Added for rate limiting
|
| 17 |
-
|
| 18 |
-
from langchain_community.tools import WikipediaQueryRun
|
| 19 |
-
from langchain_community.utilities.wikipedia import WikipediaAPIWrapper
|
| 20 |
-
from langgraph.prebuilt import tools_condition
|
| 21 |
-
from langgraph.checkpoint.memory import MemorySaver
|
| 22 |
-
from langgraph.prebuilt import create_react_agent
|
| 23 |
|
| 24 |
# Phoenix imports
|
| 25 |
from phoenix.otel import register
|
|
@@ -59,30 +47,17 @@ start_phoenix()
|
|
| 59 |
class BasicAgent:
|
| 60 |
def __init__(self):
|
| 61 |
self.agent = get_agent()
|
| 62 |
-
self._last_request_time = 0
|
| 63 |
-
self._request_lock = asyncio.Lock()
|
| 64 |
|
| 65 |
async def __call__(self, question: str) -> str:
|
| 66 |
print(f"Agent received question: {question}")
|
| 67 |
|
| 68 |
-
# Rate limiting: ensure at least 1 second between requests
|
| 69 |
-
async with self._request_lock:
|
| 70 |
-
current_time = time.time()
|
| 71 |
-
time_since_last_request = current_time - self._last_request_time
|
| 72 |
-
if time_since_last_request < 1.0:
|
| 73 |
-
sleep_time = 1.0 - time_since_last_request
|
| 74 |
-
print(f"Rate limiting: sleeping for {sleep_time:.2f} seconds")
|
| 75 |
-
await asyncio.sleep(sleep_time)
|
| 76 |
-
|
| 77 |
-
self._last_request_time = time.time()
|
| 78 |
-
|
| 79 |
# Create configuration like in main.py
|
| 80 |
config = create_agent_config(app_name=APP_NAME)
|
| 81 |
|
| 82 |
# Call the agent with the question and config (like main.py)
|
| 83 |
answer = await self.agent.ainvoke(
|
| 84 |
-
{"messages": [
|
| 85 |
-
config
|
| 86 |
)
|
| 87 |
|
| 88 |
print(f"Agent returning answer: {answer}")
|
|
|
|
| 2 |
import gradio as gr
|
| 3 |
import requests
|
| 4 |
import pandas as pd
|
|
|
|
| 5 |
from os import getenv
|
| 6 |
from dotenv import load_dotenv
|
| 7 |
+
from langchain_core.messages import HumanMessage
|
| 8 |
+
from langchain_core.runnables import RunnableConfig
|
| 9 |
+
import asyncio
|
| 10 |
+
from typing import cast
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
|
| 12 |
# Phoenix imports
|
| 13 |
from phoenix.otel import register
|
|
|
|
| 47 |
class BasicAgent:
|
| 48 |
def __init__(self):
|
| 49 |
self.agent = get_agent()
|
|
|
|
|
|
|
| 50 |
|
| 51 |
async def __call__(self, question: str) -> str:
|
| 52 |
print(f"Agent received question: {question}")
|
| 53 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
# Create configuration like in main.py
|
| 55 |
config = create_agent_config(app_name=APP_NAME)
|
| 56 |
|
| 57 |
# Call the agent with the question and config (like main.py)
|
| 58 |
answer = await self.agent.ainvoke(
|
| 59 |
+
{"messages": [HumanMessage(content=question)]},
|
| 60 |
+
cast(RunnableConfig, config)
|
| 61 |
)
|
| 62 |
|
| 63 |
print(f"Agent returning answer: {answer}")
|