abtsousa commited on
Commit
603a029
·
1 Parent(s): 335359d

Refactor API configuration and implement rate limiting in agent calls

Browse files
Files changed (2) hide show
  1. agent/nodes.py +13 -2
  2. app.py +6 -31
agent/nodes.py CHANGED
@@ -13,14 +13,23 @@ from agent.prompts import get_system_prompt
13
  from agent.state import State
14
  from langchain_core.messages import SystemMessage, HumanMessage
15
  from langgraph.prebuilt import ToolNode
 
16
 
17
- API_BASE_URL = "https://api.openrouter.ai/v1"
18
- MODEL_NAME = "qwen/qwen3-235b-a22b:free"
 
19
  API_KEY_ENV_VAR = "OPENROUTER_API_KEY"
20
  if API_KEY_ENV_VAR not in os.environ:
21
  print(f"Please set the environment variable {API_KEY_ENV_VAR}.")
22
  os.environ[API_KEY_ENV_VAR] = getpass(f"Enter your {API_KEY_ENV_VAR} (will not be echoed): ")
23
 
 
 
 
 
 
 
 
24
  ### Helper functions ###
25
 
26
  def _get_model() -> BaseChatModel:
@@ -32,10 +41,12 @@ def _get_model() -> BaseChatModel:
32
  # )
33
 
34
  api_key = os.getenv(API_KEY_ENV_VAR)
 
35
  return ChatOpenAI(
36
  api_key=SecretStr(api_key) if api_key else None,
37
  base_url=API_BASE_URL,
38
  model=MODEL_NAME,
 
39
  metadata={
40
  "reasoning": {
41
  "effort": "high" # Use high reasoning effort
 
13
  from agent.state import State
14
  from langchain_core.messages import SystemMessage, HumanMessage
15
  from langgraph.prebuilt import ToolNode
16
+ from langchain_core.rate_limiters import InMemoryRateLimiter
17
 
18
+
19
+ API_BASE_URL = "https://openrouter.ai/api/v1"
20
+ MODEL_NAME = "openai/gpt-oss-120b"
21
  API_KEY_ENV_VAR = "OPENROUTER_API_KEY"
22
  if API_KEY_ENV_VAR not in os.environ:
23
  print(f"Please set the environment variable {API_KEY_ENV_VAR}.")
24
  os.environ[API_KEY_ENV_VAR] = getpass(f"Enter your {API_KEY_ENV_VAR} (will not be echoed): ")
25
 
26
+ # Global singleton rate limiter
27
+ _rate_limiter = InMemoryRateLimiter(
28
+ requests_per_second=1,
29
+ check_every_n_seconds=0.1, # Wake up every 100 ms to check whether allowed to make a request,
30
+ max_bucket_size=5, # Controls the maximum burst size.
31
+ )
32
+
33
  ### Helper functions ###
34
 
35
  def _get_model() -> BaseChatModel:
 
41
  # )
42
 
43
  api_key = os.getenv(API_KEY_ENV_VAR)
44
+
45
  return ChatOpenAI(
46
  api_key=SecretStr(api_key) if api_key else None,
47
  base_url=API_BASE_URL,
48
  model=MODEL_NAME,
49
+ rate_limiter=_rate_limiter,
50
  metadata={
51
  "reasoning": {
52
  "effort": "high" # Use high reasoning effort
app.py CHANGED
@@ -2,24 +2,12 @@ import os
2
  import gradio as gr
3
  import requests
4
  import pandas as pd
5
- from langchain_openai import ChatOpenAI
6
  from os import getenv
7
  from dotenv import load_dotenv
8
- from typing import Annotated
9
- from pydantic import SecretStr
10
-
11
- from typing_extensions import TypedDict
12
-
13
- from langgraph.graph import StateGraph, START, END
14
- from langgraph.graph.message import add_messages
15
- import asyncio # Added for async processing
16
- import time # Added for rate limiting
17
-
18
- from langchain_community.tools import WikipediaQueryRun
19
- from langchain_community.utilities.wikipedia import WikipediaAPIWrapper
20
- from langgraph.prebuilt import tools_condition
21
- from langgraph.checkpoint.memory import MemorySaver
22
- from langgraph.prebuilt import create_react_agent
23
 
24
  # Phoenix imports
25
  from phoenix.otel import register
@@ -59,30 +47,17 @@ start_phoenix()
59
  class BasicAgent:
60
  def __init__(self):
61
  self.agent = get_agent()
62
- self._last_request_time = 0
63
- self._request_lock = asyncio.Lock()
64
 
65
  async def __call__(self, question: str) -> str:
66
  print(f"Agent received question: {question}")
67
 
68
- # Rate limiting: ensure at least 1 second between requests
69
- async with self._request_lock:
70
- current_time = time.time()
71
- time_since_last_request = current_time - self._last_request_time
72
- if time_since_last_request < 1.0:
73
- sleep_time = 1.0 - time_since_last_request
74
- print(f"Rate limiting: sleeping for {sleep_time:.2f} seconds")
75
- await asyncio.sleep(sleep_time)
76
-
77
- self._last_request_time = time.time()
78
-
79
  # Create configuration like in main.py
80
  config = create_agent_config(app_name=APP_NAME)
81
 
82
  # Call the agent with the question and config (like main.py)
83
  answer = await self.agent.ainvoke(
84
- {"messages": [{"role": "user", "content": question}]},
85
- config=config
86
  )
87
 
88
  print(f"Agent returning answer: {answer}")
 
2
  import gradio as gr
3
  import requests
4
  import pandas as pd
 
5
  from os import getenv
6
  from dotenv import load_dotenv
7
+ from langchain_core.messages import HumanMessage
8
+ from langchain_core.runnables import RunnableConfig
9
+ import asyncio
10
+ from typing import cast
 
 
 
 
 
 
 
 
 
 
 
11
 
12
  # Phoenix imports
13
  from phoenix.otel import register
 
47
  class BasicAgent:
48
  def __init__(self):
49
  self.agent = get_agent()
 
 
50
 
51
  async def __call__(self, question: str) -> str:
52
  print(f"Agent received question: {question}")
53
 
 
 
 
 
 
 
 
 
 
 
 
54
  # Create configuration like in main.py
55
  config = create_agent_config(app_name=APP_NAME)
56
 
57
  # Call the agent with the question and config (like main.py)
58
  answer = await self.agent.ainvoke(
59
+ {"messages": [HumanMessage(content=question)]},
60
+ cast(RunnableConfig, config)
61
  )
62
 
63
  print(f"Agent returning answer: {answer}")