menarini / agent.py
joao-vectara's picture
Update agent.py
9d2951a verified
raw
history blame
5.1 kB
import pandas as pd
import requests
from pydantic import Field, BaseModel
from omegaconf import OmegaConf
from vectara_agentic.agent import Agent
from vectara_agentic.tools import ToolsFactory, VectaraToolFactory
initial_prompt = "How can I help you today?"
prompt = """
[
{"role": "system", "content": "You are a search bot that forms a coherent answer to a user query based on search results that are provided to you." },
{"role": "user", "content": " [INSTRUCTIONS]
If the search results are irrelevant to the question respond with *** I do not have enough information to answer this question.***
Search results may include tables in a markdown format. When answering a question using a table be careful about which rows and columns contain the answer and include all relevant information from the relevant rows and columns that the query is asking about.
Do not cobble facts together from multiple search results, instead summarize the main facts into a consistent and easy to understand response.
Do not base your response on information or knowledge that is not in the search results.
Make sure your response is answering the query asked. If the query is related to an entity (such as a person or place), make sure you use search results related to that entity.
For queries where only a short answer is required, you can give a brief response.
Consider that each search result is a partial segment from a bigger text, and may be incomplete.
Your output should always be in a single language - the $vectaraLangName language. Check spelling and grammar for the $vectaraLangName language.
Search results for the query *** $vectaraQuery***, are listed below, some are text, some MAY be tables in the format described above.
#foreach ($qResult in $vectaraQueryResultsDeduped)
[$esc.java($foreach.index + 1)]
#if($qResult.hasTable())
Table Title: $qResult.getTable().title() || Table Description: $qResult.getTable().description() || Table Data:
$qResult.getTable().markdown()
#else
$qResult.getText()
#end
#end
Generate a coherent response (but no more than $vectaraOutChars characters) to the query *** $vectaraQuery *** by summarizing the search results provided. Give a slight preference to search results that appear earlier in the list.
Include as many statistical numerical evidence from the search results in your response.
Only cite relevant search results in your answer following these specific instructions: $vectaraCitationInstructions
If the search results are irrelevant to the query, respond with ***I do not have enough information to answer this question.***. Respond always in the $vectaraLangName language, and only in that language."}
]
"""
def create_assistant_tools(cfg):
class QueryPublicationsArgs(BaseModel):
query: str = Field(..., description="The user query, always in the form of a question", examples=["what are the risks reported?", "which drug was use on the and how big was the population?"])
vec_factory = VectaraToolFactory(vectara_api_key=cfg.api_key,
vectara_corpus_key=cfg.corpus_key)
summarizer = 'vectara-summary-table-md-query-ext-jan-2025-gpt-4o'
ask_publications = vec_factory.create_rag_tool(
tool_name = "ask_publications",
tool_description = """
Responds to an user question about a particular result, based on the publications.
""",
tool_args_schema = QueryPublicationsArgs,
reranker = "multilingual_reranker_v1", rerank_k = 100,
n_sentences_before = 3, n_sentences_after = 3, lambda_val = 0.005,
summary_num_results = 10,
vectara_summarizer = summarizer,
include_citations = True,
vectara_prompt_text=prompt,
save_history = True
)
tools_factory = ToolsFactory()
return (
tools_factory.standard_tools() +
[ask_publications]
)
def initialize_agent(_cfg, agent_progress_callback=None):
menarini_bot_instructions = """
- You are a helpful clinical trial assistant, with expertise in clinical trial publications, in conversation with a user.
- You always respond to the user with supporting evidence based on the data, with p-values where available.
- Your responses follow a standard statistical format for results of clinical trials.
for example: instead of "Reduced 4-component MACE by 30%" use "Reduced 4-component MACE by 30% (HR: 0.70; p=0.002)"
- Your responses should always include the sample size for the clinical trial and the time to effect when available.
- Call the ask_publications tool as much as needed, with appropiate queries, until you have all the numerical data to respond properly to the user question.
"""
agent = Agent(
tools=create_assistant_tools(_cfg),
topic="Drug trials publications",
custom_instructions=menarini_bot_instructions,
agent_progress_callback=agent_progress_callback,
)
agent.report()
return agent