|
import pandas as pd |
|
import requests |
|
from pydantic import Field, BaseModel |
|
|
|
from omegaconf import OmegaConf |
|
|
|
from vectara_agentic.agent import Agent |
|
from vectara_agentic.tools import ToolsFactory, VectaraToolFactory |
|
|
|
initial_prompt = "How can I help you today?" |
|
|
|
prompt = """ |
|
[ |
|
{"role": "system", "content": "You are a search bot that forms a coherent answer to a user query based on search results that are provided to you." }, |
|
{"role": "user", "content": " [INSTRUCTIONS] |
|
If the search results are irrelevant to the question respond with *** I do not have enough information to answer this question.*** |
|
Search results may include tables in a markdown format. When answering a question using a table be careful about which rows and columns contain the answer and include all relevant information from the relevant rows and columns that the query is asking about. |
|
Do not cobble facts together from multiple search results, instead summarize the main facts into a consistent and easy to understand response. |
|
Do not base your response on information or knowledge that is not in the search results. |
|
Make sure your response is answering the query asked. If the query is related to an entity (such as a person or place), make sure you use search results related to that entity. |
|
For queries where only a short answer is required, you can give a brief response. |
|
Consider that each search result is a partial segment from a bigger text, and may be incomplete. |
|
Your output should always be in a single language - the $vectaraLangName language. Check spelling and grammar for the $vectaraLangName language. |
|
Search results for the query *** $vectaraQuery***, are listed below, some are text, some MAY be tables in the format described above. |
|
#foreach ($qResult in $vectaraQueryResultsDeduped) |
|
[$esc.java($foreach.index + 1)] |
|
#if($qResult.hasTable()) |
|
Table Title: $qResult.getTable().title() || Table Description: $qResult.getTable().description() || Table Data: |
|
$qResult.getTable().markdown() |
|
#else |
|
$qResult.getText() |
|
#end |
|
#end |
|
Generate a coherent response (but no more than $vectaraOutChars characters) to the query *** $vectaraQuery *** by summarizing the search results provided. Give a slight preference to search results that appear earlier in the list. |
|
Include as many statistical numerical evidence from the search results in your response. |
|
Only cite relevant search results in your answer following these specific instructions: $vectaraCitationInstructions |
|
If the search results are irrelevant to the query, respond with ***I do not have enough information to answer this question.***. Respond always in the $vectaraLangName language, and only in that language."} |
|
] |
|
""" |
|
|
|
def create_assistant_tools(cfg): |
|
|
|
|
|
class QueryPublicationsArgs(BaseModel): |
|
query: str = Field(..., description="The user query, always in the form of a question", examples=["what are the risks reported?", "which drug was use on the and how big was the population?"]) |
|
|
|
vec_factory = VectaraToolFactory(vectara_api_key=cfg.api_key, |
|
vectara_corpus_key=cfg.corpus_key) |
|
summarizer = 'vectara-summary-table-md-query-ext-jan-2025-gpt-4o' |
|
ask_publications = vec_factory.create_rag_tool( |
|
tool_name = "ask_publications", |
|
tool_description = """ |
|
Responds to an user question about a particular result, based on the publications. |
|
""", |
|
tool_args_schema = QueryPublicationsArgs, |
|
reranker = "multilingual_reranker_v1", rerank_k = 100, |
|
n_sentences_before = 3, n_sentences_after = 3, lambda_val = 0.005, |
|
summary_num_results = 10, |
|
vectara_summarizer = summarizer, |
|
include_citations = True, |
|
vectara_prompt_text=prompt, |
|
save_history = True |
|
) |
|
|
|
tools_factory = ToolsFactory() |
|
return ( |
|
tools_factory.standard_tools() + |
|
[ask_publications] |
|
) |
|
|
|
def initialize_agent(_cfg, agent_progress_callback=None): |
|
menarini_bot_instructions = """ |
|
- You are a helpful clinical trial assistant, with expertise in clinical trial publications, in conversation with a user. |
|
- You always respond to the user with supporting evidence based on the data, with p-values where available. |
|
- Your responses follow a standard statistical format for results of clinical trials. |
|
for example: instead of "Reduced 4-component MACE by 30%" use "Reduced 4-component MACE by 30% (HR: 0.70; p=0.002)" |
|
- Your responses should always include the sample size for the clinical trial and the time to effect when available. |
|
- Call the ask_publications tool as much as needed, with appropiate queries, until you have all the numerical data to respond properly to the user question. |
|
""" |
|
|
|
agent = Agent( |
|
tools=create_assistant_tools(_cfg), |
|
topic="Drug trials publications", |
|
custom_instructions=menarini_bot_instructions, |
|
agent_progress_callback=agent_progress_callback, |
|
) |
|
agent.report() |
|
return agent |