Spaces:
Sleeping
Sleeping
my latest updates
Browse files
agent.py
CHANGED
|
@@ -1,21 +1,18 @@
|
|
| 1 |
-
import pandas as pd
|
| 2 |
-
import requests
|
| 3 |
from pydantic import Field, BaseModel
|
| 4 |
|
| 5 |
-
from omegaconf import OmegaConf
|
| 6 |
-
|
| 7 |
from vectara_agentic.agent import Agent
|
| 8 |
-
from vectara_agentic.tools import
|
| 9 |
|
| 10 |
initial_prompt = "How can I help you today?"
|
| 11 |
|
| 12 |
-
|
| 13 |
[
|
| 14 |
-
{"role": "system", "content": "You are an AI assistant that forms a
|
| 15 |
{"role": "user", "content": "
|
| 16 |
[INSTRUCTIONS]
|
| 17 |
If the search results are irrelevant to the question respond with *** I do not have enough information to answer this question.***
|
| 18 |
-
Search results may include tables in a markdown format.
|
|
|
|
| 19 |
Do not base your response on information or knowledge that is not in the search results.
|
| 20 |
Make sure your response is answering the query asked. If the query is related to an entity (such as a person or place), make sure you use search results related to that entity.
|
| 21 |
Consider that each search result is a partial segment from a bigger text, and may be incomplete.
|
|
@@ -30,23 +27,62 @@ prompt = """
|
|
| 30 |
$qResult.getText()
|
| 31 |
#end
|
| 32 |
#end
|
| 33 |
-
Generate a
|
| 34 |
Give a slight preference to search results that appear earlier in the list.
|
| 35 |
Include statistical and numerical evidence to support and contextualize your response.
|
| 36 |
-
Your response should
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
If the search results are irrelevant to the query, respond with ***I do not have enough information to answer this question.***.
|
| 40 |
Respond always in the $vectaraLangName language, and only in that language."}
|
| 41 |
]
|
| 42 |
"""
|
| 43 |
|
| 44 |
-
def create_assistant_tools(cfg):
|
| 45 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
|
| 47 |
class QueryPublicationsArgs(BaseModel):
|
| 48 |
-
query: str = Field(..., description="The user query, always in the form of a question",
|
| 49 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
|
| 51 |
vec_factory = VectaraToolFactory(vectara_api_key=cfg.api_key,
|
| 52 |
vectara_corpus_key=cfg.corpus_key)
|
|
@@ -54,57 +90,69 @@ def create_assistant_tools(cfg):
|
|
| 54 |
ask_publications = vec_factory.create_rag_tool(
|
| 55 |
tool_name = "ask_publications",
|
| 56 |
tool_description = """
|
| 57 |
-
Responds to an user question about
|
| 58 |
""",
|
| 59 |
tool_args_schema = QueryPublicationsArgs,
|
| 60 |
-
reranker = "
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
"cutoff": 0.1
|
| 65 |
-
},
|
| 66 |
-
{
|
| 67 |
-
"type": "mmr",
|
| 68 |
-
"diversity_bias": 0.1,
|
| 69 |
-
"limit": 100
|
| 70 |
-
}
|
| 71 |
-
],
|
| 72 |
-
n_sentences_before = 3, n_sentences_after = 3, lambda_val = 0.005,
|
| 73 |
-
summary_num_results = 25,
|
| 74 |
vectara_summarizer = summarizer,
|
| 75 |
include_citations = True,
|
| 76 |
vectara_prompt_text = prompt,
|
| 77 |
save_history = True,
|
| 78 |
-
verbose =
|
| 79 |
)
|
| 80 |
|
| 81 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 82 |
return (
|
| 83 |
-
|
| 84 |
-
[ask_publications]
|
| 85 |
)
|
| 86 |
|
| 87 |
def initialize_agent(_cfg, agent_progress_callback=None):
|
| 88 |
menarini_bot_instructions = """
|
| 89 |
- You are an expert in clinical trial and statistical data analysis with extensive experience in designing, analyzing, and interpreting clinical research data.
|
| 90 |
-
- Your
|
| 91 |
-
|
| 92 |
-
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
-
|
| 97 |
-
-
|
| 98 |
-
|
|
|
|
|
|
|
|
|
|
| 99 |
1) Use precise statistical terminology (e.g., randomization, blinding, intention-to-treat, type I/II error, p-values, confidence intervals, Bayesian methods, etc.)
|
| 100 |
and reference common methodologies or guidelines where applicable (e.g., CONSORT, FDA, EMA).
|
| 101 |
-
2)
|
| 102 |
-
When
|
| 103 |
-
|
| 104 |
3) Provide clear explanations of statistical concepts, including assumptions, potential biases, and limitations in the context of clinical trial data.
|
| 105 |
4) Ensure that your analysis is evidence-based and reflects current best practices in the field of clinical research and data analysis.
|
| 106 |
-
|
| 107 |
-
|
| 108 |
"""
|
| 109 |
|
| 110 |
agent = Agent(
|
|
|
|
|
|
|
|
|
|
| 1 |
from pydantic import Field, BaseModel
|
| 2 |
|
|
|
|
|
|
|
| 3 |
from vectara_agentic.agent import Agent
|
| 4 |
+
from vectara_agentic.tools import VectaraToolFactory
|
| 5 |
|
| 6 |
initial_prompt = "How can I help you today?"
|
| 7 |
|
| 8 |
+
prompt_old = """
|
| 9 |
[
|
| 10 |
+
{"role": "system", "content": "You are an AI assistant that forms a detailed and comprehensive answer to a user query based on search results that are provided to you." },
|
| 11 |
{"role": "user", "content": "
|
| 12 |
[INSTRUCTIONS]
|
| 13 |
If the search results are irrelevant to the question respond with *** I do not have enough information to answer this question.***
|
| 14 |
+
Search results may include tables in a markdown format.
|
| 15 |
+
When answering a question using a table be careful about which rows and columns contain the answer and include all relevant information from the relevant rows and columns that the query is asking about.
|
| 16 |
Do not base your response on information or knowledge that is not in the search results.
|
| 17 |
Make sure your response is answering the query asked. If the query is related to an entity (such as a person or place), make sure you use search results related to that entity.
|
| 18 |
Consider that each search result is a partial segment from a bigger text, and may be incomplete.
|
|
|
|
| 27 |
$qResult.getText()
|
| 28 |
#end
|
| 29 |
#end
|
| 30 |
+
Generate a comprehensive response to the query *** $vectaraQuery *** using information and facts in the search results provided.
|
| 31 |
Give a slight preference to search results that appear earlier in the list.
|
| 32 |
Include statistical and numerical evidence to support and contextualize your response.
|
| 33 |
+
Your response should include all relevant information and values from the search results. Do not omit anything relevant.
|
| 34 |
+
Prioritize a long, detailed, thorough and comprehensive response over a short one.
|
| 35 |
+
Cite relevant search results in your answer following these specific instructions: $vectaraCitationInstructions
|
|
|
|
| 36 |
Respond always in the $vectaraLangName language, and only in that language."}
|
| 37 |
]
|
| 38 |
"""
|
| 39 |
|
|
|
|
| 40 |
|
| 41 |
+
prompt = """
|
| 42 |
+
[
|
| 43 |
+
{"role": "system", "content": "
|
| 44 |
+
You are an AI assistant that forms a detailed and comprehensive answer to a user question based on search results that are provided to you.
|
| 45 |
+
You are an expert in clinical trial and statistical data analysis with extensive experience in designing, analyzing, and interpreting clinical research data.
|
| 46 |
+
When asked about baseline characteristics, include as many such characteristics as possible in your response. Be detailed and comprehensive.
|
| 47 |
+
For example, always include in baseline characteristics the sample size (number of patients), population demographics (male/female), age, race, BMI, and any other relevant characteristics.
|
| 48 |
+
Include statistical and numerical evidence to support and contextualize your response.
|
| 49 |
+
Your response should include all relevant information and values from the search results. Do not omit anything relevant.
|
| 50 |
+
Prioritize a long, detailed, thorough and comprehensive response over a short one."
|
| 51 |
+
},
|
| 52 |
+
{"role": "user", "content": "
|
| 53 |
+
[INSTRUCTIONS]
|
| 54 |
+
- Generate a comprehensive response to the question *** $vectaraQuery *** using information and facts in the search results provided.
|
| 55 |
+
- If the search results are irrelevant to the question respond with *** I do not have enough information to answer this question.***
|
| 56 |
+
- Do not base your response on information or knowledge that is not in the search results.
|
| 57 |
+
- Make sure your response is answering the question asked. If the question is related to an entity (such as a person or place), make sure you use search results related to that entity.
|
| 58 |
+
- Consider that each search result is a partial segment from a bigger text, and may be incomplete.
|
| 59 |
+
- Your output should always be in a single language - the $vectaraLangName language. Check spelling and grammar for the $vectaraLangName language.
|
| 60 |
+
- Give a slight preference to search results that appear earlier in the list.
|
| 61 |
+
- Only cite relevant search results in your answer following these specific instructions: $vectaraCitationInstructions
|
| 62 |
+
Search results for the question *** $vectaraQuery***, are listed below, some are text, some MAY be tables in markdown format.
|
| 63 |
+
#foreach ($qResult in $vectaraQueryResultsDeduped)
|
| 64 |
+
[$esc.java($foreach.index + 1)]
|
| 65 |
+
#if($qResult.hasTable())
|
| 66 |
+
Table Title: $qResult.getTable().title() || Table Description: $qResult.getTable().description() || Table Data:
|
| 67 |
+
$qResult.getTable().markdown()
|
| 68 |
+
#else
|
| 69 |
+
$qResult.getText()
|
| 70 |
+
#end
|
| 71 |
+
#end
|
| 72 |
+
Respond always in the $vectaraLangName language, and only in that language."}
|
| 73 |
+
]
|
| 74 |
+
"""
|
| 75 |
+
|
| 76 |
+
def create_assistant_tools(cfg):
|
| 77 |
|
| 78 |
class QueryPublicationsArgs(BaseModel):
|
| 79 |
+
query: str = Field(..., description="The user query, always in the form of a question?",
|
| 80 |
+
examples=[
|
| 81 |
+
"what are the risks reported?",
|
| 82 |
+
"which drug was tested?",
|
| 83 |
+
"what is the baseline population in the trial?"
|
| 84 |
+
]),
|
| 85 |
+
name: str = Field(..., description="The name of the clinical trial")
|
| 86 |
|
| 87 |
vec_factory = VectaraToolFactory(vectara_api_key=cfg.api_key,
|
| 88 |
vectara_corpus_key=cfg.corpus_key)
|
|
|
|
| 90 |
ask_publications = vec_factory.create_rag_tool(
|
| 91 |
tool_name = "ask_publications",
|
| 92 |
tool_description = """
|
| 93 |
+
Responds to an user question about clinical trials, focusing on a specific information and data.
|
| 94 |
""",
|
| 95 |
tool_args_schema = QueryPublicationsArgs,
|
| 96 |
+
reranker = "slingshot", rerank_k = 100, rerank_cutoff = 0.1,
|
| 97 |
+
n_sentences_before = 1, n_sentences_after = 1, lambda_val = 0.1,
|
| 98 |
+
summary_num_results = 15,
|
| 99 |
+
max_response_chars = 8192, max_tokens = 4096,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100 |
vectara_summarizer = summarizer,
|
| 101 |
include_citations = True,
|
| 102 |
vectara_prompt_text = prompt,
|
| 103 |
save_history = True,
|
| 104 |
+
verbose = False
|
| 105 |
)
|
| 106 |
|
| 107 |
+
class SearchPublicationsArgs(BaseModel):
|
| 108 |
+
query: str = Field(..., description="The user query, always in the form of a question?",
|
| 109 |
+
examples=[
|
| 110 |
+
"what are the risks reported?",
|
| 111 |
+
"which drug was tested?",
|
| 112 |
+
"what is the baseline population in the trial?"
|
| 113 |
+
]),
|
| 114 |
+
search_publications = vec_factory.create_search_tool(
|
| 115 |
+
tool_name = "search_publications",
|
| 116 |
+
tool_description = """
|
| 117 |
+
Responds with a list of relevant publications that match the user query
|
| 118 |
+
Use a high value for top_k (3 times what you think is needed) to make sure to get all relevant results.
|
| 119 |
+
""",
|
| 120 |
+
tool_args_schema = SearchPublicationsArgs,
|
| 121 |
+
reranker = "mmr", rerank_k = 100, mmr_diversity_bias = 0.5,
|
| 122 |
+
n_sentences_before = 2, n_sentences_after = 2, lambda_val = 0.3,
|
| 123 |
+
save_history = True,
|
| 124 |
+
verbose = False
|
| 125 |
+
)
|
| 126 |
+
|
| 127 |
+
|
| 128 |
return (
|
| 129 |
+
[ask_publications, search_publications]
|
|
|
|
| 130 |
)
|
| 131 |
|
| 132 |
def initialize_agent(_cfg, agent_progress_callback=None):
|
| 133 |
menarini_bot_instructions = """
|
| 134 |
- You are an expert in clinical trial and statistical data analysis with extensive experience in designing, analyzing, and interpreting clinical research data.
|
| 135 |
+
- Your task is to answer user question, using the tools you have available.
|
| 136 |
+
- use the 'search_publications' tool to get a list of relevant trials or documents that match the user question, but always call it with summarize=False.
|
| 137 |
+
- Call the 'ask_publications' tool to obtain relevant information needed to answer the user question.
|
| 138 |
+
If the 'ask_publications' tool responds that it does not have enough information to answer your query,
|
| 139 |
+
rephrase your query to be more specific and explicit, and call 'ask_publications' again to get the answer you need.
|
| 140 |
+
Retry in this manner up to 10 times.
|
| 141 |
+
- You can specify in your tool query the specific information you are looking for, such as "what is the sample size?" or "what is the percentage of patients with Diabetes".
|
| 142 |
+
- Your response to the user question should be technically rigorous, data-driven, and written for an audience familiar with advanced statistical terminology,
|
| 143 |
+
regulatory standards, and the nuances of clinical trial design.
|
| 144 |
+
- If a tool returns citations or references, include them in your response. Avoid including citations inside table cells.
|
| 145 |
+
- Form queries to tool as questions. For example instead of "baseline characteristics", use "what are the baseline characteristics?"
|
| 146 |
+
- When responding to a user question:
|
| 147 |
1) Use precise statistical terminology (e.g., randomization, blinding, intention-to-treat, type I/II error, p-values, confidence intervals, Bayesian methods, etc.)
|
| 148 |
and reference common methodologies or guidelines where applicable (e.g., CONSORT, FDA, EMA).
|
| 149 |
+
2) When reporting population statistics, always include sample size (number of patients) and other important population characteristics.
|
| 150 |
+
When reporting sample sizes, consider participants who were eligible for the study, those who were randomized, and those who completed the study.
|
| 151 |
+
Never use estimated characteristics, always use the actual values from the study.
|
| 152 |
3) Provide clear explanations of statistical concepts, including assumptions, potential biases, and limitations in the context of clinical trial data.
|
| 153 |
4) Ensure that your analysis is evidence-based and reflects current best practices in the field of clinical research and data analysis.
|
| 154 |
+
6) Provide sources and citations for data and statistical information included in your response, based on citations from the tools.
|
| 155 |
+
7) Be consistent and comprehensive in your responses, ensuring that all relevant information is included.
|
| 156 |
"""
|
| 157 |
|
| 158 |
agent = Agent(
|