Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| import requests | |
| from pydantic import Field, BaseModel | |
| from omegaconf import OmegaConf | |
| from vectara_agentic.agent import Agent | |
| from vectara_agentic.tools import ToolsFactory, VectaraToolFactory | |
| initial_prompt = "How can I help you today?" | |
| prompt = """ | |
| [ | |
| {"role": "system", "content": "You are a search bot that forms a coherent answer to a user query based on search results that are provided to you." }, | |
| {"role": "user", "content": " [INSTRUCTIONS] | |
| If the search results are irrelevant to the question respond with *** I do not have enough information to answer this question.*** | |
| Search results may include tables in a markdown format. When answering a question using a table be careful about which rows and columns contain the answer and include all relevant information from the relevant rows and columns that the query is asking about. | |
| Do not cobble facts together from multiple search results, instead summarize the main facts into a consistent and easy to understand response. | |
| Do not base your response on information or knowledge that is not in the search results. | |
| Make sure your response is answering the query asked. If the query is related to an entity (such as a person or place), make sure you use search results related to that entity. | |
| For queries where only a short answer is required, you can give a brief response. | |
| Consider that each search result is a partial segment from a bigger text, and may be incomplete. | |
| Your output should always be in a single language - the $vectaraLangName language. Check spelling and grammar for the $vectaraLangName language. | |
| Search results for the query *** $vectaraQuery***, are listed below, some are text, some MAY be tables in the format described above. | |
| #foreach ($qResult in $vectaraQueryResultsDeduped) | |
| [$esc.java($foreach.index + 1)] | |
| #if($qResult.hasTable()) | |
| Table Title: $qResult.getTable().title() || Table Description: $qResult.getTable().description() || Table Data: | |
| $qResult.getTable().markdown() | |
| #else | |
| $qResult.getText() | |
| #end | |
| #end | |
| Generate a coherent response (but no more than $vectaraOutChars characters) to the query *** $vectaraQuery *** by summarizing the search results provided. Give a slight preference to search results that appear earlier in the list. | |
| Include as many statistical numerical evidence from the search results in your response. | |
| Only cite relevant search results in your answer following these specific instructions: $vectaraCitationInstructions | |
| If the search results are irrelevant to the query, respond with ***I do not have enough information to answer this question.***. Respond always in the $vectaraLangName language, and only in that language."} | |
| ] | |
| """ | |
| def create_assistant_tools(cfg): | |
| class QueryPublicationsArgs(BaseModel): | |
| query: str = Field(..., description="The user query, always in the form of a question", examples=["what are the risks reported?", "which drug was use on the and how big was the population?"]) | |
| vec_factory = VectaraToolFactory(vectara_api_key=cfg.api_key, | |
| vectara_corpus_key=cfg.corpus_key) | |
| summarizer = 'vectara-summary-table-md-query-ext-jan-2025-gpt-4o' | |
| ask_publications = vec_factory.create_rag_tool( | |
| tool_name = "ask_publications", | |
| tool_description = """ | |
| Responds to an user question about a particular result, based on the publications. | |
| """, | |
| tool_args_schema = QueryPublicationsArgs, | |
| reranker = "multilingual_reranker_v1", rerank_k = 100, | |
| n_sentences_before = 3, n_sentences_after = 3, lambda_val = 0.005, | |
| summary_num_results = 10, | |
| vectara_summarizer = summarizer, | |
| include_citations = True, | |
| vectara_prompt_text=prompt | |
| ) | |
| tools_factory = ToolsFactory() | |
| return ( | |
| tools_factory.standard_tools() + | |
| [ask_publications] | |
| ) | |
| def initialize_agent(_cfg, agent_progress_callback=None): | |
| menarini_bot_instructions = """ | |
| - You are a helpful clinical trial assistant, with expertise in clinical trial publications, in conversation with a user. | |
| - You always respond to the user with supporting evidence based on the data, with p-values where available. | |
| - Your responses follow a standard statistical format for results of clinical trials. | |
| for example: instead of "Reduced 4-component MACE by 30%" use "Reduced 4-component MACE by 30% (HR: 0.70; p=0.002)" | |
| - Your responses should always include the sample size for the clinical trial and the time to effect when available. | |
| - Call the ask_publications tool as much as needed, with appropiate queries, until you have all the numerical data to respond properly to the user question. | |
| """ | |
| agent = Agent( | |
| tools=create_assistant_tools(_cfg), | |
| topic="Drug trials publications", | |
| custom_instructions=menarini_bot_instructions, | |
| agent_progress_callback=agent_progress_callback, | |
| ) | |
| agent.report() | |
| return agent |