update chains
Browse files
Dockerfile
CHANGED
|
@@ -1,6 +1,8 @@
|
|
| 1 |
-
FROM python:3.11
|
|
|
|
| 2 |
|
| 3 |
RUN adduser --uid 1000 --disabled-password --gecos '' appuser
|
|
|
|
| 4 |
USER 1000
|
| 5 |
|
| 6 |
ENV PYTHONDONTWRITEBYTECODE=1 \
|
|
|
|
| 1 |
+
FROM python:3.11
|
| 2 |
+
#slim-bookworm
|
| 3 |
|
| 4 |
RUN adduser --uid 1000 --disabled-password --gecos '' appuser
|
| 5 |
+
RUN apt-get update && apt-get install gcc tesseract-ocr -y
|
| 6 |
USER 1000
|
| 7 |
|
| 8 |
ENV PYTHONDONTWRITEBYTECODE=1 \
|
langchain-streamlit-demo/app.py
CHANGED
|
@@ -18,8 +18,7 @@ from langchain.schema.retriever import BaseRetriever
|
|
| 18 |
from langchain_community.callbacks import StreamlitCallbackHandler
|
| 19 |
from langsmith.client import Client
|
| 20 |
from llm_resources import (
|
| 21 |
-
|
| 22 |
-
get_doc_agent,
|
| 23 |
get_llm,
|
| 24 |
get_runnable,
|
| 25 |
get_texts_and_multiretriever,
|
|
@@ -396,15 +395,19 @@ get_llm_args = dict(
|
|
| 396 |
# },
|
| 397 |
)
|
| 398 |
get_llm_args_temp_zero = get_llm_args | {"temperature": 0.0}
|
| 399 |
-
st.session_state.llm = get_llm(**
|
| 400 |
|
| 401 |
# --- Chat History ---
|
| 402 |
for msg in STMEMORY.messages:
|
| 403 |
if msg.content and msg.type in ("ai", "assistant", "human", "user"):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 404 |
st.chat_message(
|
| 405 |
msg.type,
|
| 406 |
avatar="🦜" if msg.type in ("ai", "assistant") else None,
|
| 407 |
-
).write(
|
| 408 |
|
| 409 |
|
| 410 |
# --- Current Chat ---
|
|
@@ -423,10 +426,11 @@ if st.session_state.llm:
|
|
| 423 |
|
| 424 |
# --- Chat Input ---
|
| 425 |
prompt = st.chat_input(placeholder="Ask me a question!")
|
| 426 |
-
if question and question != "--":
|
| 427 |
prompt = question
|
| 428 |
-
if
|
| 429 |
-
st.
|
|
|
|
| 430 |
feedback_update = None
|
| 431 |
feedback = None
|
| 432 |
|
|
@@ -441,7 +445,7 @@ if st.session_state.llm:
|
|
| 441 |
config: Dict[str, Any] = dict(
|
| 442 |
callbacks=callbacks,
|
| 443 |
tags=["Streamlit Chat"],
|
| 444 |
-
verbose=
|
| 445 |
return_intermediate_steps=False,
|
| 446 |
)
|
| 447 |
if st.session_state.provider == "Anthropic":
|
|
@@ -456,8 +460,7 @@ if st.session_state.llm:
|
|
| 456 |
# )
|
| 457 |
|
| 458 |
full_response: Union[str, None] = None
|
| 459 |
-
|
| 460 |
-
# callbacks.append(stream_handler)
|
| 461 |
message_placeholder = st.empty()
|
| 462 |
default_tools = [
|
| 463 |
# DuckDuckGoSearchRun(),
|
|
@@ -470,7 +473,9 @@ if st.session_state.llm:
|
|
| 470 |
# search_llm=get_llm(**get_llm_args_temp_zero), # type: ignore
|
| 471 |
# writer_llm=get_llm(**get_llm_args_temp_zero), # type: ignore
|
| 472 |
# )
|
| 473 |
-
st_callback = StreamlitCallbackHandler(
|
|
|
|
|
|
|
| 474 |
callbacks.append(st_callback)
|
| 475 |
|
| 476 |
# @tool("web-research-assistant")
|
|
@@ -513,46 +518,58 @@ if st.session_state.llm:
|
|
| 513 |
|
| 514 |
@tool("user-document-chat")
|
| 515 |
def doc_chain_tool(input_str: str, callbacks: Callbacks = None):
|
| 516 |
-
"""
|
| 517 |
-
|
|
|
|
|
|
|
| 518 |
input_str,
|
| 519 |
config=get_config(callbacks),
|
| 520 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 521 |
|
| 522 |
-
|
| 523 |
-
|
| 524 |
-
|
|
|
|
|
|
|
| 525 |
|
| 526 |
-
|
| 527 |
-
|
| 528 |
-
|
| 529 |
-
|
| 530 |
-
|
| 531 |
-
|
| 532 |
-
|
| 533 |
-
|
| 534 |
-
|
| 535 |
-
config=get_config(callbacks),
|
| 536 |
-
)
|
| 537 |
|
| 538 |
-
TOOLS =
|
| 539 |
|
| 540 |
-
|
| 541 |
-
|
| 542 |
-
|
| 543 |
-
# st.session_state.llm,
|
| 544 |
-
# callbacks,
|
| 545 |
-
# )
|
| 546 |
-
# else:
|
| 547 |
-
st.session_state.chain = get_runnable(
|
| 548 |
-
True, # use_document_chat,
|
| 549 |
-
document_chat_chain_type,
|
| 550 |
st.session_state.llm,
|
| 551 |
-
|
| 552 |
-
MEMORY,
|
| 553 |
-
chat_prompt,
|
| 554 |
-
prompt,
|
| 555 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 556 |
|
| 557 |
# --- LLM call ---
|
| 558 |
try:
|
|
|
|
| 18 |
from langchain_community.callbacks import StreamlitCallbackHandler
|
| 19 |
from langsmith.client import Client
|
| 20 |
from llm_resources import (
|
| 21 |
+
get_agent,
|
|
|
|
| 22 |
get_llm,
|
| 23 |
get_runnable,
|
| 24 |
get_texts_and_multiretriever,
|
|
|
|
| 395 |
# },
|
| 396 |
)
|
| 397 |
get_llm_args_temp_zero = get_llm_args | {"temperature": 0.0}
|
| 398 |
+
st.session_state.llm = get_llm(**get_llm_args_temp_zero)
|
| 399 |
|
| 400 |
# --- Chat History ---
|
| 401 |
for msg in STMEMORY.messages:
|
| 402 |
if msg.content and msg.type in ("ai", "assistant", "human", "user"):
|
| 403 |
+
content = (
|
| 404 |
+
# msg.content.split("-" * 50)[1] if ("-" * 50) in msg.content else
|
| 405 |
+
msg.content
|
| 406 |
+
)
|
| 407 |
st.chat_message(
|
| 408 |
msg.type,
|
| 409 |
avatar="🦜" if msg.type in ("ai", "assistant") else None,
|
| 410 |
+
).write(content)
|
| 411 |
|
| 412 |
|
| 413 |
# --- Current Chat ---
|
|
|
|
| 426 |
|
| 427 |
# --- Chat Input ---
|
| 428 |
prompt = st.chat_input(placeholder="Ask me a question!")
|
| 429 |
+
if question and question != "--" and not prompt:
|
| 430 |
prompt = question
|
| 431 |
+
if not uploaded_file:
|
| 432 |
+
st.error("Please upload a PDF to use the document chat feature.")
|
| 433 |
+
elif prompt:
|
| 434 |
feedback_update = None
|
| 435 |
feedback = None
|
| 436 |
|
|
|
|
| 445 |
config: Dict[str, Any] = dict(
|
| 446 |
callbacks=callbacks,
|
| 447 |
tags=["Streamlit Chat"],
|
| 448 |
+
verbose=False,
|
| 449 |
return_intermediate_steps=False,
|
| 450 |
)
|
| 451 |
if st.session_state.provider == "Anthropic":
|
|
|
|
| 460 |
# )
|
| 461 |
|
| 462 |
full_response: Union[str, None] = None
|
| 463 |
+
|
|
|
|
| 464 |
message_placeholder = st.empty()
|
| 465 |
default_tools = [
|
| 466 |
# DuckDuckGoSearchRun(),
|
|
|
|
| 473 |
# search_llm=get_llm(**get_llm_args_temp_zero), # type: ignore
|
| 474 |
# writer_llm=get_llm(**get_llm_args_temp_zero), # type: ignore
|
| 475 |
# )
|
| 476 |
+
st_callback = StreamlitCallbackHandler(
|
| 477 |
+
st.container(), expand_new_thoughts=False
|
| 478 |
+
)
|
| 479 |
callbacks.append(st_callback)
|
| 480 |
|
| 481 |
# @tool("web-research-assistant")
|
|
|
|
| 518 |
|
| 519 |
@tool("user-document-chat")
|
| 520 |
def doc_chain_tool(input_str: str, callbacks: Callbacks = None):
|
| 521 |
+
"""Usa sempre questo strumento almeno una volta. L'input dovrebbe essere una domanda."""
|
| 522 |
+
|
| 523 |
+
# """Always use this tool at least once. Input should be a question."""
|
| 524 |
+
response = st.session_state.doc_chain.invoke(
|
| 525 |
input_str,
|
| 526 |
config=get_config(callbacks),
|
| 527 |
)
|
| 528 |
+
with st.sidebar.expander("Sources"):
|
| 529 |
+
for source in response["source_documents"][:3]:
|
| 530 |
+
st.markdown("-" * 50)
|
| 531 |
+
st.markdown(source.page_content)
|
| 532 |
+
return response["output_text"]
|
| 533 |
+
|
| 534 |
+
# doc_chain_agent = get_doc_agent(
|
| 535 |
+
# [doc_chain_tool],
|
| 536 |
+
# st.session_state.llm,
|
| 537 |
+
# )
|
| 538 |
|
| 539 |
+
# @tool("document-question-tool")
|
| 540 |
+
# def doc_question_tool(input_str: str, callbacks: Callbacks = None):
|
| 541 |
+
# """Questo strumento è un assistente AI con accesso al documento caricato dall'utente.
|
| 542 |
+
# L'input dovrebbe essere una o più domande, richieste, istruzioni, ecc.
|
| 543 |
+
# """
|
| 544 |
|
| 545 |
+
# # """This tool is an AI assistant with access to the user's uploaded document.
|
| 546 |
+
# # Input should be one or more questions, requests, instructions, etc.
|
| 547 |
+
# # If the user's meaning is unclear, perhaps the answer is here.
|
| 548 |
+
# # Generally speaking, try this tool before conducting web research.
|
| 549 |
+
# # """
|
| 550 |
+
# return doc_chain_agent.invoke(
|
| 551 |
+
# input_str,
|
| 552 |
+
# config=get_config(callbacks),
|
| 553 |
+
# )
|
|
|
|
|
|
|
| 554 |
|
| 555 |
+
TOOLS = TOOLS + [doc_chain_tool]
|
| 556 |
|
| 557 |
+
st.session_state.chain = get_agent(
|
| 558 |
+
TOOLS,
|
| 559 |
+
STMEMORY,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 560 |
st.session_state.llm,
|
| 561 |
+
callbacks,
|
|
|
|
|
|
|
|
|
|
| 562 |
)
|
| 563 |
+
# else:
|
| 564 |
+
# st.session_state.chain = get_runnable(
|
| 565 |
+
# True, # use_document_chat,
|
| 566 |
+
# document_chat_chain_type,
|
| 567 |
+
# st.session_state.llm,
|
| 568 |
+
# st.session_state.retriever,
|
| 569 |
+
# MEMORY,
|
| 570 |
+
# chat_prompt,
|
| 571 |
+
# prompt,
|
| 572 |
+
# )
|
| 573 |
|
| 574 |
# --- LLM call ---
|
| 575 |
try:
|
langchain-streamlit-demo/defaults.py
CHANGED
|
@@ -21,16 +21,19 @@ MODEL_DICT = {
|
|
| 21 |
|
| 22 |
SUPPORTED_MODELS = list(MODEL_DICT.keys())
|
| 23 |
|
| 24 |
-
DEFAULT_MODEL = os.environ.get(
|
|
|
|
|
|
|
| 25 |
|
| 26 |
DEFAULT_SYSTEM_PROMPT = os.environ.get(
|
| 27 |
"DEFAULT_SYSTEM_PROMPT",
|
| 28 |
# "You are a helpful chatbot. Do not rush. Always plan, think, and act in a step-by-step manner.",
|
| 29 |
"""
|
| 30 |
-
Comportati come un operatore di call center.
|
| 31 |
Hai accesso a tutte le informazioni necessarie per rispondere alle domande dei clienti tramite il tool 'document-question-tool'.
|
| 32 |
-
|
| 33 |
-
|
|
|
|
| 34 |
""",
|
| 35 |
)
|
| 36 |
|
|
@@ -45,6 +48,20 @@ DEFAULT_MAX_TOKENS = int(os.environ.get("DEFAULT_MAX_TOKENS", 1000))
|
|
| 45 |
DEFAULT_LANGSMITH_PROJECT = os.environ.get("LANGCHAIN_PROJECT")
|
| 46 |
|
| 47 |
TEST_QUESTIONS = [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
"non ho ricevuto le credenziali di accesso all'area riservata: dove posso trovarle?",
|
| 49 |
"vorrei informazioni relativamente alle prestazioni presenti nel checkup",
|
| 50 |
"la risonanza magnetica è coperta dalla polizza?",
|
|
@@ -72,7 +89,7 @@ TEST_QUESTIONS = [
|
|
| 72 |
|
| 73 |
|
| 74 |
SHOW_LANGSMITH_OPTIONS = (
|
| 75 |
-
os.environ.get("SHOW_LANGSMITH_OPTIONS", "
|
| 76 |
)
|
| 77 |
SHOW_AZURE_OPTIONS = os.environ.get("SHOW_AZURE_OPTIONS", "true").lower() == "true"
|
| 78 |
|
|
|
|
| 21 |
|
| 22 |
SUPPORTED_MODELS = list(MODEL_DICT.keys())
|
| 23 |
|
| 24 |
+
DEFAULT_MODEL = os.environ.get(
|
| 25 |
+
"DEFAULT_MODEL", "gpt-3.5-turbo"
|
| 26 |
+
) # "gpt-4-turbo-preview")
|
| 27 |
|
| 28 |
DEFAULT_SYSTEM_PROMPT = os.environ.get(
|
| 29 |
"DEFAULT_SYSTEM_PROMPT",
|
| 30 |
# "You are a helpful chatbot. Do not rush. Always plan, think, and act in a step-by-step manner.",
|
| 31 |
"""
|
| 32 |
+
Comportati come un operatore di call center di Poste Assicurazioni. Ti vengono rivolte domande su polizze, prestazioni, autorizzazioni, rimborsi.
|
| 33 |
Hai accesso a tutte le informazioni necessarie per rispondere alle domande dei clienti tramite il tool 'document-question-tool'.
|
| 34 |
+
Usa sempre il 'document-question-tool' per trovare le informazioni necessarie a formulare la risposta. Rispondi sempre in maniera specifica e dettagliata rispetto alla polizza di Poste Assicurazioni accessibile tramite tool.
|
| 35 |
+
Se non sai rispondere ad una domanda, chiedi all'utente le informazioni mancante oppure rispondi che non hai la risposta e offri di connettere il cliente con un operatore umano. Non consigliare mai di leggere la polizza o di cercare informazioni altrove.
|
| 36 |
+
Rispondi in italiano, usando uno stile amichevole ma formale, e meno di 150 parole per risposta, a meno che non contengona una lunga lista.
|
| 37 |
""",
|
| 38 |
)
|
| 39 |
|
|
|
|
| 48 |
DEFAULT_LANGSMITH_PROJECT = os.environ.get("LANGCHAIN_PROJECT")
|
| 49 |
|
| 50 |
TEST_QUESTIONS = [
|
| 51 |
+
"Non ho ricevuto le credenziali di accesso all'area riservata: come posso ottenerle?",
|
| 52 |
+
"Quali prestazioni presenti nel checkup",
|
| 53 |
+
"La risonanza magnetica è coperta dalla polizza?",
|
| 54 |
+
"Le visite odontoiatriche sono coperte dalla polizza?",
|
| 55 |
+
"Come posso richiedere il checkup",
|
| 56 |
+
"Come posso trovare struttura convenzionata che faccia le risonanze magnetiche",
|
| 57 |
+
"Come chiedere autorizzazione per fare una risonanza magnetica?",
|
| 58 |
+
"Come chiedere un rimborso per una risonanza magnetica?",
|
| 59 |
+
"Quali prestazioni sono incluse nel pacchetto maternità?",
|
| 60 |
+
"Come chiedere autorizzazione per prestazioni incluse mel pacchetto maternità?",
|
| 61 |
+
"Come devo procedere per fare l'estensione della copertura al nucleo familiare?",
|
| 62 |
+
]
|
| 63 |
+
|
| 64 |
+
OLD_TEST_QUESTIONS = [
|
| 65 |
"non ho ricevuto le credenziali di accesso all'area riservata: dove posso trovarle?",
|
| 66 |
"vorrei informazioni relativamente alle prestazioni presenti nel checkup",
|
| 67 |
"la risonanza magnetica è coperta dalla polizza?",
|
|
|
|
| 89 |
|
| 90 |
|
| 91 |
SHOW_LANGSMITH_OPTIONS = (
|
| 92 |
+
os.environ.get("SHOW_LANGSMITH_OPTIONS", "false").lower() == "true"
|
| 93 |
)
|
| 94 |
SHOW_AZURE_OPTIONS = os.environ.get("SHOW_AZURE_OPTIONS", "true").lower() == "true"
|
| 95 |
|
langchain-streamlit-demo/llm_resources.py
CHANGED
|
@@ -2,19 +2,16 @@ import uuid
|
|
| 2 |
from tempfile import NamedTemporaryFile
|
| 3 |
from typing import Dict, List, Optional, Tuple
|
| 4 |
|
| 5 |
-
from defaults import
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
from langchain.agents import AgentExecutor, AgentType, initialize_agent
|
| 7 |
from langchain.agents.openai_functions_agent.base import OpenAIFunctionsAgent
|
| 8 |
from langchain.callbacks.base import BaseCallbackHandler
|
| 9 |
from langchain.chains import RetrievalQA
|
| 10 |
-
from langchain.chat_models import (
|
| 11 |
-
AzureChatOpenAI,
|
| 12 |
-
ChatAnthropic,
|
| 13 |
-
ChatAnyscale,
|
| 14 |
-
ChatOpenAI,
|
| 15 |
-
)
|
| 16 |
-
from langchain.document_loaders import PyPDFLoader
|
| 17 |
-
from langchain.embeddings import OpenAIEmbeddings
|
| 18 |
from langchain.llms.base import BaseLLM
|
| 19 |
from langchain.memory import ConversationBufferMemory
|
| 20 |
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
|
|
@@ -27,8 +24,15 @@ from langchain.schema.runnable import RunnablePassthrough
|
|
| 27 |
from langchain.storage import InMemoryStore
|
| 28 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
| 29 |
from langchain.tools.base import BaseTool
|
| 30 |
-
from
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
from langchain_core.messages import SystemMessage
|
|
|
|
| 32 |
|
| 33 |
# from qagen import get_rag_qa_gen_chain
|
| 34 |
# from summarize import get_rag_summarization_chain
|
|
@@ -42,11 +46,12 @@ def get_agent(
|
|
| 42 |
):
|
| 43 |
memory_key = "agent_history"
|
| 44 |
system_message = SystemMessage(
|
| 45 |
-
content=
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
|
|
|
| 50 |
)
|
| 51 |
prompt = OpenAIFunctionsAgent.create_prompt(
|
| 52 |
system_message=system_message,
|
|
@@ -97,11 +102,17 @@ def get_doc_agent(
|
|
| 97 |
(
|
| 98 |
"system",
|
| 99 |
"""
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
""",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 105 |
),
|
| 106 |
("user", "{input}"),
|
| 107 |
MessagesPlaceholder(variable_name="agent_scratchpad"),
|
|
@@ -157,13 +168,20 @@ def get_runnable(
|
|
| 157 |
retriever=retriever,
|
| 158 |
output_key="output_text",
|
| 159 |
return_source_documents=True,
|
| 160 |
-
) | (
|
| 161 |
-
lambda output: output["output_text"]
|
| 162 |
-
+ "\n\n"
|
| 163 |
-
+ "Source Documents:"
|
| 164 |
-
+ "\n"
|
| 165 |
-
+ "\n".join([x.page_content for x in output["source_documents"][:3]])
|
| 166 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 167 |
|
| 168 |
|
| 169 |
def get_llm(
|
|
@@ -232,7 +250,7 @@ def get_texts_and_multiretriever(
|
|
| 232 |
temp_file.write(uploaded_file_bytes)
|
| 233 |
temp_file.seek(0)
|
| 234 |
|
| 235 |
-
loader =
|
| 236 |
documents = loader.load()
|
| 237 |
text_splitter = RecursiveCharacterTextSplitter(
|
| 238 |
chunk_size=10000,
|
|
@@ -252,7 +270,10 @@ def get_texts_and_multiretriever(
|
|
| 252 |
_text.metadata[id_key] = _id
|
| 253 |
sub_texts.extend(_sub_texts)
|
| 254 |
|
| 255 |
-
embeddings_kwargs = {
|
|
|
|
|
|
|
|
|
|
| 256 |
# if use_azure and azure_kwargs:
|
| 257 |
# azure_kwargs["azure_endpoint"] = azure_kwargs.pop("openai_api_base")
|
| 258 |
# embeddings_kwargs.update(azure_kwargs)
|
|
|
|
| 2 |
from tempfile import NamedTemporaryFile
|
| 3 |
from typing import Dict, List, Optional, Tuple
|
| 4 |
|
| 5 |
+
from defaults import (
|
| 6 |
+
DEFAULT_CHUNK_OVERLAP,
|
| 7 |
+
DEFAULT_CHUNK_SIZE,
|
| 8 |
+
DEFAULT_RETRIEVER_K,
|
| 9 |
+
DEFAULT_SYSTEM_PROMPT,
|
| 10 |
+
)
|
| 11 |
from langchain.agents import AgentExecutor, AgentType, initialize_agent
|
| 12 |
from langchain.agents.openai_functions_agent.base import OpenAIFunctionsAgent
|
| 13 |
from langchain.callbacks.base import BaseCallbackHandler
|
| 14 |
from langchain.chains import RetrievalQA
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
from langchain.llms.base import BaseLLM
|
| 16 |
from langchain.memory import ConversationBufferMemory
|
| 17 |
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
|
|
|
|
| 24 |
from langchain.storage import InMemoryStore
|
| 25 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
| 26 |
from langchain.tools.base import BaseTool
|
| 27 |
+
from langchain_community.chat_models import (
|
| 28 |
+
AzureChatOpenAI,
|
| 29 |
+
ChatAnthropic,
|
| 30 |
+
ChatAnyscale,
|
| 31 |
+
)
|
| 32 |
+
from langchain_community.document_loaders import PyMuPDFLoader
|
| 33 |
+
from langchain_community.vectorstores.faiss import FAISS
|
| 34 |
from langchain_core.messages import SystemMessage
|
| 35 |
+
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
|
| 36 |
|
| 37 |
# from qagen import get_rag_qa_gen_chain
|
| 38 |
# from summarize import get_rag_summarization_chain
|
|
|
|
| 46 |
):
|
| 47 |
memory_key = "agent_history"
|
| 48 |
system_message = SystemMessage(
|
| 49 |
+
content=DEFAULT_SYSTEM_PROMPT
|
| 50 |
+
# (
|
| 51 |
+
# "Do your best to answer the questions. "
|
| 52 |
+
# "Feel free to use any tools available to look up "
|
| 53 |
+
# "relevant information, only if necessary"
|
| 54 |
+
# ),
|
| 55 |
)
|
| 56 |
prompt = OpenAIFunctionsAgent.create_prompt(
|
| 57 |
system_message=system_message,
|
|
|
|
| 102 |
(
|
| 103 |
"system",
|
| 104 |
"""
|
| 105 |
+
Assisti un chatbot a rispondere a domande su un documento di polizza.
|
| 106 |
+
Se necessario, suddividi le domande in più parti
|
| 107 |
+
e usa gli strumenti forniti per rispondere a domande più piccole
|
| 108 |
+
prima di rispondere alla domanda più grande.
|
| 109 |
""",
|
| 110 |
+
# """
|
| 111 |
+
# You assist a chatbot with answering questions about a document.
|
| 112 |
+
# If necessary, break up incoming questions into multiple parts,
|
| 113 |
+
# and use the tools provided to answer smaller questions before
|
| 114 |
+
# answering the larger question.
|
| 115 |
+
# """,
|
| 116 |
),
|
| 117 |
("user", "{input}"),
|
| 118 |
MessagesPlaceholder(variable_name="agent_scratchpad"),
|
|
|
|
| 168 |
retriever=retriever,
|
| 169 |
output_key="output_text",
|
| 170 |
return_source_documents=True,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 171 |
)
|
| 172 |
+
# ) | (
|
| 173 |
+
# lambda output: output["output_text"]
|
| 174 |
+
# + "\n\n"
|
| 175 |
+
# + ("-" * 50)
|
| 176 |
+
# + "\nSource Documents:"
|
| 177 |
+
# + "\n"
|
| 178 |
+
# + "\n".join(
|
| 179 |
+
# [
|
| 180 |
+
# f"\nRisorsa {e}:\n" + x.page_content
|
| 181 |
+
# for e, x in enumerate(output["source_documents"][:3])
|
| 182 |
+
# ]
|
| 183 |
+
# )
|
| 184 |
+
# )
|
| 185 |
|
| 186 |
|
| 187 |
def get_llm(
|
|
|
|
| 250 |
temp_file.write(uploaded_file_bytes)
|
| 251 |
temp_file.seek(0)
|
| 252 |
|
| 253 |
+
loader = PyMuPDFLoader(temp_file.name, clip=(0, 0.10 * 842, 595, 0.85 * 842))
|
| 254 |
documents = loader.load()
|
| 255 |
text_splitter = RecursiveCharacterTextSplitter(
|
| 256 |
chunk_size=10000,
|
|
|
|
| 270 |
_text.metadata[id_key] = _id
|
| 271 |
sub_texts.extend(_sub_texts)
|
| 272 |
|
| 273 |
+
embeddings_kwargs = {
|
| 274 |
+
"openai_api_key": openai_api_key,
|
| 275 |
+
"model": "text-embedding-3-large",
|
| 276 |
+
}
|
| 277 |
# if use_azure and azure_kwargs:
|
| 278 |
# azure_kwargs["azure_endpoint"] = azure_kwargs.pop("openai_api_base")
|
| 279 |
# embeddings_kwargs.update(azure_kwargs)
|
langchain-streamlit-demo/research_assistant/search/web.py
CHANGED
|
@@ -6,7 +6,7 @@ from bs4 import BeautifulSoup
|
|
| 6 |
from langchain.llms.base import BaseLLM
|
| 7 |
from langchain.prompts import ChatPromptTemplate
|
| 8 |
from langchain.retrievers.tavily_search_api import TavilySearchAPIRetriever
|
| 9 |
-
from
|
| 10 |
from langchain_core.messages import SystemMessage
|
| 11 |
from langchain_core.output_parsers import StrOutputParser
|
| 12 |
from langchain_core.runnables import (
|
|
|
|
| 6 |
from langchain.llms.base import BaseLLM
|
| 7 |
from langchain.prompts import ChatPromptTemplate
|
| 8 |
from langchain.retrievers.tavily_search_api import TavilySearchAPIRetriever
|
| 9 |
+
from langchain_community.utilities.duckduckgo_search import DuckDuckGoSearchAPIWrapper
|
| 10 |
from langchain_core.messages import SystemMessage
|
| 11 |
from langchain_core.output_parsers import StrOutputParser
|
| 12 |
from langchain_core.runnables import (
|
requirements.txt
CHANGED
|
@@ -6,6 +6,7 @@ duckduckgo-search==4.3.1
|
|
| 6 |
faiss-cpu==1.7.4
|
| 7 |
langchain==0.1.4
|
| 8 |
langchain-community==0.0.16
|
|
|
|
| 9 |
langsmith==0.0.84
|
| 10 |
# mypy==1.8.0
|
| 11 |
numexpr==2.9.0
|
|
@@ -22,3 +23,4 @@ tiktoken==0.5.2
|
|
| 22 |
tornado>=6.3.3 # not directly required, pinned by Snyk to avoid a vulnerability
|
| 23 |
validators>=0.21.0 # not directly required, pinned by Snyk to avoid a vulnerability
|
| 24 |
wikipedia==1.4.0
|
|
|
|
|
|
| 6 |
faiss-cpu==1.7.4
|
| 7 |
langchain==0.1.4
|
| 8 |
langchain-community==0.0.16
|
| 9 |
+
langchain-openai
|
| 10 |
langsmith==0.0.84
|
| 11 |
# mypy==1.8.0
|
| 12 |
numexpr==2.9.0
|
|
|
|
| 23 |
tornado>=6.3.3 # not directly required, pinned by Snyk to avoid a vulnerability
|
| 24 |
validators>=0.21.0 # not directly required, pinned by Snyk to avoid a vulnerability
|
| 25 |
wikipedia==1.4.0
|
| 26 |
+
pymupdf
|