File size: 2,357 Bytes
3724ac8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
# %%
from pprint import pprint

import httpx

from client_v1.formatting_utils import fixed_width_wrap, format_docs
from client_v1.settings import EmmRetrieversSettings

# %%
settings = EmmRetrieversSettings()

settings.API_BASE

# the test index configuration
TEST_INDEX = "mine_e_emb-rag_live_test_001"
INDEX_MIN = "2024-09-14"
INDEX_MAX = "2024-09-28"

# %%

from client_v1.client import EmmRetrieverV1

# we can build a concrete retriver by specifying all but the actual `query`
# here for example we build a retriver for just a specific date
retriever = EmmRetrieverV1(
    settings=settings,
    params={"index": TEST_INDEX},
    route="/r/rag-minimal/query",
    spec={"search_k": 20},
    filter={
        "max_chunk_no": 1,
        "min_chars": 200,
        "start_dt": "2024-09-19",
        "end_dt": "2024-09-20",
    },
)

# %%

EXAMPLE_QUESTION = "What natural disasters are currently occuring?"

docs = retriever.invoke(EXAMPLE_QUESTION)

docs
# %%
# very similar except `metadata` is an attribute
titles = [d.metadata["title"] for d in docs]

print("\n".join([f"- {title}" for title in titles]))

# %%

print(format_docs(docs))

# %%
# Using the gpt@jrc language models


from client_v1.jrc_openai import JRCChatOpenAI

llm_model = JRCChatOpenAI(model="llama-3.1-70b-instruct", openai_api_key=settings.OPENAI_API_KEY.get_secret_value(), openai_api_base=settings.OPENAI_API_BASE_URL)

# %%

from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser


system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know."
    "\n\n"
    "{context}"
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

rag_chain = (
    {"context": retriever | format_docs, "input": RunnablePassthrough()}
    | prompt
    | llm_model
)

# %%
r = rag_chain.invoke(EXAMPLE_QUESTION)

print(fixed_width_wrap(r.content))
print("-" * 42)
pprint(r.response_metadata)

# %%
r = rag_chain.invoke("Outline the ongoing Health emergencies in Europe")

print(fixed_width_wrap(r.content))
print("-" * 42)
pprint(r.response_metadata)

# %%