File size: 2,676 Bytes
f32179f
25915da
 
 
 
 
 
 
 
 
 
f32179f
3c770ca
25915da
c2e446b
25915da
f32179f
25915da
 
 
 
 
 
 
 
f32179f
 
6f13f4d
3c770ca
25915da
 
3c770ca
25915da
f32179f
8218c57
25915da
f32179f
 
 
 
 
8218c57
25915da
f32179f
3c770ca
25915da
f32179f
3c770ca
25915da
 
eab08c4
 
 
 
 
25915da
8218c57
eab08c4
8218c57
25915da
eab08c4
f32179f
eab08c4
 
 
 
c858739
25915da
eab08c4
 
 
 
 
8218c57
25915da
f32179f
8218c57
73443b9
eab08c4
 
 
 
 
22ad66c
eab08c4
 
69243ad
f32179f
c2e446b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import pandas as pd
import os
import gradio as gr
from langchain_groq import ChatGroq
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_chroma import Chroma
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

# Read JSON file
df = pd.read_json("./tourisme_chatbot.json")

# Randomly sample a portion of the dataset (e.g., 30% of the data)
sampled_df = df.sample(frac=0.2, random_state=42)  # Adjust the fraction as needed

context_data = []
for i in range(len(sampled_df)):  # Use the sampled data
    context = ""
    for j in range(4):
        context += sampled_df.columns[j]
        context += ": "
        context += str(sampled_df.iloc[i, j])  # Ensure it's a string for concatenation
        context += " "
    context_data.append(context)

# Get the secret key from the environment
groq_api_key = os.environ.get('groq_api_keys')

# Initialize LLM (Groq)
llm = ChatGroq(model="llama-3.1-70b-versatile", api_key=groq_api_key)

# Initialize Embedding Model (HuggingFace)
embed_model = HuggingFaceEmbeddings(model_name="mixedbread-ai/mxbai-embed-large-v1")

# Create Vector Store
vectorstore = Chroma(
    collection_name="tourism_dataset_store",
    embedding_function=embed_model,
    persist_directory="./",
)

# Add sampled data to the vector store
vectorstore.add_texts(context_data)

# Set up the retriever
retriever = vectorstore.as_retriever()

# Define prompt template
template = """You are a Moroccan tourism expert.
    Use the provided context to answer the question.
    If you don't know the answer, say so. Explain your answer in detail.
    Do not discuss the context in your response; just provide the answer directly.
    Context: {context}
    Question: {question}
    Answer:"""

rag_prompt = PromptTemplate.from_template(template)

# Set up the RAG chain
rag_chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | rag_prompt
    | llm
    | StrOutputParser()
)

# Function for real-time stream of results
def rag_memory_stream(text):
    partial_text = ""
    for new_text in rag_chain.stream(text):
        partial_text += new_text
        yield partial_text

# Gradio Interface setup
examples = ['Tourist attraction sites in Morocco', 'What are some fun activities to do in Morocco?']

title = "Real-time AI App with Groq API and LangChain to Answer Morocco Tourism questions"
demo = gr.Interface(
    title=title,
    fn=rag_memory_stream,
    inputs="text",
    outputs="text",
    examples = examples,
    allow_flagging="never",
)

if __name__ == '__main__':
    demo.launch(share=True)