Spaces:
Sleeping
Sleeping
File size: 2,391 Bytes
6ccb1e9 703992e 6ccb1e9 28c69cd 3aa419b 6ccb1e9 703992e d542df6 703992e ab7881a 703992e 392455e ab7881a 6ccb1e9 ab7881a 6ccb1e9 703992e 6ccb1e9 703992e 6ccb1e9 703992e ab7881a 703992e 6ccb1e9 703992e 6ccb1e9 703992e 6ccb1e9 5c4abd8 6ccb1e9 296274b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 |
import gradio as gr
from huggingface_hub import InferenceClient
import chromadb
from chromadb.config import Settings
from chromadb import PersistentClient
# Set the correct path to the ChromaDB directory
client_db = PersistentClient(path="./chromadb_directory/chromadb_file")
# Load your collection
collection = client_db.get_collection("my_collection")
# Function to retrieve documents from ChromaDB, ensuring results are strings
def retrieve_from_chromadb(query):
results = collection.query(query_texts=query, n_results=5) # Adjust n_results as needed
# Ensure each document is a string
documents = [str(doc) for doc in results['documents']]
return documents
# Respond function for the chatbot
def respond(
message,
history: list[tuple[str, str]],
system_message,
max_tokens,
temperature,
top_p,
):
# Prepare messages for the model
messages = [{"role": "system", "content": system_message}]
# Add conversation history
for val in history:
if val[0]:
messages.append({"role": "user", "content": val[0]})
if val[1]:
messages.append({"role": "assistant", "content": val[1]})
# Retrieve relevant documents from ChromaDB
retrieved_docs = retrieve_from_chromadb(message)
# Join the documents to create a context for the user query
context = "\n".join(retrieved_docs) + "\nUser: " + message
messages.append({"role": "user", "content": context})
response = ""
# Generate response using the Inference Client
for message in inference_client.chat_completion(
messages,
max_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p,
):
token = message.choices[0].delta.content
response += token
yield response
# Gradio Chat Interface
demo = gr.ChatInterface(
respond,
additional_inputs=[
gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-p (nucleus sampling)",
),
],
)
if __name__ == "__main__":
demo.launch()
|