Zorg / app.py
Gabriel Ramos
Add application file
74d6d87
from typing import Generator, List, Optional, Tuple, Union
import os
from dotenv import load_dotenv
import gradio as gr
from huggingface_hub import InferenceClient
# Load environment variables
load_dotenv()
# Read system prompt from file
prompt_path = os.path.join(os.path.dirname(__file__), "prompt.md")
with open(prompt_path, "r", encoding="utf-8") as f:
SYSTEM_PROMPT = f.read()
# Initialize HuggingFace client with token from env
client = InferenceClient(token=os.getenv("HUGGING_FACE_HUB_TOKEN"))
def format_prompt(message: str, history: List[Tuple[str, str]]) -> str:
"""Format the conversation prompt for the model.
Args:
message: The current user message.
history: List of past interactions (user, assistant).
Returns:
str: The formatted prompt for the model.
"""
prompt = f"<|system|>\n{SYSTEM_PROMPT}</s>"
for user, assistant in history:
prompt += f"<|user|>\n{user}</s><|assistant|>\n{assistant}</s>"
prompt += f"<|user|>\n{message}</s><|assistant|>\n"
return prompt
def predict(
message: str,
history: List[Tuple[str, str]],
temperature: float = 0.7,
max_new_tokens: int = 1024,
top_p: float = 0.9
) -> str:
"""Generate a complete response to the user message.
Args:
message: The current user message.
history: List of past interactions.
temperature: Sampling temperature.
max_new_tokens: Maximum number of tokens to generate.
top_p: Top-p sampling parameter.
Returns:
str: Complete generated response.
"""
response = client.text_generation(
format_prompt(message, history),
temperature=temperature,
max_new_tokens=max_new_tokens,
top_p=top_p,
repetition_penalty=1.2,
do_sample=True,
seed=42,
)
return response
# Main Gradio interface
with gr.Blocks(css="styles.css", title="AstroAlien 3000") as demo:
gr.Markdown("# 👽🛸 Bem-vindo ao AstroAlien 3000! 🌌✨", elem_classes="glass-card")
chatbot = gr.Chatbot(type="messages", elem_classes="glass-card")
with gr.Row():
temperature = gr.Slider(0, 1, value=0.7, label="Temperatura", elem_classes="glass-card")
max_tokens = gr.Slider(1, 2048, value=1024, step=1, label="Máximos Tokens", elem_classes="glass-card")
top_p = gr.Slider(0, 1, value=0.9, label="Top-p", elem_classes="glass-card")
gr.ChatInterface(
predict,
chatbot=chatbot,
additional_inputs=[temperature, max_tokens, top_p],
type="messages"
)
demo.launch()