VoyageX / app.py
srijaydeshpande's picture
Update app.py
29feaee verified
import re
import gradio as gr
import os
import accelerate
import spaces
from tqdm import tqdm
import subprocess
from huggingface_hub import hf_hub_download
from llama_cpp import Llama
from huggingface_hub import login
login(token = os.getenv('HF_TOKEN'))
repo_id = "QuantFactory/Meta-Llama-3-8B-Instruct-GGUF-v2"
model_id = "Meta-Llama-3-8B-Instruct-v2.Q2_K.gguf"
# repo_id = "QuantFactory/Meta-Llama-3-70B-Instruct-GGUF"
# model_id = "Meta-Llama-3-70B-Instruct.Q2_K.gguf"
local_dir = "models"
hf_hub_download(
repo_id=repo_id,
filename=model_id,
local_dir = local_dir
)
@spaces.GPU(duration=120)
def get_itinerary(information, maxtokens=1000, temperature=0.9, top_probability=0.9):
llm = Llama(
model_path="models/" + model_id,
flash_attn=True,
n_gpu_layers=81,
n_batch=1024,
n_ctx=8192,
)
prompt = "Please prepare a nice and fancy itinerary for the place and information provided following: "
output = llm.create_chat_completion(
messages=[
{"role": "assistant", "content": prompt},
{
"role": "user",
"content": information
}
],
max_tokens=maxtokens,
temperature=temperature
)
output = output['choices'][0]['message']['content']
return output
temp_slider = gr.Slider(minimum=0, maximum=2, value=0.9, label="Temperature Value")
max_tokens = gr.Number(value=1000, label="Max Tokens")
iface = gr.Interface(
fn=get_itinerary,
inputs='text',
outputs='text',
title='VoyageX',
description="This application helps building itinerary",
theme=gr.themes.Soft(),
)
iface.launch()