Spaces:
Sleeping
Sleeping
import re | |
import gradio as gr | |
import os | |
import accelerate | |
import spaces | |
from tqdm import tqdm | |
import subprocess | |
from huggingface_hub import hf_hub_download | |
from llama_cpp import Llama | |
from huggingface_hub import login | |
login(token = os.getenv('HF_TOKEN')) | |
repo_id = "QuantFactory/Meta-Llama-3-8B-Instruct-GGUF-v2" | |
model_id = "Meta-Llama-3-8B-Instruct-v2.Q2_K.gguf" | |
# repo_id = "QuantFactory/Meta-Llama-3-70B-Instruct-GGUF" | |
# model_id = "Meta-Llama-3-70B-Instruct.Q2_K.gguf" | |
local_dir = "models" | |
hf_hub_download( | |
repo_id=repo_id, | |
filename=model_id, | |
local_dir = local_dir | |
) | |
def get_itinerary(information, maxtokens=1000, temperature=0.9, top_probability=0.9): | |
llm = Llama( | |
model_path="models/" + model_id, | |
flash_attn=True, | |
n_gpu_layers=81, | |
n_batch=1024, | |
n_ctx=8192, | |
) | |
prompt = "Please prepare a nice and fancy itinerary for the place and information provided following: " | |
output = llm.create_chat_completion( | |
messages=[ | |
{"role": "assistant", "content": prompt}, | |
{ | |
"role": "user", | |
"content": information | |
} | |
], | |
max_tokens=maxtokens, | |
temperature=temperature | |
) | |
output = output['choices'][0]['message']['content'] | |
return output | |
temp_slider = gr.Slider(minimum=0, maximum=2, value=0.9, label="Temperature Value") | |
max_tokens = gr.Number(value=1000, label="Max Tokens") | |
iface = gr.Interface( | |
fn=get_itinerary, | |
inputs='text', | |
outputs='text', | |
title='VoyageX', | |
description="This application helps building itinerary", | |
theme=gr.themes.Soft(), | |
) | |
iface.launch() |