import re import gradio as gr import os import accelerate import spaces from tqdm import tqdm import subprocess from huggingface_hub import hf_hub_download from llama_cpp import Llama from huggingface_hub import login login(token = os.getenv('HF_TOKEN')) repo_id = "QuantFactory/Meta-Llama-3-8B-Instruct-GGUF-v2" model_id = "Meta-Llama-3-8B-Instruct-v2.Q2_K.gguf" # repo_id = "QuantFactory/Meta-Llama-3-70B-Instruct-GGUF" # model_id = "Meta-Llama-3-70B-Instruct.Q2_K.gguf" local_dir = "models" hf_hub_download( repo_id=repo_id, filename=model_id, local_dir = local_dir ) @spaces.GPU(duration=120) def get_itinerary(information, maxtokens=1000, temperature=0.9, top_probability=0.9): llm = Llama( model_path="models/" + model_id, flash_attn=True, n_gpu_layers=81, n_batch=1024, n_ctx=8192, ) prompt = "Please prepare a nice and fancy itinerary for the place and information provided following: " output = llm.create_chat_completion( messages=[ {"role": "assistant", "content": prompt}, { "role": "user", "content": information } ], max_tokens=maxtokens, temperature=temperature ) output = output['choices'][0]['message']['content'] return output temp_slider = gr.Slider(minimum=0, maximum=2, value=0.9, label="Temperature Value") max_tokens = gr.Number(value=1000, label="Max Tokens") iface = gr.Interface( fn=get_itinerary, inputs='text', outputs='text', title='VoyageX', description="This application helps building itinerary", theme=gr.themes.Soft(), ) iface.launch()