Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -23,6 +23,9 @@ from exception import CustomExceptionHandling
|
|
| 23 |
huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
|
| 24 |
|
| 25 |
# Download gguf model files
|
|
|
|
|
|
|
|
|
|
| 26 |
hf_hub_download(
|
| 27 |
repo_id="bartowski/google_gemma-3-1b-it-GGUF",
|
| 28 |
filename="google_gemma-3-1b-it-Q6_K.gguf",
|
|
@@ -66,13 +69,13 @@ llm_model = None
|
|
| 66 |
def respond(
|
| 67 |
message: str,
|
| 68 |
history: List[Tuple[str, str]],
|
| 69 |
-
model: str,
|
| 70 |
-
system_message: str,
|
| 71 |
-
max_tokens: int,
|
| 72 |
-
temperature: float,
|
| 73 |
-
top_p: float,
|
| 74 |
-
top_k: int,
|
| 75 |
-
repeat_penalty: float,
|
| 76 |
):
|
| 77 |
"""
|
| 78 |
Respond to a message using the Gemma3 model via Llama.cpp.
|
|
@@ -96,8 +99,18 @@ def respond(
|
|
| 96 |
global llm
|
| 97 |
global llm_model
|
| 98 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 99 |
# Load the model
|
| 100 |
if llm is None or llm_model != model:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 101 |
llm = Llama(
|
| 102 |
model_path=f"models/{model}",
|
| 103 |
flash_attn=False,
|
|
@@ -232,9 +245,16 @@ demo = gr.ChatInterface(
|
|
| 232 |
chatbot=gr.Chatbot(scale=1, show_copy_button=True, resizable=True),
|
| 233 |
flagging_mode="never",
|
| 234 |
editable=True,
|
|
|
|
| 235 |
)
|
| 236 |
|
| 237 |
|
| 238 |
# Launch the chat interface
|
| 239 |
if __name__ == "__main__":
|
| 240 |
-
demo.launch(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
|
| 24 |
|
| 25 |
# Download gguf model files
|
| 26 |
+
if not os.path.exists("./models"):
|
| 27 |
+
os.makedirs("./models")
|
| 28 |
+
|
| 29 |
hf_hub_download(
|
| 30 |
repo_id="bartowski/google_gemma-3-1b-it-GGUF",
|
| 31 |
filename="google_gemma-3-1b-it-Q6_K.gguf",
|
|
|
|
| 69 |
def respond(
|
| 70 |
message: str,
|
| 71 |
history: List[Tuple[str, str]],
|
| 72 |
+
model: str = "google_gemma-3-1b-it-Q5_K_M.gguf", # Set default model
|
| 73 |
+
system_message: str = "You are a helpful assistant.",
|
| 74 |
+
max_tokens: int = 1024,
|
| 75 |
+
temperature: float = 0.7,
|
| 76 |
+
top_p: float = 0.95,
|
| 77 |
+
top_k: int = 40,
|
| 78 |
+
repeat_penalty: float = 1.1,
|
| 79 |
):
|
| 80 |
"""
|
| 81 |
Respond to a message using the Gemma3 model via Llama.cpp.
|
|
|
|
| 99 |
global llm
|
| 100 |
global llm_model
|
| 101 |
|
| 102 |
+
# Ensure model is not None
|
| 103 |
+
if model is None:
|
| 104 |
+
model = "google_gemma-3-1b-it-Q5_K_M.gguf"
|
| 105 |
+
|
| 106 |
# Load the model
|
| 107 |
if llm is None or llm_model != model:
|
| 108 |
+
# Check if model file exists
|
| 109 |
+
model_path = f"models/{model}"
|
| 110 |
+
if not os.path.exists(model_path):
|
| 111 |
+
yield f"Error: Model file not found at {model_path}. Please check your model path."
|
| 112 |
+
return
|
| 113 |
+
|
| 114 |
llm = Llama(
|
| 115 |
model_path=f"models/{model}",
|
| 116 |
flash_attn=False,
|
|
|
|
| 245 |
chatbot=gr.Chatbot(scale=1, show_copy_button=True, resizable=True),
|
| 246 |
flagging_mode="never",
|
| 247 |
editable=True,
|
| 248 |
+
cache_examples=False,
|
| 249 |
)
|
| 250 |
|
| 251 |
|
| 252 |
# Launch the chat interface
|
| 253 |
if __name__ == "__main__":
|
| 254 |
+
demo.launch(
|
| 255 |
+
share=False,
|
| 256 |
+
server_name="0.0.0.0",
|
| 257 |
+
server_port=7860,
|
| 258 |
+
show_api=False,
|
| 259 |
+
ssr=False
|
| 260 |
+
)
|