Spaces:

sitammeur
/

Gemma-llamacpp

Running

App Files Files Community

sitammeur commited on Apr 9

Commit

26affc2

verified ·

1 Parent(s): 62d72b4

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -8

app.py CHANGED Viewed

@@ -23,6 +23,9 @@ from exception import CustomExceptionHandling
 huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
 # Download gguf model files
 hf_hub_download(
     repo_id="bartowski/google_gemma-3-1b-it-GGUF",
     filename="google_gemma-3-1b-it-Q6_K.gguf",
@@ -66,13 +69,13 @@ llm_model = None
 def respond(
     message: str,
     history: List[Tuple[str, str]],
-    model: str,
-    system_message: str,
-    max_tokens: int,
-    temperature: float,
-    top_p: float,
-    top_k: int,
-    repeat_penalty: float,
 ):
     """
     Respond to a message using the Gemma3 model via Llama.cpp.
@@ -96,8 +99,18 @@ def respond(
         global llm
         global llm_model
         # Load the model
         if llm is None or llm_model != model:
             llm = Llama(
                 model_path=f"models/{model}",
                 flash_attn=False,
@@ -232,9 +245,16 @@ demo = gr.ChatInterface(
     chatbot=gr.Chatbot(scale=1, show_copy_button=True, resizable=True),
     flagging_mode="never",
     editable=True,
 )
 # Launch the chat interface
 if __name__ == "__main__":
-    demo.launch()

 huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
 # Download gguf model files
+if not os.path.exists("./models"):
+    os.makedirs("./models")
 hf_hub_download(
     repo_id="bartowski/google_gemma-3-1b-it-GGUF",
     filename="google_gemma-3-1b-it-Q6_K.gguf",
 def respond(
     message: str,
     history: List[Tuple[str, str]],
+    model: str = "google_gemma-3-1b-it-Q5_K_M.gguf",  # Set default model
+    system_message: str = "You are a helpful assistant.",
+    max_tokens: int = 1024,
+    temperature: float = 0.7,
+    top_p: float = 0.95,
+    top_k: int = 40,
+    repeat_penalty: float = 1.1,
 ):
     """
     Respond to a message using the Gemma3 model via Llama.cpp.
         global llm
         global llm_model
+        # Ensure model is not None
+        if model is None:
+            model = "google_gemma-3-1b-it-Q5_K_M.gguf"
         # Load the model
         if llm is None or llm_model != model:
+            # Check if model file exists
+            model_path = f"models/{model}"
+            if not os.path.exists(model_path):
+                yield f"Error: Model file not found at {model_path}. Please check your model path."
+                return
             llm = Llama(
                 model_path=f"models/{model}",
                 flash_attn=False,
     chatbot=gr.Chatbot(scale=1, show_copy_button=True, resizable=True),
     flagging_mode="never",
     editable=True,
+    cache_examples=False,
 )
 # Launch the chat interface
 if __name__ == "__main__":
+    demo.launch(
+        share=False,
+        server_name="0.0.0.0",
+        server_port=7860,
+        show_api=False,
+        ssr=False
+    )