gemma-3-270m-it

Running on Zero

App Files Files Community

anakin87 commited on Aug 16

Commit

76bc95f

1 Parent(s): 329c0e6

fa + good defaults + style

Browse files

Files changed (2) hide show

README.md +6 -9
app.py +27 -29

README.md CHANGED Viewed

@@ -1,13 +1,10 @@
 ---
-title: Phi 3.5 Mini ITA
-emoji: 💬🇮🇹
-colorFrom: green
-colorTo: red
 sdk: gradio
 sdk_version: 5.42.0
 app_file: app.py
-license: mit
-short_description: Chat with an Italian Small Model
----
-An example chatbot using [Gradio](https://gradio.app), [`huggingface_hub`](https://huggingface.co/docs/huggingface_hub/v0.22.2/en/index), and the [Hugging Face Inference API](https://huggingface.co/docs/api-inference/index).

 ---
+title: Gemma 3 270m IT
+emoji: 💎💬
+colorFrom: powder-blue
+colorTo: royal-blue
 sdk: gradio
 sdk_version: 5.42.0
 app_file: app.py
+short_description: Chat with Gemma 3 270m IT
+---

app.py CHANGED Viewed

@@ -8,20 +8,19 @@ import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
 import subprocess
-# subprocess.run(
-#     "pip install flash-attn --no-build-isolation",
-#     env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
-#     shell=True,
-# )
 DESCRIPTION = """\
-# Phi 3.5 mini ITA 💬 🇮🇹
-Fine-tuned version of Microsoft/Phi-3.5-mini-instruct to improve the performance on the Italian language.
-Small (3.82 B parameters) but capable model, with 128k context length.
-[🪪 **Model card**](https://huggingface.co/anakin87/Phi-3.5-mini-ITA)
 """
 MAX_MAX_NEW_TOKENS = 2048
@@ -34,12 +33,13 @@ model_id = "google/gemma-3-270m-it"
 tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True,)
 model = AutoModelForCausalLM.from_pretrained(
     model_id,
-    torch_dtype="auto",
     device_map="auto",
-    attn_implementation="eager"
 )
-# model.config.sliding_window = 4096
-# model.eval()
 @spaces.GPU(duration=90)
@@ -80,7 +80,7 @@ def generate(
         temperature=temperature,
         num_beams=1,
         repetition_penalty=repetition_penalty,
-        disable_compile=True,
     )
     t = Thread(target=model.generate, kwargs=generate_kwargs)
     t.start()
@@ -111,42 +111,40 @@ chat_interface = gr.ChatInterface(
             minimum=0,
             maximum=4.0,
             step=0.1,
-            value=0.001,
         ),
         gr.Slider(
             label="Top-p (nucleus sampling)",
             minimum=0.05,
             maximum=1.0,
             step=0.05,
-            value=1.0,
         ),
         gr.Slider(
             label="Top-k",
             minimum=1,
             maximum=1000,
             step=1,
-            value=50,
         ),
         gr.Slider(
             label="Repetition penalty",
             minimum=1.0,
             maximum=2.0,
             step=0.05,
-            value=1.0,
         ),
     ],
     stop_btn=None,
-    examples=[
-        ["Ciao! Come stai?"],
-        ["Pro e contro di una relazione a lungo termine. Elenco puntato con max 3 pro e 3 contro sintetici."],
-        ["Quante ore impiega un uomo per mangiare un elicottero?"],
-        ["Come si apre un file JSON in Python?"],
-        ["Fammi un elenco puntato dei pro e contro di vivere in Italia. Massimo 2 pro e 2 contro."],
-        ["Inventa una breve storia con animali sul valore dell'amicizia."],
-        ["Scrivi un articolo di 100 parole sui 'Benefici dell'open-source nella ricerca sull'intelligenza artificiale'"],
-        ["Can you explain briefly to me what is the Python programming language?"],
-        ["How many hours does it take a man to eat a Helicopter?"],
-        ["Write a 100-word article on 'Benefits of Open-Source in AI research'"],
     ],
     cache_examples=False,
 )

 from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
 import subprocess
+subprocess.run(
+    "pip install flash-attn --no-build-isolation",
+    env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
+    shell=True,
+)
 DESCRIPTION = """\
+# Gemma 3 270m IT 💎💬
+Try this mini model by Google.
+[🪪 **Model card**](https://huggingface.co/google/gemma-3-270m-it)
 """
 MAX_MAX_NEW_TOKENS = 2048
 tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True,)
 model = AutoModelForCausalLM.from_pretrained(
     model_id,
     device_map="auto",
+    torch_dtype=torch.bfloat16,
+    attn_implementation="flash_attention_2",
+    trust_remote_code=True,
 )
+model.config.sliding_window = 4096
+model.eval()
 @spaces.GPU(duration=90)
         temperature=temperature,
         num_beams=1,
         repetition_penalty=repetition_penalty,
+        disable_compile=True,  #  https://ai.google.dev/gemma/docs/core/huggingface_text_full_finetune#test_model_inference
     )
     t = Thread(target=model.generate, kwargs=generate_kwargs)
     t.start()
             minimum=0,
             maximum=4.0,
             step=0.1,
+            value=1.0,  # default from https://huggingface.co/docs/transformers/en/main_classes/text_generation
         ),
         gr.Slider(
             label="Top-p (nucleus sampling)",
             minimum=0.05,
             maximum=1.0,
             step=0.05,
+            value=0.95,  # from https://huggingface.co/google/gemma-3-270m-it/blob/main/generation_config.json
         ),
         gr.Slider(
             label="Top-k",
             minimum=1,
             maximum=1000,
             step=1,
+            value=64,  # from https://huggingface.co/google/gemma-3-270m-it/blob/main/generation_config.json
         ),
         gr.Slider(
             label="Repetition penalty",
             minimum=1.0,
             maximum=2.0,
             step=0.05,
+            value=1.0,  # default from https://huggingface.co/docs/transformers/en/main_classes/text_generation
         ),
     ],
     stop_btn=None,
+    examples = [
+        ["Hi! How are you?"],
+        ["Pros and cons of a long-term relationship. Bullet list with max 3 pros and 3 cons, concise."],
+        ["How many hours does it take a man to eat a helicopter?"],
+        ["How do you open a JSON file in Python?"],
+        ["Make a bullet list of pros and cons of living in San Francisco. Maximum 2 pros and 2 cons."],
+        ["Invent a short story with animals about the value of friendship."],
+        ["Can you briefly explain what the Python programming language is?"],
+        ["Write a 100-word article on 'Benefits of Open-Source in AI Research'."],
     ],
     cache_examples=False,
 )