gemma-3-270m / app.py
hadadrjt's picture
gemma-3-270m: Initial.
dd61af5
#
# SPDX-FileCopyrightText: Hadad <[email protected]>
# SPDX-License-Identifier: Apache-2.0
#
import os # Used for accessing environment variables
import gradio as gr # Used to create the user interface
# Gradio user interface
with gr.Blocks(
fill_height=True, # Adjusting to the height of the user's screen
fill_width=True # Adjusting to the width of the user's screen
) as app:
# Sidebar
with gr.Sidebar():
# Project description
gr.HTML(
"""
This space run the <b><a href=
"https://huggingface.co/google/gemma-3-270m"
target="_blank">Gemma 3 (270M)</a></b> model from
<b>Google</b>, hosted on a server using <b>Ollama</b> and
accessed via <b>OpenAI-Style inference.</b><br><br>
Official <b>documentation</b> for using Ollama with
OpenAI-Style inference can be found
<b><a href="https://ollama.com/blog/openai-compatibility"
target="_blank">here</a></b>.<br><br>
Gemma 3 (270M) run entirely on <b>CPU</b>, utilizing only a
<b>single core</b>. This is sufficient due to the small
size of the model, which makes it possible to operate
efficiently on minimal hardware.<br><br>
The Gemma 3 (270M) model can be viewed or downloaded from
the official Ollama website,
<b><a href="https://ollama.com/library/gemma3:270m"
target="_blank">here</a></b>.<br><br>
Gemma 3 has multimodal capabilities. However, running on
CPU with a small number of parameters may limit its
understanding of context. For this reason, the
upload functionality has been disabled.<br><br>
<b>Like this project? Feel free to buy me a <a href=
"https://ko-fi.com/hadad" target="_blank">
coffee</a></b>.
"""
)
# Load chat interface
gr.load_chat(
os.getenv("OLLAMA_API_BASE_URL"), # Endpoint
token=os.getenv("OLLAMA_API_KEY"), # API Key
model="gemma3:270m", # Model
chatbot=gr.Chatbot(
label="Ollama | Gemma 3 (270M)", # Chatbot title
type="messages", # OpenAI-style messages format
show_copy_button=True, # Allow users to copy responses
scale=1 # Standard display scaling
),
file_types=None, # Disable multimodal
examples=[
["Please introduce yourself."],
["What caused World War II?"],
["Give me a short introduction to large language model."],
["Explain about quantum computers."]
], # Provide sample inputs for users to try
cache_examples=False, # Ensure responses always fresh
show_api=False # Disable Gradio API
)
# Start the app
app.launch(
server_name="0.0.0.0", # Listen on all network interfaces
pwa=True # Progressive Web App
)