# # SPDX-FileCopyrightText: Hadad # SPDX-License-Identifier: Apache-2.0 # import os # Used for accessing environment variables import gradio as gr # Used to create the user interface # Gradio user interface with gr.Blocks( fill_height=True, # Adjusting to the height of the user's screen fill_width=True # Adjusting to the width of the user's screen ) as app: # Sidebar with gr.Sidebar(): # Project description gr.HTML( """ This space run the Gemma 3 (270M) model from Google, hosted on a server using Ollama and accessed via OpenAI-Style inference.

Official documentation for using Ollama with OpenAI-Style inference can be found here.

Gemma 3 (270M) run entirely on CPU, utilizing only a single core. This is sufficient due to the small size of the model, which makes it possible to operate efficiently on minimal hardware.

The Gemma 3 (270M) model can be viewed or downloaded from the official Ollama website, here.

Gemma 3 has multimodal capabilities. However, running on CPU with a small number of parameters may limit its understanding of context. For this reason, the upload functionality has been disabled.

Like this project? Feel free to buy me a coffee. """ ) # Load chat interface gr.load_chat( os.getenv("OLLAMA_API_BASE_URL"), # Endpoint token=os.getenv("OLLAMA_API_KEY"), # API Key model="gemma3:270m", # Model chatbot=gr.Chatbot( label="Ollama | Gemma 3 (270M)", # Chatbot title type="messages", # OpenAI-style messages format show_copy_button=True, # Allow users to copy responses scale=1 # Standard display scaling ), file_types=None, # Disable multimodal examples=[ ["Please introduce yourself."], ["What caused World War II?"], ["Give me a short introduction to large language model."], ["Explain about quantum computers."] ], # Provide sample inputs for users to try cache_examples=False, # Ensure responses always fresh show_api=False # Disable Gradio API ) # Start the app app.launch( server_name="0.0.0.0", # Listen on all network interfaces pwa=True # Progressive Web App )