|
import gradio as gr |
|
from ollama import chat, ChatResponse |
|
import subprocess |
|
import time |
|
|
|
model_id = "phi" |
|
|
|
def interact(message: str, history: list): |
|
message_dct = { |
|
"role": "user", |
|
"content": message |
|
} |
|
chat_history = [msg for msg in history] |
|
chat_history.append(message_dct) |
|
|
|
response: ChatResponse = chat( |
|
model=model_id, |
|
messages=chat_history, |
|
stream=True |
|
) |
|
text_response = "" |
|
|
|
for chunk in response: |
|
bit = chunk["message"]["content"] |
|
text_response += bit |
|
yield text_response |
|
|
|
interface = gr.ChatInterface( |
|
fn=interact, |
|
type="messages", |
|
title="Microsoft Phi Chat Interface", |
|
description="Model: Microsoft Phi-2 (2.7B params)" |
|
) |
|
|
|
print("\n\nStarting Ollama...\n\n") |
|
subprocess.Popen(["ollama", "serve"]) |
|
time.sleep(10) |
|
print("\n\nOllama started successfully!!\n\n\n\nTesting...\n\n") |
|
subprocess.run(["ollama", "pull", model_id]) |
|
time.sleep(5) |
|
print("\n\nMicrosoft Phi-2 started successfully!!\n\n") |
|
interface.launch(server_name="0.0.0.0", server_port=7860) |