ZDPLI's picture
Create app.py
6336417 verified
import os
import logging
import gradio as gr
from PIL import Image
from transformers import AutoModelForCausalLM, AutoTokenizer
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# ------------------------------
# πŸ”Ή Load Bioformer-8L Model
# ------------------------------
BIOFORMER_MODEL = "bioformers/bioformer-8L"
bioformer_tokenizer = AutoTokenizer.from_pretrained(BIOFORMER_MODEL)
bioformer_model = AutoModelForCausalLM.from_pretrained(BIOFORMER_MODEL)
# ------------------------------
# πŸ”Ή Load DeepSeek-R1-Distill-Qwen-7B-GGUF Model
# ------------------------------
DEEPSEEK_REPO = "lmstudio-community/DeepSeek-R1-Distill-Qwen-7B-GGUF"
DEEPSEEK_FILENAME = "DeepSeek-R1-Distill-Qwen-7B-Q4_0.gguf"
model_path = hf_hub_download(repo_id=DEEPSEEK_REPO, filename=DEEPSEEK_FILENAME)
llm = Llama(
model_path=model_path,
n_ctx=4096,
n_gpu_layers=0, # CPU inference
logits_all=True,
n_batch=256
)
logger.info("Models Loaded Successfully.")
# ------------------------------
# πŸ”Ή Unified Medical Prompt
# ------------------------------
UNIFIED_MEDICAL_PROMPT = """
You are an advanced Medical AI Assistant capable of providing thorough,
comprehensive answers for a wide range of medical specialties:
General Practice, Radiology, Cardiology, Neurology, Psychiatry, Pediatrics,
Endocrinology, Oncology, and more.
You can:
1) Analyze images if provided (Radiology).
2) Retrieve relevant documents from a knowledge base (Vector Store).
3) Provide scientific, evidence-based explanations and references when possible.
Always strive to provide a detailed, helpful, and empathetic response.
"""
# ------------------------------
# πŸ”Ή Chat Function
# ------------------------------
def chat_with_agent(user_query, image_file=None):
# Combine context
combined_context = f"""
{UNIFIED_MEDICAL_PROMPT}
Patient Query: "{user_query}"
Your Response:
"""
# Generate response using DeepSeek-R1-Distill model
response_accumulator = ""
for token in llm(
prompt=combined_context,
max_tokens=1024,
temperature=0.7,
top_p=0.9,
stream=True
):
partial_text = token["choices"][0]["text"]
response_accumulator += partial_text
yield response_accumulator
# ------------------------------
# πŸ”Ή Gradio Interface
# ------------------------------
with gr.Blocks(title="πŸ₯ Llama3-Med AI Assistant") as demo:
gr.Markdown("""
# πŸ₯ Llama3-Med AI Assistant
_Your intelligent medical assistant powered by advanced AI._
""")
with gr.Row():
user_input = gr.Textbox(label="πŸ’¬ Ask a medical question", placeholder="Type your question here...")
image_file = gr.Image(label="πŸ“· Upload Medical Image (Optional)", type="filepath")
submit_btn = gr.Button("πŸš€ Submit", variant="primary")
output_text = gr.Textbox(label="πŸ“ Assistant's Response", interactive=False, lines=25)
submit_btn.click(fn=chat_with_agent, inputs=[user_input, image_file], outputs=output_text)
if __name__ == "__main__":
demo.queue().launch(server_name="0.0.0.0", server_port=7860)