Spaces:
Running
Running
import gradio as gr | |
from gradio_client import Client, handle_file | |
import requests | |
from PIL import Image | |
import io | |
import fitz # PyMuPDF | |
import tempfile | |
import os | |
# Função para extrair texto e imagens de um PDF | |
def extract_from_pdf(pdf_path): | |
try: | |
# Abre o PDF | |
doc = fitz.open(pdf_path) | |
extracted_text = "" | |
extracted_images = [] | |
# Itera sobre as páginas do PDF | |
for page_num in range(len(doc)): | |
page = doc.load_page(page_num) | |
# Extrai texto | |
extracted_text += page.get_text() | |
# Extrai imagens | |
image_list = page.get_images(full=True) | |
for img_index, img in enumerate(image_list): | |
xref = img[0] | |
base_image = doc.extract_image(xref) | |
image_bytes = base_image["image"] | |
image = Image.open(io.BytesIO(image_bytes)) | |
extracted_images.append(image) | |
return extracted_text, extracted_images | |
except Exception as e: | |
return f"Erro ao processar PDF: {str(e)}", [] | |
# Função principal para fazer a predição | |
def predict(file, question, seed, top_p, temperature): | |
try: | |
# Verifica se o arquivo é um PDF | |
if file.endswith(".pdf"): | |
# Extrai texto e imagens do PDF | |
extracted_text, extracted_images = extract_from_pdf(file) | |
# Se houver imagens, processa a primeira imagem | |
if extracted_images: | |
image = extracted_images[0] | |
with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp_file: | |
image.save(tmp_file.name, format="PNG") | |
img_path = tmp_file.name | |
else: | |
return "Nenhuma imagem encontrada no PDF." | |
# Se houver texto, adiciona ao prompt | |
if extracted_text: | |
question = f"Texto extraído do PDF:\n{extracted_text}\n\nPergunta: {question}" | |
else: | |
# Se não for PDF, trata como imagem | |
if file.startswith('http'): | |
response = requests.get(file) | |
img_path = handle_file(io.BytesIO(response.content)) | |
else: | |
img_path = handle_file(file) | |
# Inicializa o cliente do Gradio | |
client = Client("deepseek-ai/Janus-Pro-7B") | |
# Faz a predição | |
result = client.predict( | |
image=img_path, | |
question=question, | |
seed=seed, | |
top_p=top_p, | |
temperature=temperature, | |
api_name="/multimodal_understanding" | |
) | |
return result | |
except Exception as e: | |
return f"Erro durante a predição: {str(e)}" | |
# Componentes da interface | |
file_input = gr.File(label="Upload PDF or Image", file_types=[".pdf", ".png", ".jpg", ".jpeg"]) | |
question_input = gr.Textbox(label="Question", placeholder="Ask something about the file...") | |
seed_slider = gr.Slider(0, 100, value=42, label="Seed") | |
top_p_slider = gr.Slider(0, 1, value=0.95, label="Top-p") | |
temp_slider = gr.Slider(0, 1, value=0.1, label="Temperature") | |
# Cria a interface | |
demo = gr.Interface( | |
fn=predict, | |
inputs=[ | |
file_input, | |
question_input, | |
seed_slider, | |
top_p_slider, | |
temp_slider | |
], | |
outputs=gr.Textbox(label="Answer"), | |
title="Janus-Pro-7B Multimodal Demo", | |
description="Ask questions about PDFs or images using the Janus-Pro-7B model", | |
examples=[ | |
["https://raw.githubusercontent.com/gradio-app/gradio/main/test/test_files/bus.png", "What's in this image?", 42, 0.95, 0.1] | |
] | |
) | |
if __name__ == "__main__": | |
demo.launch() |