File size: 3,732 Bytes
503de01
 
 
 
 
d6bb942
 
 
503de01
d6bb942
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
503de01
 
d6bb942
 
503de01
 
 
 
 
 
 
 
d6bb942
503de01
 
 
 
 
 
 
d6bb942
503de01
fe6cc61
503de01
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import gradio as gr
from gradio_client import Client, handle_file
import requests
from PIL import Image
import io
import fitz  # PyMuPDF
import tempfile
import os

# Função para extrair texto e imagens de um PDF
def extract_from_pdf(pdf_path):
    try:
        # Abre o PDF
        doc = fitz.open(pdf_path)
        extracted_text = ""
        extracted_images = []

        # Itera sobre as páginas do PDF
        for page_num in range(len(doc)):
            page = doc.load_page(page_num)
            
            # Extrai texto
            extracted_text += page.get_text()
            
            # Extrai imagens
            image_list = page.get_images(full=True)
            for img_index, img in enumerate(image_list):
                xref = img[0]
                base_image = doc.extract_image(xref)
                image_bytes = base_image["image"]
                image = Image.open(io.BytesIO(image_bytes))
                extracted_images.append(image)
        
        return extracted_text, extracted_images
    except Exception as e:
        return f"Erro ao processar PDF: {str(e)}", []

# Função principal para fazer a predição
def predict(file, question, seed, top_p, temperature):
    try:
        # Verifica se o arquivo é um PDF
        if file.endswith(".pdf"):
            # Extrai texto e imagens do PDF
            extracted_text, extracted_images = extract_from_pdf(file)
            
            # Se houver imagens, processa a primeira imagem
            if extracted_images:
                image = extracted_images[0]
                with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp_file:
                    image.save(tmp_file.name, format="PNG")
                    img_path = tmp_file.name
            else:
                return "Nenhuma imagem encontrada no PDF."
            
            # Se houver texto, adiciona ao prompt
            if extracted_text:
                question = f"Texto extraído do PDF:\n{extracted_text}\n\nPergunta: {question}"
        else:
            # Se não for PDF, trata como imagem
            if file.startswith('http'):
                response = requests.get(file)
                img_path = handle_file(io.BytesIO(response.content))
            else:
                img_path = handle_file(file)
        
        # Inicializa o cliente do Gradio
        client = Client("deepseek-ai/Janus-Pro-7B")
        
        # Faz a predição
        result = client.predict(
            image=img_path,
            question=question,
            seed=seed,
            top_p=top_p,
            temperature=temperature,
            api_name="/multimodal_understanding"
        )
        
        return result
    except Exception as e:
        return f"Erro durante a predição: {str(e)}"

# Componentes da interface
file_input = gr.File(label="Upload PDF or Image", file_types=[".pdf", ".png", ".jpg", ".jpeg"])
question_input = gr.Textbox(label="Question", placeholder="Ask something about the file...")
seed_slider = gr.Slider(0, 100, value=42, label="Seed")
top_p_slider = gr.Slider(0, 1, value=0.95, label="Top-p")
temp_slider = gr.Slider(0, 1, value=0.1, label="Temperature")

# Cria a interface
demo = gr.Interface(
    fn=predict,
    inputs=[
        file_input,
        question_input,
        seed_slider,
        top_p_slider,
        temp_slider
    ],
    outputs=gr.Textbox(label="Answer"),
    title="Janus-Pro-7B Multimodal Demo",
    description="Ask questions about PDFs or images using the Janus-Pro-7B model",
    examples=[
        ["https://raw.githubusercontent.com/gradio-app/gradio/main/test/test_files/bus.png", "What's in this image?", 42, 0.95, 0.1]
    ]
)

if __name__ == "__main__":
    demo.launch()