DHEIVER commited on
Commit
d6bb942
·
verified ·
1 Parent(s): d83db7a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +81 -27
app.py CHANGED
@@ -3,33 +3,86 @@ from gradio_client import Client, handle_file
3
  import requests
4
  from PIL import Image
5
  import io
 
 
 
6
 
7
- def predict(image, question, seed, top_p, temperature):
8
- # Inicializa o cliente do Gradio
9
- client = Client("deepseek-ai/Janus-Pro-7B")
10
-
11
- # Prepara a imagem para envio
12
- if image.startswith('http'):
13
- response = requests.get(image)
14
- img_path = handle_file(io.BytesIO(response.content))
15
- else:
16
- img_path = handle_file(image)
17
-
18
- # Faz a predição
19
- result = client.predict(
20
- image=img_path,
21
- question=question,
22
- seed=seed,
23
- top_p=top_p,
24
- temperature=temperature,
25
- api_name="/multimodal_understanding"
26
- )
27
-
28
- return result
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
  # Componentes da interface
31
- image_input = gr.Image(label="Upload Image", type="filepath")
32
- question_input = gr.Textbox(label="Question", placeholder="Ask something about the image...")
33
  seed_slider = gr.Slider(0, 100, value=42, label="Seed")
34
  top_p_slider = gr.Slider(0, 1, value=0.95, label="Top-p")
35
  temp_slider = gr.Slider(0, 1, value=0.1, label="Temperature")
@@ -38,7 +91,7 @@ temp_slider = gr.Slider(0, 1, value=0.1, label="Temperature")
38
  demo = gr.Interface(
39
  fn=predict,
40
  inputs=[
41
- image_input,
42
  question_input,
43
  seed_slider,
44
  top_p_slider,
@@ -46,9 +99,10 @@ demo = gr.Interface(
46
  ],
47
  outputs=gr.Textbox(label="Answer"),
48
  title="Janus-Pro-7B Multimodal Demo",
49
- description="Ask questions about images using the Janus-Pro-7B model",
50
  examples=[
51
- ["https://raw.githubusercontent.com/gradio-app/gradio/main/test/test_files/bus.png", "What's in this image?", 42, 0.95, 0.1]
 
52
  ]
53
  )
54
 
 
3
  import requests
4
  from PIL import Image
5
  import io
6
+ import fitz # PyMuPDF
7
+ import tempfile
8
+ import os
9
 
10
+ # Função para extrair texto e imagens de um PDF
11
+ def extract_from_pdf(pdf_path):
12
+ try:
13
+ # Abre o PDF
14
+ doc = fitz.open(pdf_path)
15
+ extracted_text = ""
16
+ extracted_images = []
17
+
18
+ # Itera sobre as páginas do PDF
19
+ for page_num in range(len(doc)):
20
+ page = doc.load_page(page_num)
21
+
22
+ # Extrai texto
23
+ extracted_text += page.get_text()
24
+
25
+ # Extrai imagens
26
+ image_list = page.get_images(full=True)
27
+ for img_index, img in enumerate(image_list):
28
+ xref = img[0]
29
+ base_image = doc.extract_image(xref)
30
+ image_bytes = base_image["image"]
31
+ image = Image.open(io.BytesIO(image_bytes))
32
+ extracted_images.append(image)
33
+
34
+ return extracted_text, extracted_images
35
+ except Exception as e:
36
+ return f"Erro ao processar PDF: {str(e)}", []
37
+
38
+ # Função principal para fazer a predição
39
+ def predict(file, question, seed, top_p, temperature):
40
+ try:
41
+ # Verifica se o arquivo é um PDF
42
+ if file.endswith(".pdf"):
43
+ # Extrai texto e imagens do PDF
44
+ extracted_text, extracted_images = extract_from_pdf(file)
45
+
46
+ # Se houver imagens, processa a primeira imagem
47
+ if extracted_images:
48
+ image = extracted_images[0]
49
+ with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp_file:
50
+ image.save(tmp_file.name, format="PNG")
51
+ img_path = tmp_file.name
52
+ else:
53
+ return "Nenhuma imagem encontrada no PDF."
54
+
55
+ # Se houver texto, adiciona ao prompt
56
+ if extracted_text:
57
+ question = f"Texto extraído do PDF:\n{extracted_text}\n\nPergunta: {question}"
58
+ else:
59
+ # Se não for PDF, trata como imagem
60
+ if file.startswith('http'):
61
+ response = requests.get(file)
62
+ img_path = handle_file(io.BytesIO(response.content))
63
+ else:
64
+ img_path = handle_file(file)
65
+
66
+ # Inicializa o cliente do Gradio
67
+ client = Client("deepseek-ai/Janus-Pro-7B")
68
+
69
+ # Faz a predição
70
+ result = client.predict(
71
+ image=img_path,
72
+ question=question,
73
+ seed=seed,
74
+ top_p=top_p,
75
+ temperature=temperature,
76
+ api_name="/multimodal_understanding"
77
+ )
78
+
79
+ return result
80
+ except Exception as e:
81
+ return f"Erro durante a predição: {str(e)}"
82
 
83
  # Componentes da interface
84
+ file_input = gr.File(label="Upload PDF or Image", file_types=[".pdf", ".png", ".jpg", ".jpeg"])
85
+ question_input = gr.Textbox(label="Question", placeholder="Ask something about the file...")
86
  seed_slider = gr.Slider(0, 100, value=42, label="Seed")
87
  top_p_slider = gr.Slider(0, 1, value=0.95, label="Top-p")
88
  temp_slider = gr.Slider(0, 1, value=0.1, label="Temperature")
 
91
  demo = gr.Interface(
92
  fn=predict,
93
  inputs=[
94
+ file_input,
95
  question_input,
96
  seed_slider,
97
  top_p_slider,
 
99
  ],
100
  outputs=gr.Textbox(label="Answer"),
101
  title="Janus-Pro-7B Multimodal Demo",
102
+ description="Ask questions about PDFs or images using the Janus-Pro-7B model",
103
  examples=[
104
+ ["https://raw.githubusercontent.com/gradio-app/gradio/main/test/test_files/bus.png", "What's in this image?", 42, 0.95, 0.1],
105
+ ["https://example.com/sample.pdf", "Summarize the text in this PDF.", 42, 0.95, 0.1]
106
  ]
107
  )
108