openfree commited on
Commit
a1dc3ca
ยท
verified ยท
1 Parent(s): 72c2546

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +339 -224
app.py CHANGED
@@ -1,3 +1,7 @@
 
 
 
 
1
  import base64
2
  import json
3
  import os
@@ -8,9 +12,9 @@ import re
8
  import uuid
9
  import pymupdf
10
 
 
11
  os.system('pip uninstall -y magic-pdf')
12
  os.system('pip install git+https://github.com/opendatalab/MinerU.git@dev')
13
-
14
  os.system('wget https://github.com/opendatalab/MinerU/raw/dev/scripts/download_models_hf.py -O download_models_hf.py')
15
  os.system('python download_models_hf.py')
16
 
@@ -26,7 +30,7 @@ with open('/home/user/magic-pdf.json', 'w') as file:
26
  json.dump(data, file, indent=4)
27
 
28
  os.system('cp -r paddleocr /home/user/.paddleocr')
29
- from gradio_pdf import PDF
30
 
31
  import gradio as gr
32
  from loguru import logger
@@ -65,81 +69,9 @@ def create_css():
65
  color: #6B7280;
66
  font-size: 1.1rem;
67
  }
68
- /* ์ปดํฌ๋„ŒํŠธ ์Šคํƒ€์ผ๋ง */
69
- .gr-box, .gr-panel {
70
- border: 2px solid #E0E7FF !important;
71
- border-radius: 12px !important;
72
- box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1) !important;
73
- background: white !important;
74
- }
75
- /* ํŒŒ์ผ ์—…๋กœ๋“œ ์˜์—ญ */
76
- .file-upload {
77
- border: 2px dashed #93C5FD !important;
78
- border-radius: 8px !important;
79
- padding: 2rem !important;
80
- background: #F0F9FF !important;
81
- transition: all 0.3s ease;
82
- }
83
- .file-upload:hover {
84
- background: #E0F2FE !important;
85
- border-color: #60A5FA !important;
86
- }
87
- /* ๋ฒ„ํŠผ ์Šคํƒ€์ผ๋ง */
88
- .gr-button.primary-button {
89
- background: linear-gradient(90deg, #2563EB 0%, #7C3AED 100%) !important;
90
- color: white !important;
91
- border: none !important;
92
- border-radius: 8px !important;
93
- padding: 0.75rem 1.5rem !important;
94
- font-weight: bold !important;
95
- transition: opacity 0.2s !important;
96
- }
97
- .gr-button.primary-button:hover {
98
- opacity: 0.9 !important;
99
- }
100
- .gr-button.secondary-button {
101
- background: white !important;
102
- color: #4B5563 !important;
103
- border: 1px solid #D1D5DB !important;
104
- border-radius: 8px !important;
105
- padding: 0.75rem 1.5rem !important;
106
- }
107
- .gr-button.secondary-button:hover {
108
- background: #F9FAFB !important;
109
- }
110
- /* ์Šฌ๋ผ์ด๋” ์Šคํƒ€์ผ๋ง */
111
- .gr-slider {
112
- background: #E0E7FF !important;
113
- }
114
- .gr-slider .gr-slider-handle {
115
- background: #4F46E5 !important;
116
- }
117
- /* ์ฒดํฌ๋ฐ•์Šค ์Šคํƒ€์ผ๋ง */
118
- .gr-checkbox {
119
- border-color: #6366F1 !important;
120
- }
121
- .gr-checkbox:checked {
122
- background-color: #4F46E5 !important;
123
- }
124
- /* ํƒญ ์Šคํƒ€์ผ๋ง */
125
- .gr-tabs {
126
- border-bottom: 2px solid #E0E7FF !important;
127
- }
128
- .gr-tab-button {
129
- color: #6B7280 !important;
130
- padding: 0.75rem 1rem !important;
131
- font-weight: 500 !important;
132
- }
133
- .gr-tab-button.selected {
134
- color: #4F46E5 !important;
135
- border-bottom: 2px solid #4F46E5 !important;
136
- }
137
- /* ๋งˆํฌ๋‹ค์šด ์ถœ๋ ฅ ์˜์—ญ */
138
- .markdown-output {
139
- background: white !important;
140
- border-radius: 8px !important;
141
- padding: 1rem !important;
142
- box-shadow: inset 0 2px 4px rgba(0, 0, 0, 0.05) !important;
143
  }
144
  """
145
 
@@ -202,41 +134,56 @@ def replace_image_with_base64(markdown_text, image_dir_path):
202
  return f"![{relative_path}](data:image/jpeg;base64,{base64_image})"
203
  return re.sub(pattern, replace, markdown_text)
204
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
205
  def to_markdown(file_path, end_pages, is_ocr, layout_mode, formula_enable, table_enable, language):
 
 
 
 
206
  file_path = to_pdf(file_path)
207
  if end_pages > 20:
208
  end_pages = 20
209
  local_md_dir, file_name = parse_pdf(file_path, './output', end_pages - 1, is_ocr,
210
- layout_mode, formula_enable, table_enable, language)
211
  archive_zip_path = os.path.join("./output", compute_sha256(local_md_dir) + ".zip")
212
  zip_archive_success = compress_directory_to_zip(local_md_dir, archive_zip_path)
213
  if zip_archive_success == 0:
214
  logger.info("์••์ถ• ์„ฑ๊ณต")
215
  else:
216
  logger.error("์••์ถ• ์‹คํŒจ")
 
217
  md_path = os.path.join(local_md_dir, file_name + ".md")
218
  with open(md_path, 'r', encoding='utf-8') as f:
219
  txt_content = f.read()
 
220
  md_content = replace_image_with_base64(txt_content, local_md_dir)
221
- new_pdf_path = os.path.join(local_md_dir, file_name + "_layout.pdf")
222
- return md_content, txt_content, archive_zip_path, new_pdf_path
223
 
224
- def to_pdf(file_path):
225
- with pymupdf.open(file_path) as f:
226
- if f.is_pdf:
227
- return file_path
228
- else:
229
- pdf_bytes = f.convert_to_pdf()
230
- unique_filename = f"{uuid.uuid4()}.pdf"
231
- tmp_file_path = os.path.join(os.path.dirname(file_path), unique_filename)
232
- with open(tmp_file_path, 'wb') as tmp_pdf_file:
233
- tmp_pdf_file.write(pdf_bytes)
234
- return tmp_file_path
235
 
236
- latex_delimiters = [{"left": "$$", "right": "$$", "display": True},
237
- {"left": '$', "right": '$', "display": False}]
 
 
238
 
239
  def init_model():
 
 
 
240
  from magic_pdf.model.doc_analyze_by_custom_model import ModelSingleton
241
  try:
242
  model_manager = ModelSingleton()
@@ -272,142 +219,310 @@ other_lang = ['ch', 'en', 'korean', 'japan', 'chinese_cht', 'ta', 'te', 'ka']
272
  all_lang = ['', 'auto']
273
  all_lang.extend([*other_lang, *latin_lang, *arabic_lang, *cyrillic_lang, *devanagari_lang])
274
 
275
- if __name__ == "__main__":
276
- with gr.Blocks(title="OCR FLEX", css=create_css()) as demo:
277
- # ํƒ€์ดํ‹€ ์˜์—ญ
278
- with gr.Row(elem_classes="title-area"):
279
- gr.HTML("""
280
- <h1>OCR FLEX</h1>
281
- <p>PDF์™€ ์ด๋ฏธ์ง€์—์„œ ํ…์ŠคํŠธ๋ฅผ ๋น ๋ฅด๊ณ  ์ •ํ™•ํ•˜๊ฒŒ ์ถ”์ถœํ•˜์„ธ์š”</p>
282
- """)
283
-
284
- with gr.Row():
285
- # ์™ผ์ชฝ ํŒจ๋„
286
- with gr.Column(variant='panel', scale=5):
287
- file = gr.File(
288
- label="PDF ๋˜๋Š” ์ด๋ฏธ์ง€ ํŒŒ์ผ์„ ์—…๋กœ๋“œํ•˜์„ธ์š”",
289
- file_types=[".pdf", ".png", ".jpeg", ".jpg"],
290
- elem_classes="file-upload"
291
- )
292
-
293
- max_pages = gr.Slider(
294
- 1, 20, 10,
295
- step=1,
296
- label='์ตœ๋Œ€ ๋ณ€ํ™˜ ํŽ˜์ด์ง€ ์ˆ˜',
297
- elem_classes="custom-slider"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
298
  )
299
-
300
- with gr.Row():
301
- layout_mode = gr.Dropdown(
302
- ["layoutlmv3", "doclayout_yolo"],
303
- label="๋ ˆ์ด์•„์›ƒ ๋ชจ๋ธ",
304
- value="doclayout_yolo",
305
- elem_classes="custom-dropdown"
306
- )
307
- language = gr.Dropdown(
308
- all_lang,
309
- label="์–ธ์–ด",
310
- value='auto',
311
- elem_classes="custom-dropdown"
312
- )
313
-
314
- with gr.Row():
315
- formula_enable = gr.Checkbox(
316
- label="์ˆ˜์‹ ์ธ์‹ ํ™œ์„ฑํ™”",
317
- value=True,
318
- elem_classes="custom-checkbox"
319
- )
320
- is_ocr = gr.Checkbox(
321
- label="OCR ๊ฐ•์ œ ํ™œ์„ฑํ™”",
322
- value=False,
323
- elem_classes="custom-checkbox"
324
- )
325
- table_enable = gr.Checkbox(
326
- label="ํ‘œ ์ธ์‹ ํ™œ์„ฑํ™”(ํ…Œ์ŠคํŠธ)",
327
- value=True,
328
- elem_classes="custom-checkbox"
329
- )
330
-
331
- with gr.Row():
332
- change_bu = gr.Button(
333
- "๋ณ€ํ™˜",
334
- elem_classes="primary-button"
335
- )
336
- clear_bu = gr.ClearButton(
337
- value="์ดˆ๊ธฐํ™”",
338
- elem_classes="secondary-button"
339
  )
340
-
341
- pdf_show = PDF(
342
- label='PDF ๋ฏธ๋ฆฌ๋ณด๊ธฐ',
343
- interactive=False,
344
- visible=True,
345
- height=800,
346
- elem_classes="pdf-preview"
347
  )
348
-
349
- with gr.Accordion("์˜ˆ์ œ:", open=False):
350
- example_root = os.path.join(os.path.dirname(__file__), "examples")
351
- gr.Examples(
352
- examples=[os.path.join(example_root, _) for _ in os.listdir(example_root) if
353
- _.endswith("pdf")],
354
- inputs=file
355
- )
 
 
 
 
 
 
356
 
357
- # ์˜ค๋ฅธ์ชฝ ํŒจ๋„
358
- with gr.Column(variant='panel', scale=5):
359
- output_file = gr.File(
360
- label="๋ณ€ํ™˜ ๊ฒฐ๊ณผ",
361
- interactive=False,
362
- elem_classes="output-file"
363
  )
364
-
365
- with gr.Tabs() as tabs:
366
- with gr.Tab("๋งˆํฌ๋‹ค์šด ๋ Œ๋”๋ง"):
367
- md = gr.Markdown(
368
- label="๋งˆํฌ๋‹ค์šด ๋ Œ๋”๋ง",
369
- height=1100,
370
- show_copy_button=True,
371
- latex_delimiters=latex_delimiters,
372
- line_breaks=True,
373
- elem_classes="markdown-output"
374
- )
375
-
376
- with gr.Tab("๋งˆํฌ๋‹ค์šด ํ…์ŠคํŠธ"):
377
- md_text = gr.TextArea(
378
- lines=45,
379
- show_copy_button=True,
380
- elem_classes="markdown-text"
381
- )
382
-
383
- # ์ด๋ฒคํŠธ ํ•ธ๋“ค๋Ÿฌ
384
- file.change(
385
- fn=to_pdf,
386
- inputs=file,
387
- outputs=pdf_show
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
388
  )
389
-
390
- change_bu.click(
391
- fn=to_markdown,
392
- inputs=[
393
- file,
394
- max_pages,
395
- is_ocr,
396
- layout_mode,
397
- formula_enable,
398
- table_enable,
399
- language
400
- ],
401
- outputs=[
402
- md,
403
- md_text,
404
- output_file,
405
- pdf_show
406
- ],
407
- api_name=False
408
  )
409
-
410
- clear_bu.add([file, md, pdf_show, md_text, output_file, is_ocr])
411
 
412
- # ์•ฑ ์‹คํ–‰
413
- demo.launch(ssr_mode=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ##############################
2
+ # 1) ๊ธฐ์กด PDF ์ฒ˜๋ฆฌ ์ฝ”๋“œ
3
+ ##############################
4
+
5
  import base64
6
  import json
7
  import os
 
12
  import uuid
13
  import pymupdf
14
 
15
+ # ์›๋ž˜ ์ฝ”๋“œ์— ์žˆ๋˜ os.system() ํ˜ธ์ถœ๋“ค
16
  os.system('pip uninstall -y magic-pdf')
17
  os.system('pip install git+https://github.com/opendatalab/MinerU.git@dev')
 
18
  os.system('wget https://github.com/opendatalab/MinerU/raw/dev/scripts/download_models_hf.py -O download_models_hf.py')
19
  os.system('python download_models_hf.py')
20
 
 
30
  json.dump(data, file, indent=4)
31
 
32
  os.system('cp -r paddleocr /home/user/.paddleocr')
33
+ # from gradio_pdf import PDF # PDF ๋ฏธ๋ฆฌ๋ณด๊ธฐ๋ฅผ ์œ„ํ•œ ์ปดํฌ๋„ŒํŠธ์ด์ง€๋งŒ, ์ง€๊ธˆ์€ ์ˆจ๊ธธ ์˜ˆ์ •
34
 
35
  import gradio as gr
36
  from loguru import logger
 
69
  color: #6B7280;
70
  font-size: 1.1rem;
71
  }
72
+ /* ์ˆจ๊ธธ ์˜ˆ์ •์ธ ์ปดํฌ๋„ŒํŠธ ์Šคํƒ€์ผ */
73
+ .invisible {
74
+ display: none !important;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  }
76
  """
77
 
 
134
  return f"![{relative_path}](data:image/jpeg;base64,{base64_image})"
135
  return re.sub(pattern, replace, markdown_text)
136
 
137
+ def to_pdf(file_path):
138
+ """
139
+ PDF๊ฐ€ ์•„๋‹Œ ๊ฒฝ์šฐ(์˜ˆ: PNG, JPG ํŒŒ์ผ)์—๋„ pymupdf๋ฅผ ์ด์šฉํ•˜์—ฌ PDF๋กœ ๋ณ€ํ™˜ํ•˜๊ธฐ ์œ„ํ•œ ํ•จ์ˆ˜.
140
+ """
141
+ with pymupdf.open(file_path) as f:
142
+ if f.is_pdf:
143
+ return file_path
144
+ else:
145
+ pdf_bytes = f.convert_to_pdf()
146
+ unique_filename = f"{uuid.uuid4()}.pdf"
147
+ tmp_file_path = os.path.join(os.path.dirname(file_path), unique_filename)
148
+ with open(tmp_file_path, 'wb') as tmp_pdf_file:
149
+ tmp_pdf_file.write(pdf_bytes)
150
+ return tmp_file_path
151
+
152
  def to_markdown(file_path, end_pages, is_ocr, layout_mode, formula_enable, table_enable, language):
153
+ """
154
+ ํŒŒ์ผ์„ ๋ฐ›์•„ ์ตœ๋Œ€ end_pages ํŽ˜์ด์ง€๊นŒ์ง€ ๋งˆํฌ๋‹ค์šด ์ถ”์ถœ ํ›„,
155
+ base64 ์ด๋ฏธ์ง€๊ฐ€ ํฌํ•จ๋œ md_content๋ฅผ ๋ฐ˜ํ™˜.
156
+ """
157
  file_path = to_pdf(file_path)
158
  if end_pages > 20:
159
  end_pages = 20
160
  local_md_dir, file_name = parse_pdf(file_path, './output', end_pages - 1, is_ocr,
161
+ layout_mode, formula_enable, table_enable, language)
162
  archive_zip_path = os.path.join("./output", compute_sha256(local_md_dir) + ".zip")
163
  zip_archive_success = compress_directory_to_zip(local_md_dir, archive_zip_path)
164
  if zip_archive_success == 0:
165
  logger.info("์••์ถ• ์„ฑ๊ณต")
166
  else:
167
  logger.error("์••์ถ• ์‹คํŒจ")
168
+
169
  md_path = os.path.join(local_md_dir, file_name + ".md")
170
  with open(md_path, 'r', encoding='utf-8') as f:
171
  txt_content = f.read()
172
+
173
  md_content = replace_image_with_base64(txt_content, local_md_dir)
174
+ # new_pdf_path = os.path.join(local_md_dir, file_name + "_layout.pdf") # ์›๋ž˜ pdf ๋ฏธ๋ฆฌ๋ณด๊ธฐ์šฉ
 
175
 
176
+ return md_content # base64 ์ด๋ฏธ์ง€๊ฐ€ ํฌํ•จ๋œ ์ตœ์ข… ๋งˆํฌ๋‹ค์šด ํ…์ŠคํŠธ๋งŒ ๋ฐ˜ํ™˜
 
 
 
 
 
 
 
 
 
 
177
 
178
+ latex_delimiters = [
179
+ {"left": "$$", "right": "$$", "display": True},
180
+ {"left": '$', "right": '$', "display": False}
181
+ ]
182
 
183
  def init_model():
184
+ """
185
+ magic_pdf์˜ ๋ชจ๋ธ์„ ๋ฏธ๋ฆฌ ์ดˆ๊ธฐํ™”.
186
+ """
187
  from magic_pdf.model.doc_analyze_by_custom_model import ModelSingleton
188
  try:
189
  model_manager = ModelSingleton()
 
219
  all_lang = ['', 'auto']
220
  all_lang.extend([*other_lang, *latin_lang, *arabic_lang, *cyrillic_lang, *devanagari_lang])
221
 
222
+
223
+ ##############################
224
+ # 2) Gemini LLM ์ฑ— ์ฝ”๋“œ
225
+ ##############################
226
+
227
+ # (์ค‘๋ณต import์ด์ง€๋งŒ "๋ˆ„๋ฝ ์—†์ด" ์ถœ๋ ฅํ•ด์•ผ ํ•˜๋ฏ€๋กœ ์ฃผ์„ ์ฒ˜๋ฆฌ)
228
+ # import os
229
+ # import gradio as gr
230
+ from gradio import ChatMessage
231
+ from typing import Iterator
232
+ import google.generativeai as genai
233
+ import time
234
+
235
+ # get Gemini API Key from the environ variable
236
+ GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
237
+ genai.configure(api_key=GEMINI_API_KEY)
238
+
239
+ # we will be using the Gemini 2.0 Flash model with Thinking capabilities
240
+ model = genai.GenerativeModel("gemini-2.0-flash-thinking-exp-1219")
241
+
242
+ def format_chat_history(messages: list) -> list:
243
+ """
244
+ Formats the chat history into a structure Gemini can understand
245
+ """
246
+ formatted_history = []
247
+ for message in messages:
248
+ # Skip thinking messages (messages with metadata)
249
+ if not (message.get("role") == "assistant" and "metadata" in message):
250
+ formatted_history.append({
251
+ "role": "user" if message.get("role") == "user" else "assistant",
252
+ "parts": [message.get("content", "")]
253
+ })
254
+ return formatted_history
255
+
256
+ def stream_gemini_response(user_message: str, messages: list) -> Iterator[list]:
257
+ """
258
+ Streams thoughts and response with conversation history support for text input only.
259
+ """
260
+ if not user_message.strip():
261
+ messages.append(ChatMessage(role="assistant", content="Please provide a non-empty text message. Empty input is not allowed."))
262
+ yield messages
263
+ return
264
+
265
+ try:
266
+ print(f"\n=== New Request (Text) ===")
267
+ print(f"User message: {user_message}")
268
+
269
+ chat_history = format_chat_history(messages)
270
+ chat = model.start_chat(history=chat_history)
271
+ response = chat.send_message(user_message, stream=True)
272
+
273
+ thought_buffer = ""
274
+ response_buffer = ""
275
+ thinking_complete = False
276
+
277
+ messages.append(
278
+ ChatMessage(
279
+ role="assistant",
280
+ content="",
281
+ metadata={"title": "โš™๏ธ Thinking: *The thoughts produced by the model are experimental"}
282
+ )
283
+ )
284
+
285
+ for chunk in response:
286
+ parts = chunk.candidates[0].content.parts
287
+ current_chunk = parts[0].text
288
+
289
+ if len(parts) == 2 and not thinking_complete:
290
+ # Complete thought and start response
291
+ thought_buffer += current_chunk
292
+ print(f"\n=== Complete Thought ===\n{thought_buffer}")
293
+
294
+ messages[-1] = ChatMessage(
295
+ role="assistant",
296
+ content=thought_buffer,
297
+ metadata={"title": "โš™๏ธ Thinking: *The thoughts produced by the model are experimental"}
298
  )
299
+ yield messages
300
+
301
+ # Start response
302
+ response_buffer = parts[1].text
303
+ print(f"\n=== Starting Response ===\n{response_buffer}")
304
+
305
+ messages.append(
306
+ ChatMessage(
307
+ role="assistant",
308
+ content=response_buffer
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
309
  )
 
 
 
 
 
 
 
310
  )
311
+ thinking_complete = True
312
+
313
+ elif thinking_complete:
314
+ response_buffer += current_chunk
315
+ print(f"\n=== Response Chunk ===\n{current_chunk}")
316
+
317
+ messages[-1] = ChatMessage(
318
+ role="assistant",
319
+ content=response_buffer
320
+ )
321
+
322
+ else:
323
+ thought_buffer += current_chunk
324
+ print(f"\n=== Thinking Chunk ===\n{current_chunk}")
325
 
326
+ messages[-1] = ChatMessage(
327
+ role="assistant",
328
+ content=thought_buffer,
329
+ metadata={"title": "โš™๏ธ Thinking: *The thoughts produced by the model are experimental"}
 
 
330
  )
331
+
332
+ # time.sleep(0.05) #Optional debugging delay
333
+ yield messages
334
+
335
+ print(f"\n=== Final Response ===\n{response_buffer}")
336
+
337
+ except Exception as e:
338
+ print(f"\n=== Error ===\n{str(e)}")
339
+ messages.append(
340
+ ChatMessage(
341
+ role="assistant",
342
+ content=f"I apologize, but I encountered an error: {str(e)}"
343
+ )
344
+ )
345
+ yield messages
346
+
347
+ def user_message(msg: str, history: list) -> tuple[str, list]:
348
+ """Adds user message to chat history"""
349
+ history.append(ChatMessage(role="user", content=msg))
350
+ return "", history
351
+
352
+
353
+ ######################################################
354
+ # 3) ํ†ตํ•ฉ Gradio ์•ฑ ๊ตฌ์„ฑ
355
+ # - PDF ์—…๋กœ๋“œ๋งŒ ๋ณด์ด๊ฒŒ ํ•˜๊ณ (๋‚˜๋จธ์ง€๋Š” hidden)
356
+ # - ์—…๋กœ๋“œ ํ›„ "๋ณ€ํ™˜" ๋ฒ„ํŠผ ํด๋ฆญ ์‹œ, ๋งˆํฌ๋‹ค์šด์„ ๋งŒ๋“ค์–ด
357
+ # Chatbot๊ณผ ๋Œ€ํ™”ํ•  ์ˆ˜ ์žˆ๋„๋ก ์ „๋‹ฌ
358
+ ######################################################
359
+ with gr.Blocks(title="ํ†ตํ•ฉ OCR & Gemini Chat", css=create_css(), theme=gr.themes.Soft(primary_hue="teal", secondary_hue="slate", neutral_hue="neutral")) as demo:
360
+ gr.HTML("""
361
+ <div class="title-area">
362
+ <h1>OCR FLEX + Gemini Chat</h1>
363
+ <p>PDF/์ด๋ฏธ์ง€ -> ํ…์ŠคํŠธ(๋งˆํฌ๋‹ค์šด) ๋ณ€ํ™˜ ํ›„, LLM Gemini์™€ ๋Œ€ํ™”</p>
364
+ </div>
365
+ """)
366
+
367
+ # ๋‚ด๋ถ€ ์ƒํƒœ(๋งˆํฌ๋‹ค์šด ํ…์ŠคํŠธ)
368
+ md_state = gr.State("")
369
+ chat_history = gr.State([]) # Gemini ์ฑ— ๊ธฐ๋ก ์ƒํƒœ
370
+
371
+ # 1) ํŒŒ์ผ ์—…๋กœ๋“œ UI
372
+ with gr.Row():
373
+ file = gr.File(
374
+ label="PDF ๋˜๋Š” ์ด๋ฏธ์ง€ ํŒŒ์ผ ์—…๋กœ๋“œ",
375
+ file_types=[".pdf", ".png", ".jpeg", ".jpg"],
376
+ interactive=True
377
  )
378
+ convert_btn = gr.Button(
379
+ "๋ณ€ํ™˜",
380
+ elem_classes="primary-button"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
381
  )
 
 
382
 
383
+ # 2) ์›๋ž˜ ์กด์žฌํ•˜๋˜ ์Šฌ๋ผ์ด๋”, ์ฒดํฌ๋ฐ•์Šค ๋“ฑ์€ ์ „๋ถ€ hidden
384
+ max_pages = gr.Slider(
385
+ 1, 20, 10,
386
+ step=1,
387
+ label='์ตœ๋Œ€ ๋ณ€ํ™˜ ํŽ˜์ด์ง€ ์ˆ˜',
388
+ elem_classes="invisible",
389
+ visible=False
390
+ )
391
+ layout_mode = gr.Dropdown(
392
+ ["layoutlmv3", "doclayout_yolo"],
393
+ label="๋ ˆ์ด์•„์›ƒ ๋ชจ๋ธ",
394
+ value="doclayout_yolo",
395
+ elem_classes="invisible",
396
+ visible=False
397
+ )
398
+ language = gr.Dropdown(
399
+ all_lang,
400
+ label="์–ธ์–ด",
401
+ value='auto',
402
+ elem_classes="invisible",
403
+ visible=False
404
+ )
405
+ formula_enable = gr.Checkbox(
406
+ label="์ˆ˜์‹ ์ธ์‹ ํ™œ์„ฑํ™”",
407
+ value=True,
408
+ elem_classes="invisible",
409
+ visible=False
410
+ )
411
+ is_ocr = gr.Checkbox(
412
+ label="OCR ๊ฐ•์ œ ํ™œ์„ฑํ™”",
413
+ value=False,
414
+ elem_classes="invisible",
415
+ visible=False
416
+ )
417
+ table_enable = gr.Checkbox(
418
+ label="ํ‘œ ์ธ์‹ ํ™œ์„ฑํ™”(ํ…Œ์ŠคํŠธ)",
419
+ value=True,
420
+ elem_classes="invisible",
421
+ visible=False
422
+ )
423
+
424
+ # 3) ์ถœ๋ ฅ ๊ฒฐ๊ณผ(ํŒŒ์ผ, ๋งˆํฌ๋‹ค์šด ๋“ฑ)๋„ ์ˆจ๊น€
425
+ # ํ•„์š”ํ•˜๋ฉด ์ฃผ์„ ํ•ด์ œํ•˜์—ฌ ํ™•์ธ ๊ฐ€๋Šฅ
426
+ # output_file = gr.File(
427
+ # label="๋ณ€ํ™˜ ๊ฒฐ๊ณผ",
428
+ # interactive=False,
429
+ # visible=False
430
+ # )
431
+ # md = gr.Markdown(
432
+ # label="๋งˆํฌ๋‹ค์šด ๋ Œ๋”๋ง",
433
+ # visible=False
434
+ # )
435
+ # md_text = gr.TextArea(
436
+ # lines=45,
437
+ # visible=False
438
+ # )
439
+ # pdf_show = PDF(
440
+ # label='PDF ๋ฏธ๋ฆฌ๋ณด๊ธฐ',
441
+ # interactive=False,
442
+ # visible=False,
443
+ # height=800
444
+ # )
445
+
446
+ # 4) ํŒŒ์ผ ์—…๋กœ๋“œ -> '๋ณ€ํ™˜' ๋ฒ„ํŠผ ํด๋ฆญ์‹œ ๋™์ž‘:
447
+ # to_markdown ํ•จ์ˆ˜๋ฅผ ํ†ตํ•ด md_state์— ๋งˆํฌ๋‹ค์šด ์ €์žฅ
448
+ convert_btn.click(
449
+ fn=to_markdown,
450
+ inputs=[file, max_pages, is_ocr, layout_mode, formula_enable, table_enable, language],
451
+ outputs=md_state
452
+ )
453
+
454
+ # ==========================
455
+ # Gemini Chat ๋ถ€๋ถ„
456
+ # ==========================
457
+ gr.Markdown("## Gemini 2.0 Flash (With Thinking) Chat")
458
+
459
+ chatbot = gr.Chatbot(
460
+ label="Gemini2.0 Chatbot (Streaming Output)",
461
+ render_markdown=True,
462
+ height=400
463
+ )
464
+
465
+ with gr.Row():
466
+ chat_input = gr.Textbox(
467
+ lines=1,
468
+ label="์งˆ๋ฌธ ์ž…๋ ฅ",
469
+ placeholder="์ถ”์ถœ๋œ ๋ฌธ์„œ(๋งˆํฌ๋‹ค์šด ๋‚ด์šฉ)์— ๋Œ€ํ•ด ๊ถ๊ธˆํ•œ ์ ์„ ๋ฌผ์–ด๋ณด์„ธ์š”..."
470
+ )
471
+ clear_button = gr.Button("๋Œ€ํ™” ์ดˆ๊ธฐํ™”")
472
+
473
+ # ์‚ฌ์šฉ์ž๊ฐ€ ์งˆ๋ฌธ -> user_message -> Gemini ์ฒ˜๋ฆฌ -> stream_gemini_response
474
+ def user_message_wrapper(msg, history, doc_text):
475
+ """
476
+ ์‚ฌ์šฉ์ž๊ฐ€ ์ž…๋ ฅํ•  ๋•Œ๋งˆ๋‹ค, doc_text(๋งˆํฌ๋‹ค์šด)๋ฅผ ์ฐธ๊ณ ํ•˜๋„๋ก
477
+ ์งˆ๋ฌธ์„ ์•ฝ๊ฐ„ ๋ณ€ํ˜•ํ•ด์„œ history์— ์ถ”๊ฐ€ํ•˜๋Š” ๋ฐฉ์‹(๊ฐ„๋‹จ ์˜ˆ์‹œ).
478
+ """
479
+ if not doc_text:
480
+ # ์•„์ง ๋ณ€ํ™˜๋œ ๋ฌธ์„œ๊ฐ€ ์—†๋‹ค๋ฉด ๊ทธ๋ƒฅ ์งˆ๋ฌธ
481
+ user_query = msg
482
+ else:
483
+ # ๋ฌธ์„œ ๋‚ด์šฉ(doc_text)์„ "์ฐธ๊ณ " ์š”์ฒญํ•˜๋Š” ๊ฐ„๋‹จ ํ”„๋กฌํ”„ํŠธ ์˜ˆ์‹œ
484
+ user_query = f"๋‹ค์Œ ๋ฌธ์„œ๋ฅผ ์ฐธ๊ณ ํ•˜์—ฌ ๋‹ต๋ณ€:\n\n{doc_text}\n\n์งˆ๋ฌธ: {msg}"
485
+
486
+ history.append(ChatMessage(role="user", content=user_query))
487
+ return "", history
488
+
489
+ chat_input.submit(
490
+ fn=user_message_wrapper,
491
+ inputs=[chat_input, chat_history, md_state],
492
+ outputs=[chat_input, chat_history]
493
+ ).then(
494
+ fn=stream_gemini_response,
495
+ inputs=[chat_input, chat_history],
496
+ outputs=chat_history
497
+ ).then(
498
+ fn=lambda h: h,
499
+ inputs=chat_history,
500
+ outputs=chatbot
501
+ )
502
+
503
+ clear_button.click(
504
+ fn=lambda: ([], ""),
505
+ inputs=[],
506
+ outputs=[chat_history, md_state]
507
+ ).then(
508
+ fn=lambda: [],
509
+ inputs=[],
510
+ outputs=chatbot
511
+ )
512
+
513
+
514
+ ##############################
515
+ # 4) ์‹ค์ œ ์‹คํ–‰
516
+ ##############################
517
+ if __name__ == "__main__":
518
+ # ์ฒซ ๋ฒˆ์งธ demo.launch() - ํ†ตํ•ฉ ์•ฑ ์‹คํ–‰
519
+ demo.launch(ssr_mode=True, debug=True)
520
+
521
+
522
+ ###############################################
523
+ # ์•„๋ž˜๋Š” "Gemini ์ฑ— ์ฝ”๋“œ" ์›๋ณธ์— ์žˆ๋˜
524
+ # ๋ณ„๋„์˜ demo.launch() ๋ถ€๋ถ„ (๋ˆ„๋ฝ ์—†์ด ์ฃผ์„ ๋ณด์กด)
525
+ ###############################################
526
+ # if __name__ == "__main__":
527
+ # demo.launch(debug=True)
528
+