File size: 1,704 Bytes
717b6b1
 
 
ebe3e23
717b6b1
1f78813
 
 
521a314
 
717b6b1
 
521a314
 
 
1f78813
521a314
 
1f78813
521a314
1f78813
717b6b1
 
75183d4
521a314
717b6b1
 
521a314
 
 
ebe3e23
521a314
75183d4
 
521a314
717b6b1
75183d4
 
521a314
75183d4
521a314
1f78813
75183d4
 
521a314
717b6b1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import gradio as gr
import pytesseract
from pdf2image import convert_from_path
from PIL import Image

# لیست زبان‌هایی که می‌خوای همزمان پشتیبانی بشن
# (حتماً باید پکیج زبان‌ها روی تسرکت نصب باشن)
AUTO_LANGS = "eng+fas+ara+rus+spa+fra"

def ocr_auto(input_file):
    extracted_text = ""

    if isinstance(input_file, str) and input_file.endswith('.pdf'):
        images = convert_from_path(input_file)
        for page_number, image in enumerate(images, start=1):
            text = pytesseract.image_to_string(image, lang=AUTO_LANGS)
            extracted_text += f"\n--- Page {page_number} ---\n{text}"
    elif isinstance(input_file, Image.Image):
        extracted_text = pytesseract.image_to_string(input_file, lang=AUTO_LANGS)

    return extracted_text.strip()

def gradio_interface():
    input_type = gr.Radio(["PDF", "Image"], label="Choose Input Type", value="PDF")
    file_input = gr.File(label="Upload PDF/Image", file_types=[".pdf", ".png", ".jpg", ".jpeg"])
    output_text = gr.Textbox(label="Extracted Text", interactive=False)

    def process(input_type, file):
        if not file:
            return "⚠️ Please upload a file first."
        if input_type == "PDF":
            return ocr_auto(file.name)
        else:
            image = Image.open(file.name)
            return ocr_auto(image)

    gr.Interface(
        fn=process,
        inputs=[input_type, file_input],
        outputs=[output_text],
        title="Auto OCR (PDF/Image)",
        description="Upload a PDF or Image. OCR will automatically detect and extract text in multiple languages."
    ).launch()

# Run
gradio_interface()