Spaces:

kkpathak91
/

Mtech_Project_Image_processing_and_Fact_verification

Runtime error

Mtech_Project_Image_processing_and_Fact_verification

File size: 5,656 Bytes

import os
os.system('pip install paddlepaddle')
os.system('pip install paddleocr')
from paddleocr import PaddleOCR, draw_ocr
from PIL import Image
import gradio as gr
import torch

torch.hub.download_url_to_file('https://i.imgur.com/aqMBT0i.jpg', 'example.jpg')

def inference(img, lang):
    ocr = PaddleOCR(use_angle_cls=True, lang=lang,use_gpu=False)
    img_path = img.name
    result = ocr.ocr(img_path, cls=True)
    image = Image.open(img_path).convert('RGB')
    boxes = [line[0] for line in result]
    txts = [line[1][0] for line in result]
    # scores = [line[1][1] for line in result]
    im_show = draw_ocr(image, boxes, txts,
                       font_path='simfang.ttf')
    im_show = Image.fromarray(im_show)
    im_show.save('result.jpg')
    return 'result.jpg'

title = 'A Framework for Data-Driven Document Evaluation and scoring - Image to Text Extraction '
description = 'Demo for Optical character recognition(OCR)'
article = ""
examples = [['example.jpg','en']]
css = ".output_image, .input_image {height: 40rem !important; width: 100% !important;}"
gr.Interface(
    inference,
    [gr.inputs.Image(type='file', label='Input'),gr.inputs.Dropdown(choices=['ch', 'en', 'fr', 'german', 'korean', 'japan'], type="value", default='en', label='language')],
    gr.outputs.Image(type='file', label='Output'),
    title=title,
    description=description,
    article=article,
    examples=examples,
    css=css,
    enable_queue=True
    ).launch(debug=True)


##########################################################################################################
    
import os
import gradio as gr
from huggingface_hub import snapshot_download
from prettytable import PrettyTable
import pandas as pd
import torch
import traceback

config = {
    "model_type": "roberta",
    "model_name_or_path": "roberta-large",
    "logic_lambda": 0.5,
    "prior": "random",
    "mask_rate": 0.0,
    "cand_k": 1,
    "max_seq1_length": 256,
    "max_seq2_length": 128,
    "max_num_questions": 8,
    "do_lower_case": False,
    "seed": 42,
    "n_gpu": torch.cuda.device_count(),
}

os.system('git clone https://github.com/kkpathak91/project_metch/')
os.system('rm -r project_metch/data/')
os.system('rm -r project_metch/results/')
os.system('rm -r project_metch/models/')
os.system('mv project_metch/* ./')

model_dir = snapshot_download('kkpathak91/FVM')
config['fc_dir'] = os.path.join(model_dir, 'fact_checking/roberta-large/')
config['mrc_dir'] = os.path.join(model_dir, 'mrc_seq2seq/bart-base/')
config['er_dir'] = os.path.join(model_dir, 'evidence_retrieval/')


from src.loren import Loren


loren = Loren(config, verbose=False)
try:
    js = loren.check('Donald Trump won the 2020 U.S. presidential election.')
except Exception as e:
    raise ValueError(e)


def highlight_phrase(text, phrase):
    text = loren.fc_client.tokenizer.clean_up_tokenization(text)
    return text.replace('<mask>', f'<i><b>{phrase}</b></i>')


def highlight_entity(text, entity):
    return text.replace(entity, f'<i><b>{entity}</b></i>')


def gradio_formatter(js, output_type):
    zebra_css = '''
    tr:nth-child(even) {
        background: #f1f1f1;
    }
    thead{
        background: #f1f1f1;
    }'''
    if output_type == 'e':
        data = {'Evidence': [highlight_entity(x, e) for x, e in zip(js['evidence'], js['entities'])]}
    elif output_type == 'z':
        p_sup, p_ref, p_nei = [], [], []
        for x in js['phrase_veracity']:
            max_idx = torch.argmax(torch.tensor(x)).tolist()
            x = ['%.4f' % xx for xx in x]
            x[max_idx] = f'<i><b>{x[max_idx]}</b></i>'
            p_sup.append(x[2])
            p_ref.append(x[0])
            p_nei.append(x[1])

        data = {
            'Claim Phrase': js['claim_phrases'],
            'Local Premise': [highlight_phrase(q, x[0]) for q, x in zip(js['cloze_qs'], js['evidential'])],
            'p_SUP': p_sup,
            'p_REF': p_ref,
            'p_NEI': p_nei,
        }
    else:
        raise NotImplementedError
    data = pd.DataFrame(data)
    pt = PrettyTable(field_names=list(data.columns), 
        align='l', border=True, hrules=1, vrules=1)
    for v in data.values:
        pt.add_row(v)
    html = pt.get_html_string(attributes={
        'style': 'border-width: 2px; bordercolor: black'
    }, format=True)
    html = f'<head> <style type="text/css"> {zebra_css} </style> </head>\n' + html
    html = html.replace('&lt;', '<').replace('&gt;', '>')
    return html


def run(claim):
    try:
        js = loren.check(claim)
    except Exception as error_msg:
        exc = traceback.format_exc()
        msg = f'[Error]: {error_msg}.\n[Traceback]: {exc}'
        loren.logger.error(claim)
        loren.logger.error(msg)
        return 'Oops, something went wrong.', '', ''
    label = js['claim_veracity']
    loren.logger.warning(label + str(js))
    ev_html = gradio_formatter(js, 'e')
    z_html = gradio_formatter(js, 'z')
    return label, z_html, ev_html


iface = gr.Interface(
    fn=run,
    inputs="text",
    outputs=[
        'text',
        'html',
        'html',
    ],
    examples=['Kanpur is a city in Nepal',
              'PV Sindhu is an Indian Badminton Player.'],
    title="A Framework for Data-Driven Document Evaluation and Scoring",
    layout='horizontal',
    description="[Student Name: Karan Kumar Pathak] " " [Roll No.: 2020fc04334] ",
    flagging_dir='results/flagged/',
    allow_flagging=True,
    flagging_options=['Interesting!', 'Error: Claim Phrase Parsing', 'Error: Local Premise',
                      'Error: Require Commonsense', 'Error: Evidence Retrieval'],
    enable_queue=True
)
iface.launch()