File size: 1,609 Bytes
a450bc7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import sys
sys.path.append("/home/pstar7/Documents/gradio/src")

from transformers import BertTokenizerFast
from gradio_pdf import PDF
from BertModel import *
from pdf_predict import *
import gradio as gr

ids_to_labels = {0: 'B_ADVO', 1: 'B_ARTV', 2: 'B_CRIA', 3: 'B_DEFN', 4: 'B_JUDG', 5: 'B_JUDP', 6: 'B_PENA', 7: 'B_PROS', 8: 'B_PUNI', 9: 'B_REGI', 10: 'B_TIMV', 11: 'B_VERN', 12: 'I_ADVO', 13: 'I_ARTV', 14: 'I_CRIA', 15: 'I_DEFN', 16: 'I_JUDG', 17: 'I_JUDP', 18: 'I_PENA', 19: 'I_PROS', 20: 'I_PUNI', 21: 'I_REGI', 22: 'I_TIMV', 23: 'I_VERN', 24: 'O'}
indolem = 'indolem/indobert-base-uncased'
indonlu = 'indobenchmark/indobert-base-p2'
model_indolem = BertModel(indolem, len(ids_to_labels))
model_indonlu = BertModel(indonlu, len(ids_to_labels))
tokenizer_indolem = BertTokenizerFast.from_pretrained(indolem)
tokenizer_indonlu = BertTokenizerFast.from_pretrained(indonlu)

def predict(doc : str, model : str) -> str:
  if model == 'IndoBERT (IndoLEM)':
    use_model = model_indolem
    use_tokenizer = tokenizer_indolem

  else:
    use_model = model_indonlu
    use_tokenizer = tokenizer_indonlu

  result =  pdf_predict(use_model, use_tokenizer, doc, ids_to_labels, model)

  return result

iface = gr.Interface(
    fn=predict,
    inputs=[PDF(label="Document"), 
            gr.Dropdown(['IndoBERT (IndoLEM)', 'IndoBERT (IndoNLU)'], label='Model', info='Pilih Model yang ingin digunakan *Default : IndoBERT (IndoLEM)')], 
    outputs="textbox",
    title="Legal NER",
    description="Upload File PDF Putusan Pidana",
    allow_flagging='never'
    )

if __name__ == "__main__":
    iface.launch()