Nikhil Singh commited on
Commit
1afbc3a
·
1 Parent(s): 9fe2871
Files changed (1) hide show
  1. app.py +13 -1
app.py CHANGED
@@ -4,6 +4,7 @@ from mailparser import parse_from_file
4
  from bs4 import BeautifulSoup
5
  from gliner import GLiNER
6
  from typing import Dict, Union, List
 
7
 
8
  import spacy
9
  import re
@@ -11,6 +12,9 @@ import os
11
  import en_core_web_sm
12
  nlp = en_core_web_sm.load()
13
 
 
 
 
14
  _MODEL = {}
15
  _CACHE_DIR = os.environ.get("CACHE_DIR", None)
16
 
@@ -58,6 +62,13 @@ def parse_query(sentences: List[str], labels: List[str], threshold: float = 0.3,
58
 
59
  return results
60
 
 
 
 
 
 
 
 
61
  def present(email_file, labels, multilingual=False):
62
  email = accept_mail(email_file)
63
  cleaned_text = clean_email(email)
@@ -96,7 +107,8 @@ demo = gr.Interface(
96
  gr.components.Textbox(label="From"),
97
  gr.components.Textbox(label="To"),
98
  gr.components.Textbox(label="Date"),
99
- gr.components.Dataframe(headers=["Text", "Label"], label="Extracted Entities")
 
100
  ],
101
  title="Email Info Extractor",
102
  description="Upload an email file (.eml) to extract its details and detected entities."
 
4
  from bs4 import BeautifulSoup
5
  from gliner import GLiNER
6
  from typing import Dict, Union, List
7
+ from transformers import T5Tokenizer, T5ForConditionalGeneration
8
 
9
  import spacy
10
  import re
 
12
  import en_core_web_sm
13
  nlp = en_core_web_sm.load()
14
 
15
+ t5_tokenizer = T5Tokenizer.from_pretrained("t5-small")
16
+ t5_model = T5ForConditionalGeneration.from_pretrained("t5-small")
17
+
18
  _MODEL = {}
19
  _CACHE_DIR = os.environ.get("CACHE_DIR", None)
20
 
 
62
 
63
  return results
64
 
65
+ def refine_entities_with_t5(entities):
66
+ inputs = "refine entities: " + " ; ".join([f"{entity['text']} as {entity['label']}" for entity in entities])
67
+ input_ids = t5_tokenizer.encode(inputs, return_tensors="pt", add_special_tokens=True)
68
+ outputs = t5_model.generate(input_ids)
69
+ result = t5_tokenizer.decode(outputs[0], skip_special_tokens=True)
70
+ return result
71
+
72
  def present(email_file, labels, multilingual=False):
73
  email = accept_mail(email_file)
74
  cleaned_text = clean_email(email)
 
107
  gr.components.Textbox(label="From"),
108
  gr.components.Textbox(label="To"),
109
  gr.components.Textbox(label="Date"),
110
+ gr.components.Dataframe(headers=["Text", "Label"], label="Extracted Entities"),
111
+ gr.components.Textbox(label="Refined Entities")
112
  ],
113
  title="Email Info Extractor",
114
  description="Upload an email file (.eml) to extract its details and detected entities."