|
import spacy |
|
import pandas as pd |
|
|
|
def inference(model: spacy, texts: list, batch_size: int=8): |
|
""" |
|
To perform batch inferencing |
|
|
|
Parameters: |
|
model: type of model |
|
texts: input text example |
|
batch_size: batch size of the inference |
|
|
|
Returns: |
|
data: pandas.DataFrame of the output from inference |
|
""" |
|
|
|
docs = model.pipe(texts,batch_size=batch_size) |
|
|
|
records = [] |
|
for no, doc in enumerate(docs): |
|
if len(doc.ents)>0: |
|
records.append([{'id':no+1,'text':doc.text,'span': entity.text, |
|
'entity': entity.label_, 'start': entity.start, 'end': entity.end} |
|
for entity in doc.ents]) |
|
else: |
|
records.append([{'id':no+1,'text':doc.text,'span': None, |
|
'entity': None, 'start':None, 'end': None}]) |
|
|
|
data = pd.DataFrame.from_dict(sum(records,[])).set_index(['text','id']) |
|
|
|
return data |
|
|
|
|
|
|