Spaces:
Runtime error
Runtime error
from metrics import calc_metrics | |
import gradio as gr | |
from openai import OpenAI | |
import os | |
from transformers import pipeline | |
# from dotenv import load_dotenv, find_dotenv | |
import huggingface_hub | |
import json | |
from evaluate_data import store_sample_data, get_metrics_trf | |
from sentence_transformers import SentenceTransformer | |
from tqdm import tqdm | |
hf_token= os.environ['HF_TOKEN'] | |
huggingface_hub.login(hf_token) | |
pipe = pipeline("token-classification", model="elshehawy/finer-ord-transformers", aggregation_strategy="first") | |
# llm_model = 'gpt-3.5-turbo-0125' | |
# llm_model = 'gpt-4-0125-preview' | |
llm_model = 'gpt-3.5-turbo-0301' | |
# openai.api_key = os.environ['OPENAI_API_KEY'] | |
client = OpenAI( | |
api_key=os.environ.get("OPENAI_API_KEY"), | |
) | |
def get_completion(prompt, model=llm_model): | |
messages = [{"role": "user", "content": prompt}] | |
response = client.chat.completions.create( | |
messages=messages, | |
model=model, | |
temperature=0, | |
# response_format={"type": "json_object"} | |
) | |
# print(response.choices[0].message.content) | |
return response.choices[0].message.content | |
def find_orgs_gpt(sentence): | |
prompt = f""" | |
In context of named entity recognition (NER), find all organizations in the text delimited by triple backticks. | |
text: | |
``` | |
{sentence} | |
``` | |
Your output should be a a json object that containes the extracted organizations. | |
Output example 1: | |
{{\"Organizations\": [\"Organization 1\", \"Organization 2\", \"Organization 3\"]}} | |
Output example 2: | |
{{\"Organizations\": []}} | |
""" | |
sent_orgs_str = get_completion(prompt) | |
sent_orgs = json.loads(sent_orgs_str) | |
return sent_orgs['Organizations'] | |
example = """ | |
My latest exclusive for The Hill : Conservative frustration over Republican efforts to force a House vote on reauthorizing the Export - Import Bank boiled over Wednesday during a contentious GOP meeting. | |
""" | |
def find_orgs(uploaded_file): | |
print(type(uploaded_file)) | |
uploaded_data = json.loads(uploaded_file) | |
all_metrics = {} | |
sample_data = store_sample_data(uploaded_data) | |
gpt_orgs, true_orgs = [], [] | |
for sent in tqdm(sample_data): | |
gpt_orgs.append(find_orgs_gpt(sent['text'])) | |
true_orgs.append(sent['orgs']) | |
sim_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2') | |
all_metrics['gpt'] = calc_metrics(true_orgs, gpt_orgs, sim_model, threshold=0.85) | |
print(all_metrics) | |
all_metrics['trf'] = get_metrics_trf(uploaded_data) | |
print(all_metrics) | |
return all_metrics | |
upload_btn = gr.UploadButton(label='Upload a json file.', type='binary') | |
iface = gr.Interface(fn=find_orgs, inputs=upload_btn, outputs="text") | |
iface.launch(share=True) | |