elshehawy's picture
remove json option from model gpt 3.5
76268a8
from metrics import calc_metrics
import gradio as gr
from openai import OpenAI
import os
from transformers import pipeline
# from dotenv import load_dotenv, find_dotenv
import huggingface_hub
import json
from evaluate_data import store_sample_data, get_metrics_trf
from sentence_transformers import SentenceTransformer
from tqdm import tqdm
hf_token= os.environ['HF_TOKEN']
huggingface_hub.login(hf_token)
pipe = pipeline("token-classification", model="elshehawy/finer-ord-transformers", aggregation_strategy="first")
# llm_model = 'gpt-3.5-turbo-0125'
# llm_model = 'gpt-4-0125-preview'
llm_model = 'gpt-3.5-turbo-0301'
# openai.api_key = os.environ['OPENAI_API_KEY']
client = OpenAI(
api_key=os.environ.get("OPENAI_API_KEY"),
)
def get_completion(prompt, model=llm_model):
messages = [{"role": "user", "content": prompt}]
response = client.chat.completions.create(
messages=messages,
model=model,
temperature=0,
# response_format={"type": "json_object"}
)
# print(response.choices[0].message.content)
return response.choices[0].message.content
def find_orgs_gpt(sentence):
prompt = f"""
In context of named entity recognition (NER), find all organizations in the text delimited by triple backticks.
text:
```
{sentence}
```
Your output should be a a json object that containes the extracted organizations.
Output example 1:
{{\"Organizations\": [\"Organization 1\", \"Organization 2\", \"Organization 3\"]}}
Output example 2:
{{\"Organizations\": []}}
"""
sent_orgs_str = get_completion(prompt)
sent_orgs = json.loads(sent_orgs_str)
return sent_orgs['Organizations']
example = """
My latest exclusive for The Hill : Conservative frustration over Republican efforts to force a House vote on reauthorizing the Export - Import Bank boiled over Wednesday during a contentious GOP meeting.
"""
def find_orgs(uploaded_file):
print(type(uploaded_file))
uploaded_data = json.loads(uploaded_file)
all_metrics = {}
sample_data = store_sample_data(uploaded_data)
gpt_orgs, true_orgs = [], []
for sent in tqdm(sample_data):
gpt_orgs.append(find_orgs_gpt(sent['text']))
true_orgs.append(sent['orgs'])
sim_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
all_metrics['gpt'] = calc_metrics(true_orgs, gpt_orgs, sim_model, threshold=0.85)
print(all_metrics)
all_metrics['trf'] = get_metrics_trf(uploaded_data)
print(all_metrics)
return all_metrics
upload_btn = gr.UploadButton(label='Upload a json file.', type='binary')
iface = gr.Interface(fn=find_orgs, inputs=upload_btn, outputs="text")
iface.launch(share=True)