Spaces:
Sleeping
Sleeping
File size: 5,517 Bytes
9356181 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 |
import pandas as pd
import gradio as gr
import hashlib, base64
import openai
# querying OpenAI for generation
from openAI_manager import initOpenAI, examples_to_prompt, genChatGPT, generateTestSentences
# bias testing manager
import mgr_bias_scoring as bt_mgr
import mgr_sentences as smgr
# error messages
from error_messages import *
# hashing
def getHashForString(text):
d=hashlib.md5(bytes(text, encoding='utf-8')).digest()
d=base64.urlsafe_b64encode(d)
return d.decode('utf-8')
def getBiasName(gr1_lst, gr2_lst, att1_lst, att2_lst):
full_spec = ''.join(gr1_lst)+''.join(gr2_lst)+''.join(att1_lst)+''.join(att2_lst)
hash = getHashForString(full_spec)
bias_name = f"{gr1_lst[0].replace(' ','-')}_{gr2_lst[0].replace(' ','-')}__{att1_lst[0].replace(' ','-')}_{att2_lst[0].replace(' ','-')}_{hash}"
return bias_name
def _generateOnline(bias_spec, progress, key, isSaving=False):
test_sentences = []
# Initiate with key
try:
models = initOpenAI(key)
model_names = [m['id'] for m in models['data']]
print(f"Model names: {model_names}")
except openai.error.AuthenticationError as err:
raise gr.Error(OPENAI_INIT_ERROR.replace("<ERR>", str(err)))
if "gpt-3.5-turbo" in model_names:
print("Access to ChatGPT")
if "gpt-4" in model_names:
print("Access to GPT-4")
model_name = "gpt-3.5-turbo"
# Generate one example
gen = genChatGPT(model_name, ["man","math"], 2, 5,
[{"Keywords": ["sky","blue"], "Sentence": "the sky is blue"}
],
temperature=0.8)
print(f"Test gen: {gen}")
# Generate all test sentences
print(f"Bias spec dict: {bias_spec}")
g1, g2, a1, a2 = bt_mgr.get_words(bias_spec)
gens = generateTestSentences(model_name, g1+g2, a1+a2, progress)
print("--GENS--")
print(gens)
for gt, at, s in gens:
test_sentences.append([s,gt,at])
# save the generations immediately
print("Saving generations to HF DF...")
save_df = pd.DataFrame(test_sentences, columns=["Test sentence",'Group term', "Attribute term"])
## make the templates to save
# 1. bias specification
print(f"Bias spec dict: {bias_spec}")
# 2. convert to templates
save_df['Template'] = save_df.apply(bt_mgr.sentence_to_template, axis=1)
print(f"Data with template: {save_df}")
# 3. convert to pairs
test_pairs_df = bt_mgr.convert2pairs(bias_spec, save_df)
print(f"Test pairs cols: {list(test_pairs_df.columns)}")
bias_name = getBiasName(g1, g2, a1, a2)
save_df = save_df.rename(columns={'Group term':'org_grp_term',
"Attribute term": 'att_term',
"Test sentence":'sentence',
"Template":"template"})
save_df['grp_term1'] = test_pairs_df['att_term_1']
save_df['grp_term2'] = test_pairs_df['att_term_2']
save_df['label_1'] = test_pairs_df['label_1']
save_df['label_2'] = test_pairs_df['label_2']
save_df['bias_spec'] = bias_name
save_df['type'] = 'tool'
save_df['gen_model'] = model_name
if isSaving == True:
print(f"Save cols: {list(save_df.columns)}")
print(f"Save: {save_df.head(1)}")
#smgr.saveSentences(save_df) #[["Group term","Attribute term","Test sentence"]])
num_sentences = len(test_sentences)
print(f"Returned num sentences: {num_sentences}")
return test_sentences
def _getSavedSentences(bias_spec, progress, use_paper_sentences):
test_sentences = []
print(f"Bias spec dict: {bias_spec}")
g1, g2, a1, a2 = bt_mgr.get_words(bias_spec)
for gi, g_term in enumerate(g1+g2):
att_list = a1+a2
# match "-" and no space
att_list_dash = [t.replace(' ','-') for t in att_list]
att_list.extend(att_list_dash)
att_list_nospace = [t.replace(' ','') for t in att_list]
att_list.extend(att_list_nospace)
att_list = list(set(att_list))
progress(gi/len(g1+g2), desc=f"{g_term}")
_, sentence_df, _ = smgr.getSavedSentences(g_term)
# only take from paper & gpt3.5
flt_gen_models = ["gpt-3.5","gpt-3.5-turbo"]
print(f"Before filter: {sentence_df.shape[0]}")
if use_paper_sentences == True:
if 'type' in list(sentence_df.columns):
sentence_df = sentence_df.query("type=='paper' and gen_model in @flt_gen_models")
print(f"After filter: {sentence_df.shape[0]}")
else:
if 'type' in list(sentence_df.columns):
# only use GPT-3.5 generations for now - todo: add settings option for this
sentence_df = sentence_df.query("gen_model in @flt_gen_models")
print(f"After filter: {sentence_df.shape[0]}")
if sentence_df.shape[0] > 0:
sentence_df = sentence_df[['org_grp_term','att_term','sentence']]
sentence_df = sentence_df.rename(columns={'org_grp_term': "Group term",
"att_term": "Attribute term",
"sentence": "Test sentence"})
sel = sentence_df[sentence_df['Attribute term'].isin(att_list)].values
if len(sel) > 0:
for gt,at,s in sel:
test_sentences.append([s,gt,at])
else:
print("Test sentences empty!")
#raise gr.Error(NO_SENTENCES_ERROR)
return test_sentences
|