Spaces:
Build error
Build error
Jordan
commited on
Commit
·
10f417b
1
Parent(s):
e16fb05
Unbias - Version one push
Browse files- app.py +27 -4
- bias_check.py +47 -0
- combine_modules.py +17 -0
- img2txt.py +19 -0
- interpret_model_pt.py +8 -0
- load_model_pt.py +15 -0
- video2txt.py +22 -0
app.py
CHANGED
|
@@ -1,7 +1,30 @@
|
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
|
| 3 |
-
|
| 4 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
|
| 6 |
-
|
| 7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import combine_modules
|
| 2 |
import gradio as gr
|
| 3 |
|
| 4 |
+
with gr.Blocks() as demo:
|
| 5 |
+
gr.Markdown("Welcome to Bias Checking Demo !")
|
| 6 |
+
with gr.Tab("Text"):
|
| 7 |
+
text_input = gr.Textbox()
|
| 8 |
+
text_output = gr.Textbox()
|
| 9 |
+
text_output_interpret = gr.TextArea()
|
| 10 |
+
text_button = gr.Button("Check Bias in your Text")
|
| 11 |
+
|
| 12 |
+
with gr.Tab("Video"):
|
| 13 |
+
vid_input = gr.Video()
|
| 14 |
+
vid_output = gr.Textbox()
|
| 15 |
+
vid_output_interpret = gr.TextArea()
|
| 16 |
+
vid_button = gr.Button("Check Bias in your Video")
|
| 17 |
|
| 18 |
+
with gr.Tab("Image"):
|
| 19 |
+
img_input = gr.Image()
|
| 20 |
+
img_output = gr.Textbox()
|
| 21 |
+
img_output_interpret = gr.TextArea()
|
| 22 |
+
img_button = gr.Button("Check Bias in your Image")
|
| 23 |
+
|
| 24 |
+
text_button.click(combine_modules.app_nlp_start, inputs=text_input, outputs=[text_output, text_output_interpret])
|
| 25 |
+
vid_button.click(combine_modules.app_video_start, inputs=vid_input, outputs=[vid_output, vid_output_interpret])
|
| 26 |
+
img_button.click(combine_modules.app_image_start, inputs=img_input, outputs=[img_output, img_output_interpret])
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
if __name__=="__main__":
|
| 30 |
+
demo.launch()
|
bias_check.py
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import load_model_pt
|
| 2 |
+
import interpret_model_pt
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
def sub_pipeline(raw_input, pretrained_model):
|
| 6 |
+
tokenizer, model = load_model_pt.load_models_from_pretrained(pretrained_model)
|
| 7 |
+
output_ = load_model_pt.load_pipeline(raw_input, pretrained_model)
|
| 8 |
+
words_weightages = interpret_model_pt.explainer(raw_input, model, tokenizer)
|
| 9 |
+
return output_, words_weightages
|
| 10 |
+
|
| 11 |
+
def bias_checker(input_statement):
|
| 12 |
+
pretrained_model_basic_check = "valurank/distilroberta-bias"
|
| 13 |
+
pretrained_model_political = "valurank/distilroberta-mbfc-bias"
|
| 14 |
+
pretrained_model_gender = "monologg/koelectra-base-v3-gender-bias"
|
| 15 |
+
|
| 16 |
+
raw_input = input_statement
|
| 17 |
+
# print("Checking if the input has any primary bias ?..")
|
| 18 |
+
output_stmt_zero, words_interpreted = sub_pipeline(raw_input, pretrained_model_basic_check)
|
| 19 |
+
print(output_stmt_zero)
|
| 20 |
+
return_var = " "
|
| 21 |
+
interpret_var = " "
|
| 22 |
+
|
| 23 |
+
if (output_stmt_zero["label"] == "BIASED" and output_stmt_zero["score"] >= 0.7) or (output_stmt_zero["label"] == "NEUTRAL" and output_stmt_zero["score"] < 0.6):
|
| 24 |
+
# print(output_stmt_zero)
|
| 25 |
+
# print("\n The statement seems biased, lets investigate ! \n")
|
| 26 |
+
# print(words_interpreted)
|
| 27 |
+
# print("\n Checking for political propaganda... \n")
|
| 28 |
+
output_stmt_political, words_interpreted_political = sub_pipeline(raw_input, pretrained_model_political)
|
| 29 |
+
# print(output_stmt_political, "\n")
|
| 30 |
+
# print(words_interpreted_political, "\n")
|
| 31 |
+
# print("\n Let's check for gender bias, shall we ? \n")
|
| 32 |
+
output_stmt_gender, words_interpreted_gender = sub_pipeline(raw_input, pretrained_model_gender)
|
| 33 |
+
# print(output_stmt_gender, "\n")
|
| 34 |
+
# print(words_interpreted_gender, "\n")
|
| 35 |
+
return_var = ("Generic:", output_stmt_zero,"\n","Gender:", output_stmt_gender,"\n","Political:", output_stmt_political)
|
| 36 |
+
interpret_var = ("Generic:", words_interpreted, "\n", "Gender:", words_interpreted_gender, "\n","Political:", words_interpreted_political)
|
| 37 |
+
else:
|
| 38 |
+
# print("The statement seems ok as of now, please input another statement!")
|
| 39 |
+
return_var = "The statement seems ok as of now, please input another statement!"
|
| 40 |
+
interpret_var = " "
|
| 41 |
+
|
| 42 |
+
return return_var, interpret_var
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
if __name__=="__main__":
|
| 46 |
+
input_stmt = "Nevertheless, Trump and other Republicans have tarred the protests as havens for terrorists intent on destroying property."
|
| 47 |
+
bias_checker(input_stmt)
|
combine_modules.py
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from bias_check import bias_checker
|
| 2 |
+
from video2txt import read_video_file_and_return_text as rtxt
|
| 3 |
+
from img2txt import load_image_pipeline as img_pipe
|
| 4 |
+
|
| 5 |
+
def app_nlp_start(statement):
|
| 6 |
+
output_txt, interpreted_txt = bias_checker(statement)
|
| 7 |
+
return output_txt, interpreted_txt
|
| 8 |
+
|
| 9 |
+
def app_video_start(video_path):
|
| 10 |
+
return_text = rtxt(video_path)
|
| 11 |
+
output_txt = bias_checker(return_text)
|
| 12 |
+
return output_txt
|
| 13 |
+
|
| 14 |
+
def app_image_start(image_path):
|
| 15 |
+
text_generated = img_pipe(image_path)
|
| 16 |
+
output_txt = bias_checker(text_generated)
|
| 17 |
+
return output_txt
|
img2txt.py
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from transformers import pipeline
|
| 2 |
+
from PIL import Image
|
| 3 |
+
import os
|
| 4 |
+
pretrained_img_model = "nlpconnect/vit-gpt2-image-captioning"
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
def load_image_pipeline(img_path):
|
| 8 |
+
img_path_read = Image.fromarray(img_path)
|
| 9 |
+
img_path_read.save("temp_img.jpg")
|
| 10 |
+
image_to_text = pipeline("image-to-text", model=pretrained_img_model, framework="pt")
|
| 11 |
+
generated_text = image_to_text("temp_img.jpg")[0]["generated_text"]
|
| 12 |
+
os.remove("temp_img.jpg")
|
| 13 |
+
return generated_text
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
if __name__=="__main__":
|
| 17 |
+
imgpath = r"C:\Users\Shringar\Pictures\ar.jpg"
|
| 18 |
+
img_text_generated = load_image_pipeline(imgpath)
|
| 19 |
+
print(img_text_generated)
|
interpret_model_pt.py
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from transformers_interpret import SequenceClassificationExplainer, MultiLabelClassificationExplainer
|
| 2 |
+
|
| 3 |
+
def explainer(input_statement, model, tokenizer):
|
| 4 |
+
# cls_explainer = SequenceClassificationExplainer(model, tokenizer)
|
| 5 |
+
cls_explainer = MultiLabelClassificationExplainer(model, tokenizer)
|
| 6 |
+
word_attributions = cls_explainer(input_statement)
|
| 7 |
+
return dict(word_attributions)
|
| 8 |
+
|
load_model_pt.py
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from transformers import pipeline
|
| 2 |
+
from transformers import AutoTokenizer
|
| 3 |
+
from transformers import AutoModelForSequenceClassification
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
def load_pipeline(input_statement, pretrained_model_name):
|
| 7 |
+
classifier_ = pipeline("text-classification", model=pretrained_model_name, framework="pt")
|
| 8 |
+
cls_output = classifier_(input_statement)[0]
|
| 9 |
+
return cls_output
|
| 10 |
+
|
| 11 |
+
def load_models_from_pretrained(checkpoint):
|
| 12 |
+
checkpoint_local = checkpoint
|
| 13 |
+
tokenizer = AutoTokenizer.from_pretrained(checkpoint_local)
|
| 14 |
+
model = AutoModelForSequenceClassification.from_pretrained(checkpoint_local)
|
| 15 |
+
return tokenizer, model
|
video2txt.py
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import speech_recognition as sr
|
| 2 |
+
import moviepy.editor as mp
|
| 3 |
+
import os
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
def convert_video_to_audio(file_path):
|
| 7 |
+
vid_clip = mp.VideoFileClip(file_path)
|
| 8 |
+
vid_clip.audio.write_audiofile("temp_converted_mp3.wav")
|
| 9 |
+
|
| 10 |
+
def convert_audio_to_text():
|
| 11 |
+
recogniser = sr.Recognizer()
|
| 12 |
+
audio_file = sr.AudioFile(r"temp_converted_mp3.wav")
|
| 13 |
+
with audio_file as source:
|
| 14 |
+
audio_source = recogniser.record(source)
|
| 15 |
+
audio_result = recogniser.recognize_google(audio_source)
|
| 16 |
+
os.remove("temp_converted_mp3.wav")
|
| 17 |
+
return audio_result
|
| 18 |
+
|
| 19 |
+
def read_video_file_and_return_text(filepath=r"C:\Users\Shringar\Documents\Python Scripts\hface\course\emma_1.mp4"):
|
| 20 |
+
convert_video_to_audio(filepath)
|
| 21 |
+
converted_text = convert_audio_to_text()
|
| 22 |
+
return converted_text
|