import torch from transformers import RobertaForMaskedLM, RobertaTokenizer import gradio as gr model = RobertaForMaskedLM.from_pretrained('roberta-base') tokenizer = RobertaTokenizer.from_pretrained('roberta-base') def sentence_perplexity(sentence): tokenized_sentence = tokenizer(sentence, return_tensors='pt') input_ids = tokenized_sentence['input_ids'] with torch.no_grad(): output = model(input_ids) logits = output.logits probabilities = torch.softmax(logits, dim=-1) true_token_probabilities = torch.gather(probabilities, 2, input_ids.unsqueeze(-1)).squeeze(-1) log_probs = torch.log(true_token_probabilities) # Ignore the [CLS] and [SEP] tokens log_probs = log_probs[:, 1:-1] # Calculate the perplexity perplexity = torch.exp(-log_probs.mean()).item() return perplexity def weird_score(sentence): perplexity = sentence_perplexity(sentence) # Normalize the weird score using the perplexity value weird_score = (perplexity - 1) / (perplexity + 1) * 100 return f"Weird Score: {weird_score:.2f}%" # Example usage sentence = "This is a normal sentence." print(weird_score(sentence)) sentence = "Giraffes are known to be fluent in six languages." print(weird_score(sentence)) iface = gr.Interface( fn=weird_score, inputs=gr.inputs.Textbox(lines=2, placeholder="Enter a sentence..."), outputs="text", title="RoBERTa Weird Score Calculator", description="This app calculates the weird score percentage of a sentence using RoBERTa." ) iface.launch()