|
--- |
|
license: mit |
|
datasets: |
|
- ZachW/GPT-BookSum |
|
language: |
|
- en |
|
metrics: |
|
- accuracy |
|
base_model: |
|
- FacebookAI/roberta-base |
|
pipeline_tag: zero-shot-classification |
|
tags: |
|
- pacing |
|
- concreteness |
|
- text-evalutaion |
|
--- |
|
# Pacing-Judge |
|
|
|
[\[project page\]](https://github.com/YichenZW/Pacing) |
|
|
|
## Overview |
|
|
|
This is the **concreteness evaluator** developed in the paper [Improving Pacing in Long-Form Story Planning](https://arxiv.org/abs/2311.04459) (EMNLP 2023). |
|
|
|
## Quick Start |
|
|
|
A simple usage: Input a pair of texts (text_ex_1, text_ex_2) with \<sep\> as the separator to the model. The output is whether the first or the second is more concrete. |
|
|
|
```python |
|
import torch.nn.functional as F |
|
from transformers import AutoModelForSequenceClassification, AutoTokenizer |
|
|
|
model_name = "ZachW/pacing-judge" |
|
model = AutoModelForSequenceClassification.from_pretrained(model_name) |
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
text_ex_1 = "The Duke then focused on securing his power and looking to future threats. The Duke eventually turned his attention to acquiring Tuscany but struggled." |
|
text_ex_2 = "Lord Bacon mentioned his book \"The History of Henry VII,\" in the conversation noting that King Charles had conquered Naples without resistance, implying that the conquest was like a dream." |
|
inputs = tokenizer(text_ex_1 + " <sep> " + text_ex_2, return_tensors="pt") |
|
outputs = model(**inputs) |
|
output = int(F.softmax(outputs.logits, dim=1)[:, 0].squeeze(-1).detach().cpu().numpy() > 0.5) |
|
print(f"Output Binary = {output}") |
|
if output: |
|
print("The second text is more concrete.") |
|
else: |
|
print("The first text is more concrete.") |
|
``` |
|
|
|
## Usage |
|
|
|
We have designed this Ranker, which enables fair pairwise comparison (independent of sequence order) and ranking among candidates. We **recommend** using our model via the Ranker. |
|
|
|
```python |
|
import torch.nn.functional as F |
|
from transformers import AutoModelForSequenceClassification, AutoTokenizer |
|
|
|
class Ranker: |
|
def __init__(self): |
|
print(f"*** Loading Model from Huggingface ***") |
|
model_name = "ZachW/pacing-judge" |
|
self.model = AutoModelForSequenceClassification.from_pretrained(model_name) |
|
self.tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
|
|
def compare(self, t1, t2): |
|
text_pair = [t1 + ' <sep> ' + t2, t2 + ' <sep> ' + t1] |
|
pair_dataset = self.tokenizer(text_pair, padding=True, truncation=True, return_tensors="pt") |
|
score = self.run_model(pair_dataset) |
|
if score < 0.5: |
|
return 0 # first is more concrete |
|
else: |
|
return 1 # second is more concrete |
|
|
|
def compare_logits(self, t1, t2): |
|
text_pair = [t1 + ' <sep> ' + t2, t2 + ' <sep> ' + t1] |
|
pair_dataset = self.tokenizer(text_pair, padding=True, truncation=True, return_tensors="pt") |
|
score = self.run_model(pair_dataset) |
|
return score |
|
|
|
def run_model(self, dataset): |
|
outputs = self.model(**dataset) |
|
scores = F.softmax(outputs.logits, dim=1)[:, 0].squeeze(-1).detach().cpu().numpy() |
|
aver_score = (scores[0] + (1 - scores[1]))/2 |
|
return aver_score |
|
|
|
def rank(self, texts_list): # input a list of texts |
|
def quicksort(arr): |
|
if len(arr) <= 1: |
|
return arr |
|
else: |
|
pivot = arr[0] |
|
less = [] |
|
greater = [] |
|
for t in arr[1:]: |
|
cmp = self.compare(pivot, t) |
|
if cmp == 0: |
|
less.append(t) |
|
elif cmp == 1: |
|
greater.append(t) |
|
return quicksort(greater) + [pivot] + quicksort(less) |
|
return quicksort(texts_list) |
|
# most concrete -> lest concrete |
|
|
|
def rank_idx(self, texts_list): # input a list of texts |
|
def quicksort(arr): |
|
if len(arr) <= 1: |
|
return arr |
|
else: |
|
pivot = arr[0] |
|
less = [] |
|
greater = [] |
|
for t in arr[1:]: |
|
cmp = self.compare(texts_list[pivot], texts_list[t]) |
|
if cmp == 0: |
|
less.append(t) |
|
elif cmp == 1: |
|
greater.append(t) |
|
return quicksort(greater) + [pivot] + quicksort(less) |
|
return quicksort(list(range(len(texts_list)))) |
|
|
|
def rank_idx_conpletely(self, texts_list): |
|
n = len(texts_list) |
|
texts_idx = list(range(n)) |
|
scores = [[0] * n for _ in range(n)] |
|
self_score = [0] * n |
|
for i in texts_idx: |
|
scores[i][i] = self.compare_logits(texts_list[i], texts_list[i]) |
|
self_score[i] = scores[i][i] |
|
for j in texts_idx: |
|
if j < i: |
|
scores[i][j] = 1 - scores[j][i] |
|
continue |
|
if j == i: |
|
continue |
|
scores[i][j] = self.compare_logits(texts_list[i], texts_list[j]) |
|
# average score is, smaller is more concrete |
|
average_score = [ sum(s)/len(s) for s in scores] |
|
output_score = [ a + 0.5 - s for a, s in zip(average_score, self_score)] |
|
sorted_indices = sorted(range(len(output_score)), key=lambda x: output_score[x]) |
|
return sorted_indices |
|
|
|
def rank_idx_conpletely_wlogits(self, texts_list, logger=None): |
|
n = len(texts_list) |
|
texts_idx = list(range(n)) |
|
scores = [[0] * n for _ in range(n)] |
|
self_score = [0] * n |
|
for i in texts_idx: |
|
scores[i][i] = self.compare_logits(texts_list[i], texts_list[i]) |
|
self_score[i] = scores[i][i] |
|
for j in texts_idx: |
|
if j < i: |
|
scores[i][j] = 1 - scores[j][i] |
|
continue |
|
if j == i: |
|
continue |
|
scores[i][j] = self.compare_logits(texts_list[i], texts_list[j]) |
|
# average score is, smaller is more concrete |
|
average_score = [ sum(s)/len(s) for s in scores] |
|
output_score = [ a + 0.5 - s for a, s in zip(average_score, self_score)] |
|
sorted_indices = sorted(range(len(output_score)), key=lambda x: output_score[x]) |
|
return sorted_indices, output_score |
|
|
|
def compare_w_neighbors(self, t, cand): |
|
score = 0.0 |
|
for c in cand: |
|
score += self.compare_logits(t, c) |
|
score /= len(cand) |
|
return score |
|
``` |
|
|
|
```python |
|
text_ex_1 = "The Duke then focused on securing his power and looking to future threats. The Duke eventually turned his attention to acquiring Tuscany but struggled." |
|
text_ex_2 = "Lord Bacon mentioned his book \"The History of Henry VII,\" in the conversation noting that King Charles had conquered Naples without resistance, implying that the conquest was like a dream." |
|
|
|
ranker = Ranker() |
|
output = ranker.compare(text_ex_1, text_ex_2) # it is equvilant to (text_ex_2, text_ex_1) |
|
print(f"Output Binary = {output}") |
|
if output: |
|
print("The second text is more concrete.") |
|
else: |
|
print("The first text is more concrete.") |
|
|
|
output_logits = ranker.compare_logits(text_ex_1, text_ex_2) |
|
print(f"Output Logits = {output_logits:.4f}") |
|
``` |
|
|
|
**For more details on the evaluator usage (e.g., pacing planning and control in generation) and training process, please refer to our [paper](https://arxiv.org/abs/2311.04459)!** |
|
|