File size: 3,107 Bytes
21cf18c 9430328 d1d4e7c 21cf18c 9430328 21cf18c d1d4e7c 9430328 21cf18c c15ac09 824dde5 d1056da 824dde5 3e30d08 824dde5 d1056da 824dde5 d1056da 61bb5be d1056da 824dde5 d1056da 824dde5 f19a32c d1056da f19a32c d1056da 824dde5 d1056da 824dde5 c32e183 d1d4e7c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 |
---
library_name: transformers
license: apache-2.0
datasets:
- mtc/absinth_german_faithfulness_detection_dataset
language:
- de
pipeline_tag: text-classification
---
# Model Card for Model ID
This model is a finetuned version of the [google-bert/bert-base-multilingual-cased](https://huggingface.co/google-bert/bert-base-multilingual-cased) . The model was finetuned on the Absinth dataset to predict for a given German news article and sentence a label indicating whether the sentence is faithful to the article or not.
## Installation
Install necessary packages:
```
pip3 install transformers
```
## Usage
Below is a minimal code snippet to run classification for a given article and summary sentences. The model outputs either **Faithful**, **Intrinsic Hallucination** or **Extrinsic Hallucination**. For more information about the labels, see [here](https://huggingface.co/datasets/mtc/absinth_german_faithfulness_detection_dataset).
```python
from typing import List, Dict
import torch
from transformers import pipeline
def generate_prompts_for_classification(article: str, summary_sentences: List[str]) -> List[Dict]:
prompts = []
for sentence in summary_sentences:
prompt = {"text": article, "text_pair": sentence}
prompts.append(prompt)
return prompts
def predict_with_hf_classification_pipeline(prompts: List[Dict], model_name: str, max_context_length: int = 512,
batch_size: int = 2) -> List[str]:
device = "cuda" if torch.cuda.is_available() else "cpu"
text_classification_pipeline = pipeline("text-classification", model=model_name, device=device,
batch_size=batch_size)
batch_output = text_classification_pipeline(prompts, truncation=True, max_length=max_context_length)
predictions = [result['label'] for result in batch_output]
return predictions
def main():
model_name = "mtc/mbert-absinth-3-epochs"
# Articles longer than 512 tokens will be truncated
max_context_length = 512
# Adjust batch_size according to your local gpu memory
batch_size = 2
article = "Ein neuer Zirkus ist gestern in Zürich angekommen. Viele Familien besuchten das grosse Zelt, um die Vorstellung zu sehen. Es gab Akrobaten, Clowns und Tiere, die das Publikum begeisterten. Der Zirkus bleibt noch eine Woche in der Stadt und bietet täglich Vorstellungen an."
summary_sentences = [
"Ein Zirkus ist in Basel angekommen.",
"Der Zirkus, der in 1950 gegründet wurde, wird von vielen Familien besucht."]
prompts = generate_prompts_for_classification(article=article, summary_sentences=summary_sentences)
predictions = predict_with_hf_classification_pipeline(prompts=prompts, model_name=model_name,
max_context_length=max_context_length, batch_size=batch_size)
print(predictions)
if __name__ == '__main__':
main()
```
The output should have the following format, when executing the code above:
```
[ Intrinsic Hallucination
Extrinsic Hallucination]
``` |