|
--- |
|
license: mit |
|
datasets: |
|
- sartajbhuvaji/gutenberg |
|
language: |
|
- en |
|
base_model: |
|
- openai-community/gpt2 |
|
pipeline_tag: text-classification |
|
library_name: transformers |
|
tags: |
|
- text-classification |
|
--- |
|
|
|
```python |
|
from transformers import GPT2ForSequenceClassification, GPT2Tokenizer |
|
from datasets import load_dataset |
|
from transformers import pipeline |
|
import pandas as pd |
|
|
|
# Load the model from Hugging Face |
|
model = GPT2ForSequenceClassification.from_pretrained('sartajbhuvaji/gutenberg-gpt2') |
|
tokenizer = GPT2Tokenizer.from_pretrained("sartajbhuvaji/gutenberg-gpt2") |
|
|
|
# Create a text classification pipeline |
|
classifier = pipeline("text-classification", model=model, tokenizer=tokenizer) |
|
|
|
# Test the pipeline |
|
result = classifier("This is a great book!") |
|
print(result) # [{'label': 'LABEL_7', 'score': 0.8302432298660278}] |
|
|
|
# Test the pipeline on a document |
|
doc_id = 1 |
|
doc_text = df.loc[df['DocID'] == doc_id, 'Text'].values[0] |
|
result = classifier(doc_text[:1024]) |
|
print(result) # [{'label': 'LABEL_4', 'score': 0.6285566091537476}] |
|
``` |