|
--- |
|
license: mit |
|
datasets: |
|
- HuggingFaceFW/fineweb-edu |
|
language: |
|
- en |
|
library_name: transformers |
|
--- |
|
|
|
30,142,848 trainable parameters. |
|
|
|
Embedding parameters: 19,298,688 |
|
|
|
Non-embedding parameters: 10,844,160 |
|
|
|
Tokenizer: GPT-2 |
|
|
|
Vocabulary size: 50,257 |
|
|
|
Compute: single T4 GPU |
|
|
|
Total train time: 2 hours and 40 minutes |
|
|
|
Total train tokens: 136,000,000 |
|
|
|
Epochs: 2 |
|
|
|
Final train Loss: 2.9811 |
|
|
|
Final test Loss: 2.7963 |
|
|
|
_________________________________________ |
|
|
|
try the following script for inference: |
|
|
|
!pip install huggingface_hub |
|
!pip install transformers |
|
!pip install torch |
|
from transformers import GPT2Tokenizer, GPT2Config, GPT2LMHeadModel |
|
from huggingface_hub import hf_hub_download |
|
import torch |
|
|
|
# Name |
|
model_name = 'Mizule/Dense-30M' |
|
|
|
# Authenticate |
|
token = input("Enter your Hugging Face token: ") |
|
|
|
# Download |
|
model_file = hf_hub_download(repo_id=f"{model_name}", filename="Dense-30M.pth", use_auth_token=token) |
|
|
|
# Custom config |
|
tokenizer = GPT2Tokenizer.from_pretrained("gpt2") |
|
|
|
config = GPT2Config( |
|
vocab_size=tokenizer.vocab_size, |
|
n_positions=512, |
|
n_ctx=512, |
|
n_embd=384, |
|
n_layer=6, |
|
n_head=8 |
|
) |
|
# Load model |
|
model = GPT2LMHeadModel(config) |
|
model.load_state_dict(torch.load(model_file, map_location=torch.device('cpu'))) |
|
device = "cuda" if torch.cuda.is_available() else "cpu" |
|
model.to(device) |
|
model.eval() |
|
|
|
# Inference settings |
|
def generate_text(prompt, max_length=128, temperature=0.2, top_k=50, top_p=0.9): |
|
inputs = tokenizer(prompt, return_tensors="pt") |
|
inputs = {key: value.to("cuda" if torch.cuda.is_available() else "cpu") for key, value in inputs.items()} |
|
outputs = model.generate( |
|
**inputs, |
|
max_length=max_length, |
|
temperature=temperature, |
|
top_k=top_k, |
|
top_p=top_p, |
|
num_return_sequences=1, |
|
no_repeat_ngram_size=2, |
|
do_sample=True, |
|
early_stopping=True |
|
) |
|
return tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
|
|
# Interactive loop (it's an undertrained base model, don't expect it to chat) |
|
while True: |
|
prompt = input("Prompt: ") |
|
if prompt.lower() == 'exit': |
|
break |
|
output = generate_text(prompt) |
|
print(f"Generated text: {output}") |
|
|