Mizule
/

Dense-30M

Transformers

English

Inference Endpoints

Model card Files Files and versions Community

Mizule commited on Jul 17, 2024

Commit

5e7d47e

verified ·

1 Parent(s): 51577b1

Update README.md

Browse files

Files changed (1) hide show

README.md +86 -3

README.md CHANGED Viewed

@@ -1,3 +1,86 @@
----
-license: mit
----

+---
+license: mit
+datasets:
+- HuggingFaceFW/fineweb-edu
+language:
+- en
+---
+30,142,848 trainable parameters.
+Embedding parameters: 19,298,688
+Non-embedding parameters: 10,844,160
+Vocabulary size: 50,257
+Total train tokens: 136,000,000
+Epochs: 2
+Final train Loss: 2.9811
+Final test Loss: 2.7963
+_________________________________________
+try the following script for inference:
+!pip install huggingface_hub
+!pip install transformers
+!pip install torch
+from transformers import GPT2Tokenizer, GPT2Config, GPT2LMHeadModel
+from huggingface_hub import hf_hub_download
+import torch
+# Name
+model_name = 'Mizule/Dense-30M'
+# Authenticate
+token = input("Enter your Hugging Face token: ")
+# Download
+model_file = hf_hub_download(repo_id=f"{model_name}", filename="Dense-30M.pth", use_auth_token=token)
+# Custom config
+tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
+config = GPT2Config(
+    vocab_size=tokenizer.vocab_size,
+    n_positions=512,
+    n_ctx=512,
+    n_embd=384,
+    n_layer=6,
+    n_head=8
+)
+# Load model
+model = GPT2LMHeadModel(config)
+model.load_state_dict(torch.load(model_file, map_location=torch.device('cpu')))
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model.to(device)
+model.eval()
+# Inference settings
+def generate_text(prompt, max_length=128, temperature=0.2, top_k=50, top_p=0.9):
+    inputs = tokenizer(prompt, return_tensors="pt")
+    inputs = {key: value.to("cuda" if torch.cuda.is_available() else "cpu") for key, value in inputs.items()}
+    outputs = model.generate(
+        **inputs,
+        max_length=max_length,
+        temperature=temperature,
+        top_k=top_k,
+        top_p=top_p,
+        num_return_sequences=1,
+        no_repeat_ngram_size=2,
+        do_sample=True,
+        early_stopping=True
+    )
+    return tokenizer.decode(outputs[0], skip_special_tokens=True)
+# Interactive loop (it's an undertrained base model, don't expect it to chat)
+while True:
+    prompt = input("Prompt: ")
+    if prompt.lower() == 'exit':
+        break
+    output = generate_text(prompt)
+    print(f"Generated text: {output}")