Transformers
English
Inference Endpoints
Mizule commited on
Commit
5e7d47e
·
verified ·
1 Parent(s): 51577b1

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +86 -3
README.md CHANGED
@@ -1,3 +1,86 @@
1
- ---
2
- license: mit
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: mit
3
+ datasets:
4
+ - HuggingFaceFW/fineweb-edu
5
+ language:
6
+ - en
7
+ ---
8
+
9
+ 30,142,848 trainable parameters.
10
+
11
+ Embedding parameters: 19,298,688
12
+
13
+ Non-embedding parameters: 10,844,160
14
+
15
+ Vocabulary size: 50,257
16
+
17
+ Total train tokens: 136,000,000
18
+
19
+ Epochs: 2
20
+
21
+ Final train Loss: 2.9811
22
+
23
+ Final test Loss: 2.7963
24
+
25
+ _________________________________________
26
+
27
+ try the following script for inference:
28
+
29
+ !pip install huggingface_hub
30
+ !pip install transformers
31
+ !pip install torch
32
+ from transformers import GPT2Tokenizer, GPT2Config, GPT2LMHeadModel
33
+ from huggingface_hub import hf_hub_download
34
+ import torch
35
+
36
+ # Name
37
+ model_name = 'Mizule/Dense-30M'
38
+
39
+ # Authenticate
40
+ token = input("Enter your Hugging Face token: ")
41
+
42
+ # Download
43
+ model_file = hf_hub_download(repo_id=f"{model_name}", filename="Dense-30M.pth", use_auth_token=token)
44
+
45
+ # Custom config
46
+ tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
47
+
48
+ config = GPT2Config(
49
+ vocab_size=tokenizer.vocab_size,
50
+ n_positions=512,
51
+ n_ctx=512,
52
+ n_embd=384,
53
+ n_layer=6,
54
+ n_head=8
55
+ )
56
+ # Load model
57
+ model = GPT2LMHeadModel(config)
58
+ model.load_state_dict(torch.load(model_file, map_location=torch.device('cpu')))
59
+ device = "cuda" if torch.cuda.is_available() else "cpu"
60
+ model.to(device)
61
+ model.eval()
62
+
63
+ # Inference settings
64
+ def generate_text(prompt, max_length=128, temperature=0.2, top_k=50, top_p=0.9):
65
+ inputs = tokenizer(prompt, return_tensors="pt")
66
+ inputs = {key: value.to("cuda" if torch.cuda.is_available() else "cpu") for key, value in inputs.items()}
67
+ outputs = model.generate(
68
+ **inputs,
69
+ max_length=max_length,
70
+ temperature=temperature,
71
+ top_k=top_k,
72
+ top_p=top_p,
73
+ num_return_sequences=1,
74
+ no_repeat_ngram_size=2,
75
+ do_sample=True,
76
+ early_stopping=True
77
+ )
78
+ return tokenizer.decode(outputs[0], skip_special_tokens=True)
79
+
80
+ # Interactive loop (it's an undertrained base model, don't expect it to chat)
81
+ while True:
82
+ prompt = input("Prompt: ")
83
+ if prompt.lower() == 'exit':
84
+ break
85
+ output = generate_text(prompt)
86
+ print(f"Generated text: {output}")