jonngan commited on
Commit
fc9d193
·
verified ·
1 Parent(s): 2a68855

Upload 11 files

Browse files
config.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "distilgpt2",
3
+ "_num_labels": 1,
4
+ "activation_function": "gelu_new",
5
+ "architectures": [
6
+ "GPT2LMHeadModel"
7
+ ],
8
+ "attn_pdrop": 0.1,
9
+ "bos_token_id": 50256,
10
+ "embd_pdrop": 0.1,
11
+ "eos_token_id": 50256,
12
+ "id2label": {
13
+ "0": "LABEL_0"
14
+ },
15
+ "initializer_range": 0.02,
16
+ "label2id": {
17
+ "LABEL_0": 0
18
+ },
19
+ "layer_norm_epsilon": 1e-05,
20
+ "model_type": "gpt2",
21
+ "n_ctx": 1024,
22
+ "n_embd": 768,
23
+ "n_head": 12,
24
+ "n_inner": null,
25
+ "n_layer": 6,
26
+ "n_positions": 1024,
27
+ "reorder_and_upcast_attn": false,
28
+ "resid_pdrop": 0.1,
29
+ "scale_attn_by_inverse_layer_idx": false,
30
+ "scale_attn_weights": true,
31
+ "summary_activation": null,
32
+ "summary_first_dropout": 0.1,
33
+ "summary_proj_to_labels": true,
34
+ "summary_type": "cls_index",
35
+ "summary_use_proj": true,
36
+ "task_specific_params": {
37
+ "text-generation": {
38
+ "do_sample": true,
39
+ "max_length": 50
40
+ }
41
+ },
42
+ "torch_dtype": "float32",
43
+ "transformers_version": "4.47.0",
44
+ "use_cache": true,
45
+ "vocab_size": 50257
46
+ }
data.json ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {"input": "What are your thoughts on $lockin?", "output": "Does $lockin seem like a good investment?"},
3
+ {"input": "Do you think $lockin will increase in value?", "output": "Is $lockin poised for a big price jump?"},
4
+ {"input": "How is $lockin doing in the market right now?", "output": "Is $lockin outperforming other assets?"},
5
+ {"input": "Should I buy more $lockin now?", "output": "Is now a good time to buy $lockin?"},
6
+ {"input": "What’s the future outlook for $lockin?", "output": "Will $lockin rise in the coming months?"},
7
+ {"input": "Is $lockin a safe investment?", "output": "Can $lockin be considered a safe bet?"},
8
+ {"input": "Do you recommend holding onto $lockin?", "output": "Should I hold $lockin long term?"},
9
+ {"input": "How does $lockin compare to other cryptocurrencies?", "output": "Is $lockin a better choice than other cryptocurrencies?"},
10
+ {"input": "Can $lockin reach $100 in the next year?", "output": "Will $lockin reach $100 within a year?"},
11
+ {"input": "What is the general market sentiment about $lockin?", "output": "Is the market positive about $lockin's future?"},
12
+ {"input": "What is the potential of $lockin?", "output": "Does $lockin have strong potential for growth?"},
13
+ {"input": "Is $lockin's price likely to increase this month?", "output": "Will $lockin’s price rise in the next 30 days?"},
14
+ {"input": "Should I invest in $lockin now or wait?", "output": "Is it wise to invest in $lockin right now?"},
15
+ {"input": "What is the current trend for $lockin?", "output": "Is $lockin currently in an uptrend?"},
16
+ {"input": "What are experts saying about $lockin?", "output": "Do experts think $lockin will continue to grow?"},
17
+ {"input": "Can $lockin become a top 10 cryptocurrency?", "output": "Is $lockin on track to become a top 10 cryptocurrency?"},
18
+ {"input": "How does $lockin perform in a bear market?", "output": "Does $lockin hold up well in a bear market?"},
19
+ {"input": "Is $lockin a risky investment?", "output": "Is $lockin considered a high-risk investment?"},
20
+ {"input": "What’s the best way to acquire $lockin?", "output": "Is buying $lockin through exchanges the best option?"},
21
+ {"input": "Does $lockin have any partnerships or collaborations?", "output": "Has $lockin formed any significant partnerships?"},
22
+ {"input": "What are the latest developments with $lockin?", "output": "Is $lockin involved in any recent significant developments?"},
23
+ {"input": "How has $lockin’s performance been this year?", "output": "Is $lockin performing well this year?"},
24
+ {"input": "What are the advantages of investing in $lockin?", "output": "Does $lockin have a strong competitive advantage?"},
25
+ {"input": "Is $lockin’s tokenomics solid?", "output": "Can $lockin’s tokenomics support long-term growth?"},
26
+ {"input": "Should I diversify my portfolio with $lockin?", "output": "Is $lockin a good addition to a diversified portfolio?"},
27
+ {"input": "What are the risks of holding $lockin?", "output": "Is there significant risk in holding $lockin?"},
28
+ {"input": "Is $lockin’s community growing?", "output": "Is $lockin’s community gaining traction?"},
29
+ {"input": "Should I stake $lockin for passive income?", "output": "Is staking $lockin a good way to earn passive income?"},
30
+ {"input": "What’s the market capitalization of $lockin?", "output": "Is $lockin’s market cap increasing?"},
31
+ {"input": "How long do you think $lockin will last?", "output": "Is $lockin expected to have long-term staying power?"},
32
+ {"input": "Is $lockin experiencing high volatility?", "output": "Is $lockin’s price highly volatile?"},
33
+ {"input": "Will $lockin be used for everyday transactions in the future?", "output": "Can $lockin become a mainstream currency?"},
34
+ {"input": "Is $lockin a deflationary token?", "output": "Does $lockin have a deflationary supply model?"},
35
+ {"input": "What’s the role of $lockin in decentralized finance?", "output": "Is $lockin widely used in DeFi protocols?"},
36
+ {"input": "What are the unique features of $lockin?", "output": "Does $lockin have any unique selling points?"},
37
+ {"input": "Should I trade $lockin on multiple exchanges?", "output": "Is it worth trading $lockin on different platforms?"},
38
+ {"input": "How secure is the $lockin network?", "output": "Is the $lockin blockchain highly secure?"},
39
+ {"input": "Can $lockin be used in smart contracts?", "output": "Is $lockin supported by smart contracts?"},
40
+ {"input": "How easy is it to buy $lockin?", "output": "Is purchasing $lockin straightforward?"},
41
+ {"input": "Will $lockin’s price recover after a dip?", "output": "Is $lockin likely to recover after a price dip?"},
42
+ {"input": "How does $lockin handle scalability?", "output": "Is $lockin capable of scaling effectively?"},
43
+ {"input": "How does $lockin’s technology compare to others?", "output": "Is $lockin’s technology superior to its competitors?"},
44
+ {"input": "Should I sell my $lockin holdings now?", "output": "Is now a good time to sell $lockin?"},
45
+ {"input": "Is $lockin gaining adoption in the market?", "output": "Is $lockin becoming more widely adopted?"},
46
+ {"input": "Can $lockin achieve a market cap of $1 billion?", "output": "Is $lockin on track to reach a $1 billion market cap?"},
47
+ {"input": "How decentralized is $lockin?", "output": "Is $lockin fully decentralized?"},
48
+ {"input": "Will $lockin surpass Bitcoin in value?", "output": "Can $lockin overtake Bitcoin in the future?"},
49
+ {"input": "How well does $lockin perform during bull markets?", "output": "Does $lockin perform well in bull markets?"},
50
+ {"input": "Is $lockin a good hedge against inflation?", "output": "Can $lockin protect against inflationary risks?"},
51
+ {"input": "What are the staking rewards for $lockin?", "output": "Are staking rewards for $lockin attractive?"},
52
+ {"input": "Is $lockin a good asset for day trading?", "output": "Can $lockin be profitably day-traded?"},
53
+ {"input": "Should I consider using $lockin for yield farming?", "output": "Is $lockin a good option for yield farming?"},
54
+ {"input": "How does $lockin compare to Ethereum in terms of scalability?", "output": "Is $lockin more scalable than Ethereum?"},
55
+ {"input": "Can $lockin be used as collateral in DeFi?", "output": "Is $lockin accepted as collateral in decentralized finance?"},
56
+ {"input": "Will $lockin be listed on more exchanges?", "output": "Is $lockin likely to be listed on additional exchanges?"},
57
+ {"input": "How does $lockin maintain its value?", "output": "Is $lockin's value supported by strong fundamentals?"},
58
+ {"input": "Is $lockin a stablecoin?", "output": "Is $lockin designed to be a stablecoin?"},
59
+ {"input": "Can $lockin be used in gaming applications?", "output": "Is $lockin useful in the gaming industry?"},
60
+ {"input": "What’s the adoption rate of $lockin in fintech?", "output": "Is $lockin gaining traction in the fintech industry?"},
61
+ {"input": "Is $lockin backed by any assets?", "output": "Is $lockin backed by real-world assets?"},
62
+ {"input": "Should I stake my $lockin for long-term gains?", "output": "Is staking $lockin for the long term a good strategy?"},
63
+ {"input": "Does $lockin have a governance token?", "output": "Is there a governance token for $lockin?"},
64
+ {"input": "What is $lockin’s token supply model?", "output": "Does $lockin have a deflationary token supply model?"},
65
+ {"input": "Is $lockin integrated with DeFi protocols?", "output": "Can $lockin be used in popular DeFi protocols?"},
66
+ {"input": "What are the chances of $lockin succeeding in the long term?", "output": "Does $lockin have long-term potential?"},
67
+ {"input": "Is $lockin a community-driven project?", "output": "Is $lockin driven by a strong community?"},
68
+ {"input": "Should I consider $lockin for my retirement fund?", "output": "Is $lockin a good investment for long-term savings?"},
69
+ {"input": "Does $lockin have any partnerships with big companies?", "output": "Has $lockin formed any major partnerships?"},
70
+ {"input": "How much has $lockin appreciated in value this year?", "output": "Has $lockin seen a significant increase in value this year?"},
71
+ {"input": "Can $lockin’s technology disrupt traditional finance?", "output": "Does $lockin have the potential to disrupt traditional finance?"},
72
+ {"input": "Will $lockin become a mainstream currency?", "output": "Is $lockin on track to become widely accepted?"},
73
+ {"input": "What’s the risk/reward ratio for investing in $lockin?", "output": "Is the risk/reward ratio for $lockin favorable?"},
74
+ {"input": "Is $lockin a viable alternative to Bitcoin?", "output": "Can $lockin replace Bitcoin as a store of value?"}
75
+ ]
76
+
generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 50256,
4
+ "eos_token_id": 50256,
5
+ "transformers_version": "4.47.0"
6
+ }
main.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoTokenizer, AutoModelForCausalLM
2
+
3
+ # Load your fine-tuned model and tokenizer
4
+ tokenizer = AutoTokenizer.from_pretrained("./lockin_model")
5
+ model = AutoModelForCausalLM.from_pretrained("./lockin_model")
6
+
7
+ # Function to generate yes/no questions
8
+ def generate_question(input_text):
9
+ # Add padding and attention mask
10
+ inputs = tokenizer(
11
+ input_text,
12
+ return_tensors="pt",
13
+ padding=True,
14
+ truncation=True,
15
+ return_attention_mask=True
16
+ )
17
+
18
+ output = model.generate(
19
+ inputs["input_ids"],
20
+ attention_mask=inputs["attention_mask"], # Add attention mask
21
+ max_new_tokens=100,
22
+ do_sample=True,
23
+ temperature=1.5,
24
+ top_p=0.8,
25
+ top_k=50,
26
+ pad_token_id=tokenizer.eos_token_id # Explicitly set pad token ID
27
+ )
28
+ return tokenizer.decode(output[0], skip_special_tokens=True)
29
+
30
+ # Example usage
31
+ prompt = "What the fuck"
32
+ question = generate_question(prompt)
33
+ print("Generated Question:", question)
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28828ab346afb345a6049fdbb9ec31e8595edd1d5ac2182d6cf4b8a264f69181
3
+ size 327657928
special_tokens_map.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<|endoftext|>",
3
+ "eos_token": "<|endoftext|>",
4
+ "pad_token": "<|endoftext|>",
5
+ "unk_token": "<|endoftext|>"
6
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "50256": {
5
+ "content": "<|endoftext|>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ }
12
+ },
13
+ "bos_token": "<|endoftext|>",
14
+ "clean_up_tokenization_spaces": false,
15
+ "eos_token": "<|endoftext|>",
16
+ "extra_special_tokens": {},
17
+ "model_max_length": 1024,
18
+ "pad_token": "<|endoftext|>",
19
+ "tokenizer_class": "GPT2Tokenizer",
20
+ "unk_token": "<|endoftext|>"
21
+ }
train.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import Trainer, TrainingArguments, AutoModelForCausalLM, AutoTokenizer
2
+ from datasets import load_dataset
3
+
4
+ # Load a small pre-trained model and tokenizer
5
+ model_name = "distilgpt2" # or choose another small model
6
+ model = AutoModelForCausalLM.from_pretrained(model_name)
7
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
8
+
9
+ # Add a pad token (setting it to eos_token is one common approach for GPT-based models)
10
+ tokenizer.pad_token = tokenizer.eos_token # Or you can choose to add a new pad token, e.g., '[PAD]'
11
+
12
+ # Load the dataset (Make sure data.json is in the correct location)
13
+ train_data = load_dataset("json", data_files={"train": "data.json"})
14
+
15
+ # Preprocess the dataset
16
+ def preprocess_function(examples):
17
+ inputs = examples["input"]
18
+ outputs = examples["output"]
19
+ model_inputs = tokenizer(inputs, max_length=512, truncation=True, padding="max_length")
20
+ labels = tokenizer(outputs, max_length=512, truncation=True, padding="max_length")
21
+ model_inputs["labels"] = labels["input_ids"]
22
+ return model_inputs
23
+
24
+ # Preprocess the train dataset using the map function
25
+ train_dataset = train_data["train"].map(preprocess_function, batched=True)
26
+
27
+ # Define training arguments
28
+ training_args = TrainingArguments(
29
+ output_dir="./results",
30
+ num_train_epochs=3,
31
+ per_device_train_batch_size=4,
32
+ logging_dir="./logs",
33
+ )
34
+
35
+ # Initialize Trainer
36
+ trainer = Trainer(
37
+ model=model,
38
+ args=training_args,
39
+ train_dataset=train_dataset,
40
+ )
41
+
42
+ # Train the model
43
+ trainer.train()
44
+
45
+ # Save the fine-tuned model
46
+ model.save_pretrained("./lockin_model")
47
+ tokenizer.save_pretrained("./lockin_model")
vocab.json ADDED
The diff for this file is too large to render. See raw diff