Upload 11 files
Browse files- config.json +46 -0
- data.json +76 -0
- generation_config.json +6 -0
- main.py +33 -0
- merges.txt +0 -0
- model.safetensors +3 -0
- special_tokens_map.json +6 -0
- tokenizer.json +0 -0
- tokenizer_config.json +21 -0
- train.py +47 -0
- vocab.json +0 -0
config.json
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "distilgpt2",
|
3 |
+
"_num_labels": 1,
|
4 |
+
"activation_function": "gelu_new",
|
5 |
+
"architectures": [
|
6 |
+
"GPT2LMHeadModel"
|
7 |
+
],
|
8 |
+
"attn_pdrop": 0.1,
|
9 |
+
"bos_token_id": 50256,
|
10 |
+
"embd_pdrop": 0.1,
|
11 |
+
"eos_token_id": 50256,
|
12 |
+
"id2label": {
|
13 |
+
"0": "LABEL_0"
|
14 |
+
},
|
15 |
+
"initializer_range": 0.02,
|
16 |
+
"label2id": {
|
17 |
+
"LABEL_0": 0
|
18 |
+
},
|
19 |
+
"layer_norm_epsilon": 1e-05,
|
20 |
+
"model_type": "gpt2",
|
21 |
+
"n_ctx": 1024,
|
22 |
+
"n_embd": 768,
|
23 |
+
"n_head": 12,
|
24 |
+
"n_inner": null,
|
25 |
+
"n_layer": 6,
|
26 |
+
"n_positions": 1024,
|
27 |
+
"reorder_and_upcast_attn": false,
|
28 |
+
"resid_pdrop": 0.1,
|
29 |
+
"scale_attn_by_inverse_layer_idx": false,
|
30 |
+
"scale_attn_weights": true,
|
31 |
+
"summary_activation": null,
|
32 |
+
"summary_first_dropout": 0.1,
|
33 |
+
"summary_proj_to_labels": true,
|
34 |
+
"summary_type": "cls_index",
|
35 |
+
"summary_use_proj": true,
|
36 |
+
"task_specific_params": {
|
37 |
+
"text-generation": {
|
38 |
+
"do_sample": true,
|
39 |
+
"max_length": 50
|
40 |
+
}
|
41 |
+
},
|
42 |
+
"torch_dtype": "float32",
|
43 |
+
"transformers_version": "4.47.0",
|
44 |
+
"use_cache": true,
|
45 |
+
"vocab_size": 50257
|
46 |
+
}
|
data.json
ADDED
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{"input": "What are your thoughts on $lockin?", "output": "Does $lockin seem like a good investment?"},
|
3 |
+
{"input": "Do you think $lockin will increase in value?", "output": "Is $lockin poised for a big price jump?"},
|
4 |
+
{"input": "How is $lockin doing in the market right now?", "output": "Is $lockin outperforming other assets?"},
|
5 |
+
{"input": "Should I buy more $lockin now?", "output": "Is now a good time to buy $lockin?"},
|
6 |
+
{"input": "What’s the future outlook for $lockin?", "output": "Will $lockin rise in the coming months?"},
|
7 |
+
{"input": "Is $lockin a safe investment?", "output": "Can $lockin be considered a safe bet?"},
|
8 |
+
{"input": "Do you recommend holding onto $lockin?", "output": "Should I hold $lockin long term?"},
|
9 |
+
{"input": "How does $lockin compare to other cryptocurrencies?", "output": "Is $lockin a better choice than other cryptocurrencies?"},
|
10 |
+
{"input": "Can $lockin reach $100 in the next year?", "output": "Will $lockin reach $100 within a year?"},
|
11 |
+
{"input": "What is the general market sentiment about $lockin?", "output": "Is the market positive about $lockin's future?"},
|
12 |
+
{"input": "What is the potential of $lockin?", "output": "Does $lockin have strong potential for growth?"},
|
13 |
+
{"input": "Is $lockin's price likely to increase this month?", "output": "Will $lockin’s price rise in the next 30 days?"},
|
14 |
+
{"input": "Should I invest in $lockin now or wait?", "output": "Is it wise to invest in $lockin right now?"},
|
15 |
+
{"input": "What is the current trend for $lockin?", "output": "Is $lockin currently in an uptrend?"},
|
16 |
+
{"input": "What are experts saying about $lockin?", "output": "Do experts think $lockin will continue to grow?"},
|
17 |
+
{"input": "Can $lockin become a top 10 cryptocurrency?", "output": "Is $lockin on track to become a top 10 cryptocurrency?"},
|
18 |
+
{"input": "How does $lockin perform in a bear market?", "output": "Does $lockin hold up well in a bear market?"},
|
19 |
+
{"input": "Is $lockin a risky investment?", "output": "Is $lockin considered a high-risk investment?"},
|
20 |
+
{"input": "What’s the best way to acquire $lockin?", "output": "Is buying $lockin through exchanges the best option?"},
|
21 |
+
{"input": "Does $lockin have any partnerships or collaborations?", "output": "Has $lockin formed any significant partnerships?"},
|
22 |
+
{"input": "What are the latest developments with $lockin?", "output": "Is $lockin involved in any recent significant developments?"},
|
23 |
+
{"input": "How has $lockin’s performance been this year?", "output": "Is $lockin performing well this year?"},
|
24 |
+
{"input": "What are the advantages of investing in $lockin?", "output": "Does $lockin have a strong competitive advantage?"},
|
25 |
+
{"input": "Is $lockin’s tokenomics solid?", "output": "Can $lockin’s tokenomics support long-term growth?"},
|
26 |
+
{"input": "Should I diversify my portfolio with $lockin?", "output": "Is $lockin a good addition to a diversified portfolio?"},
|
27 |
+
{"input": "What are the risks of holding $lockin?", "output": "Is there significant risk in holding $lockin?"},
|
28 |
+
{"input": "Is $lockin’s community growing?", "output": "Is $lockin’s community gaining traction?"},
|
29 |
+
{"input": "Should I stake $lockin for passive income?", "output": "Is staking $lockin a good way to earn passive income?"},
|
30 |
+
{"input": "What’s the market capitalization of $lockin?", "output": "Is $lockin’s market cap increasing?"},
|
31 |
+
{"input": "How long do you think $lockin will last?", "output": "Is $lockin expected to have long-term staying power?"},
|
32 |
+
{"input": "Is $lockin experiencing high volatility?", "output": "Is $lockin’s price highly volatile?"},
|
33 |
+
{"input": "Will $lockin be used for everyday transactions in the future?", "output": "Can $lockin become a mainstream currency?"},
|
34 |
+
{"input": "Is $lockin a deflationary token?", "output": "Does $lockin have a deflationary supply model?"},
|
35 |
+
{"input": "What’s the role of $lockin in decentralized finance?", "output": "Is $lockin widely used in DeFi protocols?"},
|
36 |
+
{"input": "What are the unique features of $lockin?", "output": "Does $lockin have any unique selling points?"},
|
37 |
+
{"input": "Should I trade $lockin on multiple exchanges?", "output": "Is it worth trading $lockin on different platforms?"},
|
38 |
+
{"input": "How secure is the $lockin network?", "output": "Is the $lockin blockchain highly secure?"},
|
39 |
+
{"input": "Can $lockin be used in smart contracts?", "output": "Is $lockin supported by smart contracts?"},
|
40 |
+
{"input": "How easy is it to buy $lockin?", "output": "Is purchasing $lockin straightforward?"},
|
41 |
+
{"input": "Will $lockin’s price recover after a dip?", "output": "Is $lockin likely to recover after a price dip?"},
|
42 |
+
{"input": "How does $lockin handle scalability?", "output": "Is $lockin capable of scaling effectively?"},
|
43 |
+
{"input": "How does $lockin’s technology compare to others?", "output": "Is $lockin’s technology superior to its competitors?"},
|
44 |
+
{"input": "Should I sell my $lockin holdings now?", "output": "Is now a good time to sell $lockin?"},
|
45 |
+
{"input": "Is $lockin gaining adoption in the market?", "output": "Is $lockin becoming more widely adopted?"},
|
46 |
+
{"input": "Can $lockin achieve a market cap of $1 billion?", "output": "Is $lockin on track to reach a $1 billion market cap?"},
|
47 |
+
{"input": "How decentralized is $lockin?", "output": "Is $lockin fully decentralized?"},
|
48 |
+
{"input": "Will $lockin surpass Bitcoin in value?", "output": "Can $lockin overtake Bitcoin in the future?"},
|
49 |
+
{"input": "How well does $lockin perform during bull markets?", "output": "Does $lockin perform well in bull markets?"},
|
50 |
+
{"input": "Is $lockin a good hedge against inflation?", "output": "Can $lockin protect against inflationary risks?"},
|
51 |
+
{"input": "What are the staking rewards for $lockin?", "output": "Are staking rewards for $lockin attractive?"},
|
52 |
+
{"input": "Is $lockin a good asset for day trading?", "output": "Can $lockin be profitably day-traded?"},
|
53 |
+
{"input": "Should I consider using $lockin for yield farming?", "output": "Is $lockin a good option for yield farming?"},
|
54 |
+
{"input": "How does $lockin compare to Ethereum in terms of scalability?", "output": "Is $lockin more scalable than Ethereum?"},
|
55 |
+
{"input": "Can $lockin be used as collateral in DeFi?", "output": "Is $lockin accepted as collateral in decentralized finance?"},
|
56 |
+
{"input": "Will $lockin be listed on more exchanges?", "output": "Is $lockin likely to be listed on additional exchanges?"},
|
57 |
+
{"input": "How does $lockin maintain its value?", "output": "Is $lockin's value supported by strong fundamentals?"},
|
58 |
+
{"input": "Is $lockin a stablecoin?", "output": "Is $lockin designed to be a stablecoin?"},
|
59 |
+
{"input": "Can $lockin be used in gaming applications?", "output": "Is $lockin useful in the gaming industry?"},
|
60 |
+
{"input": "What’s the adoption rate of $lockin in fintech?", "output": "Is $lockin gaining traction in the fintech industry?"},
|
61 |
+
{"input": "Is $lockin backed by any assets?", "output": "Is $lockin backed by real-world assets?"},
|
62 |
+
{"input": "Should I stake my $lockin for long-term gains?", "output": "Is staking $lockin for the long term a good strategy?"},
|
63 |
+
{"input": "Does $lockin have a governance token?", "output": "Is there a governance token for $lockin?"},
|
64 |
+
{"input": "What is $lockin’s token supply model?", "output": "Does $lockin have a deflationary token supply model?"},
|
65 |
+
{"input": "Is $lockin integrated with DeFi protocols?", "output": "Can $lockin be used in popular DeFi protocols?"},
|
66 |
+
{"input": "What are the chances of $lockin succeeding in the long term?", "output": "Does $lockin have long-term potential?"},
|
67 |
+
{"input": "Is $lockin a community-driven project?", "output": "Is $lockin driven by a strong community?"},
|
68 |
+
{"input": "Should I consider $lockin for my retirement fund?", "output": "Is $lockin a good investment for long-term savings?"},
|
69 |
+
{"input": "Does $lockin have any partnerships with big companies?", "output": "Has $lockin formed any major partnerships?"},
|
70 |
+
{"input": "How much has $lockin appreciated in value this year?", "output": "Has $lockin seen a significant increase in value this year?"},
|
71 |
+
{"input": "Can $lockin’s technology disrupt traditional finance?", "output": "Does $lockin have the potential to disrupt traditional finance?"},
|
72 |
+
{"input": "Will $lockin become a mainstream currency?", "output": "Is $lockin on track to become widely accepted?"},
|
73 |
+
{"input": "What’s the risk/reward ratio for investing in $lockin?", "output": "Is the risk/reward ratio for $lockin favorable?"},
|
74 |
+
{"input": "Is $lockin a viable alternative to Bitcoin?", "output": "Can $lockin replace Bitcoin as a store of value?"}
|
75 |
+
]
|
76 |
+
|
generation_config.json
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_from_model_config": true,
|
3 |
+
"bos_token_id": 50256,
|
4 |
+
"eos_token_id": 50256,
|
5 |
+
"transformers_version": "4.47.0"
|
6 |
+
}
|
main.py
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
2 |
+
|
3 |
+
# Load your fine-tuned model and tokenizer
|
4 |
+
tokenizer = AutoTokenizer.from_pretrained("./lockin_model")
|
5 |
+
model = AutoModelForCausalLM.from_pretrained("./lockin_model")
|
6 |
+
|
7 |
+
# Function to generate yes/no questions
|
8 |
+
def generate_question(input_text):
|
9 |
+
# Add padding and attention mask
|
10 |
+
inputs = tokenizer(
|
11 |
+
input_text,
|
12 |
+
return_tensors="pt",
|
13 |
+
padding=True,
|
14 |
+
truncation=True,
|
15 |
+
return_attention_mask=True
|
16 |
+
)
|
17 |
+
|
18 |
+
output = model.generate(
|
19 |
+
inputs["input_ids"],
|
20 |
+
attention_mask=inputs["attention_mask"], # Add attention mask
|
21 |
+
max_new_tokens=100,
|
22 |
+
do_sample=True,
|
23 |
+
temperature=1.5,
|
24 |
+
top_p=0.8,
|
25 |
+
top_k=50,
|
26 |
+
pad_token_id=tokenizer.eos_token_id # Explicitly set pad token ID
|
27 |
+
)
|
28 |
+
return tokenizer.decode(output[0], skip_special_tokens=True)
|
29 |
+
|
30 |
+
# Example usage
|
31 |
+
prompt = "What the fuck"
|
32 |
+
question = generate_question(prompt)
|
33 |
+
print("Generated Question:", question)
|
merges.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:28828ab346afb345a6049fdbb9ec31e8595edd1d5ac2182d6cf4b8a264f69181
|
3 |
+
size 327657928
|
special_tokens_map.json
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": "<|endoftext|>",
|
3 |
+
"eos_token": "<|endoftext|>",
|
4 |
+
"pad_token": "<|endoftext|>",
|
5 |
+
"unk_token": "<|endoftext|>"
|
6 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"add_prefix_space": false,
|
3 |
+
"added_tokens_decoder": {
|
4 |
+
"50256": {
|
5 |
+
"content": "<|endoftext|>",
|
6 |
+
"lstrip": false,
|
7 |
+
"normalized": true,
|
8 |
+
"rstrip": false,
|
9 |
+
"single_word": false,
|
10 |
+
"special": true
|
11 |
+
}
|
12 |
+
},
|
13 |
+
"bos_token": "<|endoftext|>",
|
14 |
+
"clean_up_tokenization_spaces": false,
|
15 |
+
"eos_token": "<|endoftext|>",
|
16 |
+
"extra_special_tokens": {},
|
17 |
+
"model_max_length": 1024,
|
18 |
+
"pad_token": "<|endoftext|>",
|
19 |
+
"tokenizer_class": "GPT2Tokenizer",
|
20 |
+
"unk_token": "<|endoftext|>"
|
21 |
+
}
|
train.py
ADDED
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import Trainer, TrainingArguments, AutoModelForCausalLM, AutoTokenizer
|
2 |
+
from datasets import load_dataset
|
3 |
+
|
4 |
+
# Load a small pre-trained model and tokenizer
|
5 |
+
model_name = "distilgpt2" # or choose another small model
|
6 |
+
model = AutoModelForCausalLM.from_pretrained(model_name)
|
7 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
8 |
+
|
9 |
+
# Add a pad token (setting it to eos_token is one common approach for GPT-based models)
|
10 |
+
tokenizer.pad_token = tokenizer.eos_token # Or you can choose to add a new pad token, e.g., '[PAD]'
|
11 |
+
|
12 |
+
# Load the dataset (Make sure data.json is in the correct location)
|
13 |
+
train_data = load_dataset("json", data_files={"train": "data.json"})
|
14 |
+
|
15 |
+
# Preprocess the dataset
|
16 |
+
def preprocess_function(examples):
|
17 |
+
inputs = examples["input"]
|
18 |
+
outputs = examples["output"]
|
19 |
+
model_inputs = tokenizer(inputs, max_length=512, truncation=True, padding="max_length")
|
20 |
+
labels = tokenizer(outputs, max_length=512, truncation=True, padding="max_length")
|
21 |
+
model_inputs["labels"] = labels["input_ids"]
|
22 |
+
return model_inputs
|
23 |
+
|
24 |
+
# Preprocess the train dataset using the map function
|
25 |
+
train_dataset = train_data["train"].map(preprocess_function, batched=True)
|
26 |
+
|
27 |
+
# Define training arguments
|
28 |
+
training_args = TrainingArguments(
|
29 |
+
output_dir="./results",
|
30 |
+
num_train_epochs=3,
|
31 |
+
per_device_train_batch_size=4,
|
32 |
+
logging_dir="./logs",
|
33 |
+
)
|
34 |
+
|
35 |
+
# Initialize Trainer
|
36 |
+
trainer = Trainer(
|
37 |
+
model=model,
|
38 |
+
args=training_args,
|
39 |
+
train_dataset=train_dataset,
|
40 |
+
)
|
41 |
+
|
42 |
+
# Train the model
|
43 |
+
trainer.train()
|
44 |
+
|
45 |
+
# Save the fine-tuned model
|
46 |
+
model.save_pretrained("./lockin_model")
|
47 |
+
tokenizer.save_pretrained("./lockin_model")
|
vocab.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|