Update model (ChatTS-14B-0801) for better reasoning and Chinese QA ability.
Browse filesUpdate model (ChatTS-14B-0801) for better reasoning and Chinese QA ability.
This new model introduce some minor bug fixes and code changes. We add position_embedding to the TimeSeriesEmbedding for better representation of positions. We have also retrained the model and added more reasoning and Chinese entries in the training dataset for better reasoning and Chinese capabilities.
If you want to reproduce the results in the paper, please download the old version of ChatTS-14B. This new model has almost the same evaluation results in terms of categorical metrics and better results in terms of statistical and reasoning metrics, compared with the old model.
- config.json +8 -5
- configuration_qwen2.py +1 -5
- generation_config.json +1 -1
- modeling_qwen2.py +110 -13
- processing_qwen2_ts.py +1 -1
- processor_config.json +6 -0
- pytorch_model-00001-of-00006.bin +1 -1
- pytorch_model-00002-of-00006.bin +1 -1
- pytorch_model-00003-of-00006.bin +1 -1
- pytorch_model-00004-of-00006.bin +1 -1
- pytorch_model-00005-of-00006.bin +1 -1
- pytorch_model-00006-of-00006.bin +2 -2
- pytorch_model.bin.index.json +3 -2
- tokenizer_config.json +5 -1
config.json
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
{
|
2 |
-
"_name_or_path": "chatts_release",
|
3 |
"architectures": [
|
4 |
"Qwen2TSForCausalLM"
|
5 |
],
|
@@ -19,7 +18,7 @@
|
|
19 |
"intermediate_size": 13824,
|
20 |
"max_position_embeddings": 32768,
|
21 |
"max_window_layers": 70,
|
22 |
-
"model_type": "
|
23 |
"num_attention_heads": 40,
|
24 |
"num_hidden_layers": 48,
|
25 |
"num_key_value_heads": 8,
|
@@ -29,13 +28,17 @@
|
|
29 |
"sliding_window": 131072,
|
30 |
"tie_word_embeddings": false,
|
31 |
"torch_dtype": "float16",
|
32 |
-
"transformers_version": "4.
|
33 |
"ts": {
|
|
|
34 |
"hidden_size": 5120,
|
|
|
|
|
35 |
"num_features": 2,
|
36 |
"num_layers": 5,
|
37 |
-
"patch_size":
|
38 |
-
"
|
|
|
39 |
},
|
40 |
"ts_token_end_index": 151666,
|
41 |
"ts_token_start_index": 151665,
|
|
|
1 |
{
|
|
|
2 |
"architectures": [
|
3 |
"Qwen2TSForCausalLM"
|
4 |
],
|
|
|
18 |
"intermediate_size": 13824,
|
19 |
"max_position_embeddings": 32768,
|
20 |
"max_window_layers": 70,
|
21 |
+
"model_type": "qwen2",
|
22 |
"num_attention_heads": 40,
|
23 |
"num_hidden_layers": 48,
|
24 |
"num_key_value_heads": 8,
|
|
|
28 |
"sliding_window": 131072,
|
29 |
"tie_word_embeddings": false,
|
30 |
"torch_dtype": "float16",
|
31 |
+
"transformers_version": "4.52.4",
|
32 |
"ts": {
|
33 |
+
"embedding_dim": 16,
|
34 |
"hidden_size": 5120,
|
35 |
+
"max_length": 32768,
|
36 |
+
"max_sequence_length": 32768,
|
37 |
"num_features": 2,
|
38 |
"num_layers": 5,
|
39 |
+
"patch_size": 8,
|
40 |
+
"use_position_embedding": true,
|
41 |
+
"use_position_idx": false
|
42 |
},
|
43 |
"ts_token_end_index": 151666,
|
44 |
"ts_token_start_index": 151665,
|
configuration_qwen2.py
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
# coding=utf-8
|
2 |
-
# The following code are reused from the QWen project (https://huggingface.co/Qwen/Qwen2.5-14B-Instruct) of Alibaba Cloud.
|
3 |
# Copyright 2024 The Qwen team, Alibaba Group and the HuggingFace Inc. team. All rights reserved.
|
4 |
#
|
5 |
# Licensed under the Apache License, Version 2.0 (the "License");
|
@@ -13,9 +12,6 @@
|
|
13 |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
14 |
# See the License for the specific language governing permissions and
|
15 |
# limitations under the License.
|
16 |
-
|
17 |
-
# The code is modified by ByteDance and Tsinghua University from the original implementation of Qwen:
|
18 |
-
# - We changed Qwen2Config to Qwen2TSConfig to support time series modeling.
|
19 |
""" Qwen2 model configuration"""
|
20 |
|
21 |
from transformers import PretrainedConfig
|
@@ -93,7 +89,7 @@ class Qwen2TSConfig(PretrainedConfig):
|
|
93 |
>>> configuration = model.config
|
94 |
```"""
|
95 |
|
96 |
-
model_type = "
|
97 |
keys_to_ignore_at_inference = ["past_key_values"]
|
98 |
|
99 |
def __init__(
|
|
|
1 |
# coding=utf-8
|
|
|
2 |
# Copyright 2024 The Qwen team, Alibaba Group and the HuggingFace Inc. team. All rights reserved.
|
3 |
#
|
4 |
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
12 |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13 |
# See the License for the specific language governing permissions and
|
14 |
# limitations under the License.
|
|
|
|
|
|
|
15 |
""" Qwen2 model configuration"""
|
16 |
|
17 |
from transformers import PretrainedConfig
|
|
|
89 |
>>> configuration = model.config
|
90 |
```"""
|
91 |
|
92 |
+
model_type = "qwen2"
|
93 |
keys_to_ignore_at_inference = ["past_key_values"]
|
94 |
|
95 |
def __init__(
|
generation_config.json
CHANGED
@@ -10,5 +10,5 @@
|
|
10 |
"temperature": 0.7,
|
11 |
"top_k": 20,
|
12 |
"top_p": 0.8,
|
13 |
-
"transformers_version": "4.
|
14 |
}
|
|
|
10 |
"temperature": 0.7,
|
11 |
"top_k": 20,
|
12 |
"top_p": 0.8,
|
13 |
+
"transformers_version": "4.52.4"
|
14 |
}
|
modeling_qwen2.py
CHANGED
@@ -106,6 +106,7 @@ class Qwen2TSCausalLMOutputWithPast(ModelOutput):
|
|
106 |
attentions: Optional[Tuple[torch.FloatTensor]] = None
|
107 |
attention_mask: Optional[torch.FloatTensor] = None
|
108 |
|
|
|
109 |
########################Naive TS Embedding#####################
|
110 |
class TimeSeriesEmbedding(nn.Module):
|
111 |
def __init__(self, config):
|
@@ -114,10 +115,23 @@ class TimeSeriesEmbedding(nn.Module):
|
|
114 |
self.num_layers = config['num_layers']
|
115 |
self.hidden_size = config['hidden_size']
|
116 |
self.num_features = config['num_features']
|
117 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
118 |
layers = []
|
119 |
-
input_size = 1 * self.patch_size
|
120 |
-
|
121 |
for _ in range(self.num_layers - 1):
|
122 |
layers.append(nn.Linear(input_size, self.hidden_size))
|
123 |
layers.append(nn.GELU())
|
@@ -130,30 +144,100 @@ class TimeSeriesEmbedding(nn.Module):
|
|
130 |
batch_size = x.size(0)
|
131 |
x = x.reshape(batch_size, -1, self.num_features)
|
132 |
|
|
|
133 |
mask = x[:, :, -1].long()
|
134 |
-
valid_lengths = mask.sum(dim=1).long()
|
135 |
-
|
136 |
-
patch_cnt = (valid_lengths + self.patch_size - 1) // self.patch_size # 向上取整
|
137 |
|
138 |
patches_list = []
|
|
|
|
|
|
|
|
|
139 |
for i in range(batch_size):
|
140 |
vl = valid_lengths[i].item()
|
141 |
pc = patch_cnt[i].item()
|
142 |
if pc == 0:
|
143 |
continue
|
144 |
-
|
|
|
|
|
145 |
total_padded_length = pc * self.patch_size
|
146 |
padding_length = total_padded_length - vl
|
|
|
|
|
|
|
|
|
147 |
if padding_length > 0:
|
148 |
-
|
|
|
|
|
149 |
xi = torch.cat([xi, padding], dim=0)
|
150 |
-
|
151 |
-
|
152 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
153 |
if patches_list:
|
154 |
-
x_patches = torch.cat(patches_list, dim=0)
|
155 |
x = self.mlp(x_patches)
|
156 |
else:
|
|
|
157 |
x = torch.empty(0, self.hidden_size, device=x.device)
|
158 |
|
159 |
return x, patch_cnt
|
@@ -1389,6 +1473,17 @@ class Qwen2TSForCausalLM(Qwen2PreTrainedModel):
|
|
1389 |
>>> tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
|
1390 |
"Hey, are you conscious? Can you talk to me?\nI'm not conscious, but I can talk to you."
|
1391 |
```"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1392 |
|
1393 |
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
|
1394 |
output_hidden_states = (
|
@@ -1401,12 +1496,14 @@ class Qwen2TSForCausalLM(Qwen2PreTrainedModel):
|
|
1401 |
|
1402 |
if timeseries is not None and timeseries.shape[0] > 0:
|
1403 |
# use_cache = False
|
|
|
1404 |
ts_features, patch_cnt = self.ts_encoder(timeseries)
|
1405 |
inputs_embeds = inputs_embeds.to(ts_features.dtype)
|
1406 |
|
1407 |
inputs_embeds, attention_mask, position_ids, labels = self._merge_input_ids_with_time_series_features(
|
1408 |
ts_features, inputs_embeds, input_ids, attention_mask, labels, patch_cnt
|
1409 |
)
|
|
|
1410 |
|
1411 |
outputs = self.model(
|
1412 |
attention_mask=attention_mask,
|
@@ -1700,4 +1797,4 @@ class Qwen2ForSequenceClassification(Qwen2PreTrainedModel):
|
|
1700 |
past_key_values=transformer_outputs.past_key_values,
|
1701 |
hidden_states=transformer_outputs.hidden_states,
|
1702 |
attentions=transformer_outputs.attentions,
|
1703 |
-
)
|
|
|
106 |
attentions: Optional[Tuple[torch.FloatTensor]] = None
|
107 |
attention_mask: Optional[torch.FloatTensor] = None
|
108 |
|
109 |
+
|
110 |
########################Naive TS Embedding#####################
|
111 |
class TimeSeriesEmbedding(nn.Module):
|
112 |
def __init__(self, config):
|
|
|
115 |
self.num_layers = config['num_layers']
|
116 |
self.hidden_size = config['hidden_size']
|
117 |
self.num_features = config['num_features']
|
118 |
+
self.max_sequence_length = config['max_sequence_length'] # Maximum time series length
|
119 |
+
self.use_position_embedding = config.get('use_position_embedding', False)
|
120 |
+
self.use_position_idx = config.get('use_position_idx', False)
|
121 |
+
self.embedding_dim = config.get('embedding_dim', 16) # Embedding dimension
|
122 |
+
|
123 |
+
if self.use_position_embedding:
|
124 |
+
# Extended vocabulary: [0, max_sequence_length) for real positions, max_sequence_length for padding
|
125 |
+
self.position_embedding = nn.Embedding(self.max_sequence_length + 1, self.embedding_dim)
|
126 |
+
self.padding_idx = self.max_sequence_length # Special index for padding
|
127 |
+
input_size = 1 * self.patch_size + self.embedding_dim * self.patch_size
|
128 |
+
elif self.use_position_idx:
|
129 |
+
input_size = 2 * self.patch_size
|
130 |
+
else:
|
131 |
+
input_size = 1 * self.patch_size
|
132 |
+
|
133 |
+
# Build MLP layers
|
134 |
layers = []
|
|
|
|
|
135 |
for _ in range(self.num_layers - 1):
|
136 |
layers.append(nn.Linear(input_size, self.hidden_size))
|
137 |
layers.append(nn.GELU())
|
|
|
144 |
batch_size = x.size(0)
|
145 |
x = x.reshape(batch_size, -1, self.num_features)
|
146 |
|
147 |
+
# Extract mask and calculate valid lengths
|
148 |
mask = x[:, :, -1].long()
|
149 |
+
valid_lengths = mask.sum(dim=1).long()
|
150 |
+
patch_cnt = (valid_lengths + self.patch_size - 1) // self.patch_size
|
|
|
151 |
|
152 |
patches_list = []
|
153 |
+
# Collect position indices for batch embedding lookup
|
154 |
+
all_position_indices = []
|
155 |
+
patch_info_list = [] # Store metadata for each patch group
|
156 |
+
|
157 |
for i in range(batch_size):
|
158 |
vl = valid_lengths[i].item()
|
159 |
pc = patch_cnt[i].item()
|
160 |
if pc == 0:
|
161 |
continue
|
162 |
+
|
163 |
+
# Extract time series data (excluding mask)
|
164 |
+
xi = x[i, :vl, :1] # Time-series data
|
165 |
total_padded_length = pc * self.patch_size
|
166 |
padding_length = total_padded_length - vl
|
167 |
+
|
168 |
+
# Create position indices: real positions for actual data, special index for padding
|
169 |
+
position_indices = torch.arange(vl, device=x.device)
|
170 |
+
|
171 |
if padding_length > 0:
|
172 |
+
# Pad with last value
|
173 |
+
last_value = xi[-1:, :]
|
174 |
+
padding = last_value.repeat(padding_length, 1)
|
175 |
xi = torch.cat([xi, padding], dim=0)
|
176 |
+
|
177 |
+
# Use special padding index for padding positions
|
178 |
+
padding_positions = torch.full((padding_length,), self.padding_idx, device=x.device)
|
179 |
+
position_indices = torch.cat([position_indices, padding_positions], dim=0)
|
180 |
+
|
181 |
+
# Reshape to patches
|
182 |
+
xi = xi.reshape(pc, self.patch_size) # (num_patches, patch_size)
|
183 |
+
position_indices = position_indices.reshape(pc, self.patch_size) # (num_patches, patch_size)
|
184 |
+
|
185 |
+
if self.use_position_embedding:
|
186 |
+
# Collect position indices instead of calling embedding immediately
|
187 |
+
all_position_indices.append(position_indices)
|
188 |
+
patch_info_list.append({
|
189 |
+
'xi': xi,
|
190 |
+
'pc': pc,
|
191 |
+
'sample_idx': i
|
192 |
+
})
|
193 |
+
elif self.use_position_idx:
|
194 |
+
# Normalize position indices
|
195 |
+
pos_indices = torch.arange(vl, device=x.device).unsqueeze(1)
|
196 |
+
pos_indices = pos_indices / max(1, valid_lengths.max().item() - 1)
|
197 |
+
if padding_length > 0:
|
198 |
+
# Use -1 for padding positions
|
199 |
+
padding_indices = torch.full((padding_length, 1), -1, device=x.device)
|
200 |
+
pos_indices = torch.cat([pos_indices, padding_indices], dim=0)
|
201 |
+
# Combine time series data with position indices
|
202 |
+
xi_combined = torch.cat([xi.reshape(-1, 1), pos_indices], dim=1)
|
203 |
+
patch_input = xi_combined.reshape(pc, self.patch_size * 2)
|
204 |
+
patches_list.append(patch_input)
|
205 |
+
else:
|
206 |
+
# No position embedding, use raw patches
|
207 |
+
patch_input = xi
|
208 |
+
patches_list.append(patch_input)
|
209 |
+
|
210 |
+
# Batch process position embeddings if needed
|
211 |
+
if self.use_position_embedding and all_position_indices:
|
212 |
+
# Concatenate all position indices for batch embedding lookup
|
213 |
+
batch_position_indices = torch.cat(all_position_indices, dim=0)
|
214 |
+
# print(f"{x.shape=}, {x.device=}, {len(all_position_indices)=}, {batch_position_indices=}")
|
215 |
+
batch_pos_emb = self.position_embedding(batch_position_indices) # Single embedding call
|
216 |
+
|
217 |
+
# Split embeddings back and create patch inputs
|
218 |
+
emb_start_idx = 0
|
219 |
+
for patch_info in patch_info_list:
|
220 |
+
xi = patch_info['xi']
|
221 |
+
pc = patch_info['pc']
|
222 |
+
|
223 |
+
# Extract corresponding embeddings
|
224 |
+
pos_emb = batch_pos_emb[emb_start_idx:emb_start_idx + pc]
|
225 |
+
emb_start_idx += pc
|
226 |
+
|
227 |
+
# Flatten and concatenate
|
228 |
+
xi = xi.unsqueeze(-1) # (num_patches, patch_size, 1)
|
229 |
+
patch_input = torch.cat([
|
230 |
+
xi.flatten(1), # (num_patches, patch_size)
|
231 |
+
pos_emb.flatten(1) # (num_patches, patch_size * embedding_dim)
|
232 |
+
], dim=1)
|
233 |
+
patches_list.append(patch_input)
|
234 |
+
|
235 |
+
# Process all patches through MLP
|
236 |
if patches_list:
|
237 |
+
x_patches = torch.cat(patches_list, dim=0)
|
238 |
x = self.mlp(x_patches)
|
239 |
else:
|
240 |
+
# Handle empty case
|
241 |
x = torch.empty(0, self.hidden_size, device=x.device)
|
242 |
|
243 |
return x, patch_cnt
|
|
|
1473 |
>>> tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
|
1474 |
"Hey, are you conscious? Can you talk to me?\nI'm not conscious, but I can talk to you."
|
1475 |
```"""
|
1476 |
+
|
1477 |
+
# if input_ids is not None and timeseries is not None:
|
1478 |
+
# # Batch decode the input
|
1479 |
+
# input_text = self.tokenizer.batch_decode(input_ids, skip_special_tokens=False)
|
1480 |
+
# # Print the input text
|
1481 |
+
# print("=================================================================")
|
1482 |
+
# print("Input text:", input_text)
|
1483 |
+
# print("Timeseries shape:", timeseries.shape)
|
1484 |
+
# print("=================================================================\n\n")
|
1485 |
+
# else:
|
1486 |
+
# print("Time series is None!!!!")
|
1487 |
|
1488 |
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
|
1489 |
output_hidden_states = (
|
|
|
1496 |
|
1497 |
if timeseries is not None and timeseries.shape[0] > 0:
|
1498 |
# use_cache = False
|
1499 |
+
# print(f"timeseries shape: {timeseries.shape=}, {input_ids.shape=}")
|
1500 |
ts_features, patch_cnt = self.ts_encoder(timeseries)
|
1501 |
inputs_embeds = inputs_embeds.to(ts_features.dtype)
|
1502 |
|
1503 |
inputs_embeds, attention_mask, position_ids, labels = self._merge_input_ids_with_time_series_features(
|
1504 |
ts_features, inputs_embeds, input_ids, attention_mask, labels, patch_cnt
|
1505 |
)
|
1506 |
+
# print(f"{inputs_embeds.shape=}, {attention_mask.shape=}, {position_ids.shape=}, {labels.shape=}")
|
1507 |
|
1508 |
outputs = self.model(
|
1509 |
attention_mask=attention_mask,
|
|
|
1797 |
past_key_values=transformer_outputs.past_key_values,
|
1798 |
hidden_states=transformer_outputs.hidden_states,
|
1799 |
attentions=transformer_outputs.attentions,
|
1800 |
+
)
|
processing_qwen2_ts.py
CHANGED
@@ -41,7 +41,7 @@ def sp_encoding(timeseries: np.ndarray, eots_token: bool = True) -> Tuple[np.nda
|
|
41 |
scale_factor = np.max(np.abs(scaled_timeseries)) / 3.0
|
42 |
scaled_timeseries /= scale_factor
|
43 |
|
44 |
-
prompt = f"[
|
45 |
if eots_token:
|
46 |
prompt += '<ts/>'
|
47 |
|
|
|
41 |
scale_factor = np.max(np.abs(scaled_timeseries)) / 3.0
|
42 |
scaled_timeseries /= scale_factor
|
43 |
|
44 |
+
prompt = f"[offset={-mean:.4f}|scaling={scale_factor:.4f}|length={len(timeseries)}|max={max(timeseries):.4f}|min={min(timeseries):.4f}|left={timeseries[0]:.4f}|right={timeseries[-1]:.4f}]<ts>"
|
45 |
if eots_token:
|
46 |
prompt += '<ts/>'
|
47 |
|
processor_config.json
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"auto_map": {
|
3 |
+
"AutoProcessor": "processing_qwen2_ts.Qwen2TSProcessor"
|
4 |
+
},
|
5 |
+
"processor_class": "Qwen2TSProcessor"
|
6 |
+
}
|
pytorch_model-00001-of-00006.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4986229446
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9fe54d5f99a2398419df0b9f4f8c48f1db148b3a074b28024569751fa99780a6
|
3 |
size 4986229446
|
pytorch_model-00002-of-00006.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4954871698
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:aab4c20520e32cfce07b6b860c8a72b1ef11e02f187fbf3d054ebee67a2c67bd
|
3 |
size 4954871698
|
pytorch_model-00003-of-00006.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4954871762
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c46a806289c2b53ab2158a06c3c2d6fb99a8be5e6061e5434976c79129df29d3
|
3 |
size 4954871762
|
pytorch_model-00004-of-00006.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4954871762
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e038cc5ed0ba96941ae800272311084affa493262cde6a20fad1b6ad8a253610
|
3 |
size 4954871762
|
pytorch_model-00005-of-00006.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4954871762
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:db8502c0a92a3545639afbb260c529efcd0733f5b972163462114c08dc5386dd
|
3 |
size 4954871762
|
pytorch_model-00006-of-00006.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1b28791c5b49a4899902a7e079460d7a41e4377214ad18f6f5b88b1d53a40394
|
3 |
+
size 4946759662
|
pytorch_model.bin.index.json
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
{
|
2 |
"metadata": {
|
3 |
-
"total_size":
|
4 |
},
|
5 |
"weight_map": {
|
6 |
"lm_head.weight": "pytorch_model-00006-of-00006.bin",
|
@@ -591,6 +591,7 @@
|
|
591 |
"ts_encoder.mlp.6.bias": "pytorch_model-00006-of-00006.bin",
|
592 |
"ts_encoder.mlp.6.weight": "pytorch_model-00006-of-00006.bin",
|
593 |
"ts_encoder.mlp.8.bias": "pytorch_model-00006-of-00006.bin",
|
594 |
-
"ts_encoder.mlp.8.weight": "pytorch_model-00006-of-00006.bin"
|
|
|
595 |
}
|
596 |
}
|
|
|
1 |
{
|
2 |
"metadata": {
|
3 |
+
"total_size": 29752274976
|
4 |
},
|
5 |
"weight_map": {
|
6 |
"lm_head.weight": "pytorch_model-00006-of-00006.bin",
|
|
|
591 |
"ts_encoder.mlp.6.bias": "pytorch_model-00006-of-00006.bin",
|
592 |
"ts_encoder.mlp.6.weight": "pytorch_model-00006-of-00006.bin",
|
593 |
"ts_encoder.mlp.8.bias": "pytorch_model-00006-of-00006.bin",
|
594 |
+
"ts_encoder.mlp.8.weight": "pytorch_model-00006-of-00006.bin",
|
595 |
+
"ts_encoder.position_embedding.weight": "pytorch_model-00006-of-00006.bin"
|
596 |
}
|
597 |
}
|
tokenizer_config.json
CHANGED
@@ -199,14 +199,18 @@
|
|
199 |
"<ts>",
|
200 |
"<ts/>"
|
201 |
],
|
|
|
|
|
|
|
202 |
"bos_token": null,
|
203 |
-
"chat_template": "{% set system_message = 'You are a helpful assistant.' %}{% if messages[0]['role'] == 'system' %}{% set system_message = messages[0]['content'] %}{% endif %}{% if system_message is defined %}{{ '<|im_start|>system\n' + system_message + '<|im_end|>\n' }}{% endif %}{% for message in messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|im_start|>user\n' + content + '<|im_end|>\n<|im_start|>assistant\n' }}{% elif message['role'] == 'assistant' %}{{ content + '<|im_end|>' + '\n' }}{% endif %}{% endfor %}",
|
204 |
"clean_up_tokenization_spaces": false,
|
205 |
"eos_token": "<|im_end|>",
|
206 |
"errors": "replace",
|
|
|
207 |
"model_max_length": 131072,
|
208 |
"pad_token": "<|endoftext|>",
|
209 |
"padding_side": "right",
|
|
|
210 |
"split_special_tokens": false,
|
211 |
"tokenizer_class": "Qwen2Tokenizer",
|
212 |
"unk_token": null
|
|
|
199 |
"<ts>",
|
200 |
"<ts/>"
|
201 |
],
|
202 |
+
"auto_map": {
|
203 |
+
"AutoProcessor": "processing_qwen2_ts.Qwen2TSProcessor"
|
204 |
+
},
|
205 |
"bos_token": null,
|
|
|
206 |
"clean_up_tokenization_spaces": false,
|
207 |
"eos_token": "<|im_end|>",
|
208 |
"errors": "replace",
|
209 |
+
"extra_special_tokens": {},
|
210 |
"model_max_length": 131072,
|
211 |
"pad_token": "<|endoftext|>",
|
212 |
"padding_side": "right",
|
213 |
+
"processor_class": "Qwen2TSProcessor",
|
214 |
"split_special_tokens": false,
|
215 |
"tokenizer_class": "Qwen2Tokenizer",
|
216 |
"unk_token": null
|