|
--- |
|
base_model: [] |
|
library_name: transformers |
|
tags: |
|
- mergekit |
|
- merge |
|
- llama |
|
- conversational |
|
license: llama3 |
|
--- |
|
# L3-Tyche-8B-v1.0 |
|
|
|
![Tyche](https://huggingface.co/Azazelle/L3-Tyche-8B-v1.0/resolve/main/TPr9FmCtT0OxPuz2MXxjgw.webp) |
|
|
|
## About: |
|
|
|
This is a merge of pre-trained language models created using [mergekit](https://github.com/cg123/mergekit). |
|
|
|
**Recommended Samplers:** |
|
|
|
``` |
|
Temperature - 1.3 |
|
TFS - 0.96 |
|
Smoothing Factor - 0.3 |
|
Smoothing Curve - 1.1 |
|
Repetition Penalty - 1.08 |
|
``` |
|
|
|
### Merge Method |
|
|
|
This model was merged a series of model stock and lora merges, followed by ExPO and an attention swap. It uses a mix of smart and roleplay centered models to improve performance. |
|
|
|
### Configuration |
|
|
|
The following YAML configuration was used to produce this model: |
|
|
|
```yaml |
|
--- |
|
models: |
|
- model: Nitral-AI/Hathor_Tahsin-L3-8B-v0.85 |
|
- model: Nitral-AI/Hathor_Respawn-L3-8B-v0.8 |
|
- model: ChaoticNeutrals/Hathor_RP-v.01-L3-8B |
|
- model: Sao10K/L3-8B-Stheno-v3.2 |
|
- model: yodayo-ai/nephra_v1.0 |
|
- model: HiroseKoichi/L3-8B-Lunar-Stheno |
|
- model: Jellywibble/lora_120k_pref_data_ep2 |
|
- model: Jellywibble/qlora_120k_pref_data_ep1 |
|
- model: Jellywibble/meseca-20062024-c1 |
|
- model: Hastagaras/Jamet-8B-L3-MK.V-Blackroot |
|
- model: Cas-Warehouse/Llama-3-SOVL-MopeyMule-Blackroot-8B |
|
- model: ResplendentAI/Nymph_8B+Azazelle/RP_Format_QuoteAsterisk_Llama3 |
|
- model: R136a1/Bungo-L3-8B |
|
- model: maldv/badger-mu-llama-3-8b |
|
- model: TheDrummer/Llama-3SOME-8B-v2 |
|
- model: Magpie-Align/Llama-3-8B-Magpie-Align-v0.1+Azazelle/Llama3_RP_ORPO_LoRA |
|
- model: grimjim/Llama-3-Instruct-8B-SPPO-Iter3-SimPO-merge+Azazelle/Llama-3-8B-Abomination-LORA |
|
- model: NousResearch/Hermes-2-Pro-Llama-3-8B+mpasila/Llama-3-Instruct-LiPPA-LoRA-8B |
|
- model: MaziyarPanahi/Llama-3-8B-Instruct-v0.8+Azazelle/Llama-3-Sunfall-8b-lora |
|
- model: openchat/openchat-3.6-8b-20240522+Azazelle/BlueMoon_Llama3 |
|
- model: collaiborateorg/Collaiborator-MEDLLM-Llama-3-8B-v2+Azazelle/llama3-8b-hikikomori-v0.4 |
|
- model: grimjim/Llama-3-Instruct-8B-SPPO-Iter3-SimPO-merge+grimjim/Llama-3-Instruct-abliteration-LoRA-8B |
|
merge_method: model_stock |
|
base_model: failspy/Meta-Llama-3-8B-Instruct-abliterated-v3 |
|
dtype: float32 |
|
vocab_type: bpe |
|
name: hq_rp |
|
|
|
--- |
|
# ExPO |
|
models: |
|
- model: hq_rp |
|
parameters: |
|
weight: 1.3 |
|
merge_method: task_arithmetic |
|
base_model: failspy/Meta-Llama-3-8B-Instruct-abliterated-v3 |
|
parameters: |
|
normalize: false |
|
dtype: float32 |
|
vocab_type: bpe |
|
name: pre |
|
|
|
--- |
|
# Attention Donor |
|
models: |
|
- model: Nitral-AI/Hathor_Tahsin-L3-8B-v0.85 |
|
- model: Sao10K/L3-8B-Stheno-v3.2 |
|
merge_method: model_stock |
|
base_model: failspy/Meta-Llama-3-8B-Instruct-abliterated-v3 |
|
dtype: float32 |
|
vocab_type: bpe |
|
name: donor |
|
|
|
--- |
|
# Attention swap? |
|
models: |
|
- model: pre |
|
merge_method: slerp |
|
base_model: donor |
|
parameters: |
|
t: |
|
- filter: mlp |
|
value: 0 |
|
- value: 1 |
|
dtype: float32 |
|
vocab_type: bpe |
|
``` |
|
|