cortexso
/

llama3

+name: Llama 3
+model: llama3:8B
+version: 1
+files:
+	- llama_model_path: model.gguf
+# Results Preferences
+top_p: 0.95
+temperature: 0.7
+frequency_penalty: 0
+presence_penalty: 0
+max_tokens: 8192 # Infer from base config.json -> max_position_embeddings
+stream: true # true | false
+# Engine / Model Settings
+ngl: 32 # Infer from base config.json -> num_attention_heads
+ctx_len: 8192 # Infer from base config.json -> max_position_embeddings
+engine: cortex.llamacpp
+prompt_template: "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n{system_message}<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n{prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
+# Prompt template: Can only be retrieved from instruct model
+# - https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct/blob/main/tokenizer_config.json#L2053
+# - Requires jinja format parser