Spaces:
Sleeping
Sleeping
Upload 16 files
Browse files- defaults.py +22 -11
- pages/2_👩🏼🔬 Describe Domain.py +10 -2
- pages/3_🌱 Generate Dataset.py +65 -65
- pipeline.py +29 -5
- pipeline.yaml +6 -6
- project_config.json +1 -1
defaults.py
CHANGED
|
@@ -1,12 +1,14 @@
|
|
|
|
|
| 1 |
import json
|
| 2 |
|
| 3 |
SEED_DATA_PATH = "seed_data.json"
|
| 4 |
PIPELINE_PATH = "pipeline.yaml"
|
| 5 |
-
REMOTE_CODE_PATHS = ["defaults.py", "domain.py", "pipeline.py"]
|
| 6 |
DIBT_PARENT_APP_URL = "https://argilla-domain-specific-datasets-welcome.hf.space/"
|
| 7 |
N_PERSPECTIVES = 5
|
| 8 |
N_TOPICS = 5
|
| 9 |
N_EXAMPLES = 5
|
|
|
|
| 10 |
|
| 11 |
################################################
|
| 12 |
# DEFAULTS ON FARMING
|
|
@@ -25,14 +27,23 @@ DEFAULT_SYSTEM_PROMPT = DEFAULT_DATA["domain_expert_prompt"]
|
|
| 25 |
# PROJECT CONFIG FROM PARENT APP
|
| 26 |
################################################
|
| 27 |
|
| 28 |
-
|
| 29 |
-
|
|
|
|
| 30 |
|
| 31 |
-
PROJECT_NAME = PROJECT_CONFIG["project_name"]
|
| 32 |
-
ARGILLA_SPACE_REPO_ID = PROJECT_CONFIG["argilla_space_repo_id"]
|
| 33 |
-
DATASET_REPO_ID = PROJECT_CONFIG["dataset_repo_id"]
|
| 34 |
-
ARGILLA_SPACE_NAME = ARGILLA_SPACE_REPO_ID.replace("/", "-").replace("_", "-")
|
| 35 |
-
ARGILLA_URL = f"https://{ARGILLA_SPACE_NAME}.hf.space"
|
| 36 |
-
PROJECT_SPACE_REPO_ID = PROJECT_CONFIG["project_space_repo_id"]
|
| 37 |
-
DATASET_URL = f"https://huggingface.co/datasets/{DATASET_REPO_ID}"
|
| 38 |
-
HUB_USERNAME = DATASET_REPO_ID.split("/")[0]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
import json
|
| 3 |
|
| 4 |
SEED_DATA_PATH = "seed_data.json"
|
| 5 |
PIPELINE_PATH = "pipeline.yaml"
|
| 6 |
+
REMOTE_CODE_PATHS = ["defaults.py", "domain.py", "pipeline.py", "requirements.txt"]
|
| 7 |
DIBT_PARENT_APP_URL = "https://argilla-domain-specific-datasets-welcome.hf.space/"
|
| 8 |
N_PERSPECTIVES = 5
|
| 9 |
N_TOPICS = 5
|
| 10 |
N_EXAMPLES = 5
|
| 11 |
+
CODELESS_DISTILABEL = os.environ.get("CODELESS_DISTILABEL", True)
|
| 12 |
|
| 13 |
################################################
|
| 14 |
# DEFAULTS ON FARMING
|
|
|
|
| 27 |
# PROJECT CONFIG FROM PARENT APP
|
| 28 |
################################################
|
| 29 |
|
| 30 |
+
try:
|
| 31 |
+
with open("project_config.json") as f:
|
| 32 |
+
PROJECT_CONFIG = json.load(f)
|
| 33 |
|
| 34 |
+
PROJECT_NAME = PROJECT_CONFIG["project_name"]
|
| 35 |
+
ARGILLA_SPACE_REPO_ID = PROJECT_CONFIG["argilla_space_repo_id"]
|
| 36 |
+
DATASET_REPO_ID = PROJECT_CONFIG["dataset_repo_id"]
|
| 37 |
+
ARGILLA_SPACE_NAME = ARGILLA_SPACE_REPO_ID.replace("/", "-").replace("_", "-")
|
| 38 |
+
ARGILLA_URL = f"https://{ARGILLA_SPACE_NAME}.hf.space"
|
| 39 |
+
PROJECT_SPACE_REPO_ID = PROJECT_CONFIG["project_space_repo_id"]
|
| 40 |
+
DATASET_URL = f"https://huggingface.co/datasets/{DATASET_REPO_ID}"
|
| 41 |
+
HUB_USERNAME = DATASET_REPO_ID.split("/")[0]
|
| 42 |
+
except FileNotFoundError:
|
| 43 |
+
PROJECT_NAME = "DEFAULT_DOMAIN"
|
| 44 |
+
ARGILLA_SPACE_REPO_ID = ""
|
| 45 |
+
DATASET_REPO_ID = ""
|
| 46 |
+
ARGILLA_URL = ""
|
| 47 |
+
PROJECT_SPACE_REPO_ID = ""
|
| 48 |
+
DATASET_URL = ""
|
| 49 |
+
HUB_USERNAME = ""
|
pages/2_👩🏼🔬 Describe Domain.py
CHANGED
|
@@ -14,7 +14,6 @@ from defaults import (
|
|
| 14 |
N_TOPICS,
|
| 15 |
SEED_DATA_PATH,
|
| 16 |
PIPELINE_PATH,
|
| 17 |
-
PROJECT_NAME,
|
| 18 |
DATASET_REPO_ID,
|
| 19 |
)
|
| 20 |
from utils import project_sidebar
|
|
@@ -231,9 +230,18 @@ if st.button("🤗 Push Dataset Seed") and all(
|
|
| 231 |
pipeline_path=PIPELINE_PATH,
|
| 232 |
)
|
| 233 |
|
| 234 |
-
st.
|
| 235 |
f"Dataset seed created and pushed to the Hub. Check it out [here](https://huggingface.co/datasets/{hub_username}/{project_name})"
|
| 236 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 237 |
else:
|
| 238 |
st.info(
|
| 239 |
"Please fill in all the required domain fields to push the dataset seed to the Hub"
|
|
|
|
| 14 |
N_TOPICS,
|
| 15 |
SEED_DATA_PATH,
|
| 16 |
PIPELINE_PATH,
|
|
|
|
| 17 |
DATASET_REPO_ID,
|
| 18 |
)
|
| 19 |
from utils import project_sidebar
|
|
|
|
| 230 |
pipeline_path=PIPELINE_PATH,
|
| 231 |
)
|
| 232 |
|
| 233 |
+
st.success(
|
| 234 |
f"Dataset seed created and pushed to the Hub. Check it out [here](https://huggingface.co/datasets/{hub_username}/{project_name})"
|
| 235 |
)
|
| 236 |
+
|
| 237 |
+
st.write("You can now move on to runnning your distilabel pipeline.")
|
| 238 |
+
|
| 239 |
+
st.page_link(
|
| 240 |
+
page="pages/3_🌱 Generate Dataset.py",
|
| 241 |
+
label="Generate Dataset",
|
| 242 |
+
icon="🌱",
|
| 243 |
+
)
|
| 244 |
+
|
| 245 |
else:
|
| 246 |
st.info(
|
| 247 |
"Please fill in all the required domain fields to push the dataset seed to the Hub"
|
pages/3_🌱 Generate Dataset.py
CHANGED
|
@@ -1,17 +1,13 @@
|
|
| 1 |
import streamlit as st
|
| 2 |
-
from streamlit.errors import EntryNotFoundError
|
| 3 |
|
| 4 |
from hub import pull_seed_data_from_repo, push_pipeline_to_hub
|
| 5 |
from defaults import (
|
| 6 |
DEFAULT_SYSTEM_PROMPT,
|
| 7 |
PIPELINE_PATH,
|
| 8 |
PROJECT_NAME,
|
| 9 |
-
ARGILLA_SPACE_REPO_ID,
|
| 10 |
-
DATASET_REPO_ID,
|
| 11 |
-
ARGILLA_SPACE_NAME,
|
| 12 |
ARGILLA_URL,
|
| 13 |
-
PROJECT_SPACE_REPO_ID,
|
| 14 |
HUB_USERNAME,
|
|
|
|
| 15 |
)
|
| 16 |
from utils import project_sidebar
|
| 17 |
|
|
@@ -75,20 +71,21 @@ st.divider()
|
|
| 75 |
st.markdown("### Run the pipeline")
|
| 76 |
|
| 77 |
st.write(
|
| 78 |
-
"Once you've defined the pipeline configuration, you can run the pipeline
|
| 79 |
)
|
| 80 |
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
|
|
|
| 92 |
|
| 93 |
|
| 94 |
if st.button("💻 Run pipeline locally", key="run_pipeline_local"):
|
|
@@ -147,13 +144,16 @@ if st.button("💻 Run pipeline locally", key="run_pipeline_local"):
|
|
| 147 |
hub_token=hub_token,
|
| 148 |
pipeline_config_path=PIPELINE_PATH,
|
| 149 |
argilla_dataset_name=argilla_dataset_name,
|
|
|
|
|
|
|
| 150 |
)
|
| 151 |
st.code(
|
| 152 |
f"""
|
| 153 |
pip install git+https://github.com/argilla-io/distilabel.git
|
| 154 |
-
git clone https://huggingface.co/{hub_username}/{project_name}
|
| 155 |
cd {project_name}
|
| 156 |
-
|
|
|
|
| 157 |
""",
|
| 158 |
language="bash",
|
| 159 |
)
|
|
@@ -163,57 +163,57 @@ if st.button("💻 Run pipeline locally", key="run_pipeline_local"):
|
|
| 163 |
###############################################################
|
| 164 |
# SPACE
|
| 165 |
###############################################################
|
| 166 |
-
|
| 167 |
-
if st.button("🔥 Run pipeline right here, right now!"):
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
try:
|
| 181 |
seed_data = pull_seed_data_from_repo(
|
| 182 |
repo_id=f"{hub_username}/{project_name}",
|
| 183 |
hub_token=hub_token,
|
| 184 |
)
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 188 |
)
|
| 189 |
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
argilla_api_key=argilla_api_key,
|
| 199 |
-
argilla_dataset_name=argilla_dataset_name,
|
| 200 |
-
argilla_api_url=argilla_url,
|
| 201 |
-
topics=topics,
|
| 202 |
-
perspectives=perspectives,
|
| 203 |
-
pipeline_config_path=PIPELINE_PATH,
|
| 204 |
-
domain_expert_prompt=domain_expert_prompt or DEFAULT_SYSTEM_PROMPT,
|
| 205 |
-
hub_token=hub_token,
|
| 206 |
-
endpoint_base_url=base_url,
|
| 207 |
-
examples=examples,
|
| 208 |
-
)
|
| 209 |
-
|
| 210 |
-
with st.spinner("Starting the pipeline..."):
|
| 211 |
-
logs = run_pipeline(PIPELINE_PATH)
|
| 212 |
|
| 213 |
-
|
| 214 |
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
|
|
|
| 1 |
import streamlit as st
|
|
|
|
| 2 |
|
| 3 |
from hub import pull_seed_data_from_repo, push_pipeline_to_hub
|
| 4 |
from defaults import (
|
| 5 |
DEFAULT_SYSTEM_PROMPT,
|
| 6 |
PIPELINE_PATH,
|
| 7 |
PROJECT_NAME,
|
|
|
|
|
|
|
|
|
|
| 8 |
ARGILLA_URL,
|
|
|
|
| 9 |
HUB_USERNAME,
|
| 10 |
+
CODELESS_DISTILABEL,
|
| 11 |
)
|
| 12 |
from utils import project_sidebar
|
| 13 |
|
|
|
|
| 71 |
st.markdown("### Run the pipeline")
|
| 72 |
|
| 73 |
st.write(
|
| 74 |
+
"Once you've defined the pipeline configuration, you can run the pipeline from your local machine."
|
| 75 |
)
|
| 76 |
|
| 77 |
+
if CODELESS_DISTILABEL:
|
| 78 |
+
st.write(
|
| 79 |
+
"""We recommend running the pipeline locally if you're planning on generating a large dataset. \
|
| 80 |
+
But running the pipeline on this space is a handy way to get started quickly. Your synthetic
|
| 81 |
+
samples will be pushed to Argilla and available for review.
|
| 82 |
+
"""
|
| 83 |
+
)
|
| 84 |
+
st.write(
|
| 85 |
+
"""If you're planning on running the pipeline on the space, be aware that it \
|
| 86 |
+
will take some time to complete and you will need to maintain a \
|
| 87 |
+
connection to the space."""
|
| 88 |
+
)
|
| 89 |
|
| 90 |
|
| 91 |
if st.button("💻 Run pipeline locally", key="run_pipeline_local"):
|
|
|
|
| 144 |
hub_token=hub_token,
|
| 145 |
pipeline_config_path=PIPELINE_PATH,
|
| 146 |
argilla_dataset_name=argilla_dataset_name,
|
| 147 |
+
argilla_api_key=argilla_api_key,
|
| 148 |
+
argilla_api_url=argilla_url,
|
| 149 |
)
|
| 150 |
st.code(
|
| 151 |
f"""
|
| 152 |
pip install git+https://github.com/argilla-io/distilabel.git
|
| 153 |
+
git clone https://huggingface.co/datasets/{hub_username}/{project_name}
|
| 154 |
cd {project_name}
|
| 155 |
+
pip install -r requirements.txt
|
| 156 |
+
{' '.join(["python"] + command_to_run[1:])}
|
| 157 |
""",
|
| 158 |
language="bash",
|
| 159 |
)
|
|
|
|
| 163 |
###############################################################
|
| 164 |
# SPACE
|
| 165 |
###############################################################
|
| 166 |
+
if CODELESS_DISTILABEL:
|
| 167 |
+
if st.button("🔥 Run pipeline right here, right now!"):
|
| 168 |
+
if all(
|
| 169 |
+
[
|
| 170 |
+
argilla_api_key,
|
| 171 |
+
argilla_url,
|
| 172 |
+
base_url,
|
| 173 |
+
hub_username,
|
| 174 |
+
project_name,
|
| 175 |
+
hub_token,
|
| 176 |
+
argilla_dataset_name,
|
| 177 |
+
]
|
| 178 |
+
):
|
| 179 |
+
with st.spinner("Pulling seed data from the Hub..."):
|
|
|
|
| 180 |
seed_data = pull_seed_data_from_repo(
|
| 181 |
repo_id=f"{hub_username}/{project_name}",
|
| 182 |
hub_token=hub_token,
|
| 183 |
)
|
| 184 |
+
domain = seed_data["domain"]
|
| 185 |
+
perspectives = seed_data["perspectives"]
|
| 186 |
+
topics = seed_data["topics"]
|
| 187 |
+
examples = seed_data["examples"]
|
| 188 |
+
domain_expert_prompt = seed_data["domain_expert_prompt"]
|
| 189 |
+
|
| 190 |
+
with st.spinner("Serializing the pipeline configuration..."):
|
| 191 |
+
serialize_pipeline(
|
| 192 |
+
argilla_api_key=argilla_api_key,
|
| 193 |
+
argilla_dataset_name=argilla_dataset_name,
|
| 194 |
+
argilla_api_url=argilla_url,
|
| 195 |
+
topics=topics,
|
| 196 |
+
perspectives=perspectives,
|
| 197 |
+
pipeline_config_path=PIPELINE_PATH,
|
| 198 |
+
domain_expert_prompt=domain_expert_prompt or DEFAULT_SYSTEM_PROMPT,
|
| 199 |
+
hub_token=hub_token,
|
| 200 |
+
endpoint_base_url=base_url,
|
| 201 |
+
examples=examples,
|
| 202 |
)
|
| 203 |
|
| 204 |
+
with st.spinner("Starting the pipeline..."):
|
| 205 |
+
logs = run_pipeline(
|
| 206 |
+
pipeline_config_path=PIPELINE_PATH,
|
| 207 |
+
argilla_api_key=argilla_api_key,
|
| 208 |
+
argilla_api_url=argilla_url,
|
| 209 |
+
hub_token=hub_token,
|
| 210 |
+
argilla_dataset_name=argilla_dataset_name,
|
| 211 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 212 |
|
| 213 |
+
st.success(f"Pipeline started successfully! 🚀")
|
| 214 |
|
| 215 |
+
with st.expander(label="View Logs", expanded=True):
|
| 216 |
+
for out in logs:
|
| 217 |
+
st.text(out)
|
| 218 |
+
else:
|
| 219 |
+
st.error("Please fill all the required fields.")
|
pipeline.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
-
import os
|
| 2 |
import subprocess
|
|
|
|
| 3 |
import time
|
| 4 |
from typing import List
|
| 5 |
|
|
@@ -82,10 +82,11 @@ def define_pipeline(
|
|
| 82 |
input_batch_size=8,
|
| 83 |
input_mappings={"instruction": "evolved_questions"},
|
| 84 |
output_mappings={"generation": "domain_expert_answer"},
|
| 85 |
-
_system_prompt=domain_expert_prompt,
|
| 86 |
-
_template=template,
|
| 87 |
)
|
| 88 |
|
|
|
|
|
|
|
|
|
|
| 89 |
keep_columns = KeepColumns(
|
| 90 |
name="keep_columns",
|
| 91 |
columns=["model_name", "evolved_questions", "domain_expert_answer"],
|
|
@@ -142,12 +143,15 @@ def serialize_pipeline(
|
|
| 142 |
|
| 143 |
|
| 144 |
def create_pipelines_run_command(
|
|
|
|
|
|
|
|
|
|
| 145 |
pipeline_config_path: str = "pipeline.yaml",
|
| 146 |
argilla_dataset_name: str = "domain_specific_datasets",
|
| 147 |
):
|
| 148 |
"""Create the command to run the pipeline."""
|
| 149 |
command_to_run = [
|
| 150 |
-
|
| 151 |
"-m",
|
| 152 |
"distilabel",
|
| 153 |
"pipeline",
|
|
@@ -156,24 +160,44 @@ def create_pipelines_run_command(
|
|
| 156 |
pipeline_config_path,
|
| 157 |
"--param",
|
| 158 |
f"text_generation_to_argilla.dataset_name={argilla_dataset_name}",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 159 |
]
|
| 160 |
return command_to_run
|
| 161 |
|
| 162 |
|
| 163 |
def run_pipeline(
|
|
|
|
|
|
|
|
|
|
| 164 |
pipeline_config_path: str = "pipeline.yaml",
|
| 165 |
argilla_dataset_name: str = "domain_specific_datasets",
|
| 166 |
):
|
| 167 |
"""Run the pipeline and yield the output as a generator of logs."""
|
| 168 |
|
| 169 |
command_to_run = create_pipelines_run_command(
|
|
|
|
| 170 |
pipeline_config_path=pipeline_config_path,
|
| 171 |
argilla_dataset_name=argilla_dataset_name,
|
|
|
|
|
|
|
| 172 |
)
|
| 173 |
|
| 174 |
# Run the script file
|
| 175 |
process = subprocess.Popen(
|
| 176 |
-
command_to_run,
|
|
|
|
|
|
|
|
|
|
| 177 |
)
|
| 178 |
|
| 179 |
while process.stdout and process.stdout.readable():
|
|
|
|
|
|
|
| 1 |
import subprocess
|
| 2 |
+
import sys
|
| 3 |
import time
|
| 4 |
from typing import List
|
| 5 |
|
|
|
|
| 82 |
input_batch_size=8,
|
| 83 |
input_mappings={"instruction": "evolved_questions"},
|
| 84 |
output_mappings={"generation": "domain_expert_answer"},
|
|
|
|
|
|
|
| 85 |
)
|
| 86 |
|
| 87 |
+
domain_expert._system_prompt = domain_expert_prompt
|
| 88 |
+
domain_expert._template = template
|
| 89 |
+
|
| 90 |
keep_columns = KeepColumns(
|
| 91 |
name="keep_columns",
|
| 92 |
columns=["model_name", "evolved_questions", "domain_expert_answer"],
|
|
|
|
| 143 |
|
| 144 |
|
| 145 |
def create_pipelines_run_command(
|
| 146 |
+
hub_token: str,
|
| 147 |
+
argilla_api_key: str,
|
| 148 |
+
argilla_api_url: str,
|
| 149 |
pipeline_config_path: str = "pipeline.yaml",
|
| 150 |
argilla_dataset_name: str = "domain_specific_datasets",
|
| 151 |
):
|
| 152 |
"""Create the command to run the pipeline."""
|
| 153 |
command_to_run = [
|
| 154 |
+
sys.executable,
|
| 155 |
"-m",
|
| 156 |
"distilabel",
|
| 157 |
"pipeline",
|
|
|
|
| 160 |
pipeline_config_path,
|
| 161 |
"--param",
|
| 162 |
f"text_generation_to_argilla.dataset_name={argilla_dataset_name}",
|
| 163 |
+
"--param",
|
| 164 |
+
f"text_generation_to_argilla.api_key={argilla_api_key}",
|
| 165 |
+
"--param",
|
| 166 |
+
f"text_generation_to_argilla.api_url={argilla_api_url}",
|
| 167 |
+
"--param",
|
| 168 |
+
f"self-instruct.llm.api_key={hub_token}",
|
| 169 |
+
"--param",
|
| 170 |
+
f"evol_instruction_complexity.llm.api_key={hub_token}",
|
| 171 |
+
"--param",
|
| 172 |
+
f"domain_expert.llm.api_key={hub_token}",
|
| 173 |
+
"--ignore-cache",
|
| 174 |
]
|
| 175 |
return command_to_run
|
| 176 |
|
| 177 |
|
| 178 |
def run_pipeline(
|
| 179 |
+
hub_token: str,
|
| 180 |
+
argilla_api_key: str,
|
| 181 |
+
argilla_api_url: str,
|
| 182 |
pipeline_config_path: str = "pipeline.yaml",
|
| 183 |
argilla_dataset_name: str = "domain_specific_datasets",
|
| 184 |
):
|
| 185 |
"""Run the pipeline and yield the output as a generator of logs."""
|
| 186 |
|
| 187 |
command_to_run = create_pipelines_run_command(
|
| 188 |
+
hub_token=hub_token,
|
| 189 |
pipeline_config_path=pipeline_config_path,
|
| 190 |
argilla_dataset_name=argilla_dataset_name,
|
| 191 |
+
argilla_api_key=argilla_api_key,
|
| 192 |
+
argilla_api_url=argilla_api_url,
|
| 193 |
)
|
| 194 |
|
| 195 |
# Run the script file
|
| 196 |
process = subprocess.Popen(
|
| 197 |
+
args=command_to_run,
|
| 198 |
+
stdout=subprocess.PIPE,
|
| 199 |
+
stderr=subprocess.PIPE,
|
| 200 |
+
env={"HF_TOKEN": hub_token},
|
| 201 |
)
|
| 202 |
|
| 203 |
while process.stdout and process.stdout.readable():
|
pipeline.yaml
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
distilabel:
|
| 2 |
-
version: 1.0.
|
| 3 |
pipeline:
|
| 4 |
name: farming
|
| 5 |
description: null
|
|
@@ -54,7 +54,7 @@ pipeline:
|
|
| 54 |
model_id: null
|
| 55 |
endpoint_name: null
|
| 56 |
endpoint_namespace: null
|
| 57 |
-
base_url: https://
|
| 58 |
tokenizer_id: null
|
| 59 |
model_display_name: null
|
| 60 |
use_openai_client: false
|
|
@@ -163,7 +163,7 @@ pipeline:
|
|
| 163 |
model_id: null
|
| 164 |
endpoint_name: null
|
| 165 |
endpoint_namespace: null
|
| 166 |
-
base_url: https://
|
| 167 |
tokenizer_id: null
|
| 168 |
model_display_name: null
|
| 169 |
use_openai_client: false
|
|
@@ -390,7 +390,7 @@ pipeline:
|
|
| 390 |
model_id: null
|
| 391 |
endpoint_name: null
|
| 392 |
endpoint_namespace: null
|
| 393 |
-
base_url: https://
|
| 394 |
tokenizer_id: null
|
| 395 |
model_display_name: null
|
| 396 |
use_openai_client: false
|
|
@@ -489,9 +489,9 @@ pipeline:
|
|
| 489 |
generation: domain_expert_answer
|
| 490 |
output_mappings: {}
|
| 491 |
input_batch_size: 50
|
| 492 |
-
dataset_name:
|
| 493 |
dataset_workspace: admin
|
| 494 |
-
api_url: https://argilla-
|
| 495 |
runtime_parameters_info:
|
| 496 |
- name: input_batch_size
|
| 497 |
optional: true
|
|
|
|
| 1 |
distilabel:
|
| 2 |
+
version: 1.0.1
|
| 3 |
pipeline:
|
| 4 |
name: farming
|
| 5 |
description: null
|
|
|
|
| 54 |
model_id: null
|
| 55 |
endpoint_name: null
|
| 56 |
endpoint_namespace: null
|
| 57 |
+
base_url: https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.2
|
| 58 |
tokenizer_id: null
|
| 59 |
model_display_name: null
|
| 60 |
use_openai_client: false
|
|
|
|
| 163 |
model_id: null
|
| 164 |
endpoint_name: null
|
| 165 |
endpoint_namespace: null
|
| 166 |
+
base_url: https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.2
|
| 167 |
tokenizer_id: null
|
| 168 |
model_display_name: null
|
| 169 |
use_openai_client: false
|
|
|
|
| 390 |
model_id: null
|
| 391 |
endpoint_name: null
|
| 392 |
endpoint_namespace: null
|
| 393 |
+
base_url: https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.2
|
| 394 |
tokenizer_id: null
|
| 395 |
model_display_name: null
|
| 396 |
use_openai_client: false
|
|
|
|
| 489 |
generation: domain_expert_answer
|
| 490 |
output_mappings: {}
|
| 491 |
input_batch_size: 50
|
| 492 |
+
dataset_name: test_3
|
| 493 |
dataset_workspace: admin
|
| 494 |
+
api_url: https://burtenshaw-test-3-argilla-space.hf.space
|
| 495 |
runtime_parameters_info:
|
| 496 |
- name: input_batch_size
|
| 497 |
optional: true
|
project_config.json
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
{"project_name": "
|
|
|
|
| 1 |
+
{"project_name": "test_3", "argilla_space_repo_id": "burtenshaw/test_3_argilla_space", "project_space_repo_id": "burtenshaw/test_3_config_space", "dataset_repo_id": "burtenshaw/test_3"}
|