Commit
·
32d8669
1
Parent(s):
f5ab4cb
update examples
Browse files
examples/argilla_deployment.py
CHANGED
@@ -4,7 +4,9 @@ import os
|
|
4 |
from synthetic_dataset_generator import launch
|
5 |
|
6 |
# Follow https://docs.argilla.io/latest/getting_started/quickstart/ to get your Argilla API key and URL
|
7 |
-
os.environ["ARGILLA_API_URL"] =
|
8 |
-
|
|
|
|
|
9 |
|
10 |
launch()
|
|
|
4 |
from synthetic_dataset_generator import launch
|
5 |
|
6 |
# Follow https://docs.argilla.io/latest/getting_started/quickstart/ to get your Argilla API key and URL
|
7 |
+
os.environ["ARGILLA_API_URL"] = (
|
8 |
+
"https://[your-owner-name]-[your_space_name].hf.space" # argilla base url
|
9 |
+
)
|
10 |
+
os.environ["ARGILLA_API_KEY"] = "my_api_key" # argilla api key
|
11 |
|
12 |
launch()
|
examples/ollama_deployment.py
CHANGED
@@ -6,12 +6,9 @@ import os
|
|
6 |
from synthetic_dataset_generator import launch
|
7 |
|
8 |
assert os.getenv("HF_TOKEN") # push the data to huggingface
|
9 |
-
os.environ["OLLAMA_BASE_URL"] = "http://127.0.0.1:11434/"
|
10 |
-
os.environ["MODEL"] = "llama3.1:8b-instruct-q8_0"
|
11 |
-
os.environ["TOKENIZER_ID"] = "meta-llama/Llama-3.1-8B-Instruct"
|
12 |
-
os.environ["MAGPIE_PRE_QUERY_TEMPLATE"] = "llama3"
|
13 |
-
os.environ["MAX_NUM_ROWS"] = "10000"
|
14 |
-
os.environ["DEFAULT_BATCH_SIZE"] = "5"
|
15 |
-
os.environ["MAX_NUM_TOKENS"] = "2048"
|
16 |
|
17 |
launch()
|
|
|
6 |
from synthetic_dataset_generator import launch
|
7 |
|
8 |
assert os.getenv("HF_TOKEN") # push the data to huggingface
|
9 |
+
os.environ["OLLAMA_BASE_URL"] = "http://127.0.0.1:11434/" # ollama base url
|
10 |
+
os.environ["MODEL"] = "llama3.1:8b-instruct-q8_0" # model id
|
11 |
+
os.environ["TOKENIZER_ID"] = "meta-llama/Llama-3.1-8B-Instruct" # tokenizer id
|
12 |
+
os.environ["MAGPIE_PRE_QUERY_TEMPLATE"] = "llama3" # magpie template
|
|
|
|
|
|
|
13 |
|
14 |
launch()
|
examples/openai_deployment.py
CHANGED
@@ -4,9 +4,9 @@ import os
|
|
4 |
from synthetic_dataset_generator import launch
|
5 |
|
6 |
assert os.getenv("HF_TOKEN") # push the data to huggingface
|
7 |
-
os.environ["OPENAI_BASE_URL"] = "https://api.openai.com/v1/"
|
8 |
-
os.environ["API_KEY"] = os.getenv("OPENAI_API_KEY")
|
9 |
-
os.environ["MODEL"] = "gpt-4o"
|
10 |
os.environ["MAGPIE_PRE_QUERY_TEMPLATE"] = None # chat data not supported with OpenAI
|
11 |
|
12 |
launch()
|
|
|
4 |
from synthetic_dataset_generator import launch
|
5 |
|
6 |
assert os.getenv("HF_TOKEN") # push the data to huggingface
|
7 |
+
os.environ["OPENAI_BASE_URL"] = "https://api.openai.com/v1/" # openai base url
|
8 |
+
os.environ["API_KEY"] = os.getenv("OPENAI_API_KEY") # openai api key
|
9 |
+
os.environ["MODEL"] = "gpt-4o" # model id
|
10 |
os.environ["MAGPIE_PRE_QUERY_TEMPLATE"] = None # chat data not supported with OpenAI
|
11 |
|
12 |
launch()
|
examples/tgi_or_hf_dedicated.py
CHANGED
@@ -4,8 +4,8 @@ import os
|
|
4 |
from synthetic_dataset_generator import launch
|
5 |
|
6 |
assert os.getenv("HF_TOKEN") # push the data to huggingface
|
7 |
-
os.environ["HUGGINGFACE_BASE_URL"] = "http://127.0.0.1:3000/"
|
8 |
-
os.environ["MAGPIE_PRE_QUERY_TEMPLATE"] = "llama3"
|
9 |
os.environ["TOKENIZER_ID"] = (
|
10 |
"meta-llama/Llama-3.1-8B-Instruct" # tokenizer for model hosted on endpoint
|
11 |
)
|
|
|
4 |
from synthetic_dataset_generator import launch
|
5 |
|
6 |
assert os.getenv("HF_TOKEN") # push the data to huggingface
|
7 |
+
os.environ["HUGGINGFACE_BASE_URL"] = "http://127.0.0.1:3000/" # dedicated endpoint/TGI
|
8 |
+
os.environ["MAGPIE_PRE_QUERY_TEMPLATE"] = "llama3" # magpie template
|
9 |
os.environ["TOKENIZER_ID"] = (
|
10 |
"meta-llama/Llama-3.1-8B-Instruct" # tokenizer for model hosted on endpoint
|
11 |
)
|
src/synthetic_dataset_generator/constants.py
CHANGED
@@ -22,12 +22,10 @@ if HUGGINGFACE_BASE_URL and MODEL:
|
|
22 |
raise ValueError(
|
23 |
"`HUGGINGFACE_BASE_URL` and `MODEL` cannot be set at the same time. Use a model id for serverless inference and a base URL dedicated to Hugging Face Inference Endpoints."
|
24 |
)
|
25 |
-
if
|
26 |
-
if
|
27 |
raise ValueError("`MODEL` is not set. Please provide a model id for inference.")
|
28 |
|
29 |
-
|
30 |
-
|
31 |
# Check if multiple base URLs are provided
|
32 |
base_urls = [
|
33 |
url for url in [OPENAI_BASE_URL, OLLAMA_BASE_URL, HUGGINGFACE_BASE_URL] if url
|
|
|
22 |
raise ValueError(
|
23 |
"`HUGGINGFACE_BASE_URL` and `MODEL` cannot be set at the same time. Use a model id for serverless inference and a base URL dedicated to Hugging Face Inference Endpoints."
|
24 |
)
|
25 |
+
if not MODEL:
|
26 |
+
if OPENAI_BASE_URL or OLLAMA_BASE_URL:
|
27 |
raise ValueError("`MODEL` is not set. Please provide a model id for inference.")
|
28 |
|
|
|
|
|
29 |
# Check if multiple base URLs are provided
|
30 |
base_urls = [
|
31 |
url for url in [OPENAI_BASE_URL, OLLAMA_BASE_URL, HUGGINGFACE_BASE_URL] if url
|