davidberenstein1957 HF staff commited on
Commit
32d8669
·
1 Parent(s): f5ab4cb

update examples

Browse files
examples/argilla_deployment.py CHANGED
@@ -4,7 +4,9 @@ import os
4
  from synthetic_dataset_generator import launch
5
 
6
  # Follow https://docs.argilla.io/latest/getting_started/quickstart/ to get your Argilla API key and URL
7
- os.environ["ARGILLA_API_URL"] = "https://[your-owner-name]-[your_space_name].hf.space"
8
- os.environ["ARGILLA_API_KEY"] = "my_api_key"
 
 
9
 
10
  launch()
 
4
  from synthetic_dataset_generator import launch
5
 
6
  # Follow https://docs.argilla.io/latest/getting_started/quickstart/ to get your Argilla API key and URL
7
+ os.environ["ARGILLA_API_URL"] = (
8
+ "https://[your-owner-name]-[your_space_name].hf.space" # argilla base url
9
+ )
10
+ os.environ["ARGILLA_API_KEY"] = "my_api_key" # argilla api key
11
 
12
  launch()
examples/ollama_deployment.py CHANGED
@@ -6,12 +6,9 @@ import os
6
  from synthetic_dataset_generator import launch
7
 
8
  assert os.getenv("HF_TOKEN") # push the data to huggingface
9
- os.environ["OLLAMA_BASE_URL"] = "http://127.0.0.1:11434/"
10
- os.environ["MODEL"] = "llama3.1:8b-instruct-q8_0"
11
- os.environ["TOKENIZER_ID"] = "meta-llama/Llama-3.1-8B-Instruct"
12
- os.environ["MAGPIE_PRE_QUERY_TEMPLATE"] = "llama3"
13
- os.environ["MAX_NUM_ROWS"] = "10000"
14
- os.environ["DEFAULT_BATCH_SIZE"] = "5"
15
- os.environ["MAX_NUM_TOKENS"] = "2048"
16
 
17
  launch()
 
6
  from synthetic_dataset_generator import launch
7
 
8
  assert os.getenv("HF_TOKEN") # push the data to huggingface
9
+ os.environ["OLLAMA_BASE_URL"] = "http://127.0.0.1:11434/" # ollama base url
10
+ os.environ["MODEL"] = "llama3.1:8b-instruct-q8_0" # model id
11
+ os.environ["TOKENIZER_ID"] = "meta-llama/Llama-3.1-8B-Instruct" # tokenizer id
12
+ os.environ["MAGPIE_PRE_QUERY_TEMPLATE"] = "llama3" # magpie template
 
 
 
13
 
14
  launch()
examples/openai_deployment.py CHANGED
@@ -4,9 +4,9 @@ import os
4
  from synthetic_dataset_generator import launch
5
 
6
  assert os.getenv("HF_TOKEN") # push the data to huggingface
7
- os.environ["OPENAI_BASE_URL"] = "https://api.openai.com/v1/"
8
- os.environ["API_KEY"] = os.getenv("OPENAI_API_KEY")
9
- os.environ["MODEL"] = "gpt-4o"
10
  os.environ["MAGPIE_PRE_QUERY_TEMPLATE"] = None # chat data not supported with OpenAI
11
 
12
  launch()
 
4
  from synthetic_dataset_generator import launch
5
 
6
  assert os.getenv("HF_TOKEN") # push the data to huggingface
7
+ os.environ["OPENAI_BASE_URL"] = "https://api.openai.com/v1/" # openai base url
8
+ os.environ["API_KEY"] = os.getenv("OPENAI_API_KEY") # openai api key
9
+ os.environ["MODEL"] = "gpt-4o" # model id
10
  os.environ["MAGPIE_PRE_QUERY_TEMPLATE"] = None # chat data not supported with OpenAI
11
 
12
  launch()
examples/tgi_or_hf_dedicated.py CHANGED
@@ -4,8 +4,8 @@ import os
4
  from synthetic_dataset_generator import launch
5
 
6
  assert os.getenv("HF_TOKEN") # push the data to huggingface
7
- os.environ["HUGGINGFACE_BASE_URL"] = "http://127.0.0.1:3000/"
8
- os.environ["MAGPIE_PRE_QUERY_TEMPLATE"] = "llama3"
9
  os.environ["TOKENIZER_ID"] = (
10
  "meta-llama/Llama-3.1-8B-Instruct" # tokenizer for model hosted on endpoint
11
  )
 
4
  from synthetic_dataset_generator import launch
5
 
6
  assert os.getenv("HF_TOKEN") # push the data to huggingface
7
+ os.environ["HUGGINGFACE_BASE_URL"] = "http://127.0.0.1:3000/" # dedicated endpoint/TGI
8
+ os.environ["MAGPIE_PRE_QUERY_TEMPLATE"] = "llama3" # magpie template
9
  os.environ["TOKENIZER_ID"] = (
10
  "meta-llama/Llama-3.1-8B-Instruct" # tokenizer for model hosted on endpoint
11
  )
src/synthetic_dataset_generator/constants.py CHANGED
@@ -22,12 +22,10 @@ if HUGGINGFACE_BASE_URL and MODEL:
22
  raise ValueError(
23
  "`HUGGINGFACE_BASE_URL` and `MODEL` cannot be set at the same time. Use a model id for serverless inference and a base URL dedicated to Hugging Face Inference Endpoints."
24
  )
25
- if OPENAI_BASE_URL or OLLAMA_BASE_URL:
26
- if not MODEL:
27
  raise ValueError("`MODEL` is not set. Please provide a model id for inference.")
28
 
29
-
30
-
31
  # Check if multiple base URLs are provided
32
  base_urls = [
33
  url for url in [OPENAI_BASE_URL, OLLAMA_BASE_URL, HUGGINGFACE_BASE_URL] if url
 
22
  raise ValueError(
23
  "`HUGGINGFACE_BASE_URL` and `MODEL` cannot be set at the same time. Use a model id for serverless inference and a base URL dedicated to Hugging Face Inference Endpoints."
24
  )
25
+ if not MODEL:
26
+ if OPENAI_BASE_URL or OLLAMA_BASE_URL:
27
  raise ValueError("`MODEL` is not set. Please provide a model id for inference.")
28
 
 
 
29
  # Check if multiple base URLs are provided
30
  base_urls = [
31
  url for url in [OPENAI_BASE_URL, OLLAMA_BASE_URL, HUGGINGFACE_BASE_URL] if url