lhoestq HF staff commited on
Commit
93ec714
·
1 Parent(s): 06ee5d2

add -1k suffix

Browse files
Files changed (1) hide show
  1. app.py +3 -2
app.py CHANGED
@@ -29,6 +29,7 @@ NAMESPACE = "dataset-rewriter"
29
 
30
  NUM_ROWS_PREVIEW = 3
31
  MAX_NUM_ROWS_TO_REWRITE = 1000
 
32
  NUM_PARALLEL_CALLS = 10
33
  NUM_ROWS_PER_CALL = 10
34
  MAX_PROGRESS_UPDATES_PER_SECOND = 4
@@ -187,7 +188,7 @@ with gr.Blocks(css=css) as demo:
187
  out = dataset.split("/")[-1] + out.split("should be", 1)[1].replace(" ", "-").replace(".", "").replace(",", "")
188
  else:
189
  out = dataset.split("/")[-1] + prompt.replace(" ", "-")
190
- return out[:90] + "-" + Hasher.hash(prompt)[:4]
191
 
192
  def _write_generator_to_queue(queue: Queue, func: Callable[..., Iterable], kwargs: dict) -> None:
193
  for i, result in enumerate(func(**kwargs)):
@@ -359,7 +360,7 @@ with gr.Blocks(css=css) as demo:
359
  print(f"Done ReWriting {dataset} (full dataset) with instruction '{prompt}'")
360
 
361
  output_rows = [{k: json.loads(row[k]) for k in output_format_df["column"]} for rows in parallel_output_rows for row in rows]
362
- repo_id = namespace + "/" + find_new_name(dataset, prompt)
363
  yield {full_dataset_generation_label: gr.Label({f"✅ ReWriting {dataset}": len(output_rows) / total, f"⚙️ Saving to {repo_id}": 0.})}
364
  token = oauth_token.token if oauth_token else save_dataset_hf_token
365
  print(f"Saving {repo_id}")
 
29
 
30
  NUM_ROWS_PREVIEW = 3
31
  MAX_NUM_ROWS_TO_REWRITE = 1000
32
+ PARTIAL_SUFFIX = "-1k"
33
  NUM_PARALLEL_CALLS = 10
34
  NUM_ROWS_PER_CALL = 10
35
  MAX_PROGRESS_UPDATES_PER_SECOND = 4
 
188
  out = dataset.split("/")[-1] + out.split("should be", 1)[1].replace(" ", "-").replace(".", "").replace(",", "")
189
  else:
190
  out = dataset.split("/")[-1] + prompt.replace(" ", "-")
191
+ return out[:80] + "-" + Hasher.hash(prompt)[:4]
192
 
193
  def _write_generator_to_queue(queue: Queue, func: Callable[..., Iterable], kwargs: dict) -> None:
194
  for i, result in enumerate(func(**kwargs)):
 
360
  print(f"Done ReWriting {dataset} (full dataset) with instruction '{prompt}'")
361
 
362
  output_rows = [{k: json.loads(row[k]) for k in output_format_df["column"]} for rows in parallel_output_rows for row in rows]
363
+ repo_id = namespace + "/" + find_new_name(dataset + (PARTIAL_SUFFIX if num_examples > total else ""), prompt)
364
  yield {full_dataset_generation_label: gr.Label({f"✅ ReWriting {dataset}": len(output_rows) / total, f"⚙️ Saving to {repo_id}": 0.})}
365
  token = oauth_token.token if oauth_token else save_dataset_hf_token
366
  print(f"Saving {repo_id}")