Spaces:
Sleeping
Sleeping
add -1k suffix
Browse files
app.py
CHANGED
@@ -29,6 +29,7 @@ NAMESPACE = "dataset-rewriter"
|
|
29 |
|
30 |
NUM_ROWS_PREVIEW = 3
|
31 |
MAX_NUM_ROWS_TO_REWRITE = 1000
|
|
|
32 |
NUM_PARALLEL_CALLS = 10
|
33 |
NUM_ROWS_PER_CALL = 10
|
34 |
MAX_PROGRESS_UPDATES_PER_SECOND = 4
|
@@ -187,7 +188,7 @@ with gr.Blocks(css=css) as demo:
|
|
187 |
out = dataset.split("/")[-1] + out.split("should be", 1)[1].replace(" ", "-").replace(".", "").replace(",", "")
|
188 |
else:
|
189 |
out = dataset.split("/")[-1] + prompt.replace(" ", "-")
|
190 |
-
return out[:
|
191 |
|
192 |
def _write_generator_to_queue(queue: Queue, func: Callable[..., Iterable], kwargs: dict) -> None:
|
193 |
for i, result in enumerate(func(**kwargs)):
|
@@ -359,7 +360,7 @@ with gr.Blocks(css=css) as demo:
|
|
359 |
print(f"Done ReWriting {dataset} (full dataset) with instruction '{prompt}'")
|
360 |
|
361 |
output_rows = [{k: json.loads(row[k]) for k in output_format_df["column"]} for rows in parallel_output_rows for row in rows]
|
362 |
-
repo_id = namespace + "/" + find_new_name(dataset, prompt)
|
363 |
yield {full_dataset_generation_label: gr.Label({f"✅ ReWriting {dataset}": len(output_rows) / total, f"⚙️ Saving to {repo_id}": 0.})}
|
364 |
token = oauth_token.token if oauth_token else save_dataset_hf_token
|
365 |
print(f"Saving {repo_id}")
|
|
|
29 |
|
30 |
NUM_ROWS_PREVIEW = 3
|
31 |
MAX_NUM_ROWS_TO_REWRITE = 1000
|
32 |
+
PARTIAL_SUFFIX = "-1k"
|
33 |
NUM_PARALLEL_CALLS = 10
|
34 |
NUM_ROWS_PER_CALL = 10
|
35 |
MAX_PROGRESS_UPDATES_PER_SECOND = 4
|
|
|
188 |
out = dataset.split("/")[-1] + out.split("should be", 1)[1].replace(" ", "-").replace(".", "").replace(",", "")
|
189 |
else:
|
190 |
out = dataset.split("/")[-1] + prompt.replace(" ", "-")
|
191 |
+
return out[:80] + "-" + Hasher.hash(prompt)[:4]
|
192 |
|
193 |
def _write_generator_to_queue(queue: Queue, func: Callable[..., Iterable], kwargs: dict) -> None:
|
194 |
for i, result in enumerate(func(**kwargs)):
|
|
|
360 |
print(f"Done ReWriting {dataset} (full dataset) with instruction '{prompt}'")
|
361 |
|
362 |
output_rows = [{k: json.loads(row[k]) for k in output_format_df["column"]} for rows in parallel_output_rows for row in rows]
|
363 |
+
repo_id = namespace + "/" + find_new_name(dataset + (PARTIAL_SUFFIX if num_examples > total else ""), prompt)
|
364 |
yield {full_dataset_generation_label: gr.Label({f"✅ ReWriting {dataset}": len(output_rows) / total, f"⚙️ Saving to {repo_id}": 0.})}
|
365 |
token = oauth_token.token if oauth_token else save_dataset_hf_token
|
366 |
print(f"Saving {repo_id}")
|