Spaces:
Runtime error
Runtime error
Commit
ยท
b8a81f2
1
Parent(s):
187357b
fix distrubution of labels
Browse files
src/synthetic_dataset_generator/apps/textcat.py
CHANGED
@@ -126,10 +126,12 @@ def generate_dataset(
|
|
126 |
inputs = []
|
127 |
for _ in range(batch_size):
|
128 |
if multi_label:
|
129 |
-
k = int(random.
|
130 |
else:
|
131 |
k = 1
|
132 |
-
|
|
|
|
|
133 |
random.shuffle(sampled_labels)
|
134 |
inputs.append(
|
135 |
{
|
|
|
126 |
inputs = []
|
127 |
for _ in range(batch_size):
|
128 |
if multi_label:
|
129 |
+
k = int(random.betavariate(alpha=2, beta=3) * len(labels))
|
130 |
else:
|
131 |
k = 1
|
132 |
+
|
133 |
+
print(k)
|
134 |
+
sampled_labels = random.sample(labels, min(k, len(labels)))
|
135 |
random.shuffle(sampled_labels)
|
136 |
inputs.append(
|
137 |
{
|