Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -76,130 +76,6 @@ class SyntheticDataGenerator:
|
|
76 |
except Exception as e:
|
77 |
return None, f"Synthetic data generation failed with error: {str(e)}"
|
78 |
|
79 |
-
def get_quality_report_file(self) -> Optional[str]:
|
80 |
-
"""
|
81 |
-
Build/export the quality report and return a **ZIP** file path for download.
|
82 |
-
Strongly prefers .zip; tries multiple SDK shapes before falling back.
|
83 |
-
"""
|
84 |
-
if not self.generator:
|
85 |
-
return None
|
86 |
-
|
87 |
-
import shutil
|
88 |
-
import zipfile
|
89 |
-
from datetime import datetime, timedelta
|
90 |
-
|
91 |
-
def _write_bytes_to_zip(b: bytes, out_path: str) -> Optional[str]:
|
92 |
-
try:
|
93 |
-
# If b already is a zip (starts with PK header), just write it
|
94 |
-
if len(b) >= 2 and b[:2] == b"PK":
|
95 |
-
with open(out_path, "wb") as f:
|
96 |
-
f.write(b)
|
97 |
-
return out_path
|
98 |
-
# Otherwise, wrap it into a zip as a single file payload
|
99 |
-
with zipfile.ZipFile(out_path, "w", zipfile.ZIP_DEFLATED) as zf:
|
100 |
-
zf.writestr("report.txt", b.decode("utf-8", errors="ignore"))
|
101 |
-
return out_path
|
102 |
-
except Exception:
|
103 |
-
return None
|
104 |
-
|
105 |
-
try:
|
106 |
-
os.makedirs("/tmp", exist_ok=True)
|
107 |
-
target_zip = "/tmp/quality_report.zip"
|
108 |
-
|
109 |
-
# 0) Generate report object
|
110 |
-
rep = self.generator.reports(display=False)
|
111 |
-
|
112 |
-
# 1) If a usable string path is returned and exists (ideally .zip)
|
113 |
-
if isinstance(rep, str):
|
114 |
-
p = rep.strip()
|
115 |
-
if os.path.exists(p):
|
116 |
-
if p.lower().endswith(".zip"):
|
117 |
-
shutil.copy2(p, target_zip)
|
118 |
-
return target_zip
|
119 |
-
# If it's some other file, zip it
|
120 |
-
with zipfile.ZipFile(target_zip, "w", zipfile.ZIP_DEFLATED) as zf:
|
121 |
-
zf.write(p, arcname=os.path.basename(p))
|
122 |
-
return target_zip
|
123 |
-
|
124 |
-
# 2) Path-like attributes
|
125 |
-
for attr in ("archive_path", "zip_path", "path", "file_path"):
|
126 |
-
if hasattr(rep, attr):
|
127 |
-
p = getattr(rep, attr)
|
128 |
-
if isinstance(p, str) and os.path.exists(p):
|
129 |
-
if p.lower().endswith(".zip"):
|
130 |
-
shutil.copy2(p, target_zip)
|
131 |
-
return target_zip
|
132 |
-
with zipfile.ZipFile(target_zip, "w", zipfile.ZIP_DEFLATED) as zf:
|
133 |
-
zf.write(p, arcname=os.path.basename(p))
|
134 |
-
return target_zip
|
135 |
-
|
136 |
-
# 3) Bytes-like content (some SDKs provide .content/.data/.bytes/.buffer)
|
137 |
-
for attr in ("content", "data", "bytes", "buffer"):
|
138 |
-
if hasattr(rep, attr):
|
139 |
-
b = getattr(rep, attr)
|
140 |
-
if isinstance(b, (bytes, bytearray)):
|
141 |
-
path = _write_bytes_to_zip(bytes(b), target_zip)
|
142 |
-
if path:
|
143 |
-
return path
|
144 |
-
if hasattr(b, "read"):
|
145 |
-
raw = b.read()
|
146 |
-
if isinstance(raw, (bytes, bytearray)):
|
147 |
-
path = _write_bytes_to_zip(bytes(raw), target_zip)
|
148 |
-
if path:
|
149 |
-
return path
|
150 |
-
|
151 |
-
# 4) Save/export API
|
152 |
-
for method in ("save", "export", "to_zip", "write"):
|
153 |
-
if hasattr(rep, method):
|
154 |
-
try:
|
155 |
-
m = getattr(rep, method)
|
156 |
-
try:
|
157 |
-
m(target_zip) # try with path argument
|
158 |
-
except TypeError:
|
159 |
-
out = m() # maybe returns bytes or a path
|
160 |
-
if isinstance(out, (bytes, bytearray)):
|
161 |
-
path = _write_bytes_to_zip(bytes(out), target_zip)
|
162 |
-
if path:
|
163 |
-
return path
|
164 |
-
if isinstance(out, str) and os.path.exists(out):
|
165 |
-
shutil.copy2(out, target_zip)
|
166 |
-
return target_zip
|
167 |
-
if os.path.exists(target_zip):
|
168 |
-
return target_zip
|
169 |
-
except Exception:
|
170 |
-
pass
|
171 |
-
|
172 |
-
# 5) Heuristic: search for a fresh .zip the SDK might have emitted (last 10 min)
|
173 |
-
candidates = []
|
174 |
-
cutoff = datetime.utcnow() - timedelta(minutes=10)
|
175 |
-
for root in ("/tmp", "/home", "/workspace", os.getcwd()):
|
176 |
-
if not os.path.exists(root):
|
177 |
-
continue
|
178 |
-
for dirpath, _, filenames in os.walk(root):
|
179 |
-
for fn in filenames:
|
180 |
-
if fn.lower().endswith(".zip"):
|
181 |
-
fp = os.path.join(dirpath, fn)
|
182 |
-
try:
|
183 |
-
st = os.stat(fp)
|
184 |
-
mtime = datetime.utcfromtimestamp(st.st_mtime)
|
185 |
-
if mtime >= cutoff:
|
186 |
-
candidates.append((mtime, st.st_size, fp))
|
187 |
-
except Exception:
|
188 |
-
pass
|
189 |
-
if candidates:
|
190 |
-
candidates.sort(key=lambda x: (x[0], x[1]), reverse=True) # newest then largest
|
191 |
-
src = candidates[0][2]
|
192 |
-
shutil.copy2(src, target_zip)
|
193 |
-
return target_zip
|
194 |
-
|
195 |
-
# 6) Last resort: wrap textual representation into a minimal zip (avoid plain .txt)
|
196 |
-
with zipfile.ZipFile(target_zip, "w", zipfile.ZIP_DEFLATED) as zf:
|
197 |
-
zf.writestr("report.txt", str(rep))
|
198 |
-
return target_zip
|
199 |
-
|
200 |
-
except Exception:
|
201 |
-
return None
|
202 |
-
|
203 |
def estimate_memory_usage(self, df: pd.DataFrame) -> str:
|
204 |
if df is None or df.empty:
|
205 |
return "No data available to analyze."
|
@@ -345,12 +221,6 @@ def create_interface():
|
|
345 |
with gr.Column():
|
346 |
train_status = gr.Textbox(label="Training Status", interactive=False)
|
347 |
|
348 |
-
# --- Quality report: button + hidden File with download link ---
|
349 |
-
with gr.Row():
|
350 |
-
get_report_btn = gr.Button("Get Quality Report", variant="secondary")
|
351 |
-
with gr.Group(visible=False) as report_group:
|
352 |
-
report_file = gr.File(label="Quality Report", interactive=False)
|
353 |
-
|
354 |
with gr.Tab("Generate Data"):
|
355 |
gr.Markdown("### Generate synthetic data from your trained model")
|
356 |
with gr.Row():
|
@@ -362,7 +232,6 @@ def create_interface():
|
|
362 |
|
363 |
synthetic_data = gr.Dataframe(label="Synthetic Data", interactive=False)
|
364 |
|
365 |
-
# --- CSV download: button + hidden File with download link ---
|
366 |
with gr.Row():
|
367 |
csv_download_btn = gr.Button("Download CSV", variant="secondary")
|
368 |
with gr.Group(visible=False) as csv_group:
|
@@ -378,37 +247,10 @@ def create_interface():
|
|
378 |
outputs=[train_status],
|
379 |
)
|
380 |
|
381 |
-
# Quality report: on click, return filepath to gr.File and reveal its group
|
382 |
-
def _prepare_report_for_download():
|
383 |
-
path = generator.get_quality_report_file()
|
384 |
-
if path:
|
385 |
-
return path, gr.update(visible=True)
|
386 |
-
else:
|
387 |
-
# keep group hidden and clear file if nothing
|
388 |
-
return None, gr.update(visible=False)
|
389 |
-
|
390 |
-
get_report_btn.click(
|
391 |
-
_prepare_report_for_download,
|
392 |
-
outputs=[report_file, report_group],
|
393 |
-
)
|
394 |
-
|
395 |
-
# Generate data
|
396 |
generate_btn.click(generate_data, inputs=[gen_size], outputs=[synthetic_data, gen_status])
|
397 |
|
398 |
-
# Build comparison plot when both datasets are available
|
399 |
synthetic_data.change(create_comparison_plot, inputs=[uploaded_data, synthetic_data], outputs=[comparison_plot])
|
400 |
|
401 |
-
# CSV download: write CSV and reveal the file link
|
402 |
-
def download_csv_prepare():
|
403 |
-
"""Return a path to the latest synthetic CSV; used as output to gr.File."""
|
404 |
-
global _last_synth_df
|
405 |
-
if _last_synth_df is None or _last_synth_df.empty:
|
406 |
-
return None
|
407 |
-
os.makedirs("/tmp", exist_ok=True)
|
408 |
-
path = "/tmp/synthetic_data.csv"
|
409 |
-
_last_synth_df.to_csv(path, index=False)
|
410 |
-
return path
|
411 |
-
|
412 |
def _prepare_csv_for_download():
|
413 |
path = download_csv_prepare()
|
414 |
if path:
|
@@ -421,7 +263,6 @@ def create_interface():
|
|
421 |
outputs=[csv_file, csv_group],
|
422 |
)
|
423 |
|
424 |
-
# File upload handler
|
425 |
def process_uploaded_file(file):
|
426 |
if file is None:
|
427 |
return None, "No file uploaded.", gr.update(visible=False)
|
|
|
76 |
except Exception as e:
|
77 |
return None, f"Synthetic data generation failed with error: {str(e)}"
|
78 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
79 |
def estimate_memory_usage(self, df: pd.DataFrame) -> str:
|
80 |
if df is None or df.empty:
|
81 |
return "No data available to analyze."
|
|
|
221 |
with gr.Column():
|
222 |
train_status = gr.Textbox(label="Training Status", interactive=False)
|
223 |
|
|
|
|
|
|
|
|
|
|
|
|
|
224 |
with gr.Tab("Generate Data"):
|
225 |
gr.Markdown("### Generate synthetic data from your trained model")
|
226 |
with gr.Row():
|
|
|
232 |
|
233 |
synthetic_data = gr.Dataframe(label="Synthetic Data", interactive=False)
|
234 |
|
|
|
235 |
with gr.Row():
|
236 |
csv_download_btn = gr.Button("Download CSV", variant="secondary")
|
237 |
with gr.Group(visible=False) as csv_group:
|
|
|
247 |
outputs=[train_status],
|
248 |
)
|
249 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
250 |
generate_btn.click(generate_data, inputs=[gen_size], outputs=[synthetic_data, gen_status])
|
251 |
|
|
|
252 |
synthetic_data.change(create_comparison_plot, inputs=[uploaded_data, synthetic_data], outputs=[comparison_plot])
|
253 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
254 |
def _prepare_csv_for_download():
|
255 |
path = download_csv_prepare()
|
256 |
if path:
|
|
|
263 |
outputs=[csv_file, csv_group],
|
264 |
)
|
265 |
|
|
|
266 |
def process_uploaded_file(file):
|
267 |
if file is None:
|
268 |
return None, "No file uploaded.", gr.update(visible=False)
|