ZennyKenny commited on
Commit
d71f074
·
verified ·
1 Parent(s): 977078a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -159
app.py CHANGED
@@ -76,130 +76,6 @@ class SyntheticDataGenerator:
76
  except Exception as e:
77
  return None, f"Synthetic data generation failed with error: {str(e)}"
78
 
79
- def get_quality_report_file(self) -> Optional[str]:
80
- """
81
- Build/export the quality report and return a **ZIP** file path for download.
82
- Strongly prefers .zip; tries multiple SDK shapes before falling back.
83
- """
84
- if not self.generator:
85
- return None
86
-
87
- import shutil
88
- import zipfile
89
- from datetime import datetime, timedelta
90
-
91
- def _write_bytes_to_zip(b: bytes, out_path: str) -> Optional[str]:
92
- try:
93
- # If b already is a zip (starts with PK header), just write it
94
- if len(b) >= 2 and b[:2] == b"PK":
95
- with open(out_path, "wb") as f:
96
- f.write(b)
97
- return out_path
98
- # Otherwise, wrap it into a zip as a single file payload
99
- with zipfile.ZipFile(out_path, "w", zipfile.ZIP_DEFLATED) as zf:
100
- zf.writestr("report.txt", b.decode("utf-8", errors="ignore"))
101
- return out_path
102
- except Exception:
103
- return None
104
-
105
- try:
106
- os.makedirs("/tmp", exist_ok=True)
107
- target_zip = "/tmp/quality_report.zip"
108
-
109
- # 0) Generate report object
110
- rep = self.generator.reports(display=False)
111
-
112
- # 1) If a usable string path is returned and exists (ideally .zip)
113
- if isinstance(rep, str):
114
- p = rep.strip()
115
- if os.path.exists(p):
116
- if p.lower().endswith(".zip"):
117
- shutil.copy2(p, target_zip)
118
- return target_zip
119
- # If it's some other file, zip it
120
- with zipfile.ZipFile(target_zip, "w", zipfile.ZIP_DEFLATED) as zf:
121
- zf.write(p, arcname=os.path.basename(p))
122
- return target_zip
123
-
124
- # 2) Path-like attributes
125
- for attr in ("archive_path", "zip_path", "path", "file_path"):
126
- if hasattr(rep, attr):
127
- p = getattr(rep, attr)
128
- if isinstance(p, str) and os.path.exists(p):
129
- if p.lower().endswith(".zip"):
130
- shutil.copy2(p, target_zip)
131
- return target_zip
132
- with zipfile.ZipFile(target_zip, "w", zipfile.ZIP_DEFLATED) as zf:
133
- zf.write(p, arcname=os.path.basename(p))
134
- return target_zip
135
-
136
- # 3) Bytes-like content (some SDKs provide .content/.data/.bytes/.buffer)
137
- for attr in ("content", "data", "bytes", "buffer"):
138
- if hasattr(rep, attr):
139
- b = getattr(rep, attr)
140
- if isinstance(b, (bytes, bytearray)):
141
- path = _write_bytes_to_zip(bytes(b), target_zip)
142
- if path:
143
- return path
144
- if hasattr(b, "read"):
145
- raw = b.read()
146
- if isinstance(raw, (bytes, bytearray)):
147
- path = _write_bytes_to_zip(bytes(raw), target_zip)
148
- if path:
149
- return path
150
-
151
- # 4) Save/export API
152
- for method in ("save", "export", "to_zip", "write"):
153
- if hasattr(rep, method):
154
- try:
155
- m = getattr(rep, method)
156
- try:
157
- m(target_zip) # try with path argument
158
- except TypeError:
159
- out = m() # maybe returns bytes or a path
160
- if isinstance(out, (bytes, bytearray)):
161
- path = _write_bytes_to_zip(bytes(out), target_zip)
162
- if path:
163
- return path
164
- if isinstance(out, str) and os.path.exists(out):
165
- shutil.copy2(out, target_zip)
166
- return target_zip
167
- if os.path.exists(target_zip):
168
- return target_zip
169
- except Exception:
170
- pass
171
-
172
- # 5) Heuristic: search for a fresh .zip the SDK might have emitted (last 10 min)
173
- candidates = []
174
- cutoff = datetime.utcnow() - timedelta(minutes=10)
175
- for root in ("/tmp", "/home", "/workspace", os.getcwd()):
176
- if not os.path.exists(root):
177
- continue
178
- for dirpath, _, filenames in os.walk(root):
179
- for fn in filenames:
180
- if fn.lower().endswith(".zip"):
181
- fp = os.path.join(dirpath, fn)
182
- try:
183
- st = os.stat(fp)
184
- mtime = datetime.utcfromtimestamp(st.st_mtime)
185
- if mtime >= cutoff:
186
- candidates.append((mtime, st.st_size, fp))
187
- except Exception:
188
- pass
189
- if candidates:
190
- candidates.sort(key=lambda x: (x[0], x[1]), reverse=True) # newest then largest
191
- src = candidates[0][2]
192
- shutil.copy2(src, target_zip)
193
- return target_zip
194
-
195
- # 6) Last resort: wrap textual representation into a minimal zip (avoid plain .txt)
196
- with zipfile.ZipFile(target_zip, "w", zipfile.ZIP_DEFLATED) as zf:
197
- zf.writestr("report.txt", str(rep))
198
- return target_zip
199
-
200
- except Exception:
201
- return None
202
-
203
  def estimate_memory_usage(self, df: pd.DataFrame) -> str:
204
  if df is None or df.empty:
205
  return "No data available to analyze."
@@ -345,12 +221,6 @@ def create_interface():
345
  with gr.Column():
346
  train_status = gr.Textbox(label="Training Status", interactive=False)
347
 
348
- # --- Quality report: button + hidden File with download link ---
349
- with gr.Row():
350
- get_report_btn = gr.Button("Get Quality Report", variant="secondary")
351
- with gr.Group(visible=False) as report_group:
352
- report_file = gr.File(label="Quality Report", interactive=False)
353
-
354
  with gr.Tab("Generate Data"):
355
  gr.Markdown("### Generate synthetic data from your trained model")
356
  with gr.Row():
@@ -362,7 +232,6 @@ def create_interface():
362
 
363
  synthetic_data = gr.Dataframe(label="Synthetic Data", interactive=False)
364
 
365
- # --- CSV download: button + hidden File with download link ---
366
  with gr.Row():
367
  csv_download_btn = gr.Button("Download CSV", variant="secondary")
368
  with gr.Group(visible=False) as csv_group:
@@ -378,37 +247,10 @@ def create_interface():
378
  outputs=[train_status],
379
  )
380
 
381
- # Quality report: on click, return filepath to gr.File and reveal its group
382
- def _prepare_report_for_download():
383
- path = generator.get_quality_report_file()
384
- if path:
385
- return path, gr.update(visible=True)
386
- else:
387
- # keep group hidden and clear file if nothing
388
- return None, gr.update(visible=False)
389
-
390
- get_report_btn.click(
391
- _prepare_report_for_download,
392
- outputs=[report_file, report_group],
393
- )
394
-
395
- # Generate data
396
  generate_btn.click(generate_data, inputs=[gen_size], outputs=[synthetic_data, gen_status])
397
 
398
- # Build comparison plot when both datasets are available
399
  synthetic_data.change(create_comparison_plot, inputs=[uploaded_data, synthetic_data], outputs=[comparison_plot])
400
 
401
- # CSV download: write CSV and reveal the file link
402
- def download_csv_prepare():
403
- """Return a path to the latest synthetic CSV; used as output to gr.File."""
404
- global _last_synth_df
405
- if _last_synth_df is None or _last_synth_df.empty:
406
- return None
407
- os.makedirs("/tmp", exist_ok=True)
408
- path = "/tmp/synthetic_data.csv"
409
- _last_synth_df.to_csv(path, index=False)
410
- return path
411
-
412
  def _prepare_csv_for_download():
413
  path = download_csv_prepare()
414
  if path:
@@ -421,7 +263,6 @@ def create_interface():
421
  outputs=[csv_file, csv_group],
422
  )
423
 
424
- # File upload handler
425
  def process_uploaded_file(file):
426
  if file is None:
427
  return None, "No file uploaded.", gr.update(visible=False)
 
76
  except Exception as e:
77
  return None, f"Synthetic data generation failed with error: {str(e)}"
78
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  def estimate_memory_usage(self, df: pd.DataFrame) -> str:
80
  if df is None or df.empty:
81
  return "No data available to analyze."
 
221
  with gr.Column():
222
  train_status = gr.Textbox(label="Training Status", interactive=False)
223
 
 
 
 
 
 
 
224
  with gr.Tab("Generate Data"):
225
  gr.Markdown("### Generate synthetic data from your trained model")
226
  with gr.Row():
 
232
 
233
  synthetic_data = gr.Dataframe(label="Synthetic Data", interactive=False)
234
 
 
235
  with gr.Row():
236
  csv_download_btn = gr.Button("Download CSV", variant="secondary")
237
  with gr.Group(visible=False) as csv_group:
 
247
  outputs=[train_status],
248
  )
249
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
250
  generate_btn.click(generate_data, inputs=[gen_size], outputs=[synthetic_data, gen_status])
251
 
 
252
  synthetic_data.change(create_comparison_plot, inputs=[uploaded_data, synthetic_data], outputs=[comparison_plot])
253
 
 
 
 
 
 
 
 
 
 
 
 
254
  def _prepare_csv_for_download():
255
  path = download_csv_prepare()
256
  if path:
 
263
  outputs=[csv_file, csv_group],
264
  )
265
 
 
266
  def process_uploaded_file(file):
267
  if file is None:
268
  return None, "No file uploaded.", gr.update(visible=False)