Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -78,45 +78,124 @@ class SyntheticDataGenerator:
|
|
78 |
|
79 |
def get_quality_report_file(self) -> Optional[str]:
|
80 |
"""
|
81 |
-
Build/export the quality report and return a file path for download.
|
82 |
-
|
83 |
"""
|
84 |
if not self.generator:
|
85 |
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
86 |
try:
|
|
|
|
|
|
|
|
|
87 |
rep = self.generator.reports(display=False)
|
88 |
|
89 |
-
#
|
90 |
-
if isinstance(rep, str)
|
91 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
92 |
|
|
|
93 |
for attr in ("archive_path", "zip_path", "path", "file_path"):
|
94 |
if hasattr(rep, attr):
|
95 |
p = getattr(rep, attr)
|
96 |
if isinstance(p, str) and os.path.exists(p):
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
try:
|
103 |
-
rep.save(target_zip)
|
104 |
-
if os.path.exists(target_zip):
|
105 |
-
return target_zip
|
106 |
-
except Exception:
|
107 |
-
pass
|
108 |
-
if hasattr(rep, "export"):
|
109 |
-
try:
|
110 |
-
rep.export(target_zip)
|
111 |
-
if os.path.exists(target_zip):
|
112 |
return target_zip
|
113 |
-
except Exception:
|
114 |
-
pass
|
115 |
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
120 |
|
121 |
except Exception:
|
122 |
return None
|
@@ -320,6 +399,16 @@ def create_interface():
|
|
320 |
synthetic_data.change(create_comparison_plot, inputs=[uploaded_data, synthetic_data], outputs=[comparison_plot])
|
321 |
|
322 |
# CSV download: write CSV and reveal the file link
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
323 |
def _prepare_csv_for_download():
|
324 |
path = download_csv_prepare()
|
325 |
if path:
|
|
|
78 |
|
79 |
def get_quality_report_file(self) -> Optional[str]:
|
80 |
"""
|
81 |
+
Build/export the quality report and return a **ZIP** file path for download.
|
82 |
+
Strongly prefers .zip; tries multiple SDK shapes before falling back.
|
83 |
"""
|
84 |
if not self.generator:
|
85 |
return None
|
86 |
+
|
87 |
+
import shutil
|
88 |
+
import zipfile
|
89 |
+
from datetime import datetime, timedelta
|
90 |
+
|
91 |
+
def _write_bytes_to_zip(b: bytes, out_path: str) -> Optional[str]:
|
92 |
+
try:
|
93 |
+
# If b already is a zip (starts with PK header), just write it
|
94 |
+
if len(b) >= 2 and b[:2] == b"PK":
|
95 |
+
with open(out_path, "wb") as f:
|
96 |
+
f.write(b)
|
97 |
+
return out_path
|
98 |
+
# Otherwise, wrap it into a zip as a single file payload
|
99 |
+
with zipfile.ZipFile(out_path, "w", zipfile.ZIP_DEFLATED) as zf:
|
100 |
+
zf.writestr("report.txt", b.decode("utf-8", errors="ignore"))
|
101 |
+
return out_path
|
102 |
+
except Exception:
|
103 |
+
return None
|
104 |
+
|
105 |
try:
|
106 |
+
os.makedirs("/tmp", exist_ok=True)
|
107 |
+
target_zip = "/tmp/quality_report.zip"
|
108 |
+
|
109 |
+
# 0) Generate report object
|
110 |
rep = self.generator.reports(display=False)
|
111 |
|
112 |
+
# 1) If a usable string path is returned and exists (ideally .zip)
|
113 |
+
if isinstance(rep, str):
|
114 |
+
p = rep.strip()
|
115 |
+
if os.path.exists(p):
|
116 |
+
if p.lower().endswith(".zip"):
|
117 |
+
shutil.copy2(p, target_zip)
|
118 |
+
return target_zip
|
119 |
+
# If it's some other file, zip it
|
120 |
+
with zipfile.ZipFile(target_zip, "w", zipfile.ZIP_DEFLATED) as zf:
|
121 |
+
zf.write(p, arcname=os.path.basename(p))
|
122 |
+
return target_zip
|
123 |
|
124 |
+
# 2) Path-like attributes
|
125 |
for attr in ("archive_path", "zip_path", "path", "file_path"):
|
126 |
if hasattr(rep, attr):
|
127 |
p = getattr(rep, attr)
|
128 |
if isinstance(p, str) and os.path.exists(p):
|
129 |
+
if p.lower().endswith(".zip"):
|
130 |
+
shutil.copy2(p, target_zip)
|
131 |
+
return target_zip
|
132 |
+
with zipfile.ZipFile(target_zip, "w", zipfile.ZIP_DEFLATED) as zf:
|
133 |
+
zf.write(p, arcname=os.path.basename(p))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
134 |
return target_zip
|
|
|
|
|
135 |
|
136 |
+
# 3) Bytes-like content (some SDKs provide .content/.data/.bytes/.buffer)
|
137 |
+
for attr in ("content", "data", "bytes", "buffer"):
|
138 |
+
if hasattr(rep, attr):
|
139 |
+
b = getattr(rep, attr)
|
140 |
+
if isinstance(b, (bytes, bytearray)):
|
141 |
+
path = _write_bytes_to_zip(bytes(b), target_zip)
|
142 |
+
if path:
|
143 |
+
return path
|
144 |
+
if hasattr(b, "read"):
|
145 |
+
raw = b.read()
|
146 |
+
if isinstance(raw, (bytes, bytearray)):
|
147 |
+
path = _write_bytes_to_zip(bytes(raw), target_zip)
|
148 |
+
if path:
|
149 |
+
return path
|
150 |
+
|
151 |
+
# 4) Save/export API
|
152 |
+
for method in ("save", "export", "to_zip", "write"):
|
153 |
+
if hasattr(rep, method):
|
154 |
+
try:
|
155 |
+
m = getattr(rep, method)
|
156 |
+
try:
|
157 |
+
m(target_zip) # try with path argument
|
158 |
+
except TypeError:
|
159 |
+
out = m() # maybe returns bytes or a path
|
160 |
+
if isinstance(out, (bytes, bytearray)):
|
161 |
+
path = _write_bytes_to_zip(bytes(out), target_zip)
|
162 |
+
if path:
|
163 |
+
return path
|
164 |
+
if isinstance(out, str) and os.path.exists(out):
|
165 |
+
shutil.copy2(out, target_zip)
|
166 |
+
return target_zip
|
167 |
+
if os.path.exists(target_zip):
|
168 |
+
return target_zip
|
169 |
+
except Exception:
|
170 |
+
pass
|
171 |
+
|
172 |
+
# 5) Heuristic: search for a fresh .zip the SDK might have emitted (last 10 min)
|
173 |
+
candidates = []
|
174 |
+
cutoff = datetime.utcnow() - timedelta(minutes=10)
|
175 |
+
for root in ("/tmp", "/home", "/workspace", os.getcwd()):
|
176 |
+
if not os.path.exists(root):
|
177 |
+
continue
|
178 |
+
for dirpath, _, filenames in os.walk(root):
|
179 |
+
for fn in filenames:
|
180 |
+
if fn.lower().endswith(".zip"):
|
181 |
+
fp = os.path.join(dirpath, fn)
|
182 |
+
try:
|
183 |
+
st = os.stat(fp)
|
184 |
+
mtime = datetime.utcfromtimestamp(st.st_mtime)
|
185 |
+
if mtime >= cutoff:
|
186 |
+
candidates.append((mtime, st.st_size, fp))
|
187 |
+
except Exception:
|
188 |
+
pass
|
189 |
+
if candidates:
|
190 |
+
candidates.sort(key=lambda x: (x[0], x[1]), reverse=True) # newest then largest
|
191 |
+
src = candidates[0][2]
|
192 |
+
shutil.copy2(src, target_zip)
|
193 |
+
return target_zip
|
194 |
+
|
195 |
+
# 6) Last resort: wrap textual representation into a minimal zip (avoid plain .txt)
|
196 |
+
with zipfile.ZipFile(target_zip, "w", zipfile.ZIP_DEFLATED) as zf:
|
197 |
+
zf.writestr("report.txt", str(rep))
|
198 |
+
return target_zip
|
199 |
|
200 |
except Exception:
|
201 |
return None
|
|
|
399 |
synthetic_data.change(create_comparison_plot, inputs=[uploaded_data, synthetic_data], outputs=[comparison_plot])
|
400 |
|
401 |
# CSV download: write CSV and reveal the file link
|
402 |
+
def download_csv_prepare():
|
403 |
+
"""Return a path to the latest synthetic CSV; used as output to gr.File."""
|
404 |
+
global _last_synth_df
|
405 |
+
if _last_synth_df is None or _last_synth_df.empty:
|
406 |
+
return None
|
407 |
+
os.makedirs("/tmp", exist_ok=True)
|
408 |
+
path = "/tmp/synthetic_data.csv"
|
409 |
+
_last_synth_df.to_csv(path, index=False)
|
410 |
+
return path
|
411 |
+
|
412 |
def _prepare_csv_for_download():
|
413 |
path = download_csv_prepare()
|
414 |
if path:
|