Spaces:

mostlyai
/

synthetic-sdk-demo

Running

App Files Files Community

ZennyKenny commited on 11 days ago

Commit

d71f074

verified ·

1 Parent(s): 977078a

Update app.py

Browse files

Files changed (1) hide show

app.py +0 -159

app.py CHANGED Viewed

@@ -76,130 +76,6 @@ class SyntheticDataGenerator:
         except Exception as e:
             return None, f"Synthetic data generation failed with error: {str(e)}"
-    def get_quality_report_file(self) -> Optional[str]:
-        """
-        Build/export the quality report and return a **ZIP** file path for download.
-        Strongly prefers .zip; tries multiple SDK shapes before falling back.
-        """
-        if not self.generator:
-            return None
-        import shutil
-        import zipfile
-        from datetime import datetime, timedelta
-        def _write_bytes_to_zip(b: bytes, out_path: str) -> Optional[str]:
-            try:
-                # If b already is a zip (starts with PK header), just write it
-                if len(b) >= 2 and b[:2] == b"PK":
-                    with open(out_path, "wb") as f:
-                        f.write(b)
-                    return out_path
-                # Otherwise, wrap it into a zip as a single file payload
-                with zipfile.ZipFile(out_path, "w", zipfile.ZIP_DEFLATED) as zf:
-                    zf.writestr("report.txt", b.decode("utf-8", errors="ignore"))
-                return out_path
-            except Exception:
-                return None
-        try:
-            os.makedirs("/tmp", exist_ok=True)
-            target_zip = "/tmp/quality_report.zip"
-            # 0) Generate report object
-            rep = self.generator.reports(display=False)
-            # 1) If a usable string path is returned and exists (ideally .zip)
-            if isinstance(rep, str):
-                p = rep.strip()
-                if os.path.exists(p):
-                    if p.lower().endswith(".zip"):
-                        shutil.copy2(p, target_zip)
-                        return target_zip
-                    # If it's some other file, zip it
-                    with zipfile.ZipFile(target_zip, "w", zipfile.ZIP_DEFLATED) as zf:
-                        zf.write(p, arcname=os.path.basename(p))
-                    return target_zip
-            # 2) Path-like attributes
-            for attr in ("archive_path", "zip_path", "path", "file_path"):
-                if hasattr(rep, attr):
-                    p = getattr(rep, attr)
-                    if isinstance(p, str) and os.path.exists(p):
-                        if p.lower().endswith(".zip"):
-                            shutil.copy2(p, target_zip)
-                            return target_zip
-                        with zipfile.ZipFile(target_zip, "w", zipfile.ZIP_DEFLATED) as zf:
-                            zf.write(p, arcname=os.path.basename(p))
-                        return target_zip
-            # 3) Bytes-like content (some SDKs provide .content/.data/.bytes/.buffer)
-            for attr in ("content", "data", "bytes", "buffer"):
-                if hasattr(rep, attr):
-                    b = getattr(rep, attr)
-                    if isinstance(b, (bytes, bytearray)):
-                        path = _write_bytes_to_zip(bytes(b), target_zip)
-                        if path:
-                            return path
-                    if hasattr(b, "read"):
-                        raw = b.read()
-                        if isinstance(raw, (bytes, bytearray)):
-                            path = _write_bytes_to_zip(bytes(raw), target_zip)
-                            if path:
-                                return path
-            # 4) Save/export API
-            for method in ("save", "export", "to_zip", "write"):
-                if hasattr(rep, method):
-                    try:
-                        m = getattr(rep, method)
-                        try:
-                            m(target_zip)  # try with path argument
-                        except TypeError:
-                            out = m()  # maybe returns bytes or a path
-                            if isinstance(out, (bytes, bytearray)):
-                                path = _write_bytes_to_zip(bytes(out), target_zip)
-                                if path:
-                                    return path
-                            if isinstance(out, str) and os.path.exists(out):
-                                shutil.copy2(out, target_zip)
-                                return target_zip
-                        if os.path.exists(target_zip):
-                            return target_zip
-                    except Exception:
-                        pass
-            # 5) Heuristic: search for a fresh .zip the SDK might have emitted (last 10 min)
-            candidates = []
-            cutoff = datetime.utcnow() - timedelta(minutes=10)
-            for root in ("/tmp", "/home", "/workspace", os.getcwd()):
-                if not os.path.exists(root):
-                    continue
-                for dirpath, _, filenames in os.walk(root):
-                    for fn in filenames:
-                        if fn.lower().endswith(".zip"):
-                            fp = os.path.join(dirpath, fn)
-                            try:
-                                st = os.stat(fp)
-                                mtime = datetime.utcfromtimestamp(st.st_mtime)
-                                if mtime >= cutoff:
-                                    candidates.append((mtime, st.st_size, fp))
-                            except Exception:
-                                pass
-            if candidates:
-                candidates.sort(key=lambda x: (x[0], x[1]), reverse=True)  # newest then largest
-                src = candidates[0][2]
-                shutil.copy2(src, target_zip)
-                return target_zip
-            # 6) Last resort: wrap textual representation into a minimal zip (avoid plain .txt)
-            with zipfile.ZipFile(target_zip, "w", zipfile.ZIP_DEFLATED) as zf:
-                zf.writestr("report.txt", str(rep))
-            return target_zip
-        except Exception:
-            return None
     def estimate_memory_usage(self, df: pd.DataFrame) -> str:
         if df is None or df.empty:
             return "No data available to analyze."
@@ -345,12 +221,6 @@ def create_interface():
                 with gr.Column():
                     train_status = gr.Textbox(label="Training Status", interactive=False)
-            # --- Quality report: button + hidden File with download link ---
-            with gr.Row():
-                get_report_btn = gr.Button("Get Quality Report", variant="secondary")
-                with gr.Group(visible=False) as report_group:
-                    report_file = gr.File(label="Quality Report", interactive=False)
         with gr.Tab("Generate Data"):
             gr.Markdown("### Generate synthetic data from your trained model")
             with gr.Row():
@@ -362,7 +232,6 @@ def create_interface():
             synthetic_data = gr.Dataframe(label="Synthetic Data", interactive=False)
-            # --- CSV download: button + hidden File with download link ---
             with gr.Row():
                 csv_download_btn = gr.Button("Download CSV", variant="secondary")
                 with gr.Group(visible=False) as csv_group:
@@ -378,37 +247,10 @@ def create_interface():
             outputs=[train_status],
         )
-        # Quality report: on click, return filepath to gr.File and reveal its group
-        def _prepare_report_for_download():
-            path = generator.get_quality_report_file()
-            if path:
-                return path, gr.update(visible=True)
-            else:
-                # keep group hidden and clear file if nothing
-                return None, gr.update(visible=False)
-        get_report_btn.click(
-            _prepare_report_for_download,
-            outputs=[report_file, report_group],
-        )
-        # Generate data
         generate_btn.click(generate_data, inputs=[gen_size], outputs=[synthetic_data, gen_status])
-        # Build comparison plot when both datasets are available
         synthetic_data.change(create_comparison_plot, inputs=[uploaded_data, synthetic_data], outputs=[comparison_plot])
-        # CSV download: write CSV and reveal the file link
-        def download_csv_prepare():
-            """Return a path to the latest synthetic CSV; used as output to gr.File."""
-            global _last_synth_df
-            if _last_synth_df is None or _last_synth_df.empty:
-                return None
-            os.makedirs("/tmp", exist_ok=True)
-            path = "/tmp/synthetic_data.csv"
-            _last_synth_df.to_csv(path, index=False)
-            return path
         def _prepare_csv_for_download():
             path = download_csv_prepare()
             if path:
@@ -421,7 +263,6 @@ def create_interface():
             outputs=[csv_file, csv_group],
         )
-        # File upload handler
         def process_uploaded_file(file):
             if file is None:
                 return None, "No file uploaded.", gr.update(visible=False)

         except Exception as e:
             return None, f"Synthetic data generation failed with error: {str(e)}"
     def estimate_memory_usage(self, df: pd.DataFrame) -> str:
         if df is None or df.empty:
             return "No data available to analyze."
                 with gr.Column():
                     train_status = gr.Textbox(label="Training Status", interactive=False)
         with gr.Tab("Generate Data"):
             gr.Markdown("### Generate synthetic data from your trained model")
             with gr.Row():
             synthetic_data = gr.Dataframe(label="Synthetic Data", interactive=False)
             with gr.Row():
                 csv_download_btn = gr.Button("Download CSV", variant="secondary")
                 with gr.Group(visible=False) as csv_group:
             outputs=[train_status],
         )
         generate_btn.click(generate_data, inputs=[gen_size], outputs=[synthetic_data, gen_status])
         synthetic_data.change(create_comparison_plot, inputs=[uploaded_data, synthetic_data], outputs=[comparison_plot])
         def _prepare_csv_for_download():
             path = download_csv_prepare()
             if path:
             outputs=[csv_file, csv_group],
         )
         def process_uploaded_file(file):
             if file is None:
                 return None, "No file uploaded.", gr.update(visible=False)