TorchTransformers-CV-SFT

Sleeping

App Files Files Community

awacke1 commited on Mar 22

Commit

fc736fc

verified ·

1 Parent(s): a9c30d8

Update app.py

Browse files

Files changed (1) hide show

app.py +170 -13

app.py CHANGED Viewed

@@ -7,11 +7,13 @@ import shutil
 import streamlit as st
 import pandas as pd
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer, AutoModel
 from diffusers import StableDiffusionPipeline
 from torch.utils.data import Dataset, DataLoader
 import csv
-import fitz  # PyMuPDF
 import requests
 from PIL import Image
 import cv2
@@ -46,7 +48,7 @@ st.set_page_config(
     menu_items={
         'Get Help': 'https://huggingface.co/awacke1',
         'Report a Bug': 'https://huggingface.co/spaces/awacke1',
-        'About': "AI Vision & SFT Titans: PDFs, OCR, Image Gen, Line Drawings, and SFT on CPU! 🌌"
     }
 )
@@ -114,6 +116,87 @@ class DiffusionDataset(Dataset):
     def __getitem__(self, idx):
         return {"image": self.images[idx], "text": self.texts[idx]}
 # Model Builders
 class ModelBuilder:
     def __init__(self):
@@ -343,22 +426,18 @@ async def process_pdf_snapshot(pdf_path, mode="thumbnail"):
     start_time = time.time()
     status = st.empty()
     status.text(f"Processing PDF Snapshot ({mode})... (0s)")
-    doc = fitz.open(pdf_path)
     output_files = []
     if mode == "thumbnail":
-        page = doc[0]
-        pix = page.get_pixmap(matrix=fitz.Matrix(0.5, 0.5))
         output_file = generate_filename("thumbnail", "png")
-        pix.save(output_file)
         output_files.append(output_file)
     elif mode == "twopage":
-        for i in range(min(2, len(doc))):
-            page = doc[i]
-            pix = page.get_pixmap(matrix=fitz.Matrix(1.0, 1.0))
             output_file = generate_filename(f"twopage_{i}", "png")
-            pix.save(output_file)
             output_files.append(output_file)
-    doc.close()
     elapsed = int(time.time() - start_time)
     status.text(f"PDF Snapshot ({mode}) completed in {elapsed}s!")
     for file in output_files:
@@ -383,12 +462,55 @@ async def process_ocr(image, output_file):
     update_gallery()
     return result
 # Main App
 st.title("AI Vision & SFT Titans 🚀")
 # Sidebar
 st.sidebar.header("Captured Files 📜")
 gallery_size = st.sidebar.slider("Gallery Size", 1, 10, 4)
 update_gallery()
 st.sidebar.subheader("Model Management 🗂️")
@@ -416,9 +538,9 @@ with history_container:
         st.write(entry)
 # Tabs
-tab1, tab2, tab3, tab4, tab5, tab6, tab7, tab8 = st.tabs([
     "Camera Snap 📷", "Download PDFs 📥", "Build Titan 🌱", "Fine-Tune Titan 🔧",
-    "Test Titan 🧪", "Agentic RAG Party 🌐", "Test OCR 🔍", "Test Image Gen 🎨"
 ])
 with tab1:
@@ -669,5 +791,40 @@ with tab8:
     else:
         st.warning("No images captured yet. Use Camera Snap or Download PDFs first!")
 # Initial Gallery Update
 update_gallery()

 import streamlit as st
 import pandas as pd
 import torch
+import torch.nn as nn
+import torch.nn.functional as F
 from transformers import AutoModelForCausalLM, AutoTokenizer, AutoModel
 from diffusers import StableDiffusionPipeline
 from torch.utils.data import Dataset, DataLoader
 import csv
+from pdf2image import convert_from_path  # Replaced fitz with pdf2image
 import requests
 from PIL import Image
 import cv2
     menu_items={
         'Get Help': 'https://huggingface.co/awacke1',
         'Report a Bug': 'https://huggingface.co/spaces/awacke1',
+        'About': "AI Vision & SFT Titans: PDFs, OCR, Image Gen, Line Drawings, Custom Diffusion, and SFT on CPU! 🌌"
     }
 )
     def __getitem__(self, idx):
         return {"image": self.images[idx], "text": self.texts[idx]}
+class TinyDiffusionDataset(Dataset):
+    def __init__(self, images):
+        self.images = [torch.tensor(np.array(img.convert("RGB")).transpose(2, 0, 1), dtype=torch.float32) / 255.0 for img in images]
+    def __len__(self):
+        return len(self.images)
+    def __getitem__(self, idx):
+        return self.images[idx]
+# Custom Tiny Diffusion Model
+class TinyUNet(nn.Module):
+    def __init__(self, in_channels=3, out_channels=3):
+        super(TinyUNet, self).__init__()
+        self.down1 = nn.Conv2d(in_channels, 32, 3, padding=1)
+        self.down2 = nn.Conv2d(32, 64, 3, padding=1, stride=2)
+        self.mid = nn.Conv2d(64, 128, 3, padding=1)
+        self.up1 = nn.ConvTranspose2d(128, 64, 3, stride=2, padding=1, output_padding=1)
+        self.up2 = nn.Conv2d(64 + 32, 32, 3, padding=1)
+        self.out = nn.Conv2d(32, out_channels, 3, padding=1)
+        self.time_embed = nn.Linear(1, 64)
+    def forward(self, x, t):
+        t_embed = F.relu(self.time_embed(t.unsqueeze(-1)))
+        t_embed = t_embed.view(t_embed.size(0), t_embed.size(1), 1, 1)
+        x1 = F.relu(self.down1(x))
+        x2 = F.relu(self.down2(x1))
+        x_mid = F.relu(self.mid(x2)) + t_embed
+        x_up1 = F.relu(self.up1(x_mid))
+        x_up2 = F.relu(self.up2(torch.cat([x_up1, x1], dim=1)))
+        return self.out(x_up2)
+class TinyDiffusion:
+    def __init__(self, model, timesteps=100):
+        self.model = model
+        self.timesteps = timesteps
+        self.beta = torch.linspace(0.0001, 0.02, timesteps)
+        self.alpha = 1 - self.beta
+        self.alpha_cumprod = torch.cumprod(self.alpha, dim=0)
+    def train(self, images, epochs=50):
+        dataset = TinyDiffusionDataset(images)
+        dataloader = DataLoader(dataset, batch_size=1, shuffle=True)
+        optimizer = torch.optim.Adam(self.model.parameters(), lr=1e-4)
+        device = torch.device("cpu")
+        self.model.to(device)
+        for epoch in range(epochs):
+            total_loss = 0
+            for x in dataloader:
+                x = x.to(device)
+                t = torch.randint(0, self.timesteps, (x.size(0),), device=device).float()
+                noise = torch.randn_like(x)
+                alpha_t = self.alpha_cumprod[t.long()].view(-1, 1, 1, 1)
+                x_noisy = torch.sqrt(alpha_t) * x + torch.sqrt(1 - alpha_t) * noise
+                pred_noise = self.model(x_noisy, t)
+                loss = F.mse_loss(pred_noise, noise)
+                optimizer.zero_grad()
+                loss.backward()
+                optimizer.step()
+                total_loss += loss.item()
+            logger.info(f"Epoch {epoch + 1}/{epochs}, Loss: {total_loss / len(dataloader):.4f}")
+        return self
+    def generate(self, size=(64, 64), steps=100):
+        device = torch.device("cpu")
+        x = torch.randn(1, 3, size[0], size[1], device=device)
+        for t in reversed(range(steps)):
+            t_tensor = torch.full((1,), t, device=device, dtype=torch.float32)
+            alpha_t = self.alpha_cumprod[t].view(-1, 1, 1, 1)
+            pred_noise = self.model(x, t_tensor)
+            x = (x - (1 - self.alpha[t]) / torch.sqrt(1 - alpha_t) * pred_noise) / torch.sqrt(self.alpha[t])
+            if t > 0:
+                x += torch.sqrt(self.beta[t]) * torch.randn_like(x)
+        x = torch.clamp(x * 255, 0, 255).byte()
+        return Image.fromarray(x.squeeze(0).permute(1, 2, 0).cpu().numpy())
+    def upscale(self, image, scale_factor=2):
+        img_tensor = torch.tensor(np.array(image.convert("RGB")).transpose(2, 0, 1), dtype=torch.float32).unsqueeze(0) / 255.0
+        upscaled = F.interpolate(img_tensor, scale_factor=scale_factor, mode='bilinear', align_corners=False)
+        upscaled = torch.clamp(upscaled * 255, 0, 255).byte()
+        return Image.fromarray(upscaled.squeeze(0).permute(1, 2, 0).cpu().numpy())
 # Model Builders
 class ModelBuilder:
     def __init__(self):
     start_time = time.time()
     status = st.empty()
     status.text(f"Processing PDF Snapshot ({mode})... (0s)")
+    images = convert_from_path(pdf_path, dpi=200)  # Convert PDF to images
     output_files = []
     if mode == "thumbnail":
+        img = images[0].resize((int(images[0].width * 0.5), int(images[0].height * 0.5)), Image.Resampling.LANCZOS)
         output_file = generate_filename("thumbnail", "png")
+        img.save(output_file)
         output_files.append(output_file)
     elif mode == "twopage":
+        for i in range(min(2, len(images))):
             output_file = generate_filename(f"twopage_{i}", "png")
+            images[i].save(output_file)
             output_files.append(output_file)
     elapsed = int(time.time() - start_time)
     status.text(f"PDF Snapshot ({mode}) completed in {elapsed}s!")
     for file in output_files:
     update_gallery()
     return result
+async def process_image_gen(prompt, output_file):
+    start_time = time.time()
+    status = st.empty()
+    status.text("Processing Image Gen... (0s)")
+    pipeline = StableDiffusionPipeline.from_pretrained("OFA-Sys/small-stable-diffusion-v0", torch_dtype=torch.float32).to("cpu")
+    gen_image = pipeline(prompt, num_inference_steps=20).images[0]
+    elapsed = int(time.time() - start_time)
+    status.text(f"Image Gen completed in {elapsed}s!")
+    gen_image.save(output_file)
+    if output_file not in st.session_state['captured_files']:
+        st.session_state['captured_files'].append(output_file)
+    update_gallery()
+    return gen_image
+async def process_custom_diffusion(images, output_file, model_name):
+    start_time = time.time()
+    status = st.empty()
+    status.text(f"Training {model_name}... (0s)")
+    unet = TinyUNet()
+    diffusion = TinyDiffusion(unet)
+    diffusion.train(images)
+    gen_image = diffusion.generate()
+    upscaled_image = diffusion.upscale(gen_image, scale_factor=2)
+    elapsed = int(time.time() - start_time)
+    status.text(f"{model_name} completed in {elapsed}s!")
+    upscaled_image.save(output_file)
+    if output_file not in st.session_state['captured_files']:
+        st.session_state['captured_files'].append(output_file)
+    update_gallery()
+    return upscaled_image
 # Main App
 st.title("AI Vision & SFT Titans 🚀")
 # Sidebar
 st.sidebar.header("Captured Files 📜")
 gallery_size = st.sidebar.slider("Gallery Size", 1, 10, 4)
+def update_gallery():
+    media_files = get_gallery_files(["png", "txt"])
+    if media_files:
+        cols = st.sidebar.columns(2)
+        for idx, file in enumerate(media_files[:gallery_size * 2]):
+            with cols[idx % 2]:
+                if file.endswith(".png"):
+                    st.image(Image.open(file), caption=file, use_container_width=True)
+                elif file.endswith(".txt"):
+                    with open(file, "r") as f:
+                        content = f.read()
+                        st.text(content[:50] + "..." if len(content) > 50 else content, help=file)
 update_gallery()
 st.sidebar.subheader("Model Management 🗂️")
         st.write(entry)
 # Tabs
+tab1, tab2, tab3, tab4, tab5, tab6, tab7, tab8, tab9 = st.tabs([
     "Camera Snap 📷", "Download PDFs 📥", "Build Titan 🌱", "Fine-Tune Titan 🔧",
+    "Test Titan 🧪", "Agentic RAG Party 🌐", "Test OCR 🔍", "Test Image Gen 🎨", "Custom Diffusion 🎨🤓"
 ])
 with tab1:
     else:
         st.warning("No images captured yet. Use Camera Snap or Download PDFs first!")
+with tab9:
+    st.header("Custom Diffusion 🎨🤓")
+    st.write("Unleash your inner artist with our tiny diffusion models!")
+    captured_files = get_gallery_files(["png"])
+    if captured_files:
+        st.subheader("Select Images to Train")
+        selected_files = st.multiselect("Pick Images", captured_files, key="diffusion_select")
+        images = [Image.open(file) for file in selected_files]
+        model_options = [
+            ("PixelTickler 🎨✨", "OFA-Sys/small-stable-diffusion-v0"),
+            ("DreamWeaver 🌙🖌️", "stabilityai/stable-diffusion-2-base"),
+            ("TinyArtBot 🤖🖼️", "custom")
+        ]
+        model_choice = st.selectbox("Choose Your Diffusion Dynamo", [opt[0] for opt in model_options], key="diffusion_model")
+        model_name = next(opt[1] for opt in model_options if opt[0] == model_choice)
+        if st.button("Train & Generate 🚀", key="diffusion_run"):
+            output_file = generate_filename("custom_diffusion", "png")
+            st.session_state['processing']['diffusion'] = True
+            if model_name == "custom":
+                result = asyncio.run(process_custom_diffusion(images, output_file, model_choice))
+            else:
+                builder = DiffusionBuilder()
+                builder.load_model(model_name)
+                result = builder.generate("A superhero scene inspired by captured images")
+                result.save(output_file)
+                st.session_state['captured_files'].append(output_file)
+            st.session_state['history'].append(f"Custom Diffusion: {model_choice} -> {output_file}")
+            st.image(result, caption=f"{model_choice} Masterpiece", use_container_width=True)
+            st.success(f"Image saved to {output_file}")
+            st.session_state['processing']['diffusion'] = False
+    else:
+        st.warning("No images captured yet. Use Camera Snap or Download PDFs first!")
 # Initial Gallery Update
 update_gallery()