TorchTransformers-CV-SFT

Sleeping

App Files Files Community

awacke1 commited on Mar 29

Commit

ac52042

verified ·

1 Parent(s): 794b68e

Update app.py

Browse files

Files changed (1) hide show

app.py +50 -22

app.py CHANGED Viewed

@@ -44,10 +44,8 @@ from typing import Optional
 from urllib.parse import quote
 from xml.etree import ElementTree as ET
-# OpenAI client initialization
 client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'), organization=os.getenv('OPENAI_ORG_ID'))
-# Logging setup
 logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
 logger = logging.getLogger(__name__)
 log_records = []
@@ -56,7 +54,6 @@ class LogCaptureHandler(logging.Handler):
         log_records.append(record)
 logger.addHandler(LogCaptureHandler())
-# Streamlit configuration
 st.set_page_config(
     page_title="AI Multimodal Titan 🚀",
     page_icon="🤖",
@@ -69,7 +66,6 @@ st.set_page_config(
     }
 )
-# Session state initialization
 for key in ['history', 'messages', 'processing', 'asset_checkboxes', 'downloaded_pdfs', 'unique_counter', 'search_queries']:
     st.session_state.setdefault(key, [] if key in ['history', 'messages', 'search_queries'] else {} if key in ['asset_checkboxes', 'downloaded_pdfs', 'processing'] else 0 if key == 'unique_counter' else None)
 st.session_state.setdefault('builder', None)
@@ -82,7 +78,37 @@ st.session_state.setdefault('cam0_file', None)
 st.session_state.setdefault('cam1_file', None)
 st.session_state.setdefault('openai_model', "gpt-4o-2024-05-13")
-# Model configurations
 @dataclass
 class ModelConfig:
     name: str
@@ -112,8 +138,8 @@ class ModelBuilder:
         self.jokes = [
             "Why did the AI go to therapy? Too many layers to unpack! 😂",
             "Training complete! Time for a binary coffee break. ☕",
-            "I told my neural network a joke; it couldn't stop dropping bits! 🤖",
-            "I asked the AI for a pun, and it said, 'I'm punning on parallel processing!' 😄",
             "Debugging my code is like a stand-up routine—always a series of exceptions! 😆"
         ]
     def load_model(self, model_path: str, config: Optional[ModelConfig] = None):
@@ -124,8 +150,9 @@ class ModelBuilder:
                 self.tokenizer.pad_token = self.tokenizer.eos_token
             if config:
                 self.config = config
-            self.model.to("cuda" if torch.cuda.is_available() else "cpu")
-        st.success(f"Model loaded! 🎉 {random.choice(self.jokes)}")
         return self
     def save_model(self, path: str):
         with st.spinner("Saving model... 💾"):
@@ -153,7 +180,6 @@ class DiffusionBuilder:
     def generate(self, prompt: str):
         return self.pipeline(prompt, num_inference_steps=20).images[0]
-# Utility functions
 def generate_filename(sequence, ext="png", prompt=None):
     central = pytz.timezone('US/Central')
     safe_date_time = datetime.now(central).strftime("%m%d_%H%M")
@@ -197,11 +223,10 @@ def download_pdf(url, output_path):
         logger.error(f"Failed to download {url}: {e}")
         return False
-# Processing functions
 async def process_pdf_snapshot(pdf_path, mode="single"):
     start_time = time.time()
     status = st.empty()
-    status.text(f"Processing PDF Snapshot ({mode})... (0s)")
     try:
         doc = fitz.open(pdf_path)
         output_files = []
@@ -249,13 +274,15 @@ async def process_ocr(image, output_file):
     status = st.empty()
     status.text("Processing GOT-OCR2_0... (0s)")
     tokenizer = AutoTokenizer.from_pretrained("ucaslcl/GOT-OCR2_0", trust_remote_code=True)
-    model = AutoModel.from_pretrained("ucaslcl/GOT-OCR2_0", trust_remote_code=True, torch_dtype=torch.float32).to("cpu").eval()
     temp_file = generate_filename("temp", "png")
     image.save(temp_file)
     result = model.chat(tokenizer, temp_file, ocr_type='ocr')
     os.remove(temp_file)
     elapsed = int(time.time() - start_time)
-    status.text(f"GOT-OCR2_0 completed in {elapsed}s!")
     async with aiofiles.open(output_file, "w") as f:
         await f.write(result)
     return result
@@ -555,8 +582,7 @@ def FileSidebar():
 FileSidebar()
-# Tabs
-tabs = st.tabs(["Camera 📷", "Download 📥", "OCR 🔍", "Build 🌱", "Image Gen 🎨", "PDF 📄", "Image 🖼️", "Audio 🎵", "Video 🎥", "Code 🧑‍💻", "Gallery 📚", "Search 🔎"])
 (tab_camera, tab_download, tab_ocr, tab_build, tab_imggen, tab_pdf, tab_image, tab_audio, tab_video, tab_code, tab_gallery, tab_search) = tabs
 with tab_camera:
@@ -597,14 +623,13 @@ with tab_ocr:
     st.header("Test OCR 🔍")
     all_files = get_gallery_files()
     if all_files:
-        # Filter for only PNG and PDF files
         ocr_files = [f for f in all_files if f.endswith(('.png', '.pdf'))]
         if st.button("OCR All Assets 🚀"):
             full_text = "# OCR Results\n\n"
             for file in ocr_files:
                 if file.endswith('.png'):
                     image = Image.open(file)
-                else:  # PDF
                     try:
                         doc = fitz.open(file)
                         pix = doc[0].get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
@@ -626,7 +651,7 @@ with tab_ocr:
         if selected_file:
             if selected_file.endswith('.png'):
                 image = Image.open(selected_file)
-            else:  # PDF
                 try:
                     doc = fitz.open(selected_file)
                     pix = doc[0].get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
@@ -776,9 +801,9 @@ with tab_search:
         result = search_arxiv(query)
         st.markdown(result)
-# Sidebar
 st.sidebar.subheader("Gallery Settings")
 st.session_state['gallery_size'] = st.sidebar.slider("Gallery Size", 1, 10, st.session_state['gallery_size'], key="gallery_size_slider")
 st.sidebar.subheader("Action Logs 📜")
 for record in log_records:
     st.sidebar.write(f"{record.asctime} - {record.levelname} - {record.message}")
@@ -812,7 +837,6 @@ def update_gallery():
 update_gallery()
-# Chatbot
 if prompt := st.chat_input("GPT-4o Multimodal ChatBot - What can I help you with?"):
     st.session_state.messages.append({"role": "user", "content": prompt})
     with st.chat_message("user"):
@@ -824,4 +848,8 @@ if prompt := st.chat_input("GPT-4o Multimodal ChatBot - What can I help you with
             if chunk.choices[0].delta.content:
                 response += chunk.choices[0].delta.content
                 st.write(response)
-        st.session_state.messages.append({"role": "assistant", "content": response})

 from urllib.parse import quote
 from xml.etree import ElementTree as ET
 client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'), organization=os.getenv('OPENAI_ORG_ID'))
 logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
 logger = logging.getLogger(__name__)
 log_records = []
         log_records.append(record)
 logger.addHandler(LogCaptureHandler())
 st.set_page_config(
     page_title="AI Multimodal Titan 🚀",
     page_icon="🤖",
     }
 )
 for key in ['history', 'messages', 'processing', 'asset_checkboxes', 'downloaded_pdfs', 'unique_counter', 'search_queries']:
     st.session_state.setdefault(key, [] if key in ['history', 'messages', 'search_queries'] else {} if key in ['asset_checkboxes', 'downloaded_pdfs', 'processing'] else 0 if key == 'unique_counter' else None)
 st.session_state.setdefault('builder', None)
 st.session_state.setdefault('cam1_file', None)
 st.session_state.setdefault('openai_model', "gpt-4o-2024-05-13")
+def get_gpu_info():
+    if torch.cuda.is_available():
+        gpu_name = torch.cuda.get_device_name(0)
+        total_memory = torch.cuda.get_device_properties(0).total_memory / (1024 ** 3)
+        reserved_memory = torch.cuda.memory_reserved(0) / (1024 ** 3)
+        allocated_memory = torch.cuda.memory_allocated(0) / (1024 ** 3)
+        free_memory = total_memory - allocated_memory
+        utilization = torch.cuda.utilization(0)
+        return {
+            "GPU Name": gpu_name,
+            "Total Memory (GB)": f"{total_memory:.2f}",
+            "Reserved Memory (GB)": f"{reserved_memory:.2f}",
+            "Allocated Memory (GB)": f"{allocated_memory:.2f}",
+            "Free Memory (GB)": f"{free_memory:.2f}",
+            "Utilization (%)": utilization
+        }
+    else:
+        return {"Status": "No GPU detected"}
+def display_gpu_info():
+    gpu_info = get_gpu_info()
+    st.sidebar.subheader("GPU Status 📊")
+    if "Status" in gpu_info and gpu_info["Status"] == "No GPU detected":
+        st.sidebar.warning("No GPU detected. Running on CPU.")
+    else:
+        for key, value in gpu_info.items():
+            st.sidebar.write(f"{key}: {value}")
+        memory_usage_percent = (float(gpu_info["Allocated Memory (GB)"]) / float(gpu_info["Total Memory (GB)"])) * 100
+        st.sidebar.progress(min(memory_usage_percent / 100, 1.0))
+        st.sidebar.caption(f"Memory Usage: {memory_usage_percent:.1f}%")
 @dataclass
 class ModelConfig:
     name: str
         self.jokes = [
             "Why did the AI go to therapy? Too many layers to unpack! 😂",
             "Training complete! Time for a binary coffee break. ☕",
+            "I told my neural network a joke; it couldn’t stop dropping bits! 🤖",
+            "I asked the AI for a pun, and it said, 'I’m punning on parallel processing!' 😄",
             "Debugging my code is like a stand-up routine—always a series of exceptions! 😆"
         ]
     def load_model(self, model_path: str, config: Optional[ModelConfig] = None):
                 self.tokenizer.pad_token = self.tokenizer.eos_token
             if config:
                 self.config = config
+            device = "cuda" if torch.cuda.is_available() else "cpu"
+            self.model.to(device)
+        st.success(f"Model loaded on {device}! 🎉 {random.choice(self.jokes)}")
         return self
     def save_model(self, path: str):
         with st.spinner("Saving model... 💾"):
     def generate(self, prompt: str):
         return self.pipeline(prompt, num_inference_steps=20).images[0]
 def generate_filename(sequence, ext="png", prompt=None):
     central = pytz.timezone('US/Central')
     safe_date_time = datetime.now(central).strftime("%m%d_%H%M")
         logger.error(f"Failed to download {url}: {e}")
         return False
 async def process_pdf_snapshot(pdf_path, mode="single"):
     start_time = time.time()
     status = st.empty()
+    status.text(f"Processing PDF SnapshotK Snapshot ({mode})... (0s)")
     try:
         doc = fitz.open(pdf_path)
         output_files = []
     status = st.empty()
     status.text("Processing GOT-OCR2_0... (0s)")
     tokenizer = AutoTokenizer.from_pretrained("ucaslcl/GOT-OCR2_0", trust_remote_code=True)
+    model = AutoModel.from_pretrained("ucaslcl/GOT-OCR2_0", trust_remote_code=True, torch_dtype=torch.float32)
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    model.to(device).eval()
     temp_file = generate_filename("temp", "png")
     image.save(temp_file)
     result = model.chat(tokenizer, temp_file, ocr_type='ocr')
     os.remove(temp_file)
     elapsed = int(time.time() - start_time)
+    status.text(f"GOT-OCR2_0 completed in {elapsed}s on {device}!")
     async with aiofiles.open(output_file, "w") as f:
         await f.write(result)
     return result
 FileSidebar()
+tabs = st.tabs(["Camera 📷", "Download RFP 📥", "OCR 🔍", "Build 🌱", "Image Gen 🎨", "PDF 📄", "Image 🖼️", "Audio 🎵", "Video 🎥", "Code 🧑‍💻", "Gallery 📚", "Search 🔎"])
 (tab_camera, tab_download, tab_ocr, tab_build, tab_imggen, tab_pdf, tab_image, tab_audio, tab_video, tab_code, tab_gallery, tab_search) = tabs
 with tab_camera:
     st.header("Test OCR 🔍")
     all_files = get_gallery_files()
     if all_files:
         ocr_files = [f for f in all_files if f.endswith(('.png', '.pdf'))]
         if st.button("OCR All Assets 🚀"):
             full_text = "# OCR Results\n\n"
             for file in ocr_files:
                 if file.endswith('.png'):
                     image = Image.open(file)
+                else:
                     try:
                         doc = fitz.open(file)
                         pix = doc[0].get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
         if selected_file:
             if selected_file.endswith('.png'):
                 image = Image.open(selected_file)
+            else:
                 try:
                     doc = fitz.open(selected_file)
                     pix = doc[0].get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
         result = search_arxiv(query)
         st.markdown(result)
 st.sidebar.subheader("Gallery Settings")
 st.session_state['gallery_size'] = st.sidebar.slider("Gallery Size", 1, 10, st.session_state['gallery_size'], key="gallery_size_slider")
+display_gpu_info()
 st.sidebar.subheader("Action Logs 📜")
 for record in log_records:
     st.sidebar.write(f"{record.asctime} - {record.levelname} - {record.message}")
 update_gallery()
 if prompt := st.chat_input("GPT-4o Multimodal ChatBot - What can I help you with?"):
     st.session_state.messages.append({"role": "user", "content": prompt})
     with st.chat_message("user"):
             if chunk.choices[0].delta.content:
                 response += chunk.choices[0].delta.content
                 st.write(response)
+        st.session_state.messages.append({"role": "assistant", "content": response})
+def create_audio_file(filename, audio_input, flag):
+    with open(filename, "wb") as f:
+        f.write(audio_input)