Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -44,10 +44,8 @@ from typing import Optional
|
|
| 44 |
from urllib.parse import quote
|
| 45 |
from xml.etree import ElementTree as ET
|
| 46 |
|
| 47 |
-
# OpenAI client initialization
|
| 48 |
client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'), organization=os.getenv('OPENAI_ORG_ID'))
|
| 49 |
|
| 50 |
-
# Logging setup
|
| 51 |
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
|
| 52 |
logger = logging.getLogger(__name__)
|
| 53 |
log_records = []
|
|
@@ -56,7 +54,6 @@ class LogCaptureHandler(logging.Handler):
|
|
| 56 |
log_records.append(record)
|
| 57 |
logger.addHandler(LogCaptureHandler())
|
| 58 |
|
| 59 |
-
# Streamlit configuration
|
| 60 |
st.set_page_config(
|
| 61 |
page_title="AI Multimodal Titan 🚀",
|
| 62 |
page_icon="🤖",
|
|
@@ -69,7 +66,6 @@ st.set_page_config(
|
|
| 69 |
}
|
| 70 |
)
|
| 71 |
|
| 72 |
-
# Session state initialization
|
| 73 |
for key in ['history', 'messages', 'processing', 'asset_checkboxes', 'downloaded_pdfs', 'unique_counter', 'search_queries']:
|
| 74 |
st.session_state.setdefault(key, [] if key in ['history', 'messages', 'search_queries'] else {} if key in ['asset_checkboxes', 'downloaded_pdfs', 'processing'] else 0 if key == 'unique_counter' else None)
|
| 75 |
st.session_state.setdefault('builder', None)
|
|
@@ -82,7 +78,37 @@ st.session_state.setdefault('cam0_file', None)
|
|
| 82 |
st.session_state.setdefault('cam1_file', None)
|
| 83 |
st.session_state.setdefault('openai_model', "gpt-4o-2024-05-13")
|
| 84 |
|
| 85 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
@dataclass
|
| 87 |
class ModelConfig:
|
| 88 |
name: str
|
|
@@ -112,8 +138,8 @@ class ModelBuilder:
|
|
| 112 |
self.jokes = [
|
| 113 |
"Why did the AI go to therapy? Too many layers to unpack! 😂",
|
| 114 |
"Training complete! Time for a binary coffee break. ☕",
|
| 115 |
-
"I told my neural network a joke; it couldn
|
| 116 |
-
"I asked the AI for a pun, and it said, 'I
|
| 117 |
"Debugging my code is like a stand-up routine—always a series of exceptions! 😆"
|
| 118 |
]
|
| 119 |
def load_model(self, model_path: str, config: Optional[ModelConfig] = None):
|
|
@@ -124,8 +150,9 @@ class ModelBuilder:
|
|
| 124 |
self.tokenizer.pad_token = self.tokenizer.eos_token
|
| 125 |
if config:
|
| 126 |
self.config = config
|
| 127 |
-
|
| 128 |
-
|
|
|
|
| 129 |
return self
|
| 130 |
def save_model(self, path: str):
|
| 131 |
with st.spinner("Saving model... 💾"):
|
|
@@ -153,7 +180,6 @@ class DiffusionBuilder:
|
|
| 153 |
def generate(self, prompt: str):
|
| 154 |
return self.pipeline(prompt, num_inference_steps=20).images[0]
|
| 155 |
|
| 156 |
-
# Utility functions
|
| 157 |
def generate_filename(sequence, ext="png", prompt=None):
|
| 158 |
central = pytz.timezone('US/Central')
|
| 159 |
safe_date_time = datetime.now(central).strftime("%m%d_%H%M")
|
|
@@ -197,11 +223,10 @@ def download_pdf(url, output_path):
|
|
| 197 |
logger.error(f"Failed to download {url}: {e}")
|
| 198 |
return False
|
| 199 |
|
| 200 |
-
# Processing functions
|
| 201 |
async def process_pdf_snapshot(pdf_path, mode="single"):
|
| 202 |
start_time = time.time()
|
| 203 |
status = st.empty()
|
| 204 |
-
status.text(f"Processing PDF Snapshot ({mode})... (0s)")
|
| 205 |
try:
|
| 206 |
doc = fitz.open(pdf_path)
|
| 207 |
output_files = []
|
|
@@ -249,13 +274,15 @@ async def process_ocr(image, output_file):
|
|
| 249 |
status = st.empty()
|
| 250 |
status.text("Processing GOT-OCR2_0... (0s)")
|
| 251 |
tokenizer = AutoTokenizer.from_pretrained("ucaslcl/GOT-OCR2_0", trust_remote_code=True)
|
| 252 |
-
model = AutoModel.from_pretrained("ucaslcl/GOT-OCR2_0", trust_remote_code=True, torch_dtype=torch.float32)
|
|
|
|
|
|
|
| 253 |
temp_file = generate_filename("temp", "png")
|
| 254 |
image.save(temp_file)
|
| 255 |
result = model.chat(tokenizer, temp_file, ocr_type='ocr')
|
| 256 |
os.remove(temp_file)
|
| 257 |
elapsed = int(time.time() - start_time)
|
| 258 |
-
status.text(f"GOT-OCR2_0 completed in {elapsed}s!")
|
| 259 |
async with aiofiles.open(output_file, "w") as f:
|
| 260 |
await f.write(result)
|
| 261 |
return result
|
|
@@ -555,8 +582,7 @@ def FileSidebar():
|
|
| 555 |
|
| 556 |
FileSidebar()
|
| 557 |
|
| 558 |
-
|
| 559 |
-
tabs = st.tabs(["Camera 📷", "Download 📥", "OCR 🔍", "Build 🌱", "Image Gen 🎨", "PDF 📄", "Image 🖼️", "Audio 🎵", "Video 🎥", "Code 🧑💻", "Gallery 📚", "Search 🔎"])
|
| 560 |
(tab_camera, tab_download, tab_ocr, tab_build, tab_imggen, tab_pdf, tab_image, tab_audio, tab_video, tab_code, tab_gallery, tab_search) = tabs
|
| 561 |
|
| 562 |
with tab_camera:
|
|
@@ -597,14 +623,13 @@ with tab_ocr:
|
|
| 597 |
st.header("Test OCR 🔍")
|
| 598 |
all_files = get_gallery_files()
|
| 599 |
if all_files:
|
| 600 |
-
# Filter for only PNG and PDF files
|
| 601 |
ocr_files = [f for f in all_files if f.endswith(('.png', '.pdf'))]
|
| 602 |
if st.button("OCR All Assets 🚀"):
|
| 603 |
full_text = "# OCR Results\n\n"
|
| 604 |
for file in ocr_files:
|
| 605 |
if file.endswith('.png'):
|
| 606 |
image = Image.open(file)
|
| 607 |
-
else:
|
| 608 |
try:
|
| 609 |
doc = fitz.open(file)
|
| 610 |
pix = doc[0].get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
|
|
@@ -626,7 +651,7 @@ with tab_ocr:
|
|
| 626 |
if selected_file:
|
| 627 |
if selected_file.endswith('.png'):
|
| 628 |
image = Image.open(selected_file)
|
| 629 |
-
else:
|
| 630 |
try:
|
| 631 |
doc = fitz.open(selected_file)
|
| 632 |
pix = doc[0].get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
|
|
@@ -776,9 +801,9 @@ with tab_search:
|
|
| 776 |
result = search_arxiv(query)
|
| 777 |
st.markdown(result)
|
| 778 |
|
| 779 |
-
# Sidebar
|
| 780 |
st.sidebar.subheader("Gallery Settings")
|
| 781 |
st.session_state['gallery_size'] = st.sidebar.slider("Gallery Size", 1, 10, st.session_state['gallery_size'], key="gallery_size_slider")
|
|
|
|
| 782 |
st.sidebar.subheader("Action Logs 📜")
|
| 783 |
for record in log_records:
|
| 784 |
st.sidebar.write(f"{record.asctime} - {record.levelname} - {record.message}")
|
|
@@ -812,7 +837,6 @@ def update_gallery():
|
|
| 812 |
|
| 813 |
update_gallery()
|
| 814 |
|
| 815 |
-
# Chatbot
|
| 816 |
if prompt := st.chat_input("GPT-4o Multimodal ChatBot - What can I help you with?"):
|
| 817 |
st.session_state.messages.append({"role": "user", "content": prompt})
|
| 818 |
with st.chat_message("user"):
|
|
@@ -824,4 +848,8 @@ if prompt := st.chat_input("GPT-4o Multimodal ChatBot - What can I help you with
|
|
| 824 |
if chunk.choices[0].delta.content:
|
| 825 |
response += chunk.choices[0].delta.content
|
| 826 |
st.write(response)
|
| 827 |
-
st.session_state.messages.append({"role": "assistant", "content": response})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
from urllib.parse import quote
|
| 45 |
from xml.etree import ElementTree as ET
|
| 46 |
|
|
|
|
| 47 |
client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'), organization=os.getenv('OPENAI_ORG_ID'))
|
| 48 |
|
|
|
|
| 49 |
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
|
| 50 |
logger = logging.getLogger(__name__)
|
| 51 |
log_records = []
|
|
|
|
| 54 |
log_records.append(record)
|
| 55 |
logger.addHandler(LogCaptureHandler())
|
| 56 |
|
|
|
|
| 57 |
st.set_page_config(
|
| 58 |
page_title="AI Multimodal Titan 🚀",
|
| 59 |
page_icon="🤖",
|
|
|
|
| 66 |
}
|
| 67 |
)
|
| 68 |
|
|
|
|
| 69 |
for key in ['history', 'messages', 'processing', 'asset_checkboxes', 'downloaded_pdfs', 'unique_counter', 'search_queries']:
|
| 70 |
st.session_state.setdefault(key, [] if key in ['history', 'messages', 'search_queries'] else {} if key in ['asset_checkboxes', 'downloaded_pdfs', 'processing'] else 0 if key == 'unique_counter' else None)
|
| 71 |
st.session_state.setdefault('builder', None)
|
|
|
|
| 78 |
st.session_state.setdefault('cam1_file', None)
|
| 79 |
st.session_state.setdefault('openai_model', "gpt-4o-2024-05-13")
|
| 80 |
|
| 81 |
+
def get_gpu_info():
|
| 82 |
+
if torch.cuda.is_available():
|
| 83 |
+
gpu_name = torch.cuda.get_device_name(0)
|
| 84 |
+
total_memory = torch.cuda.get_device_properties(0).total_memory / (1024 ** 3)
|
| 85 |
+
reserved_memory = torch.cuda.memory_reserved(0) / (1024 ** 3)
|
| 86 |
+
allocated_memory = torch.cuda.memory_allocated(0) / (1024 ** 3)
|
| 87 |
+
free_memory = total_memory - allocated_memory
|
| 88 |
+
utilization = torch.cuda.utilization(0)
|
| 89 |
+
return {
|
| 90 |
+
"GPU Name": gpu_name,
|
| 91 |
+
"Total Memory (GB)": f"{total_memory:.2f}",
|
| 92 |
+
"Reserved Memory (GB)": f"{reserved_memory:.2f}",
|
| 93 |
+
"Allocated Memory (GB)": f"{allocated_memory:.2f}",
|
| 94 |
+
"Free Memory (GB)": f"{free_memory:.2f}",
|
| 95 |
+
"Utilization (%)": utilization
|
| 96 |
+
}
|
| 97 |
+
else:
|
| 98 |
+
return {"Status": "No GPU detected"}
|
| 99 |
+
|
| 100 |
+
def display_gpu_info():
|
| 101 |
+
gpu_info = get_gpu_info()
|
| 102 |
+
st.sidebar.subheader("GPU Status 📊")
|
| 103 |
+
if "Status" in gpu_info and gpu_info["Status"] == "No GPU detected":
|
| 104 |
+
st.sidebar.warning("No GPU detected. Running on CPU.")
|
| 105 |
+
else:
|
| 106 |
+
for key, value in gpu_info.items():
|
| 107 |
+
st.sidebar.write(f"{key}: {value}")
|
| 108 |
+
memory_usage_percent = (float(gpu_info["Allocated Memory (GB)"]) / float(gpu_info["Total Memory (GB)"])) * 100
|
| 109 |
+
st.sidebar.progress(min(memory_usage_percent / 100, 1.0))
|
| 110 |
+
st.sidebar.caption(f"Memory Usage: {memory_usage_percent:.1f}%")
|
| 111 |
+
|
| 112 |
@dataclass
|
| 113 |
class ModelConfig:
|
| 114 |
name: str
|
|
|
|
| 138 |
self.jokes = [
|
| 139 |
"Why did the AI go to therapy? Too many layers to unpack! 😂",
|
| 140 |
"Training complete! Time for a binary coffee break. ☕",
|
| 141 |
+
"I told my neural network a joke; it couldn’t stop dropping bits! 🤖",
|
| 142 |
+
"I asked the AI for a pun, and it said, 'I’m punning on parallel processing!' 😄",
|
| 143 |
"Debugging my code is like a stand-up routine—always a series of exceptions! 😆"
|
| 144 |
]
|
| 145 |
def load_model(self, model_path: str, config: Optional[ModelConfig] = None):
|
|
|
|
| 150 |
self.tokenizer.pad_token = self.tokenizer.eos_token
|
| 151 |
if config:
|
| 152 |
self.config = config
|
| 153 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 154 |
+
self.model.to(device)
|
| 155 |
+
st.success(f"Model loaded on {device}! 🎉 {random.choice(self.jokes)}")
|
| 156 |
return self
|
| 157 |
def save_model(self, path: str):
|
| 158 |
with st.spinner("Saving model... 💾"):
|
|
|
|
| 180 |
def generate(self, prompt: str):
|
| 181 |
return self.pipeline(prompt, num_inference_steps=20).images[0]
|
| 182 |
|
|
|
|
| 183 |
def generate_filename(sequence, ext="png", prompt=None):
|
| 184 |
central = pytz.timezone('US/Central')
|
| 185 |
safe_date_time = datetime.now(central).strftime("%m%d_%H%M")
|
|
|
|
| 223 |
logger.error(f"Failed to download {url}: {e}")
|
| 224 |
return False
|
| 225 |
|
|
|
|
| 226 |
async def process_pdf_snapshot(pdf_path, mode="single"):
|
| 227 |
start_time = time.time()
|
| 228 |
status = st.empty()
|
| 229 |
+
status.text(f"Processing PDF SnapshotK Snapshot ({mode})... (0s)")
|
| 230 |
try:
|
| 231 |
doc = fitz.open(pdf_path)
|
| 232 |
output_files = []
|
|
|
|
| 274 |
status = st.empty()
|
| 275 |
status.text("Processing GOT-OCR2_0... (0s)")
|
| 276 |
tokenizer = AutoTokenizer.from_pretrained("ucaslcl/GOT-OCR2_0", trust_remote_code=True)
|
| 277 |
+
model = AutoModel.from_pretrained("ucaslcl/GOT-OCR2_0", trust_remote_code=True, torch_dtype=torch.float32)
|
| 278 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 279 |
+
model.to(device).eval()
|
| 280 |
temp_file = generate_filename("temp", "png")
|
| 281 |
image.save(temp_file)
|
| 282 |
result = model.chat(tokenizer, temp_file, ocr_type='ocr')
|
| 283 |
os.remove(temp_file)
|
| 284 |
elapsed = int(time.time() - start_time)
|
| 285 |
+
status.text(f"GOT-OCR2_0 completed in {elapsed}s on {device}!")
|
| 286 |
async with aiofiles.open(output_file, "w") as f:
|
| 287 |
await f.write(result)
|
| 288 |
return result
|
|
|
|
| 582 |
|
| 583 |
FileSidebar()
|
| 584 |
|
| 585 |
+
tabs = st.tabs(["Camera 📷", "Download RFP 📥", "OCR 🔍", "Build 🌱", "Image Gen 🎨", "PDF 📄", "Image 🖼️", "Audio 🎵", "Video 🎥", "Code 🧑💻", "Gallery 📚", "Search 🔎"])
|
|
|
|
| 586 |
(tab_camera, tab_download, tab_ocr, tab_build, tab_imggen, tab_pdf, tab_image, tab_audio, tab_video, tab_code, tab_gallery, tab_search) = tabs
|
| 587 |
|
| 588 |
with tab_camera:
|
|
|
|
| 623 |
st.header("Test OCR 🔍")
|
| 624 |
all_files = get_gallery_files()
|
| 625 |
if all_files:
|
|
|
|
| 626 |
ocr_files = [f for f in all_files if f.endswith(('.png', '.pdf'))]
|
| 627 |
if st.button("OCR All Assets 🚀"):
|
| 628 |
full_text = "# OCR Results\n\n"
|
| 629 |
for file in ocr_files:
|
| 630 |
if file.endswith('.png'):
|
| 631 |
image = Image.open(file)
|
| 632 |
+
else:
|
| 633 |
try:
|
| 634 |
doc = fitz.open(file)
|
| 635 |
pix = doc[0].get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
|
|
|
|
| 651 |
if selected_file:
|
| 652 |
if selected_file.endswith('.png'):
|
| 653 |
image = Image.open(selected_file)
|
| 654 |
+
else:
|
| 655 |
try:
|
| 656 |
doc = fitz.open(selected_file)
|
| 657 |
pix = doc[0].get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
|
|
|
|
| 801 |
result = search_arxiv(query)
|
| 802 |
st.markdown(result)
|
| 803 |
|
|
|
|
| 804 |
st.sidebar.subheader("Gallery Settings")
|
| 805 |
st.session_state['gallery_size'] = st.sidebar.slider("Gallery Size", 1, 10, st.session_state['gallery_size'], key="gallery_size_slider")
|
| 806 |
+
display_gpu_info()
|
| 807 |
st.sidebar.subheader("Action Logs 📜")
|
| 808 |
for record in log_records:
|
| 809 |
st.sidebar.write(f"{record.asctime} - {record.levelname} - {record.message}")
|
|
|
|
| 837 |
|
| 838 |
update_gallery()
|
| 839 |
|
|
|
|
| 840 |
if prompt := st.chat_input("GPT-4o Multimodal ChatBot - What can I help you with?"):
|
| 841 |
st.session_state.messages.append({"role": "user", "content": prompt})
|
| 842 |
with st.chat_message("user"):
|
|
|
|
| 848 |
if chunk.choices[0].delta.content:
|
| 849 |
response += chunk.choices[0].delta.content
|
| 850 |
st.write(response)
|
| 851 |
+
st.session_state.messages.append({"role": "assistant", "content": response})
|
| 852 |
+
|
| 853 |
+
def create_audio_file(filename, audio_input, flag):
|
| 854 |
+
with open(filename, "wb") as f:
|
| 855 |
+
f.write(audio_input)
|