ShubhamMhaske's picture
Update app.py
eeed855 verified
import streamlit as st
import cv2
import torch
from PIL import Image
from doclayout_yolo import YOLOv10
import numpy as np
# Load the pre-trained model
model = YOLOv10("doclayout_yolo_docstructbench_imgsz1024.pt")
# Automatically select device
device = 'cuda' if torch.cuda.is_available() else 'cpu'
#st.write(f"Using device: {device}")
# Streamlit UI
st.title("Document Layout Detection")
st.subheader("Upload an image to detect and annotate document layout")
uploaded_file = st.file_uploader("Upload Image", type=["jpg", "jpeg", "png"])
if uploaded_file is not None:
# Display the uploaded image
#st.image(uploaded_file, caption="Uploaded Image", use_container_width=True)
# Load the uploaded image
image = Image.open(uploaded_file).convert("RGB")
image_path = "temp_input.jpg" # Temporary save for inference
image.save(image_path)
# Perform prediction
with st.spinner("Processing..."):
det_res = model.predict(
image_path,
imgsz=1024,
conf=0.2,
device=device,
)
# Annotate the result
annotated_frame = det_res[0].plot(pil=True, line_width=5, font_size=20)
# Convert annotated PIL image to displayable format
annotated_image = np.array(annotated_frame)
# Display the annotated image
st.image(annotated_image, caption="Annotated Image", use_container_width=True)
st.success("Detection completed!")
st.markdown("**Application Created By Shubham Mhaske**")
st.write("Do have a look on Papers πŸ“„ : - https://arxiv.org/pdf/2410.12628")
st.write("Thanks to https://github.com/opendatalab")