Spaces:
Sleeping
Sleeping
File size: 3,107 Bytes
091fb5b 9570464 091fb5b 177b928 091fb5b 5ea9349 900bd47 9570464 19e8547 9570464 5ea9349 091fb5b 5ea9349 091fb5b f2b595a 091fb5b 2717e64 091fb5b 5ea9349 1df6d16 4316bd2 aa690e5 52f54e1 aa690e5 1df6d16 e8b575f 1df6d16 ab4dcbc f368818 ab4dcbc f368818 1df6d16 409c32e 091fb5b 19e8547 091fb5b 5ea9349 2717e64 397fa1b 5ea9349 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 |
from ultralytics import YOLO
from PIL import Image
import gradio as gr
from huggingface_hub import snapshot_download
from tqdm.auto import tqdm
import os
import tempfile
import cv2 # OpenCV for video processing
model_path = "best_int8_openvino_model"
def load_model(repo_id):
download_dir = snapshot_download(repo_id)
print(download_dir)
path = os.path.join(download_dir, "best_int8_openvino_model")
print(path)
detection_model = YOLO(path, task='detect')
return detection_model
def process_image(pilimg):
source = pilimg
# x = np.asarray(pilimg)
# print(x.shape)
result = detection_model.predict(source, conf=0.5)
img_bgr = result[0].plot()
out_pilimg = Image.fromarray(img_bgr[..., ::-1]) # RGB-order PIL image
return out_pilimg
def process_video(video):
print(video)
video_reader = cv2.VideoCapture(video)
print(video_reader)
nb_frames = int(video_reader.get(cv2.CAP_PROP_FRAME_COUNT))
frame_h = int(video_reader.get(cv2.CAP_PROP_FRAME_HEIGHT))
frame_w = int(video_reader.get(cv2.CAP_PROP_FRAME_WIDTH))
fps = video_reader.get(cv2.CAP_PROP_FPS)
temp_dir = tempfile.mkdtemp()
output_path = os.path.join(temp_dir, "annotated_video.mp4")
video_writer = cv2.VideoWriter(output_path,
cv2.VideoWriter_fourcc(*'mp4v'),
fps,
(frame_w, frame_h))
# Loop through the video frames
for i in tqdm(range(nb_frames)):
# Read a frame from the video
success, frame = video_reader.read()
if success:
# Run YOLO inference on the frame on GPU Device 0
results = detection_model.predict(frame, conf=0.5)
# Visualize the results on the frame
annotated_frame = results[0].plot()
# Write the annotated frame
video_writer.write(annotated_frame)
video_reader.release()
video_writer.release()
cv2.destroyAllWindows()
cv2.waitKey(1)
return output_path
REPO_ID = "2384603g/violin_GuZheng_WM"
detection_model = load_model(REPO_ID)
# Define a message to display at the top of the app
message = "<h1>Welcome to the Image and Video Upload App For Violin & Guzheng!</h1><br>Done By Tang Wei Ming (2384603G)<br><p>Please upload an image or a video of Violin & Guzheng to get started.</p>"
# Create the interface for image upload
image_interface = gr.Interface(fn=process_image,
inputs=gr.Image(type="pil"),
outputs=gr.Image(type="pil"))
# Create the interface for video upload
video_interface = gr.Interface(fn=process_video,
inputs=gr.Video(label="Upload a Video"),
outputs="video")
# Use gr.Blocks to arrange components and launch the app
with gr.Blocks() as app:
gr.HTML(message) # Add the message at the top
gr.TabbedInterface([image_interface, video_interface],
tab_names=["Image Upload", "Video Upload"])
# Launch the interface
app.launch() |