File size: 3,107 Bytes
091fb5b
 
9570464
091fb5b
177b928
091fb5b
5ea9349
900bd47
9570464
19e8547
9570464
5ea9349
091fb5b
 
 
 
 
 
 
 
 
5ea9349
091fb5b
 
 
 
f2b595a
091fb5b
2717e64
 
091fb5b
5ea9349
1df6d16
 
 
 
 
 
 
4316bd2
aa690e5
 
52f54e1
aa690e5
 
 
 
1df6d16
 
 
 
 
 
 
e8b575f
1df6d16
ab4dcbc
 
f368818
ab4dcbc
 
f368818
1df6d16
409c32e
 
 
 
091fb5b
19e8547
091fb5b
 
5ea9349
 
 
 
 
 
2717e64
397fa1b
5ea9349
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
from ultralytics import YOLO
from PIL import Image
import gradio as gr
from huggingface_hub import snapshot_download
from tqdm.auto import tqdm
import os
import tempfile
import cv2  # OpenCV for video processing

model_path = "best_int8_openvino_model"


def load_model(repo_id):
    download_dir = snapshot_download(repo_id)
    print(download_dir)
    path  = os.path.join(download_dir, "best_int8_openvino_model")
    print(path)
    detection_model = YOLO(path, task='detect')
    return detection_model


def process_image(pilimg):

    source = pilimg
    # x = np.asarray(pilimg)
    # print(x.shape)
    result = detection_model.predict(source, conf=0.5)
    img_bgr = result[0].plot()
    out_pilimg = Image.fromarray(img_bgr[..., ::-1])  # RGB-order PIL image
    return out_pilimg

def process_video(video):
    print(video)
    video_reader = cv2.VideoCapture(video)
    print(video_reader)
    nb_frames = int(video_reader.get(cv2.CAP_PROP_FRAME_COUNT))
    frame_h = int(video_reader.get(cv2.CAP_PROP_FRAME_HEIGHT))
    frame_w = int(video_reader.get(cv2.CAP_PROP_FRAME_WIDTH))
    fps = video_reader.get(cv2.CAP_PROP_FPS)

    temp_dir = tempfile.mkdtemp()
    output_path = os.path.join(temp_dir, "annotated_video.mp4")
    video_writer = cv2.VideoWriter(output_path,
                            cv2.VideoWriter_fourcc(*'mp4v'),
                            fps,
                            (frame_w, frame_h))

    # Loop through the video frames
    for i in tqdm(range(nb_frames)):
        # Read a frame from the video
        success, frame = video_reader.read()
        
        if success:
            # Run YOLO inference on the frame on GPU Device 0
            results = detection_model.predict(frame, conf=0.5)

            # Visualize the results on the frame
            annotated_frame = results[0].plot()

            # Write the annotated frame
            video_writer.write(annotated_frame)

    video_reader.release()
    video_writer.release()
    cv2.destroyAllWindows()
    cv2.waitKey(1)
    return output_path

REPO_ID = "2384603g/violin_GuZheng_WM"
detection_model = load_model(REPO_ID)

# Define a message to display at the top of the app
message = "<h1>Welcome to the Image and Video Upload App For Violin & Guzheng!</h1><br>Done By Tang Wei Ming (2384603G)<br><p>Please upload an image or a video of Violin & Guzheng to get started.</p>"


# Create the interface for image upload
image_interface = gr.Interface(fn=process_image, 
                               inputs=gr.Image(type="pil"),
                               outputs=gr.Image(type="pil"))

# Create the interface for video upload
video_interface = gr.Interface(fn=process_video, 
                               inputs=gr.Video(label="Upload a Video"), 
                               outputs="video")


# Use gr.Blocks to arrange components and launch the app
with gr.Blocks() as app:
    gr.HTML(message)  # Add the message at the top
    gr.TabbedInterface([image_interface, video_interface], 
                          tab_names=["Image Upload", "Video Upload"])

# Launch the interface
app.launch()