CristianLazoQuispe commited on
Commit
979a0f3
·
1 Parent(s): ab2d783
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ Demo/flagged/
app.py ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from src.predict import predict_from_video
4
+ from src.islr.islr_model import DummyISLRModel
5
+
6
+
7
+ #device = 'cpu'
8
+ #device = 'cuda'
9
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
10
+
11
+
12
+ # Simulación de diccionario con paths
13
+ dataset_models = {
14
+ "PERU": {"path":"models/demo_model.pt","num_classes":100},
15
+ "WLASL": {"path":"models/demo_model.pt","num_classes":100},
16
+ }
17
+
18
+ # Diccionario de rutas y etiquetas por dataset
19
+ dataset_examples = {
20
+ "PERU": [
21
+ {"label": "📘 **Glosa: `libro`**", "path": "videos/wlasl/book.mp4"},
22
+ {"label": "🏠 **Glosa: `casa`**", "path": "videos/wlasl/book.mp4"},
23
+ {"label": "📘 **Glosa: `libro2`**", "path": "videos/wlasl/book.mp4"},
24
+ {"label": "🏠 **Glosa: `casa2`**", "path": "videos/wlasl/book.mp4"},
25
+ ],
26
+ "WLASL": [
27
+ {"label": "📙 **Glosa: `read`**", "path":"videos/wlasl/book.mp4"},
28
+ {"label": "🏫 **Glosa: `school`**", "path":"videos/wlasl/book.mp4"},
29
+ {"label": "📙 **Glosa: `read2`**", "path":"videos/wlasl/book.mp4"},
30
+ {"label": "🏫 **Glosa: `school2`**", "path":"videos/wlasl/book.mp4"},
31
+ ]
32
+ }
33
+
34
+ # === Carga el modelo según el dataset seleccionado ===
35
+ def load_model_and_examples(dataset):
36
+ model_path = dataset_models.get(dataset)['path']
37
+ num_classes = dataset_models.get(dataset)['num_classes']
38
+ model = DummyISLRModel(num_classes=num_classes)
39
+ model.load_state_dict(torch.load(model_path, map_location=device))
40
+ model.eval()
41
+ print(f"Model {dataset} Loaded!")
42
+ examples = dataset_examples.get(dataset, [{"label": "", "path": ""}, {"label": "", "path": ""}])
43
+ return (
44
+ model,
45
+ gr.update(visible=True),
46
+ gr.update(value=examples[0]["path"]),
47
+ examples[0]["path"],
48
+ gr.update(value=examples[0]["label"]),
49
+ gr.update(value=examples[1]["path"]),
50
+ examples[1]["path"],
51
+ gr.update(value=examples[1]["label"]),
52
+ gr.update(value=examples[2]["path"]),
53
+ examples[2]["path"],
54
+ gr.update(value=examples[2]["label"]),
55
+ gr.update(value=examples[3]["path"]),
56
+ examples[3]["path"],
57
+ gr.update(value=examples[3]["label"]),
58
+ gr.update(interactive=True) # activa el botón
59
+ )
60
+
61
+ # === Usamos el modelo cargado en el State ===
62
+ def classify_video_with_model(video, model):
63
+ top1, top5_df = predict_from_video(video, model=model) # asegúrate de pasar el modelo en `predict_from_video`
64
+ return f"Top-1: {top1}", top5_df
65
+
66
+ with gr.Blocks() as demo:
67
+ gr.Markdown("# 🧠 ISLR Demo con Mediapipe y 100 Clases")
68
+ gr.Markdown("Sube un video o usa la webcam. El modelo clasificará la seña y mostrará las 5 clases más probables.")
69
+
70
+ # === Selector de dataset
71
+ gr.Markdown("## 📁 Filtrar por Language")
72
+ dataset_selector = gr.Dropdown(choices=list(dataset_examples.keys()), value=None, label="Selecciona el lenguaje")
73
+
74
+
75
+ # === Estado del modelo ===
76
+ current_model = gr.State()
77
+ video_path_1 = gr.State()
78
+ video_path_2 = gr.State()
79
+ video_path_3 = gr.State()
80
+ video_path_4 = gr.State()
81
+
82
+ # === Entrada de video + salida
83
+ with gr.Row():
84
+ video_input = gr.Video(sources=["upload", "webcam"], label="🎥 Video de entrada", width=300, height=400)
85
+ with gr.Column():
86
+ output_text = gr.Text(label="Predicción Top-1")
87
+ output_table = gr.Label(num_top_classes=5)
88
+ button_classify = gr.Button("🔍 Clasificar",interactive=False)
89
+
90
+ button_classify.click(
91
+ fn=classify_video_with_model,
92
+ inputs=[video_input, current_model],
93
+ outputs=[output_text, output_table]
94
+ )
95
+
96
+
97
+
98
+
99
+ # === Contenedor dinámico de ejemplos
100
+ examples_output = gr.Column(visible=True)
101
+
102
+ with examples_output:
103
+ with gr.Row():
104
+ with gr.Column(scale=1, min_width=100):
105
+ m1 = gr.Markdown("📘 **Glosa: **")
106
+ v1 = gr.Video(interactive=False, width=160, height=120)
107
+ b1 = gr.Button("Usar", scale=0)
108
+ with gr.Column(scale=1, min_width=100):
109
+ m2 = gr.Markdown("🏠 **Glosa: **")
110
+ v2 = gr.Video(interactive=False, width=160, height=120)
111
+ b2 = gr.Button("Usar", scale=0)
112
+ with gr.Column(scale=1, min_width=100):
113
+ m3 = gr.Markdown("🏠 **Glosa: **")
114
+ v3 = gr.Video(interactive=False, width=160, height=120)
115
+ b3 = gr.Button("Usar", scale=0)
116
+ with gr.Column(scale=1, min_width=100):
117
+ m4 = gr.Markdown("🏠 **Glosa: **")
118
+ v4 = gr.Video(interactive=False, width=160, height=120)
119
+ b4 = gr.Button("Usar", scale=0)
120
+
121
+ b1.click(fn=lambda path: path, inputs=video_path_1, outputs=video_input)
122
+ b2.click(fn=lambda path: path, inputs=video_path_2, outputs=video_input)
123
+ b3.click(fn=lambda path: path, inputs=video_path_3, outputs=video_input)
124
+ b4.click(fn=lambda path: path, inputs=video_path_4, outputs=video_input)
125
+
126
+ gr.Markdown("## 📁 Ejemplos de videos")
127
+ # === Al cambiar dataset, cargamos modelo + ejemplos
128
+ dataset_selector.change(
129
+ fn=load_model_and_examples,
130
+ inputs=dataset_selector,
131
+ outputs=[current_model, examples_output, v1,video_path_1,m1, v2, video_path_2, m2, v3, video_path_3, m3, v4, video_path_4, m4,
132
+ button_classify
133
+ ]
134
+ )
135
+
136
+ if __name__ == "__main__":
137
+ demo.launch(server_port=8080)
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ gradio
2
+ torch
3
+ mediapipe
4
+ opencv-python
5
+ numpy
6
+ pandas
src/__pycache__/islr_model.cpython-39.pyc ADDED
Binary file (807 Bytes). View file
 
src/__pycache__/keypoints_utils.cpython-39.pyc ADDED
Binary file (1.23 kB). View file
 
src/__pycache__/predict.cpython-39.pyc ADDED
Binary file (1.75 kB). View file
 
src/islr/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from . import *
src/islr/__pycache__/__init__.cpython-39.pyc ADDED
Binary file (181 Bytes). View file
 
src/islr/__pycache__/islr_model.cpython-39.pyc ADDED
Binary file (808 Bytes). View file
 
src/islr/islr_model.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch.nn as nn
2
+
3
+ class DummyISLRModel(nn.Module):
4
+ def __init__(self, input_dim=225, num_classes=100):
5
+ super().__init__()
6
+ self.fc = nn.Sequential(
7
+ nn.Linear(input_dim, 128),
8
+ nn.ReLU(),
9
+ nn.Linear(128, num_classes)
10
+ )
11
+
12
+ def forward(self, x):
13
+ return self.fc(x)
src/islr/save_dummy_model.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from islr_model import DummyISLRModel
3
+
4
+ model = DummyISLRModel(num_classes=100)
5
+ for param in model.parameters():
6
+ if param.dim() > 1:
7
+ torch.nn.init.xavier_uniform_(param)
8
+ else:
9
+ torch.nn.init.zeros_(param)
10
+
11
+ torch.save(model.state_dict(), "demo_model.pt")
12
+ print("✅ Modelo guardado como demo_model.pt")
src/pose/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from . import *
src/pose/__pycache__/__init__.cpython-39.pyc ADDED
Binary file (181 Bytes). View file
 
src/pose/__pycache__/keypoints_utils.cpython-39.pyc ADDED
Binary file (1.23 kB). View file
 
src/pose/keypoints_utils.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import mediapipe as mp
2
+ import cv2
3
+ import numpy as np
4
+
5
+ mp_holistic = mp.solutions.holistic
6
+
7
+ def extract_keypoints_from_frame(frame):
8
+ with mp_holistic.Holistic(static_image_mode=True) as holistic:
9
+ results = holistic.process(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
10
+ if results.pose_landmarks and results.left_hand_landmarks and results.right_hand_landmarks:
11
+ pose = np.array([[p.x, p.y, p.z] for p in results.pose_landmarks.landmark])
12
+ left = np.array([[p.x, p.y, p.z] for p in results.left_hand_landmarks.landmark])
13
+ right = np.array([[p.x, p.y, p.z] for p in results.right_hand_landmarks.landmark])
14
+ return np.concatenate([pose, left, right], axis=0).flatten()
15
+ return np.random.random(33*3 + 21*3*2) # fallback
src/predict.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import torch
3
+ import numpy as np
4
+ import pandas as pd
5
+ from .islr.islr_model import DummyISLRModel
6
+ from .pose.keypoints_utils import extract_keypoints_from_frame
7
+
8
+ #model = DummyISLRModel(num_classes=100)
9
+ #model.load_state_dict(torch.load("demo_model.pt", map_location='cpu'))
10
+ #model.eval()
11
+
12
+ LABELS = [f"Clase {i}" for i in range(100)]
13
+
14
+ def predict_from_video(video_path,model=None):
15
+ cap = cv2.VideoCapture(video_path)
16
+ keypoints = []
17
+
18
+ while cap.isOpened():
19
+ ret, frame = cap.read()
20
+ if not ret:
21
+ break
22
+ keypoint = extract_keypoints_from_frame(frame)
23
+ keypoints.append(keypoint)
24
+
25
+ cap.release()
26
+ if not keypoints:
27
+ return "No keypoints detected", pd.DataFrame()
28
+
29
+ x = torch.tensor(np.mean(keypoints, axis=0)).float().unsqueeze(0)
30
+ #print("x:")
31
+ #print(x)
32
+ with torch.no_grad():
33
+ logits = model(x)
34
+ probs = torch.softmax(logits, dim=1).numpy()[0]
35
+
36
+ #print("probs:")
37
+ #print(probs)
38
+ top5_idx = probs.argsort()[-5:][::-1]
39
+
40
+ top5_labels = [LABELS[i] for i in top5_idx]
41
+ top5_probs = [probs[i] for i in top5_idx]
42
+ confidences = {LABELS[i]: float(probs[i]) for i in top5_idx}
43
+
44
+ #print("confidences:")
45
+ #print(confidences)
46
+ #df = pd.DataFrame({"label": top5_labels, "value": top5_probs})
47
+ return top5_labels[0],confidences # df
src/simple_demo.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from predict import predict_from_video
3
+
4
+ def classify_video(video):
5
+ top1, top5_df = predict_from_video(video)
6
+ return f"Top-1: {top1}", top5_df
7
+
8
+ demo = gr.Interface(
9
+ fn=classify_video,
10
+ inputs=gr.Video(sources=["upload", "webcam"], label="🎥 Video (webcam o archivo)"),
11
+ outputs=[
12
+ gr.Text(label="Predicción principal"),
13
+ gr.Label(num_top_classes=5),
14
+ ],
15
+ #outputs="text",
16
+ title="🧠 ISLR Demo con Mediapipe y 100 Clases",
17
+ description="Clasificador de lenguaje de señas aislado. Muestra las Top-5 clases más probables.",
18
+ #examples=["/home/va0831/slr/SLR_2024/Gloss/SignLanguageRecognition/J7tP98oDxqE_000000_000066-msasl-book.mp4",
19
+ # "/home/va0831/slr/SLR_2024/Gloss/SignLanguageRecognition/J7tP98oDxqE_000000_000066-msasl-book.mp4"],
20
+ #example_labels = ["book","house"],
21
+ )
22
+
23
+ if __name__ == "__main__":
24
+ demo.launch(server_port=9090)
videos/wlasl/book.mp4 ADDED
Binary file (52.5 kB). View file