Perilon commited on
Commit
52d1010
·
1 Parent(s): 6aac4dc
Files changed (2) hide show
  1. flask_app.py +191 -721
  2. pig.txt +100 -89
flask_app.py CHANGED
@@ -1,625 +1,62 @@
1
- # from flask import Flask, render_template, jsonify, request, send_from_directory, send_file, redirect, url_for, session
2
- # import os, json, threading, time, signal, sys
3
- # from datetime import datetime
4
- # from extract_signed_segments_from_annotations import ClipExtractor, VideoClip
5
- # import logging
6
- # from dotenv import load_dotenv
7
-
8
- # # Load environment variables
9
- # load_dotenv()
10
-
11
- # # Add this near the top with other environment variables
12
- # bypass_auth = os.getenv('BYPASS_AUTH', 'false').lower() == 'true'
13
-
14
- # # Configure logging first
15
- # logging.basicConfig(
16
- # level=logging.INFO,
17
- # format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
18
- # )
19
- # logger = logging.getLogger(__name__)
20
-
21
- # # Hugging Face specific configuration
22
- # is_hf_space = os.getenv('SPACE_ID') is not None
23
- # if is_hf_space:
24
- # logger.info("Running in Hugging Face Spaces environment")
25
- # # Allow insecure transport for development in HF
26
- # os.environ['OAUTHLIB_INSECURE_TRANSPORT'] = '1'
27
- # # Ensure port is set correctly
28
- # os.environ['PORT'] = '7860'
29
-
30
-
31
-
32
- # app = Flask(__name__)
33
- # app.secret_key = os.getenv('SECRET_KEY', 'dev_key_for_testing')
34
-
35
- # # Configure session for HF
36
- # if is_hf_space:
37
- # app.config['SESSION_COOKIE_SECURE'] = False
38
- # app.config['SESSION_COOKIE_HTTPONLY'] = True
39
- # app.config['SESSION_COOKIE_SAMESITE'] = None # Add this line
40
- # app.config['PERMANENT_SESSION_LIFETIME'] = 86400 # 24 hours
41
-
42
- # # Directory paths
43
- # VIDEO_DIR = os.path.abspath("data/videos")
44
- # ANNOTATIONS_DIR = os.path.abspath("data/annotations")
45
- # TEMP_DIR = os.path.abspath("data/temp")
46
- # WORD_TIMESTAMPS_DIR = os.path.abspath("data/word_timestamps")
47
- # ALIGNMENTS_DIR = os.path.abspath("data/alignments")
48
- # TRANSCRIPTS_DIR = os.path.abspath("data/transcripts")
49
-
50
- # # Ensure all required directories exist
51
- # for directory in [VIDEO_DIR, ANNOTATIONS_DIR, TEMP_DIR, WORD_TIMESTAMPS_DIR, ALIGNMENTS_DIR, TRANSCRIPTS_DIR]:
52
- # os.makedirs(directory, exist_ok=True)
53
-
54
- # # Global dictionaries for progress tracking
55
- # clip_extraction_status = {}
56
- # transcription_progress_status = {}
57
-
58
- # # Graceful shutdown handler
59
- # def graceful_shutdown(signum, frame):
60
- # """Handle graceful shutdown on signals."""
61
- # logger.info(f"Received signal {signum}, shutting down gracefully...")
62
- # # Clean up as needed here
63
- # sys.exit(0)
64
-
65
- # # Register signal handlers
66
- # signal.signal(signal.SIGTERM, graceful_shutdown)
67
- # signal.signal(signal.SIGINT, graceful_shutdown)
68
-
69
- # # Login required decorator
70
- # def login_required(f):
71
- # from functools import wraps
72
- # @wraps(f)
73
- # def decorated_function(*args, **kwargs):
74
- # if 'user' not in session:
75
- # logger.info(f"User not in session, redirecting to login")
76
- # return redirect(url_for('login'))
77
- # return f(*args, **kwargs)
78
- # return decorated_function
79
-
80
- # # Allow specific users (for testing)
81
- # def is_allowed_user(username):
82
- # allowed_users_env = os.getenv('ALLOWED_USERS', 'Perilon') # Default to your username
83
- # allowed_users = [user.strip() for user in allowed_users_env.split(',')]
84
- # return username in allowed_users or not is_hf_space # Allow all users in local dev
85
-
86
- # def update_extraction_progress(video_id, current, total):
87
- # percent = int((current / total) * 100)
88
- # clip_extraction_status[video_id] = {"current": current, "total": total, "percent": percent}
89
-
90
- # def run_clip_extraction(video_id):
91
- # try:
92
- # base_dir = app.root_path
93
- # extractor = ClipExtractor(base_dir)
94
- # extractor.extract_clips_from_annotations(
95
- # video_id,
96
- # progress_callback=lambda current, total: update_extraction_progress(video_id, current, total)
97
- # )
98
- # if video_id in clip_extraction_status:
99
- # status = clip_extraction_status[video_id]
100
- # if status.get("percent", 0) < 100:
101
- # update_extraction_progress(video_id, status["total"], status["total"])
102
- # else:
103
- # update_extraction_progress(video_id, 1, 1)
104
- # except Exception as e:
105
- # logger.error(f"Error during clip extraction for {video_id}: {str(e)}")
106
- # clip_extraction_status[video_id] = {"error": str(e)}
107
-
108
- # def run_transcription(video_id):
109
- # try:
110
- # base_dir = app.root_path
111
- # output_path = os.path.join(WORD_TIMESTAMPS_DIR, f"{video_id}_word_timestamps.json")
112
-
113
- # # Check if transcription already exists and is valid.
114
- # if os.path.exists(output_path) and os.path.getsize(output_path) > 0:
115
- # logger.info(f"Using cached transcription for video {video_id}.")
116
- # transcription_progress_status[video_id] = {"status": "completed", "percent": 100}
117
- # return
118
-
119
- # video_path = os.path.join(base_dir, "data", "videos", f"{video_id}.mp4")
120
- # transcription_progress_status[video_id] = {"status": "started", "percent": 10}
121
-
122
- # # Check if AWS credentials are available
123
- # if not os.environ.get('AWS_ACCESS_KEY_ID') or not os.environ.get('AWS_SECRET_ACCESS_KEY'):
124
- # logger.warning("AWS credentials not found. Transcription will not work properly.")
125
- # transcription_progress_status[video_id] = {
126
- # "status": "error",
127
- # "percent": 0,
128
- # "message": "AWS credentials missing"
129
- # }
130
- # return
131
-
132
- # # Run transcription via the imported function from get_transcription_with_amazon.py
133
- # from get_transcription_with_amazon import get_word_timestamps
134
- # word_timestamps = get_word_timestamps(video_path)
135
-
136
- # with open(output_path, "w") as f:
137
- # json.dump(word_timestamps, f, indent=4)
138
-
139
- # transcription_progress_status[video_id] = {"status": "completed", "percent": 100}
140
- # except Exception as e:
141
- # logger.error(f"Error during transcription for {video_id}: {str(e)}")
142
- # transcription_progress_status[video_id] = {"status": "error", "percent": 0, "message": str(e)}
143
-
144
- # # Authentication routes
145
- # @app.route('/login')
146
- # def login():
147
- # """Handle login for both local and HF environments."""
148
- # logger.info(f"Login route called. Headers: {dict(request.headers)}")
149
-
150
- # if is_hf_space:
151
- # username = request.headers.get('X-Spaces-Username')
152
- # logger.info(f"Username from headers in login: {username}")
153
-
154
- # if username and is_allowed_user(username):
155
- # session['user'] = {'name': username, 'is_hf': True}
156
- # return redirect(url_for('index'))
157
- # else:
158
- # # Redirect to the HF auth endpoint
159
- # return redirect('/auth')
160
- # else:
161
- # # For local development
162
- # session['user'] = {'name': 'LocalDeveloper', 'is_mock': True}
163
- # return redirect(url_for('index'))
164
-
165
- # @app.route('/auth/callback')
166
- # def auth_callback():
167
- # """This route will be called by Hugging Face after successful authentication."""
168
- # logger.info(f"Auth callback called. Headers: {dict(request.headers)}")
169
-
170
- # if is_hf_space:
171
- # # In Hugging Face Spaces, the user info is available in the request headers
172
- # username = request.headers.get('X-Spaces-Username')
173
- # if username:
174
- # session['user'] = {'name': username, 'is_hf': True}
175
- # return redirect(url_for('index'))
176
- # else:
177
- # return render_template('error.html', message="Authentication failed. No username provided.")
178
- # return redirect(url_for('login'))
179
-
180
- # # Replace the health check route with this improved version
181
- # @app.route('/health')
182
- # def health_check():
183
- # """Health check endpoint for container verification."""
184
- # # Log environment variables for debugging
185
- # env_vars = {
186
- # "FLASK_ENV": os.environ.get('FLASK_ENV', 'production'),
187
- # "DEBUG": os.environ.get('DEBUG', 'Not set'),
188
- # "SPACE_ID": os.environ.get('SPACE_ID', 'Not set'),
189
- # "BYPASS_AUTH": os.environ.get('BYPASS_AUTH', 'Not set'),
190
- # "SECRET_KEY": os.environ.get('SECRET_KEY', 'Not set')[:5] + '...' if os.environ.get('SECRET_KEY') else 'Not set'
191
- # }
192
-
193
- # logger.info(f"Health check called. Environment: {env_vars}")
194
-
195
- # # Get session information for debugging
196
- # session_info = dict(session) if session else None
197
- # session_keys = list(session.keys()) if session else []
198
-
199
- # return jsonify({
200
- # "status": "healthy",
201
- # "environment": env_vars,
202
- # "session_keys": session_keys,
203
- # "is_hf_space": is_hf_space,
204
- # "bypass_auth": bypass_auth,
205
- # "directories": {
206
- # "videos": os.path.exists(VIDEO_DIR),
207
- # "annotations": os.path.exists(ANNOTATIONS_DIR),
208
- # "temp": os.path.exists(TEMP_DIR)
209
- # }
210
- # })
211
-
212
- # @app.route('/auth')
213
- # def auth():
214
- # """This route handles HF authentication."""
215
- # logger.info(f"Auth route called. Headers: {dict(request.headers)}")
216
-
217
- # # Force bypass auth to be true for debugging
218
- # bypass_auth = True
219
-
220
- # # If bypass is enabled, authenticate immediately
221
- # if bypass_auth:
222
- # logger.info("Auth bypass enabled, setting default user")
223
- # session['user'] = {'name': 'Perilon', 'is_hf': True}
224
- # return redirect(url_for('index'))
225
-
226
- # # Normal authentication logic
227
- # username = request.headers.get('X-Spaces-Username')
228
- # logger.info(f"Username from headers in auth: {username}")
229
-
230
- # if is_hf_space and username and is_allowed_user(username):
231
- # logger.info(f"Setting user in session: {username}")
232
- # session['user'] = {'name': username, 'is_hf': True}
233
- # return redirect(url_for('index'))
234
- # elif not is_hf_space:
235
- # # For local development
236
- # session['user'] = {'name': 'LocalDeveloper', 'is_mock': True}
237
- # return redirect(url_for('index'))
238
- # else:
239
- # # For HF with no valid username yet
240
- # return render_template('error.html', message=
241
- # "Waiting for Hugging Face authentication. If you continue to see this message, "
242
- # "please make sure you're logged into Hugging Face and your username is allowed.")
243
-
244
- # @app.before_request
245
- # def check_auth():
246
- # """Check authentication before processing requests."""
247
- # # Skip authentication for certain routes and static files
248
- # if request.path in ['/login', '/logout', '/auth', '/auth/callback', '/debug', '/health'] or request.path.startswith('/static/'):
249
- # return
250
-
251
- # # Force bypass auth to be true for debugging
252
- # bypass_auth = True
253
-
254
- # # Log all request paths to help troubleshoot
255
- # logger.debug(f"Request path: {request.path}, User in session: {'user' in session}")
256
-
257
- # if bypass_auth:
258
- # # Set default user for bypass mode if not already set
259
- # if 'user' not in session:
260
- # session['user'] = {'name': 'Perilon', 'is_hf': True}
261
- # return
262
-
263
- # if is_hf_space:
264
- # # Check for HF username header
265
- # username = request.headers.get('X-Spaces-Username')
266
-
267
- # if 'user' in session:
268
- # logger.debug(f"User in session: {session['user']}")
269
- # return
270
-
271
- # if username and is_allowed_user(username):
272
- # logger.info(f"Setting user from headers: {username}")
273
- # session['user'] = {'name': username, 'is_hf': True}
274
- # return
275
-
276
- # # No valid user in session or headers
277
- # logger.info(f"No authenticated user, redirecting to /auth")
278
- # return redirect('/auth')
279
- # elif 'user' not in session:
280
- # return redirect(url_for('login'))
281
-
282
- # @app.route('/logout')
283
- # def logout():
284
- # """Clear session and redirect to login."""
285
- # session.clear() # Clear the entire session
286
- # if is_hf_space:
287
- # return redirect('/auth/logout')
288
- # return redirect(url_for('login'))
289
-
290
- # @app.route('/debug')
291
- # def debug_info():
292
- # """Return debug information."""
293
- # cookies = {key: request.cookies.get(key) for key in request.cookies.keys()}
294
-
295
- # info = {
296
- # "session": dict(session) if session else None,
297
- # "headers": dict(request.headers),
298
- # "cookies": cookies,
299
- # "is_hf_space": is_hf_space,
300
- # "allowed_users": os.getenv('ALLOWED_USERS', 'Perilon'),
301
- # "app_config": {k: str(v) for k, v in app.config.items() if k in
302
- # ['SESSION_COOKIE_SECURE', 'SESSION_COOKIE_HTTPONLY',
303
- # 'SESSION_COOKIE_SAMESITE', 'PERMANENT_SESSION_LIFETIME']},
304
- # }
305
- # return jsonify(info)
306
-
307
- # # Main application routes
308
- # @app.route('/')
309
- # @login_required
310
- # def index():
311
- # """Main entry point, redirects to video selection."""
312
- # return redirect(url_for('select_video'))
313
-
314
- # @app.route('/select_video')
315
- # @login_required
316
- # def select_video():
317
- # """Page to select a video for annotation."""
318
- # if not os.path.exists(VIDEO_DIR):
319
- # return render_template('error.html', message="Video directory not found.")
320
- # videos = [f for f in os.listdir(VIDEO_DIR) if f.endswith('.mp4')]
321
- # video_ids = [os.path.splitext(v)[0] for v in videos]
322
- # return render_template('select_video.html', video_ids=video_ids, user=session.get('user'))
323
-
324
- # @app.route('/player/<video_id>')
325
- # @login_required
326
- # def player(video_id):
327
- # """Video player page for annotation."""
328
- # return render_template('player.html', video_id=video_id, user=session.get('user'))
329
-
330
- # @app.route('/videos')
331
- # @login_required
332
- # def get_videos():
333
- # """API endpoint to get available videos."""
334
- # if not os.path.exists(VIDEO_DIR):
335
- # return jsonify({'error': 'Video directory not found'}), 404
336
- # videos = [f for f in os.listdir(VIDEO_DIR) if f.endswith(('.mp4', '.avi', '.mov'))]
337
- # if not videos:
338
- # return jsonify({'error': 'No videos found'}), 404
339
- # return jsonify(videos)
340
-
341
- # @app.route('/video/<path:filename>')
342
- # @login_required
343
- # def serve_video(filename):
344
- # """Serve a video file."""
345
- # if not os.path.exists(os.path.join(VIDEO_DIR, filename)):
346
- # return jsonify({'error': 'Video not found'}), 404
347
- # return send_from_directory(VIDEO_DIR, filename)
348
-
349
- # @app.route('/save_annotations', methods=['POST'])
350
- # @login_required
351
- # def save_annotations():
352
- # """Save annotation data."""
353
- # data = request.json
354
- # if not data or 'video' not in data or 'timestamps' not in data:
355
- # return jsonify({'success': False, 'message': 'Invalid data'}), 400
356
-
357
- # annotation_file = os.path.join(ANNOTATIONS_DIR, f"{data['video']}_annotations.json")
358
- # annotation_data = {
359
- # "video_name": data['video'] + ".mp4",
360
- # "timestamps": sorted(data['timestamps']),
361
- # "annotation_date": datetime.now().isoformat(),
362
- # "annotated_by": session.get('user', {}).get('name', 'unknown')
363
- # }
364
- # with open(annotation_file, 'w') as f:
365
- # json.dump(annotation_data, f, indent=4)
366
- # return jsonify({'success': True, 'message': 'Annotations saved successfully'})
367
-
368
- # @app.route('/get_annotations/<path:video_name>')
369
- # @login_required
370
- # def get_annotations(video_name):
371
- # """Get annotations for a video."""
372
- # annotation_file = os.path.join(ANNOTATIONS_DIR, f"{video_name}_annotations.json")
373
- # if not os.path.exists(annotation_file):
374
- # return jsonify({'error': 'No annotations found'}), 404
375
- # with open(annotation_file, 'r') as f:
376
- # annotations = json.load(f)
377
- # return jsonify(annotations)
378
-
379
- # @app.route("/alignment/<video_id>")
380
- # @login_required
381
- # def alignment_mode(video_id):
382
- # """Page for aligning sign language with transcribed text."""
383
- # annotation_file = os.path.join(ANNOTATIONS_DIR, f"{video_id}_annotations.json")
384
- # if not os.path.exists(annotation_file):
385
- # return render_template("error.html", message="No annotations found for this video. Please annotate the video first.")
386
- # with open(annotation_file, 'r') as f:
387
- # annotations = json.load(f)
388
- # return render_template(
389
- # "alignment.html",
390
- # video_id=video_id,
391
- # total_clips=len(annotations['timestamps']) - 1,
392
- # user=session.get('user')
393
- # )
394
-
395
- # @app.route("/api/transcript/<video_id>")
396
- # @login_required
397
- # def get_transcript(video_id):
398
- # """Get transcript for a video."""
399
- # timestamps_file = os.path.join(WORD_TIMESTAMPS_DIR, f"{video_id}_word_timestamps.json")
400
- # logger.info(f"Attempting to load word timestamps from: {timestamps_file}")
401
- # if not os.path.exists(timestamps_file):
402
- # logger.warning(f"Word timestamps file not found: {timestamps_file}")
403
- # return jsonify({
404
- # "status": "error",
405
- # "message": "No word timestamps found for this video"
406
- # }), 404
407
- # try:
408
- # with open(timestamps_file, 'r') as f:
409
- # word_data = json.load(f)
410
- # full_text = " ".join(item["punctuated_word"] for item in word_data)
411
- # words_with_times = [{
412
- # "word": item["punctuated_word"],
413
- # "start": float(item["start_time"]),
414
- # "end": float(item["end_time"])
415
- # } for item in word_data]
416
- # logger.info(f"Successfully created transcript ({len(full_text)} characters)")
417
- # return jsonify({
418
- # "status": "success",
419
- # "text": full_text,
420
- # "words": words_with_times
421
- # })
422
- # except Exception as e:
423
- # logger.error(f"Error processing word timestamps: {str(e)}")
424
- # return jsonify({
425
- # "status": "error",
426
- # "message": f"Error processing word timestamps: {str(e)}"
427
- # }), 500
428
-
429
- # @app.route("/api/word_timestamps/<video_id>")
430
- # @login_required
431
- # def get_word_timestamps(video_id):
432
- # """Get word-level timestamps for a video."""
433
- # timestamps_file = os.path.join(WORD_TIMESTAMPS_DIR, f"{video_id}_word_timestamps.json")
434
- # logger.info(f"Attempting to load word timestamps from: {timestamps_file}")
435
- # if not os.path.exists(timestamps_file):
436
- # logger.warning(f"Word timestamps file not found: {timestamps_file}")
437
- # return jsonify({
438
- # "status": "error",
439
- # "message": "No word timestamps found for this video"
440
- # }), 404
441
- # try:
442
- # with open(timestamps_file, 'r') as f:
443
- # word_data = json.load(f)
444
- # logger.info(f"Successfully loaded {len(word_data)} word timestamps")
445
- # return jsonify({
446
- # "status": "success",
447
- # "words": word_data
448
- # })
449
- # except Exception as e:
450
- # logger.error(f"Error processing word timestamps: {str(e)}")
451
- # return jsonify({
452
- # "status": "error",
453
- # "message": f"Error processing word timestamps: {str(e)}"
454
- # }), 500
455
-
456
- # @app.route("/api/clips/<video_id>")
457
- # @login_required
458
- # def get_video_clips(video_id):
459
- # """Get clips for a video."""
460
- # try:
461
- # annotation_file = os.path.join(ANNOTATIONS_DIR, f"{video_id}_annotations.json")
462
- # if not os.path.exists(annotation_file):
463
- # raise FileNotFoundError("Annotations not found")
464
- # with open(annotation_file, 'r') as f:
465
- # annotations = json.load(f)
466
- # timestamps = annotations['timestamps']
467
- # clips = []
468
- # for i in range(len(timestamps)-1):
469
- # clips.append({
470
- # "index": i,
471
- # "start": timestamps[i],
472
- # "end": timestamps[i+1],
473
- # "path": f"/clip/{video_id}/{i}"
474
- # })
475
- # return jsonify({
476
- # "status": "success",
477
- # "clips": clips
478
- # })
479
- # except Exception as e:
480
- # logger.error(f"Error getting clips: {str(e)}")
481
- # return jsonify({
482
- # "status": "error",
483
- # "message": str(e)
484
- # }), 500
485
-
486
- # @app.route("/clip/<video_id>/<int:clip_index>")
487
- # @login_required
488
- # def serve_clip(video_id, clip_index):
489
- # """Serve a specific clip."""
490
- # clip_path = os.path.join(
491
- # TEMP_DIR,
492
- # f"{video_id}_clip_{clip_index:03d}.mp4"
493
- # )
494
- # logger.info(f"Attempting to serve clip: {clip_path}")
495
- # if not os.path.exists(clip_path):
496
- # logger.error(f"Clip not found: {clip_path}")
497
- # return jsonify({
498
- # "status": "error",
499
- # "message": "Clip not found"
500
- # }), 404
501
- # return send_file(clip_path, mimetype="video/mp4")
502
-
503
- # @app.route("/api/save_alignments", methods=["POST"])
504
- # @login_required
505
- # def save_alignments():
506
- # """Save alignment data."""
507
- # try:
508
- # data = request.json
509
- # if not data or 'video_id' not in data or 'alignments' not in data:
510
- # return jsonify({'success': False, 'message': 'Invalid data'}), 400
511
-
512
- # # Add user information to the alignments
513
- # for alignment in data['alignments']:
514
- # if alignment:
515
- # alignment['aligned_by'] = session.get('user', {}).get('name', 'unknown')
516
-
517
- # output_path = os.path.join(ALIGNMENTS_DIR, f"{data['video_id']}.json")
518
- # with open(output_path, "w") as f:
519
- # json.dump(data['alignments'], f, indent=2)
520
- # return jsonify({
521
- # "success": True,
522
- # "message": "Alignments saved successfully"
523
- # })
524
- # except Exception as e:
525
- # logger.error(f"Error saving alignments: {str(e)}")
526
- # return jsonify({
527
- # "success": False,
528
- # "message": str(e)
529
- # }), 500
530
-
531
- # @app.route("/api/extract_clips/<video_id>")
532
- # @login_required
533
- # def extract_clips_for_video(video_id):
534
- # """Extract clips and start transcription for a video."""
535
- # status = clip_extraction_status.get(video_id, {})
536
- # if status.get("percent", 0) < 100:
537
- # thread = threading.Thread(target=run_clip_extraction, args=(video_id,))
538
- # thread.start()
539
- # if video_id not in transcription_progress_status or transcription_progress_status.get(video_id, {}).get("percent", 0) < 100:
540
- # thread_trans = threading.Thread(target=run_transcription, args=(video_id,))
541
- # thread_trans.start()
542
- # return jsonify({"status": "started"})
543
-
544
- # @app.route("/api/clip_progress/<video_id>")
545
- # @login_required
546
- # def clip_progress(video_id):
547
- # """Get clip extraction progress."""
548
- # progress = clip_extraction_status.get(video_id, {"current": 0, "total": 0, "percent": 0})
549
- # return jsonify(progress)
550
-
551
- # @app.route("/api/transcription_progress/<video_id>")
552
- # @login_required
553
- # def transcription_progress(video_id):
554
- # """Get transcription progress."""
555
- # progress = transcription_progress_status.get(video_id, {"status": "not started", "percent": 0})
556
- # return jsonify(progress)
557
-
558
- # if __name__ == '__main__':
559
- # try:
560
- # # Print diagnostic information
561
- # print("=" * 50)
562
- # print(f"Starting app with configuration:")
563
- # print(f"- Running in HF Space: {is_hf_space}")
564
- # print(f"- Auth bypass: {bypass_auth}")
565
- # print(f"- Port: {os.getenv('PORT', 5000)}")
566
- # print(f"- Available videos: {os.listdir(VIDEO_DIR) if os.path.exists(VIDEO_DIR) else 'None'}")
567
- # print("=" * 50)
568
-
569
- # port = int(os.getenv('PORT', 5000))
570
- # app.run(host='0.0.0.0', port=port, debug=True)
571
- # except Exception as e:
572
- # print(f"Error starting the application: {e}")
573
- # import traceback
574
- # traceback.print_exc()
575
-
576
-
577
- from flask import Flask, render_template, jsonify, request, send_from_directory, send_file, redirect, url_for, session
578
- import os, json, threading, time, signal, sys
579
- from datetime import datetime
580
- from extract_signed_segments_from_annotations import ClipExtractor, VideoClip
581
- import logging
582
- from dotenv import load_dotenv
583
  import boto3
584
- from botocore.exceptions import ClientError
 
 
 
 
 
 
585
  import tempfile
 
 
586
  import uuid
587
- import requests
 
 
 
 
 
588
  from urllib.parse import urlparse
589
 
590
  # Load environment variables
591
  load_dotenv()
592
 
593
  # Add this near the top with other environment variables
594
- bypass_auth = os.getenv('BYPASS_AUTH', 'false').lower() == 'true'
595
 
596
  # Configure logging first
597
  logging.basicConfig(
598
  level=logging.INFO,
599
- format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
600
  )
601
  logger = logging.getLogger(__name__)
602
 
603
  # Hugging Face specific configuration
604
- is_hf_space = os.getenv('SPACE_ID') is not None
605
  if is_hf_space:
606
  logger.info("Running in Hugging Face Spaces environment")
607
  # Allow insecure transport for development in HF
608
- os.environ['OAUTHLIB_INSECURE_TRANSPORT'] = '1'
609
  # Ensure port is set correctly
610
- os.environ['PORT'] = '7860'
611
 
612
-
 
 
 
 
 
 
613
 
614
  app = Flask(__name__)
615
- app.secret_key = os.getenv('SECRET_KEY', 'dev_key_for_testing')
616
 
617
  # Configure session for HF
618
  if is_hf_space:
619
- app.config['SESSION_COOKIE_SECURE'] = False
620
- app.config['SESSION_COOKIE_HTTPONLY'] = True
621
- app.config['SESSION_COOKIE_SAMESITE'] = None # Add this line
622
- app.config['PERMANENT_SESSION_LIFETIME'] = 86400 # 24 hours
623
 
624
  # Directory paths
625
  VIDEO_DIR = os.path.abspath("data/videos")
@@ -630,9 +67,9 @@ ALIGNMENTS_DIR = os.path.abspath("data/alignments")
630
  TRANSCRIPTS_DIR = os.path.abspath("data/transcripts")
631
 
632
  # S3 configuration
633
- S3_BUCKET = os.getenv('S3_BUCKET', "sorenson-ai-sb-scratch")
634
- S3_VIDEO_PREFIX = os.getenv('S3_VIDEO_PREFIX', "awilkinson/kylie_dataset_videos_for_alignment_webapp/")
635
- USE_S3_FOR_VIDEOS = os.getenv('USE_S3_FOR_VIDEOS', 'true').lower() == 'true'
636
 
637
  # Ensure all required directories exist
638
  for directory in [VIDEO_DIR, ANNOTATIONS_DIR, TEMP_DIR, WORD_TIMESTAMPS_DIR, ALIGNMENTS_DIR, TRANSCRIPTS_DIR]:
@@ -642,20 +79,22 @@ for directory in [VIDEO_DIR, ANNOTATIONS_DIR, TEMP_DIR, WORD_TIMESTAMPS_DIR, ALI
642
  clip_extraction_status = {}
643
  transcription_progress_status = {}
644
 
 
645
  # S3 helper functions
646
  def get_s3_client():
647
  """Get a boto3 S3 client."""
648
  return boto3.client(
649
- 's3',
650
- region_name=os.environ.get('AWS_DEFAULT_REGION', 'us-west-2'),
651
- aws_access_key_id=os.environ.get('AWS_ACCESS_KEY_ID'),
652
- aws_secret_access_key=os.environ.get('AWS_SECRET_ACCESS_KEY')
653
  )
654
 
655
- def list_s3_videos():
 
656
  """List all videos in the S3 bucket with the given prefix."""
657
- if not os.environ.get('AWS_ACCESS_KEY_ID') or not os.environ.get('AWS_SECRET_ACCESS_KEY'):
658
- logger.warning("AWS credentials not found. Returning empty video list.")
659
  return []
660
 
661
  try:
@@ -665,15 +104,15 @@ def list_s3_videos():
665
  Prefix=S3_VIDEO_PREFIX
666
  )
667
 
668
- if 'Contents' not in response:
669
  logger.warning(f"No videos found in S3 bucket {S3_BUCKET} with prefix {S3_VIDEO_PREFIX}")
670
  return []
671
 
672
  # Extract video IDs (filenames without extension) from S3 keys
673
  videos = []
674
- for item in response['Contents']:
675
- key = item['Key']
676
- if key.endswith('.mp4'):
677
  # Extract just the filename without extension
678
  filename = os.path.basename(key)
679
  video_id = os.path.splitext(filename)[0]
@@ -684,7 +123,8 @@ def list_s3_videos():
684
  logger.error(f"Error listing S3 videos: {str(e)}")
685
  return []
686
 
687
- def download_video_from_s3(video_id):
 
688
  """Download a video from S3 to the local videos directory."""
689
  video_filename = f"{video_id}.mp4"
690
  s3_key = f"{S3_VIDEO_PREFIX}{video_filename}"
@@ -705,7 +145,8 @@ def download_video_from_s3(video_id):
705
  logger.error(f"Error downloading video from S3: {str(e)}")
706
  return None
707
 
708
- def generate_presigned_url(video_id, expiration=3600):
 
709
  """Generate a presigned URL for direct access to the video in S3."""
710
  video_filename = f"{video_id}.mp4"
711
  s3_key = f"{S3_VIDEO_PREFIX}{video_filename}"
@@ -713,8 +154,8 @@ def generate_presigned_url(video_id, expiration=3600):
713
  try:
714
  s3_client = get_s3_client()
715
  url = s3_client.generate_presigned_url(
716
- 'get_object',
717
- Params={'Bucket': S3_BUCKET, 'Key': s3_key},
718
  ExpiresIn=expiration
719
  )
720
  return url
@@ -722,6 +163,7 @@ def generate_presigned_url(video_id, expiration=3600):
722
  logger.error(f"Error generating presigned URL: {str(e)}")
723
  return None
724
 
 
725
  # Graceful shutdown handler
726
  def graceful_shutdown(signum, frame):
727
  """Handle graceful shutdown on signals."""
@@ -729,32 +171,37 @@ def graceful_shutdown(signum, frame):
729
  # Clean up as needed here
730
  sys.exit(0)
731
 
 
732
  # Register signal handlers
733
  signal.signal(signal.SIGTERM, graceful_shutdown)
734
  signal.signal(signal.SIGINT, graceful_shutdown)
735
 
 
736
  # Login required decorator
737
  def login_required(f):
738
  from functools import wraps
739
  @wraps(f)
740
  def decorated_function(*args, **kwargs):
741
- if 'user' not in session:
742
  logger.info(f"User not in session, redirecting to login")
743
- return redirect(url_for('login'))
744
  return f(*args, **kwargs)
745
  return decorated_function
746
 
 
747
  # Allow specific users (for testing)
748
- def is_allowed_user(username):
749
- allowed_users_env = os.getenv('ALLOWED_USERS', 'Perilon') # Default to your username
750
- allowed_users = [user.strip() for user in allowed_users_env.split(',')]
751
  return username in allowed_users or not is_hf_space # Allow all users in local dev
752
 
753
- def update_extraction_progress(video_id, current, total):
 
754
  percent = int((current / total) * 100)
755
  clip_extraction_status[video_id] = {"current": current, "total": total, "percent": percent}
756
 
757
- def run_clip_extraction(video_id):
 
758
  try:
759
  base_dir = app.root_path
760
  extractor = ClipExtractor(base_dir)
@@ -772,7 +219,8 @@ def run_clip_extraction(video_id):
772
  logger.error(f"Error during clip extraction for {video_id}: {str(e)}")
773
  clip_extraction_status[video_id] = {"error": str(e)}
774
 
775
- def run_transcription(video_id):
 
776
  try:
777
  base_dir = app.root_path
778
  output_path = os.path.join(WORD_TIMESTAMPS_DIR, f"{video_id}_word_timestamps.json")
@@ -799,8 +247,8 @@ def run_transcription(video_id):
799
  transcription_progress_status[video_id] = {"status": "started", "percent": 10}
800
 
801
  # Check if AWS credentials are available
802
- if not os.environ.get('AWS_ACCESS_KEY_ID') or not os.environ.get('AWS_SECRET_ACCESS_KEY'):
803
- logger.warning("AWS credentials not found. Transcription will not work properly.")
804
  transcription_progress_status[video_id] = {
805
  "status": "error",
806
  "percent": 0,
@@ -820,58 +268,61 @@ def run_transcription(video_id):
820
  logger.error(f"Error during transcription for {video_id}: {str(e)}")
821
  transcription_progress_status[video_id] = {"status": "error", "percent": 0, "message": str(e)}
822
 
 
823
  # Authentication routes
824
- @app.route('/login')
825
  def login():
826
  """Handle login for both local and HF environments."""
827
- logger.info(f"Login route called. Headers: {dict(request.headers)}")
828
 
829
  if is_hf_space:
830
- username = request.headers.get('X-Spaces-Username')
831
  logger.info(f"Username from headers in login: {username}")
832
 
833
  if username and is_allowed_user(username):
834
- session['user'] = {'name': username, 'is_hf': True}
835
- return redirect(url_for('index'))
836
  else:
837
  # Redirect to the HF auth endpoint
838
- return redirect('/auth')
839
  else:
840
  # For local development
841
- session['user'] = {'name': 'LocalDeveloper', 'is_mock': True}
842
- return redirect(url_for('index'))
843
 
844
- @app.route('/auth/callback')
 
845
  def auth_callback():
846
  """This route will be called by Hugging Face after successful authentication."""
847
- logger.info(f"Auth callback called. Headers: {dict(request.headers)}")
848
 
849
  if is_hf_space:
850
  # In Hugging Face Spaces, the user info is available in the request headers
851
- username = request.headers.get('X-Spaces-Username')
852
  if username:
853
- session['user'] = {'name': username, 'is_hf': True}
854
- return redirect(url_for('index'))
855
  else:
856
- return render_template('error.html', message="Authentication failed. No username provided.")
857
- return redirect(url_for('login'))
 
858
 
859
- @app.route('/health')
860
  def health_check():
861
  """Health check endpoint for container verification."""
862
  # Log environment variables for debugging
863
  env_vars = {
864
- "FLASK_ENV": os.environ.get('FLASK_ENV', 'production'),
865
- "DEBUG": os.environ.get('DEBUG', 'Not set'),
866
- "SPACE_ID": os.environ.get('SPACE_ID', 'Not set'),
867
- "BYPASS_AUTH": os.environ.get('BYPASS_AUTH', 'Not set'),
868
- "SECRET_KEY": os.environ.get('SECRET_KEY', 'Not set')[:5] + '...' if os.environ.get('SECRET_KEY') else 'Not set',
869
- "S3_BUCKET": os.environ.get('S3_BUCKET', 'Not set'),
870
- "S3_VIDEO_PREFIX": os.environ.get('S3_VIDEO_PREFIX', 'Not set'),
871
- "USE_S3_FOR_VIDEOS": os.environ.get('USE_S3_FOR_VIDEOS', 'Not set')
872
  }
873
 
874
- logger.info(f"Health check called. Environment: {env_vars}")
875
 
876
  # Get session information for debugging
877
  session_info = dict(session) if session else None
@@ -890,10 +341,11 @@ def health_check():
890
  }
891
  })
892
 
893
- @app.route('/auth')
 
894
  def auth():
895
  """This route handles HF authentication."""
896
- logger.info(f"Auth route called. Headers: {dict(request.headers)}")
897
 
898
  # Force bypass auth to be true for debugging
899
  bypass_auth = True
@@ -901,32 +353,33 @@ def auth():
901
  # If bypass is enabled, authenticate immediately
902
  if bypass_auth:
903
  logger.info("Auth bypass enabled, setting default user")
904
- session['user'] = {'name': 'Perilon', 'is_hf': True}
905
- return redirect(url_for('index'))
906
 
907
  # Normal authentication logic
908
- username = request.headers.get('X-Spaces-Username')
909
  logger.info(f"Username from headers in auth: {username}")
910
 
911
  if is_hf_space and username and is_allowed_user(username):
912
  logger.info(f"Setting user in session: {username}")
913
- session['user'] = {'name': username, 'is_hf': True}
914
- return redirect(url_for('index'))
915
  elif not is_hf_space:
916
  # For local development
917
- session['user'] = {'name': 'LocalDeveloper', 'is_mock': True}
918
- return redirect(url_for('index'))
919
  else:
920
  # For HF with no valid username yet
921
- return render_template('error.html', message=
922
- "Waiting for Hugging Face authentication. If you continue to see this message, "
923
  "please make sure you're logged into Hugging Face and your username is allowed.")
924
 
 
925
  @app.before_request
926
  def check_auth():
927
  """Check authentication before processing requests."""
928
  # Skip authentication for certain routes and static files
929
- if request.path in ['/login', '/logout', '/auth', '/auth/callback', '/debug', '/health'] or request.path.startswith('/static/'):
930
  return
931
 
932
  # Force bypass auth to be true for debugging
@@ -937,38 +390,40 @@ def check_auth():
937
 
938
  if bypass_auth:
939
  # Set default user for bypass mode if not already set
940
- if 'user' not in session:
941
- session['user'] = {'name': 'Perilon', 'is_hf': True}
942
  return
943
 
944
  if is_hf_space:
945
  # Check for HF username header
946
- username = request.headers.get('X-Spaces-Username')
947
 
948
- if 'user' in session:
949
  logger.debug(f"User in session: {session['user']}")
950
  return
951
 
952
  if username and is_allowed_user(username):
953
  logger.info(f"Setting user from headers: {username}")
954
- session['user'] = {'name': username, 'is_hf': True}
955
  return
956
 
957
  # No valid user in session or headers
958
  logger.info(f"No authenticated user, redirecting to /auth")
959
- return redirect('/auth')
960
- elif 'user' not in session:
961
- return redirect(url_for('login'))
962
 
963
- @app.route('/logout')
 
964
  def logout():
965
  """Clear session and redirect to login."""
966
  session.clear() # Clear the entire session
967
  if is_hf_space:
968
- return redirect('/auth/logout')
969
- return redirect(url_for('login'))
 
970
 
971
- @app.route('/debug')
972
  def debug_info():
973
  """Return debug information."""
974
  cookies = {key: request.cookies.get(key) for key in request.cookies.keys()}
@@ -978,10 +433,10 @@ def debug_info():
978
  "headers": dict(request.headers),
979
  "cookies": cookies,
980
  "is_hf_space": is_hf_space,
981
- "allowed_users": os.getenv('ALLOWED_USERS', 'Perilon'),
982
  "app_config": {k: str(v) for k, v in app.config.items() if k in
983
- ['SESSION_COOKIE_SECURE', 'SESSION_COOKIE_HTTPONLY',
984
- 'SESSION_COOKIE_SAMESITE', 'PERMANENT_SESSION_LIFETIME']},
985
  "s3_config": {
986
  "S3_BUCKET": S3_BUCKET,
987
  "S3_VIDEO_PREFIX": S3_VIDEO_PREFIX,
@@ -990,14 +445,16 @@ def debug_info():
990
  }
991
  return jsonify(info)
992
 
 
993
  # Main application routes
994
- @app.route('/')
995
  @login_required
996
  def index():
997
  """Main entry point, redirects to video selection."""
998
- return redirect(url_for('select_video'))
 
999
 
1000
- @app.route('/select_video')
1001
  @login_required
1002
  def select_video():
1003
  """Page to select a video for annotation."""
@@ -1005,38 +462,41 @@ def select_video():
1005
  video_ids = list_s3_videos()
1006
  else:
1007
  if not os.path.exists(VIDEO_DIR):
1008
- return render_template('error.html', message="Video directory not found.")
1009
- videos = [f for f in os.listdir(VIDEO_DIR) if f.endswith('.mp4')]
1010
  video_ids = [os.path.splitext(v)[0] for v in videos]
1011
 
1012
- return render_template('select_video.html', video_ids=video_ids, user=session.get('user'))
 
1013
 
1014
- @app.route('/player/<video_id>')
1015
  @login_required
1016
  def player(video_id):
1017
  """Video player page for annotation."""
1018
- return render_template('player.html', video_id=video_id, user=session.get('user'))
1019
 
1020
- @app.route('/videos')
 
1021
  @login_required
1022
  def get_videos():
1023
  """API endpoint to get available videos."""
1024
  if USE_S3_FOR_VIDEOS:
1025
  videos = list_s3_videos()
1026
  if not videos:
1027
- return jsonify({'error': 'No videos found in S3'}), 404
1028
  # Return just the filenames with .mp4 extension for compatibility
1029
  return jsonify([f"{vid}.mp4" for vid in videos])
1030
  else:
1031
  # Original local file behavior
1032
  if not os.path.exists(VIDEO_DIR):
1033
- return jsonify({'error': 'Video directory not found'}), 404
1034
- videos = [f for f in os.listdir(VIDEO_DIR) if f.endswith(('.mp4', '.avi', '.mov'))]
1035
  if not videos:
1036
- return jsonify({'error': 'No videos found'}), 404
1037
  return jsonify(videos)
1038
 
1039
- @app.route('/video/<path:filename>')
 
1040
  @login_required
1041
  def serve_video(filename):
1042
  """Serve a video file from S3 or local storage."""
@@ -1053,59 +513,63 @@ def serve_video(filename):
1053
  if local_path and os.path.exists(local_path):
1054
  return send_from_directory(VIDEO_DIR, filename)
1055
 
1056
- return jsonify({'error': 'Video not found in S3'}), 404
1057
  else:
1058
  # Original local file behavior
1059
  if not os.path.exists(os.path.join(VIDEO_DIR, filename)):
1060
- return jsonify({'error': 'Video not found'}), 404
1061
  return send_from_directory(VIDEO_DIR, filename)
1062
 
1063
- @app.route('/save_annotations', methods=['POST'])
 
1064
  @login_required
1065
  def save_annotations():
1066
  """Save annotation data."""
1067
  data = request.json
1068
- if not data or 'video' not in data or 'timestamps' not in data:
1069
- return jsonify({'success': False, 'message': 'Invalid data'}), 400
1070
 
1071
  annotation_file = os.path.join(ANNOTATIONS_DIR, f"{data['video']}_annotations.json")
1072
  annotation_data = {
1073
- "video_name": data['video'] + ".mp4",
1074
- "timestamps": sorted(data['timestamps']),
1075
  "annotation_date": datetime.now().isoformat(),
1076
- "annotated_by": session.get('user', {}).get('name', 'unknown')
1077
  }
1078
- with open(annotation_file, 'w') as f:
1079
  json.dump(annotation_data, f, indent=4)
1080
- return jsonify({'success': True, 'message': 'Annotations saved successfully'})
 
1081
 
1082
- @app.route('/get_annotations/<path:video_name>')
1083
  @login_required
1084
  def get_annotations(video_name):
1085
  """Get annotations for a video."""
1086
  annotation_file = os.path.join(ANNOTATIONS_DIR, f"{video_name}_annotations.json")
1087
  if not os.path.exists(annotation_file):
1088
- return jsonify({'error': 'No annotations found'}), 404
1089
- with open(annotation_file, 'r') as f:
1090
  annotations = json.load(f)
1091
  return jsonify(annotations)
1092
 
 
1093
  @app.route("/alignment/<video_id>")
1094
  @login_required
1095
  def alignment_mode(video_id):
1096
  """Page for aligning sign language with transcribed text."""
1097
  annotation_file = os.path.join(ANNOTATIONS_DIR, f"{video_id}_annotations.json")
1098
  if not os.path.exists(annotation_file):
1099
- return render_template("error.html", message="No annotations found for this video. Please annotate the video first.")
1100
- with open(annotation_file, 'r') as f:
1101
  annotations = json.load(f)
1102
  return render_template(
1103
  "alignment.html",
1104
  video_id=video_id,
1105
- total_clips=len(annotations['timestamps']) - 1,
1106
- user=session.get('user')
1107
  )
1108
 
 
1109
  @app.route("/api/transcript/<video_id>")
1110
  @login_required
1111
  def get_transcript(video_id):
@@ -1119,7 +583,7 @@ def get_transcript(video_id):
1119
  "message": "No word timestamps found for this video"
1120
  }), 404
1121
  try:
1122
- with open(timestamps_file, 'r') as f:
1123
  word_data = json.load(f)
1124
  full_text = " ".join(item["punctuated_word"] for item in word_data)
1125
  words_with_times = [{
@@ -1140,6 +604,7 @@ def get_transcript(video_id):
1140
  "message": f"Error processing word timestamps: {str(e)}"
1141
  }), 500
1142
 
 
1143
  @app.route("/api/word_timestamps/<video_id>")
1144
  @login_required
1145
  def get_word_timestamps(video_id):
@@ -1153,7 +618,7 @@ def get_word_timestamps(video_id):
1153
  "message": "No word timestamps found for this video"
1154
  }), 404
1155
  try:
1156
- with open(timestamps_file, 'r') as f:
1157
  word_data = json.load(f)
1158
  logger.info(f"Successfully loaded {len(word_data)} word timestamps")
1159
  return jsonify({
@@ -1167,6 +632,7 @@ def get_word_timestamps(video_id):
1167
  "message": f"Error processing word timestamps: {str(e)}"
1168
  }), 500
1169
 
 
1170
  @app.route("/api/clips/<video_id>")
1171
  @login_required
1172
  def get_video_clips(video_id):
@@ -1175,9 +641,9 @@ def get_video_clips(video_id):
1175
  annotation_file = os.path.join(ANNOTATIONS_DIR, f"{video_id}_annotations.json")
1176
  if not os.path.exists(annotation_file):
1177
  raise FileNotFoundError("Annotations not found")
1178
- with open(annotation_file, 'r') as f:
1179
  annotations = json.load(f)
1180
- timestamps = annotations['timestamps']
1181
  clips = []
1182
  for i in range(len(timestamps)-1):
1183
  clips.append({
@@ -1197,6 +663,7 @@ def get_video_clips(video_id):
1197
  "message": str(e)
1198
  }), 500
1199
 
 
1200
  @app.route("/clip/<video_id>/<int:clip_index>")
1201
  @login_required
1202
  def serve_clip(video_id, clip_index):
@@ -1214,23 +681,24 @@ def serve_clip(video_id, clip_index):
1214
  }), 404
1215
  return send_file(clip_path, mimetype="video/mp4")
1216
 
 
1217
  @app.route("/api/save_alignments", methods=["POST"])
1218
  @login_required
1219
  def save_alignments():
1220
  """Save alignment data."""
1221
  try:
1222
  data = request.json
1223
- if not data or 'video_id' not in data or 'alignments' not in data:
1224
- return jsonify({'success': False, 'message': 'Invalid data'}), 400
1225
 
1226
  # Add user information to the alignments
1227
- for alignment in data['alignments']:
1228
  if alignment:
1229
- alignment['aligned_by'] = session.get('user', {}).get('name', 'unknown')
1230
 
1231
  output_path = os.path.join(ALIGNMENTS_DIR, f"{data['video_id']}.json")
1232
  with open(output_path, "w") as f:
1233
- json.dump(data['alignments'], f, indent=2)
1234
  return jsonify({
1235
  "success": True,
1236
  "message": "Alignments saved successfully"
@@ -1242,6 +710,7 @@ def save_alignments():
1242
  "message": str(e)
1243
  }), 500
1244
 
 
1245
  @app.route("/api/extract_clips/<video_id>")
1246
  @login_required
1247
  def extract_clips_for_video(video_id):
@@ -1264,6 +733,7 @@ def extract_clips_for_video(video_id):
1264
  thread_trans.start()
1265
  return jsonify({"status": "started"})
1266
 
 
1267
  @app.route("/api/clip_progress/<video_id>")
1268
  @login_required
1269
  def clip_progress(video_id):
@@ -1271,6 +741,7 @@ def clip_progress(video_id):
1271
  progress = clip_extraction_status.get(video_id, {"current": 0, "total": 0, "percent": 0})
1272
  return jsonify(progress)
1273
 
 
1274
  @app.route("/api/transcription_progress/<video_id>")
1275
  @login_required
1276
  def transcription_progress(video_id):
@@ -1278,16 +749,15 @@ def transcription_progress(video_id):
1278
  progress = transcription_progress_status.get(video_id, {"status": "not started", "percent": 0})
1279
  return jsonify(progress)
1280
 
1281
- if __name__ == '__main__':
1282
- port = int(os.getenv('PORT', 7860))
1283
- app.run(host='0.0.0.0', port=port)
1284
  try:
1285
  # Print diagnostic information
1286
  print("=" * 50)
1287
  print(f"Starting app with configuration:")
1288
  print(f"- Running in HF Space: {is_hf_space}")
1289
  print(f"- Auth bypass: {bypass_auth}")
1290
- print(f"- Port: {os.getenv('PORT', 5000)}")
1291
  print(f"- S3 for videos: {USE_S3_FOR_VIDEOS}")
1292
  print(f"- S3 bucket: {S3_BUCKET}")
1293
  print(f"- S3 prefix: {S3_VIDEO_PREFIX}")
@@ -1300,8 +770,8 @@ if __name__ == '__main__':
1300
  print(f"- Error listing S3 videos: {str(e)}")
1301
  print("=" * 50)
1302
 
1303
- port = int(os.getenv('PORT', 5000))
1304
- app.run(host='0.0.0.0', port=port, debug=True)
1305
  except Exception as e:
1306
  print(f"Error starting the application: {e}")
1307
  import traceback
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import boto3
2
+ import json
3
+ import logging
4
+ import os
5
+ import platform
6
+ import requests
7
+ import signal
8
+ import sys
9
  import tempfile
10
+ import threading
11
+ import time
12
  import uuid
13
+ from botocore.exceptions import ClientError
14
+ from datetime import datetime
15
+ from dotenv import load_dotenv
16
+ from extract_signed_segments_from_annotations import ClipExtractor, VideoClip
17
+ from flask import Flask, jsonify, redirect, render_template, request, send_file, send_from_directory, session, url_for
18
+ from typing import Any, Dict, List, Optional
19
  from urllib.parse import urlparse
20
 
21
  # Load environment variables
22
  load_dotenv()
23
 
24
  # Add this near the top with other environment variables
25
+ bypass_auth = os.getenv("BYPASS_AUTH", "false").lower() == "true"
26
 
27
  # Configure logging first
28
  logging.basicConfig(
29
  level=logging.INFO,
30
+ format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
31
  )
32
  logger = logging.getLogger(__name__)
33
 
34
  # Hugging Face specific configuration
35
+ is_hf_space = os.getenv("SPACE_ID") is not None
36
  if is_hf_space:
37
  logger.info("Running in Hugging Face Spaces environment")
38
  # Allow insecure transport for development in HF
39
+ os.environ["OAUTHLIB_INSECURE_TRANSPORT"] = "1"
40
  # Ensure port is set correctly
41
+ os.environ["PORT"] = "7860"
42
 
43
+ # Debug information
44
+ print("=" * 50)
45
+ print(f"Python version: {sys.version}")
46
+ print(f"Platform: {platform.platform()}")
47
+ print(f"Current directory: {os.getcwd()}")
48
+ print(f"Directory contents: {os.listdir('.')}")
49
+ print("=" * 50)
50
 
51
  app = Flask(__name__)
52
+ app.secret_key = os.getenv("SECRET_KEY", "dev_key_for_testing")
53
 
54
  # Configure session for HF
55
  if is_hf_space:
56
+ app.config["SESSION_COOKIE_SECURE"] = False
57
+ app.config["SESSION_COOKIE_HTTPONLY"] = True
58
+ app.config["SESSION_COOKIE_SAMESITE"] = None # Add this line
59
+ app.config["PERMANENT_SESSION_LIFETIME"] = 86400 # 24 hours
60
 
61
  # Directory paths
62
  VIDEO_DIR = os.path.abspath("data/videos")
 
67
  TRANSCRIPTS_DIR = os.path.abspath("data/transcripts")
68
 
69
  # S3 configuration
70
+ S3_BUCKET = os.getenv("S3_BUCKET", "sorenson-ai-sb-scratch")
71
+ S3_VIDEO_PREFIX = os.getenv("S3_VIDEO_PREFIX", "awilkinson/kylie_dataset_videos_for_alignment_webapp/")
72
+ USE_S3_FOR_VIDEOS = os.getenv("USE_S3_FOR_VIDEOS", "true").lower() == "true"
73
 
74
  # Ensure all required directories exist
75
  for directory in [VIDEO_DIR, ANNOTATIONS_DIR, TEMP_DIR, WORD_TIMESTAMPS_DIR, ALIGNMENTS_DIR, TRANSCRIPTS_DIR]:
 
79
  clip_extraction_status = {}
80
  transcription_progress_status = {}
81
 
82
+
83
  # S3 helper functions
84
  def get_s3_client():
85
  """Get a boto3 S3 client."""
86
  return boto3.client(
87
+ "s3",
88
+ region_name=os.environ.get("AWS_DEFAULT_REGION", "us-west-2"),
89
+ aws_access_key_id=os.environ.get("AWS_ACCESS_KEY_ID"),
90
+ aws_secret_access_key=os.environ.get("AWS_SECRET_ACCESS_KEY")
91
  )
92
 
93
+
94
+ def list_s3_videos() -> List[str]:
95
  """List all videos in the S3 bucket with the given prefix."""
96
+ if not os.environ.get("AWS_ACCESS_KEY_ID") or not os.environ.get("AWS_SECRET_ACCESS_KEY"):
97
+ logger.warning("AWS credentials not found. Returning empty video list.")
98
  return []
99
 
100
  try:
 
104
  Prefix=S3_VIDEO_PREFIX
105
  )
106
 
107
+ if "Contents" not in response:
108
  logger.warning(f"No videos found in S3 bucket {S3_BUCKET} with prefix {S3_VIDEO_PREFIX}")
109
  return []
110
 
111
  # Extract video IDs (filenames without extension) from S3 keys
112
  videos = []
113
+ for item in response["Contents"]:
114
+ key = item["Key"]
115
+ if key.endswith(".mp4"):
116
  # Extract just the filename without extension
117
  filename = os.path.basename(key)
118
  video_id = os.path.splitext(filename)[0]
 
123
  logger.error(f"Error listing S3 videos: {str(e)}")
124
  return []
125
 
126
+
127
+ def download_video_from_s3(video_id: str) -> Optional[str]:
128
  """Download a video from S3 to the local videos directory."""
129
  video_filename = f"{video_id}.mp4"
130
  s3_key = f"{S3_VIDEO_PREFIX}{video_filename}"
 
145
  logger.error(f"Error downloading video from S3: {str(e)}")
146
  return None
147
 
148
+
149
+ def generate_presigned_url(video_id: str, expiration: int = 3600) -> Optional[str]:
150
  """Generate a presigned URL for direct access to the video in S3."""
151
  video_filename = f"{video_id}.mp4"
152
  s3_key = f"{S3_VIDEO_PREFIX}{video_filename}"
 
154
  try:
155
  s3_client = get_s3_client()
156
  url = s3_client.generate_presigned_url(
157
+ "get_object",
158
+ Params={"Bucket": S3_BUCKET, "Key": s3_key},
159
  ExpiresIn=expiration
160
  )
161
  return url
 
163
  logger.error(f"Error generating presigned URL: {str(e)}")
164
  return None
165
 
166
+
167
  # Graceful shutdown handler
168
  def graceful_shutdown(signum, frame):
169
  """Handle graceful shutdown on signals."""
 
171
  # Clean up as needed here
172
  sys.exit(0)
173
 
174
+
175
  # Register signal handlers
176
  signal.signal(signal.SIGTERM, graceful_shutdown)
177
  signal.signal(signal.SIGINT, graceful_shutdown)
178
 
179
+
180
  # Login required decorator
181
  def login_required(f):
182
  from functools import wraps
183
  @wraps(f)
184
  def decorated_function(*args, **kwargs):
185
+ if "user" not in session:
186
  logger.info(f"User not in session, redirecting to login")
187
+ return redirect(url_for("login"))
188
  return f(*args, **kwargs)
189
  return decorated_function
190
 
191
+
192
  # Allow specific users (for testing)
193
+ def is_allowed_user(username: str) -> bool:
194
+ allowed_users_env = os.getenv("ALLOWED_USERS", "Perilon") # Default to your username
195
+ allowed_users = [user.strip() for user in allowed_users_env.split(",")]
196
  return username in allowed_users or not is_hf_space # Allow all users in local dev
197
 
198
+
199
+ def update_extraction_progress(video_id: str, current: int, total: int) -> None:
200
  percent = int((current / total) * 100)
201
  clip_extraction_status[video_id] = {"current": current, "total": total, "percent": percent}
202
 
203
+
204
+ def run_clip_extraction(video_id: str) -> None:
205
  try:
206
  base_dir = app.root_path
207
  extractor = ClipExtractor(base_dir)
 
219
  logger.error(f"Error during clip extraction for {video_id}: {str(e)}")
220
  clip_extraction_status[video_id] = {"error": str(e)}
221
 
222
+
223
+ def run_transcription(video_id: str) -> None:
224
  try:
225
  base_dir = app.root_path
226
  output_path = os.path.join(WORD_TIMESTAMPS_DIR, f"{video_id}_word_timestamps.json")
 
247
  transcription_progress_status[video_id] = {"status": "started", "percent": 10}
248
 
249
  # Check if AWS credentials are available
250
+ if not os.environ.get("AWS_ACCESS_KEY_ID") or not os.environ.get("AWS_SECRET_ACCESS_KEY"):
251
+ logger.warning("AWS credentials not found. Transcription will not work properly.")
252
  transcription_progress_status[video_id] = {
253
  "status": "error",
254
  "percent": 0,
 
268
  logger.error(f"Error during transcription for {video_id}: {str(e)}")
269
  transcription_progress_status[video_id] = {"status": "error", "percent": 0, "message": str(e)}
270
 
271
+
272
  # Authentication routes
273
+ @app.route("/login")
274
  def login():
275
  """Handle login for both local and HF environments."""
276
+ logger.info(f"Login route called. Headers: {dict(request.headers)}")
277
 
278
  if is_hf_space:
279
+ username = request.headers.get("X-Spaces-Username")
280
  logger.info(f"Username from headers in login: {username}")
281
 
282
  if username and is_allowed_user(username):
283
+ session["user"] = {"name": username, "is_hf": True}
284
+ return redirect(url_for("index"))
285
  else:
286
  # Redirect to the HF auth endpoint
287
+ return redirect("/auth")
288
  else:
289
  # For local development
290
+ session["user"] = {"name": "LocalDeveloper", "is_mock": True}
291
+ return redirect(url_for("index"))
292
 
293
+
294
+ @app.route("/auth/callback")
295
  def auth_callback():
296
  """This route will be called by Hugging Face after successful authentication."""
297
+ logger.info(f"Auth callback called. Headers: {dict(request.headers)}")
298
 
299
  if is_hf_space:
300
  # In Hugging Face Spaces, the user info is available in the request headers
301
+ username = request.headers.get("X-Spaces-Username")
302
  if username:
303
+ session["user"] = {"name": username, "is_hf": True}
304
+ return redirect(url_for("index"))
305
  else:
306
+ return render_template("error.html", message="Authentication failed. No username provided.")
307
+ return redirect(url_for("login"))
308
+
309
 
310
+ @app.route("/health")
311
  def health_check():
312
  """Health check endpoint for container verification."""
313
  # Log environment variables for debugging
314
  env_vars = {
315
+ "FLASK_ENV": os.environ.get("FLASK_ENV", "production"),
316
+ "DEBUG": os.environ.get("DEBUG", "Not set"),
317
+ "SPACE_ID": os.environ.get("SPACE_ID", "Not set"),
318
+ "BYPASS_AUTH": os.environ.get("BYPASS_AUTH", "Not set"),
319
+ "SECRET_KEY": os.environ.get("SECRET_KEY", "Not set")[:5] + "..." if os.environ.get("SECRET_KEY") else "Not set",
320
+ "S3_BUCKET": os.environ.get("S3_BUCKET", "Not set"),
321
+ "S3_VIDEO_PREFIX": os.environ.get("S3_VIDEO_PREFIX", "Not set"),
322
+ "USE_S3_FOR_VIDEOS": os.environ.get("USE_S3_FOR_VIDEOS", "Not set")
323
  }
324
 
325
+ logger.info(f"Health check called. Environment: {env_vars}")
326
 
327
  # Get session information for debugging
328
  session_info = dict(session) if session else None
 
341
  }
342
  })
343
 
344
+
345
+ @app.route("/auth")
346
  def auth():
347
  """This route handles HF authentication."""
348
+ logger.info(f"Auth route called. Headers: {dict(request.headers)}")
349
 
350
  # Force bypass auth to be true for debugging
351
  bypass_auth = True
 
353
  # If bypass is enabled, authenticate immediately
354
  if bypass_auth:
355
  logger.info("Auth bypass enabled, setting default user")
356
+ session["user"] = {"name": "Perilon", "is_hf": True}
357
+ return redirect(url_for("index"))
358
 
359
  # Normal authentication logic
360
+ username = request.headers.get("X-Spaces-Username")
361
  logger.info(f"Username from headers in auth: {username}")
362
 
363
  if is_hf_space and username and is_allowed_user(username):
364
  logger.info(f"Setting user in session: {username}")
365
+ session["user"] = {"name": username, "is_hf": True}
366
+ return redirect(url_for("index"))
367
  elif not is_hf_space:
368
  # For local development
369
+ session["user"] = {"name": "LocalDeveloper", "is_mock": True}
370
+ return redirect(url_for("index"))
371
  else:
372
  # For HF with no valid username yet
373
+ return render_template("error.html", message=
374
+ "Waiting for Hugging Face authentication. If you continue to see this message, "
375
  "please make sure you're logged into Hugging Face and your username is allowed.")
376
 
377
+
378
  @app.before_request
379
  def check_auth():
380
  """Check authentication before processing requests."""
381
  # Skip authentication for certain routes and static files
382
+ if request.path in ["/login", "/logout", "/auth", "/auth/callback", "/debug", "/health"] or request.path.startswith("/static/"):
383
  return
384
 
385
  # Force bypass auth to be true for debugging
 
390
 
391
  if bypass_auth:
392
  # Set default user for bypass mode if not already set
393
+ if "user" not in session:
394
+ session["user"] = {"name": "Perilon", "is_hf": True}
395
  return
396
 
397
  if is_hf_space:
398
  # Check for HF username header
399
+ username = request.headers.get("X-Spaces-Username")
400
 
401
+ if "user" in session:
402
  logger.debug(f"User in session: {session['user']}")
403
  return
404
 
405
  if username and is_allowed_user(username):
406
  logger.info(f"Setting user from headers: {username}")
407
+ session["user"] = {"name": username, "is_hf": True}
408
  return
409
 
410
  # No valid user in session or headers
411
  logger.info(f"No authenticated user, redirecting to /auth")
412
+ return redirect("/auth")
413
+ elif "user" not in session:
414
+ return redirect(url_for("login"))
415
 
416
+
417
+ @app.route("/logout")
418
  def logout():
419
  """Clear session and redirect to login."""
420
  session.clear() # Clear the entire session
421
  if is_hf_space:
422
+ return redirect("/auth/logout")
423
+ return redirect(url_for("login"))
424
+
425
 
426
+ @app.route("/debug")
427
  def debug_info():
428
  """Return debug information."""
429
  cookies = {key: request.cookies.get(key) for key in request.cookies.keys()}
 
433
  "headers": dict(request.headers),
434
  "cookies": cookies,
435
  "is_hf_space": is_hf_space,
436
+ "allowed_users": os.getenv("ALLOWED_USERS", "Perilon"),
437
  "app_config": {k: str(v) for k, v in app.config.items() if k in
438
+ ["SESSION_COOKIE_SECURE", "SESSION_COOKIE_HTTPONLY",
439
+ "SESSION_COOKIE_SAMESITE", "PERMANENT_SESSION_LIFETIME"]},
440
  "s3_config": {
441
  "S3_BUCKET": S3_BUCKET,
442
  "S3_VIDEO_PREFIX": S3_VIDEO_PREFIX,
 
445
  }
446
  return jsonify(info)
447
 
448
+
449
  # Main application routes
450
+ @app.route("/")
451
  @login_required
452
  def index():
453
  """Main entry point, redirects to video selection."""
454
+ return redirect(url_for("select_video"))
455
+
456
 
457
+ @app.route("/select_video")
458
  @login_required
459
  def select_video():
460
  """Page to select a video for annotation."""
 
462
  video_ids = list_s3_videos()
463
  else:
464
  if not os.path.exists(VIDEO_DIR):
465
+ return render_template("error.html", message="Video directory not found.")
466
+ videos = [f for f in os.listdir(VIDEO_DIR) if f.endswith(".mp4")]
467
  video_ids = [os.path.splitext(v)[0] for v in videos]
468
 
469
+ return render_template("select_video.html", video_ids=video_ids, user=session.get("user"))
470
+
471
 
472
+ @app.route("/player/<video_id>")
473
  @login_required
474
  def player(video_id):
475
  """Video player page for annotation."""
476
+ return render_template("player.html", video_id=video_id, user=session.get("user"))
477
 
478
+
479
+ @app.route("/videos")
480
  @login_required
481
  def get_videos():
482
  """API endpoint to get available videos."""
483
  if USE_S3_FOR_VIDEOS:
484
  videos = list_s3_videos()
485
  if not videos:
486
+ return jsonify({"error": "No videos found in S3"}), 404
487
  # Return just the filenames with .mp4 extension for compatibility
488
  return jsonify([f"{vid}.mp4" for vid in videos])
489
  else:
490
  # Original local file behavior
491
  if not os.path.exists(VIDEO_DIR):
492
+ return jsonify({"error": "Video directory not found"}), 404
493
+ videos = [f for f in os.listdir(VIDEO_DIR) if f.endswith((".mp4", ".avi", ".mov"))]
494
  if not videos:
495
+ return jsonify({"error": "No videos found"}), 404
496
  return jsonify(videos)
497
 
498
+
499
+ @app.route("/video/<path:filename>")
500
  @login_required
501
  def serve_video(filename):
502
  """Serve a video file from S3 or local storage."""
 
513
  if local_path and os.path.exists(local_path):
514
  return send_from_directory(VIDEO_DIR, filename)
515
 
516
+ return jsonify({"error": "Video not found in S3"}), 404
517
  else:
518
  # Original local file behavior
519
  if not os.path.exists(os.path.join(VIDEO_DIR, filename)):
520
+ return jsonify({"error": "Video not found"}), 404
521
  return send_from_directory(VIDEO_DIR, filename)
522
 
523
+
524
+ @app.route("/save_annotations", methods=["POST"])
525
  @login_required
526
  def save_annotations():
527
  """Save annotation data."""
528
  data = request.json
529
+ if not data or "video" not in data or "timestamps" not in data:
530
+ return jsonify({"success": False, "message": "Invalid data"}), 400
531
 
532
  annotation_file = os.path.join(ANNOTATIONS_DIR, f"{data['video']}_annotations.json")
533
  annotation_data = {
534
+ "video_name": data["video"] + ".mp4",
535
+ "timestamps": sorted(data["timestamps"]),
536
  "annotation_date": datetime.now().isoformat(),
537
+ "annotated_by": session.get("user", {}).get("name", "unknown")
538
  }
539
+ with open(annotation_file, "w") as f:
540
  json.dump(annotation_data, f, indent=4)
541
+ return jsonify({"success": True, "message": "Annotations saved successfully"})
542
+
543
 
544
+ @app.route("/get_annotations/<path:video_name>")
545
  @login_required
546
  def get_annotations(video_name):
547
  """Get annotations for a video."""
548
  annotation_file = os.path.join(ANNOTATIONS_DIR, f"{video_name}_annotations.json")
549
  if not os.path.exists(annotation_file):
550
+ return jsonify({"error": "No annotations found"}), 404
551
+ with open(annotation_file, "r") as f:
552
  annotations = json.load(f)
553
  return jsonify(annotations)
554
 
555
+
556
  @app.route("/alignment/<video_id>")
557
  @login_required
558
  def alignment_mode(video_id):
559
  """Page for aligning sign language with transcribed text."""
560
  annotation_file = os.path.join(ANNOTATIONS_DIR, f"{video_id}_annotations.json")
561
  if not os.path.exists(annotation_file):
562
+ return render_template("error.html", message="No annotations found for this video. Please annotate the video first.")
563
+ with open(annotation_file, "r") as f:
564
  annotations = json.load(f)
565
  return render_template(
566
  "alignment.html",
567
  video_id=video_id,
568
+ total_clips=len(annotations["timestamps"]) - 1,
569
+ user=session.get("user")
570
  )
571
 
572
+
573
  @app.route("/api/transcript/<video_id>")
574
  @login_required
575
  def get_transcript(video_id):
 
583
  "message": "No word timestamps found for this video"
584
  }), 404
585
  try:
586
+ with open(timestamps_file, "r") as f:
587
  word_data = json.load(f)
588
  full_text = " ".join(item["punctuated_word"] for item in word_data)
589
  words_with_times = [{
 
604
  "message": f"Error processing word timestamps: {str(e)}"
605
  }), 500
606
 
607
+
608
  @app.route("/api/word_timestamps/<video_id>")
609
  @login_required
610
  def get_word_timestamps(video_id):
 
618
  "message": "No word timestamps found for this video"
619
  }), 404
620
  try:
621
+ with open(timestamps_file, "r") as f:
622
  word_data = json.load(f)
623
  logger.info(f"Successfully loaded {len(word_data)} word timestamps")
624
  return jsonify({
 
632
  "message": f"Error processing word timestamps: {str(e)}"
633
  }), 500
634
 
635
+
636
  @app.route("/api/clips/<video_id>")
637
  @login_required
638
  def get_video_clips(video_id):
 
641
  annotation_file = os.path.join(ANNOTATIONS_DIR, f"{video_id}_annotations.json")
642
  if not os.path.exists(annotation_file):
643
  raise FileNotFoundError("Annotations not found")
644
+ with open(annotation_file, "r") as f:
645
  annotations = json.load(f)
646
+ timestamps = annotations["timestamps"]
647
  clips = []
648
  for i in range(len(timestamps)-1):
649
  clips.append({
 
663
  "message": str(e)
664
  }), 500
665
 
666
+
667
  @app.route("/clip/<video_id>/<int:clip_index>")
668
  @login_required
669
  def serve_clip(video_id, clip_index):
 
681
  }), 404
682
  return send_file(clip_path, mimetype="video/mp4")
683
 
684
+
685
  @app.route("/api/save_alignments", methods=["POST"])
686
  @login_required
687
  def save_alignments():
688
  """Save alignment data."""
689
  try:
690
  data = request.json
691
+ if not data or "video_id" not in data or "alignments" not in data:
692
+ return jsonify({"success": False, "message": "Invalid data"}), 400
693
 
694
  # Add user information to the alignments
695
+ for alignment in data["alignments"]:
696
  if alignment:
697
+ alignment["aligned_by"] = session.get("user", {}).get("name", "unknown")
698
 
699
  output_path = os.path.join(ALIGNMENTS_DIR, f"{data['video_id']}.json")
700
  with open(output_path, "w") as f:
701
+ json.dump(data["alignments"], f, indent=2)
702
  return jsonify({
703
  "success": True,
704
  "message": "Alignments saved successfully"
 
710
  "message": str(e)
711
  }), 500
712
 
713
+
714
  @app.route("/api/extract_clips/<video_id>")
715
  @login_required
716
  def extract_clips_for_video(video_id):
 
733
  thread_trans.start()
734
  return jsonify({"status": "started"})
735
 
736
+
737
  @app.route("/api/clip_progress/<video_id>")
738
  @login_required
739
  def clip_progress(video_id):
 
741
  progress = clip_extraction_status.get(video_id, {"current": 0, "total": 0, "percent": 0})
742
  return jsonify(progress)
743
 
744
+
745
  @app.route("/api/transcription_progress/<video_id>")
746
  @login_required
747
  def transcription_progress(video_id):
 
749
  progress = transcription_progress_status.get(video_id, {"status": "not started", "percent": 0})
750
  return jsonify(progress)
751
 
752
+
753
+ if __name__ == "__main__":
 
754
  try:
755
  # Print diagnostic information
756
  print("=" * 50)
757
  print(f"Starting app with configuration:")
758
  print(f"- Running in HF Space: {is_hf_space}")
759
  print(f"- Auth bypass: {bypass_auth}")
760
+ print(f"- Port: {os.getenv('PORT', 7860)}")
761
  print(f"- S3 for videos: {USE_S3_FOR_VIDEOS}")
762
  print(f"- S3 bucket: {S3_BUCKET}")
763
  print(f"- S3 prefix: {S3_VIDEO_PREFIX}")
 
770
  print(f"- Error listing S3 videos: {str(e)}")
771
  print("=" * 50)
772
 
773
+ port = int(os.getenv("PORT", 7860))
774
+ app.run(host="0.0.0.0", port=port, debug=True)
775
  except Exception as e:
776
  print(f"Error starting the application: {e}")
777
  import traceback
pig.txt CHANGED
@@ -2,120 +2,131 @@ Building webapp
2
  #0 building with "default" instance using docker driver
3
 
4
  #1 [internal] load build definition from Dockerfile
5
- #1 transferring dockerfile: 1.19kB done
6
  #1 DONE 0.0s
7
 
8
  #2 [internal] load metadata for docker.io/library/python:3.9-slim
9
- #2 DONE 0.3s
10
 
11
  #3 [internal] load .dockerignore
12
  #3 transferring context: 2B done
13
  #3 DONE 0.0s
14
 
15
- #4 [1/9] FROM docker.io/library/python:3.9-slim@sha256:d1fd807555208707ec95b284afd10048d0737e84b5f2d6fdcbed2922b9284b56
16
  #4 DONE 0.0s
17
 
18
  #5 [internal] load build context
19
- #5 transferring context: 19.78kB 0.0s done
20
  #5 DONE 0.0s
21
 
22
- #6 [2/9] WORKDIR /app
23
  #6 CACHED
24
 
25
- #7 [3/9] COPY requirements.txt .
26
  #7 CACHED
27
 
28
- #8 [4/9] RUN pip install --no-cache-dir -r requirements.txt
29
  #8 CACHED
30
 
31
- #9 [5/9] RUN apt-get update && apt-get install -y ffmpeg wget git && apt-get clean
32
  #9 CACHED
33
 
34
- #10 [6/9] RUN mkdir -p /home/user && chown -R 1000:1000 /home/user
35
  #10 CACHED
36
 
37
- #11 [7/9] COPY . .
38
- #11 DONE 0.3s
39
-
40
- #12 [8/9] RUN mkdir -p data/videos data/annotations data/temp data/word_timestamps data/alignments data/transcripts
41
- #12 DONE 0.1s
42
-
43
- #13 [9/9] RUN ls -la /app && ls -la /app/flask_app.py && which gunicorn && pip list
44
- #13 0.170 total 148
45
- #13 0.170 drwxr-xr-x 1 root root 4096 Feb 27 21:59 .
46
- #13 0.170 drwxr-xr-x 1 root root 4096 Feb 27 21:59 ..
47
- #13 0.170 -rw-rw-r-- 1 root root 202 Feb 27 21:42 .env
48
- #13 0.170 drwxrwxr-x 8 root root 4096 Feb 27 21:59 .git
49
- #13 0.170 -rw-rw-r-- 1 root root 1519 Feb 26 22:49 .gitattributes
50
- #13 0.170 -rw-rw-r-- 1 root root 72 Feb 26 23:20 .gitignore
51
- #13 0.170 -rw-rw-r-- 1 root root 403 Feb 27 21:50 .hf-space
52
- #13 0.170 drwxrwxr-x 2 root root 4096 Feb 26 22:51 .space
53
- #13 0.170 -rw-rw-r-- 1 root root 1151 Feb 27 21:58 Dockerfile
54
- #13 0.170 -rw-rw-r-- 1 root root 804 Feb 26 22:54 README.md
55
- #13 0.170 -rw-rw-r-- 1 root root 904 Feb 26 22:50 check_large_files_linux.sh
56
- #13 0.170 drwxrwxr-x 9 root root 4096 Feb 26 22:50 data
57
- #13 0.170 -rw-rw-r-- 1 root root 571 Feb 27 21:42 docker-compose.yml
58
- #13 0.170 -rw-rw-r-- 1 root root 10867 Feb 27 21:42 extract_signed_segments_from_annotations.py
59
- #13 0.170 -rw-rw-r-- 1 root root 51619 Feb 27 21:51 flask_app.py
60
- #13 0.170 -rw-rw-r-- 1 root root 6704 Feb 27 21:42 get_transcription_with_amazon.py
61
- #13 0.170 -rw-rw-r-- 1 root root 258 Feb 27 21:59 pig.txt
62
- #13 0.170 -rw-rw-r-- 1 root root 66 Feb 27 17:46 requirements.txt
63
- #13 0.170 drwxrwxr-x 2 root root 4096 Feb 26 22:50 static
64
- #13 0.170 drwxrwxr-x 2 root root 4096 Feb 27 21:56 templates
65
- #13 0.170 -rw-rw-r-- 1 root root 5569 Feb 26 22:50 tree.txt
66
- #13 0.171 -rw-rw-r-- 1 root root 51619 Feb 27 21:51 /app/flask_app.py
67
- #13 0.172 /usr/local/bin/gunicorn
68
- #13 0.499 WARNING: The directory '/home/user/.cache/pip' or its parent directory is not owned or is not writable by the current user. The cache has been disabled. Check the permissions and owner of that directory. If executing pip with sudo, you should use sudo's -H flag.
69
- #13 0.544 Package Version
70
- #13 0.544 ------------------ -----------
71
- #13 0.544 Authlib 1.5.0
72
- #13 0.544 blinker 1.9.0
73
- #13 0.544 boto3 1.37.2
74
- #13 0.545 botocore 1.37.2
75
- #13 0.545 certifi 2025.1.31
76
- #13 0.545 cffi 1.17.1
77
- #13 0.545 charset-normalizer 3.4.1
78
- #13 0.545 click 8.1.8
79
- #13 0.545 cryptography 44.0.1
80
- #13 0.546 ffmpeg-python 0.2.0
81
- #13 0.546 Flask 3.1.0
82
- #13 0.546 future 1.0.0
83
- #13 0.546 gunicorn 23.0.0
84
- #13 0.546 idna 3.10
85
- #13 0.546 importlib_metadata 8.6.1
86
- #13 0.547 itsdangerous 2.2.0
87
- #13 0.547 Jinja2 3.1.5
88
- #13 0.547 jmespath 1.0.1
89
- #13 0.547 MarkupSafe 3.0.2
90
- #13 0.547 packaging 24.2
91
- #13 0.547 pip 23.0.1
92
- #13 0.547 pycparser 2.22
93
- #13 0.548 python-dateutil 2.9.0.post0
94
- #13 0.548 python-dotenv 1.0.1
95
- #13 0.548 requests 2.32.3
96
- #13 0.548 s3transfer 0.11.3
97
- #13 0.548 setuptools 58.1.0
98
- #13 0.548 six 1.17.0
99
- #13 0.549 urllib3 1.26.20
100
- #13 0.549 Werkzeug 3.1.3
101
- #13 0.549 wheel 0.45.1
102
- #13 0.549 zipp 3.21.0
103
- #13 0.719
104
- #13 0.719 [notice] A new release of pip is available: 23.0.1 -> 25.0.1
105
- #13 0.719 [notice] To update, run: pip install --upgrade pip
106
- #13 DONE 0.8s
107
-
108
- #14 exporting to image
109
- #14 exporting layers
110
- #14 exporting layers 0.5s done
111
- #14 writing image sha256:f71a97e216e67b0ebb92ed1a322fb6e703b2be41f2f0e9c2a51e21a67d0f3a96 done
112
- #14 naming to docker.io/library/vsl_boundary_annotation_and_alignment_tool_webapp done
113
- #14 DONE 0.5s
 
 
 
 
 
 
 
 
 
 
 
114
 
115
  2 warnings found (use docker --debug to expand):
116
-  - SecretsUsedInArgOrEnv: Do not use ARG or ENV instructions for sensitive data (ENV "SECRET_KEY") (line 25)
117
- - SecretsUsedInArgOrEnv: Do not use ARG or ENV instructions for sensitive data (ENV "BYPASS_AUTH") (line 24)
118
- Recreating 3aed3e7bb19b_vsl_boundary_annotation_and_alignment_tool_webapp_1 ...
119
 
120
  ERROR: for 3aed3e7bb19b_vsl_boundary_annotation_and_alignment_tool_webapp_1 'ContainerConfig'
121
 
 
2
  #0 building with "default" instance using docker driver
3
 
4
  #1 [internal] load build definition from Dockerfile
5
+ #1 transferring dockerfile: 1.30kB done
6
  #1 DONE 0.0s
7
 
8
  #2 [internal] load metadata for docker.io/library/python:3.9-slim
9
+ #2 DONE 0.5s
10
 
11
  #3 [internal] load .dockerignore
12
  #3 transferring context: 2B done
13
  #3 DONE 0.0s
14
 
15
+ #4 [ 1/12] FROM docker.io/library/python:3.9-slim@sha256:d1fd807555208707ec95b284afd10048d0737e84b5f2d6fdcbed2922b9284b56
16
  #4 DONE 0.0s
17
 
18
  #5 [internal] load build context
19
+ #5 transferring context: 170.74kB 0.0s done
20
  #5 DONE 0.0s
21
 
22
+ #6 [ 4/12] RUN pip install --no-cache-dir -r requirements.txt
23
  #6 CACHED
24
 
25
+ #7 [ 2/12] WORKDIR /app
26
  #7 CACHED
27
 
28
+ #8 [ 3/12] COPY requirements.txt .
29
  #8 CACHED
30
 
31
+ #9 [ 5/12] RUN apt-get update && apt-get install -y ffmpeg wget git && apt-get clean
32
  #9 CACHED
33
 
34
+ #10 [ 6/12] RUN mkdir -p /home/user && chown -R 1000:1000 /home/user
35
  #10 CACHED
36
 
37
+ #11 [ 7/12] RUN mkdir -p data/videos data/annotations data/temp data/word_timestamps data/alignments data/transcripts
38
+ #11 DONE 0.2s
39
+
40
+ #12 [ 8/12] COPY . .
41
+ #12 DONE 0.3s
42
+
43
+ #13 [ 9/12] COPY entrypoint.sh /app/
44
+ #13 DONE 0.0s
45
+
46
+ #14 [10/12] RUN chmod +x /app/entrypoint.sh
47
+ #14 DONE 0.1s
48
+
49
+ #15 [11/12] RUN chmod -R 755 /app && chmod -R 777 /app/data
50
+ #15 DONE 0.9s
51
+
52
+ #16 [12/12] RUN ls -la /app && ls -la /app/flask_app.py && which gunicorn && pip list
53
+ #16 0.179 total 172
54
+ #16 0.179 drwxr-xr-x 1 root root 4096 Feb 27 23:30 .
55
+ #16 0.179 drwxr-xr-x 1 root root 4096 Feb 27 23:30 ..
56
+ #16 0.179 -rwxr-xr-x 1 root root 202 Feb 27 23:14 .env
57
+ #16 0.179 drwxr-xr-x 1 root root 4096 Feb 27 23:30 .git
58
+ #16 0.179 -rwxr-xr-x 1 root root 1519 Feb 26 22:49 .gitattributes
59
+ #16 0.179 -rwxr-xr-x 1 root root 72 Feb 26 23:20 .gitignore
60
+ #16 0.179 -rwxr-xr-x 1 root root 403 Feb 27 23:14 .hf-space
61
+ #16 0.179 drwxr-xr-x 1 root root 4096 Feb 26 22:51 .space
62
+ #16 0.179 -rwxr-xr-x 1 root root 1257 Feb 27 23:22 Dockerfile
63
+ #16 0.179 -rwxr-xr-x 1 root root 804 Feb 26 22:54 README.md
64
+ #16 0.179 -rwxr-xr-x 1 root root 0 Feb 27 23:21 __init__.py
65
+ #16 0.179 -rwxr-xr-x 1 root root 904 Feb 26 22:50 check_large_files_linux.sh
66
+ #16 0.179 drwxrwxrwx 1 root root 4096 Feb 26 22:50 data
67
+ #16 0.179 -rwxr-xr-x 1 root root 571 Feb 27 23:14 docker-compose.yml
68
+ #16 0.179 -rwxr-xr-x 1 root root 312 Feb 27 23:22 entrypoint.sh
69
+ #16 0.179 -rwxr-xr-x 1 root root 10867 Feb 27 23:14 extract_signed_segments_from_annotations.py
70
+ #16 0.179 -rwxr-xr-x 1 root root 51698 Feb 27 23:21 flask_app.py
71
+ #16 0.179 -rwxr-xr-x 1 root root 6815 Feb 27 23:14 get_transcription_with_amazon.py
72
+ #16 0.179 -rwxr-xr-x 1 root root 258 Feb 27 23:30 pig.txt
73
+ #16 0.179 -rwxr-xr-x 1 root root 66 Feb 27 23:14 requirements.txt
74
+ #16 0.179 drwxr-xr-x 1 root root 4096 Feb 26 22:50 static
75
+ #16 0.179 drwxr-xr-x 1 root root 4096 Feb 27 23:30 templates
76
+ #16 0.179 -rwxr-xr-x 1 root root 5569 Feb 26 22:50 tree.txt
77
+ #16 0.180 -rwxr-xr-x 1 root root 51698 Feb 27 23:21 /app/flask_app.py
78
+ #16 0.181 /usr/local/bin/gunicorn
79
+ #16 0.509 WARNING: The directory '/home/user/.cache/pip' or its parent directory is not owned or is not writable by the current user. The cache has been disabled. Check the permissions and owner of that directory. If executing pip with sudo, you should use sudo's -H flag.
80
+ #16 0.554 Package Version
81
+ #16 0.554 ------------------ -----------
82
+ #16 0.554 Authlib 1.5.0
83
+ #16 0.554 blinker 1.9.0
84
+ #16 0.555 boto3 1.37.2
85
+ #16 0.555 botocore 1.37.2
86
+ #16 0.555 certifi 2025.1.31
87
+ #16 0.555 cffi 1.17.1
88
+ #16 0.555 charset-normalizer 3.4.1
89
+ #16 0.555 click 8.1.8
90
+ #16 0.556 cryptography 44.0.1
91
+ #16 0.556 ffmpeg-python 0.2.0
92
+ #16 0.556 Flask 3.1.0
93
+ #16 0.556 future 1.0.0
94
+ #16 0.556 gunicorn 23.0.0
95
+ #16 0.556 idna 3.10
96
+ #16 0.556 importlib_metadata 8.6.1
97
+ #16 0.557 itsdangerous 2.2.0
98
+ #16 0.557 Jinja2 3.1.5
99
+ #16 0.557 jmespath 1.0.1
100
+ #16 0.557 MarkupSafe 3.0.2
101
+ #16 0.557 packaging 24.2
102
+ #16 0.557 pip 23.0.1
103
+ #16 0.558 pycparser 2.22
104
+ #16 0.558 python-dateutil 2.9.0.post0
105
+ #16 0.558 python-dotenv 1.0.1
106
+ #16 0.558 requests 2.32.3
107
+ #16 0.558 s3transfer 0.11.3
108
+ #16 0.558 setuptools 58.1.0
109
+ #16 0.559 six 1.17.0
110
+ #16 0.559 urllib3 1.26.20
111
+ #16 0.559 Werkzeug 3.1.3
112
+ #16 0.559 wheel 0.45.1
113
+ #16 0.559 zipp 3.21.0
114
+ #16 0.730
115
+ #16 0.730 [notice] A new release of pip is available: 23.0.1 -> 25.0.1
116
+ #16 0.730 [notice] To update, run: pip install --upgrade pip
117
+ #16 DONE 0.8s
118
+
119
+ #17 exporting to image
120
+ #17 exporting layers
121
+ #17 exporting layers 0.6s done
122
+ #17 writing image sha256:7783d4fd70349562f507d3734fb83e77d9df5ec0145f4eb87d9ec8ccf30dbda3 done
123
+ #17 naming to docker.io/library/vsl_boundary_annotation_and_alignment_tool_webapp done
124
+ #17 DONE 0.6s
125
 
126
  2 warnings found (use docker --debug to expand):
127
+  - SecretsUsedInArgOrEnv: Do not use ARG or ENV instructions for sensitive data (ENV "SECRET_KEY") (line 36)
128
+ - SecretsUsedInArgOrEnv: Do not use ARG or ENV instructions for sensitive data (ENV "BYPASS_AUTH") (line 35)
129
+ Recreating 3aed3e7bb19b_vsl_boundary_annotation_and_alignment_tool_webapp_1 ...
130
 
131
  ERROR: for 3aed3e7bb19b_vsl_boundary_annotation_and_alignment_tool_webapp_1 'ContainerConfig'
132