Spaces:

VSL-Data-Collection
/

VSL_Boundary_Annotation_and_Alignment_Tool

Sleeping

App Files Files Community

VSL_Boundary_Annotation_and_Alignment_Tool / flask_app.py

Perilon

Bug fixes

52d1010 3 days ago

raw

history blame contribute delete

28.8 kB

	import boto3
	import json
	import logging
	import os
	import platform
	import requests
	import signal
	import sys
	import tempfile
	import threading
	import time
	import uuid
	from botocore.exceptions import ClientError
	from datetime import datetime
	from dotenv import load_dotenv
	from extract_signed_segments_from_annotations import ClipExtractor, VideoClip
	from flask import Flask, jsonify, redirect, render_template, request, send_file, send_from_directory, session, url_for
	from typing import Any, Dict, List, Optional
	from urllib.parse import urlparse

	# Load environment variables
	load_dotenv()

	# Add this near the top with other environment variables
	bypass_auth = os.getenv("BYPASS_AUTH", "false").lower() == "true"

	# Configure logging first
	logging.basicConfig(
	level=logging.INFO,
	format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
	)
	logger = logging.getLogger(__name__)

	# Hugging Face specific configuration
	is_hf_space = os.getenv("SPACE_ID") is not None
	if is_hf_space:
	logger.info("Running in Hugging Face Spaces environment")
	# Allow insecure transport for development in HF
	os.environ["OAUTHLIB_INSECURE_TRANSPORT"] = "1"
	# Ensure port is set correctly
	os.environ["PORT"] = "7860"

	# Debug information
	print("=" * 50)
	print(f"Python version: {sys.version}")
	print(f"Platform: {platform.platform()}")
	print(f"Current directory: {os.getcwd()}")
	print(f"Directory contents: {os.listdir('.')}")
	print("=" * 50)

	app = Flask(__name__)
	app.secret_key = os.getenv("SECRET_KEY", "dev_key_for_testing")

	# Configure session for HF
	if is_hf_space:
	app.config["SESSION_COOKIE_SECURE"] = False
	app.config["SESSION_COOKIE_HTTPONLY"] = True
	app.config["SESSION_COOKIE_SAMESITE"] = None # Add this line
	app.config["PERMANENT_SESSION_LIFETIME"] = 86400 # 24 hours

	# Directory paths
	VIDEO_DIR = os.path.abspath("data/videos")
	ANNOTATIONS_DIR = os.path.abspath("data/annotations")
	TEMP_DIR = os.path.abspath("data/temp")
	WORD_TIMESTAMPS_DIR = os.path.abspath("data/word_timestamps")
	ALIGNMENTS_DIR = os.path.abspath("data/alignments")
	TRANSCRIPTS_DIR = os.path.abspath("data/transcripts")

	# S3 configuration
	S3_BUCKET = os.getenv("S3_BUCKET", "sorenson-ai-sb-scratch")
	S3_VIDEO_PREFIX = os.getenv("S3_VIDEO_PREFIX", "awilkinson/kylie_dataset_videos_for_alignment_webapp/")
	USE_S3_FOR_VIDEOS = os.getenv("USE_S3_FOR_VIDEOS", "true").lower() == "true"

	# Ensure all required directories exist
	for directory in [VIDEO_DIR, ANNOTATIONS_DIR, TEMP_DIR, WORD_TIMESTAMPS_DIR, ALIGNMENTS_DIR, TRANSCRIPTS_DIR]:
	os.makedirs(directory, exist_ok=True)

	# Global dictionaries for progress tracking
	clip_extraction_status = {}
	transcription_progress_status = {}


	# S3 helper functions
	def get_s3_client():
	"""Get a boto3 S3 client."""
	return boto3.client(
	"s3",
	region_name=os.environ.get("AWS_DEFAULT_REGION", "us-west-2"),
	aws_access_key_id=os.environ.get("AWS_ACCESS_KEY_ID"),
	aws_secret_access_key=os.environ.get("AWS_SECRET_ACCESS_KEY")
	)


	def list_s3_videos() -> List[str]:
	"""List all videos in the S3 bucket with the given prefix."""
	if not os.environ.get("AWS_ACCESS_KEY_ID") or not os.environ.get("AWS_SECRET_ACCESS_KEY"):
	logger.warning("AWS credentials not found. Returning empty video list.")
	return []

	try:
	s3_client = get_s3_client()
	response = s3_client.list_objects_v2(
	Bucket=S3_BUCKET,
	Prefix=S3_VIDEO_PREFIX
	)

	if "Contents" not in response:
	logger.warning(f"No videos found in S3 bucket {S3_BUCKET} with prefix {S3_VIDEO_PREFIX}")
	return []

	# Extract video IDs (filenames without extension) from S3 keys
	videos = []
	for item in response["Contents"]:
	key = item["Key"]
	if key.endswith(".mp4"):
	# Extract just the filename without extension
	filename = os.path.basename(key)
	video_id = os.path.splitext(filename)[0]
	videos.append(video_id)

	return videos
	except Exception as e:
	logger.error(f"Error listing S3 videos: {str(e)}")
	return []


	def download_video_from_s3(video_id: str) -> Optional[str]:
	"""Download a video from S3 to the local videos directory."""
	video_filename = f"{video_id}.mp4"
	s3_key = f"{S3_VIDEO_PREFIX}{video_filename}"
	local_path = os.path.join(VIDEO_DIR, video_filename)

	# Check if the file already exists locally
	if os.path.exists(local_path):
	logger.info(f"Video {video_id} already exists locally.")
	return local_path

	try:
	logger.info(f"Downloading video {video_id} from S3...")
	s3_client = get_s3_client()
	s3_client.download_file(S3_BUCKET, s3_key, local_path)
	logger.info(f"Video {video_id} downloaded successfully to {local_path}")
	return local_path
	except ClientError as e:
	logger.error(f"Error downloading video from S3: {str(e)}")
	return None


	def generate_presigned_url(video_id: str, expiration: int = 3600) -> Optional[str]:
	"""Generate a presigned URL for direct access to the video in S3."""
	video_filename = f"{video_id}.mp4"
	s3_key = f"{S3_VIDEO_PREFIX}{video_filename}"

	try:
	s3_client = get_s3_client()
	url = s3_client.generate_presigned_url(
	"get_object",
	Params={"Bucket": S3_BUCKET, "Key": s3_key},
	ExpiresIn=expiration
	)
	return url
	except ClientError as e:
	logger.error(f"Error generating presigned URL: {str(e)}")
	return None


	# Graceful shutdown handler
	def graceful_shutdown(signum, frame):
	"""Handle graceful shutdown on signals."""
	logger.info(f"Received signal {signum}, shutting down gracefully...")
	# Clean up as needed here
	sys.exit(0)


	# Register signal handlers
	signal.signal(signal.SIGTERM, graceful_shutdown)
	signal.signal(signal.SIGINT, graceful_shutdown)


	# Login required decorator
	def login_required(f):
	from functools import wraps
	@wraps(f)
	def decorated_function(args, *kwargs):
	if "user" not in session:
	logger.info(f"User not in session, redirecting to login")
	return redirect(url_for("login"))
	return f(args, *kwargs)
	return decorated_function


	# Allow specific users (for testing)
	def is_allowed_user(username: str) -> bool:
	allowed_users_env = os.getenv("ALLOWED_USERS", "Perilon") # Default to your username
	allowed_users = [user.strip() for user in allowed_users_env.split(",")]
	return username in allowed_users or not is_hf_space # Allow all users in local dev


	def update_extraction_progress(video_id: str, current: int, total: int) -> None:
	percent = int((current / total) * 100)
	clip_extraction_status[video_id] = {"current": current, "total": total, "percent": percent}


	def run_clip_extraction(video_id: str) -> None:
	try:
	base_dir = app.root_path
	extractor = ClipExtractor(base_dir)
	extractor.extract_clips_from_annotations(
	video_id,
	progress_callback=lambda current, total: update_extraction_progress(video_id, current, total)
	)
	if video_id in clip_extraction_status:
	status = clip_extraction_status[video_id]
	if status.get("percent", 0) < 100:
	update_extraction_progress(video_id, status["total"], status["total"])
	else:
	update_extraction_progress(video_id, 1, 1)
	except Exception as e:
	logger.error(f"Error during clip extraction for {video_id}: {str(e)}")
	clip_extraction_status[video_id] = {"error": str(e)}


	def run_transcription(video_id: str) -> None:
	try:
	base_dir = app.root_path
	output_path = os.path.join(WORD_TIMESTAMPS_DIR, f"{video_id}_word_timestamps.json")

	# Check if transcription already exists and is valid.
	if os.path.exists(output_path) and os.path.getsize(output_path) > 0:
	logger.info(f"Using cached transcription for video {video_id}.")
	transcription_progress_status[video_id] = {"status": "completed", "percent": 100}
	return

	# Download video from S3 if needed
	if USE_S3_FOR_VIDEOS:
	video_path = download_video_from_s3(video_id)
	if not video_path:
	transcription_progress_status[video_id] = {
	"status": "error",
	"percent": 0,
	"message": f"Failed to download video {video_id} from S3"
	}
	return
	else:
	video_path = os.path.join(base_dir, "data", "videos", f"{video_id}.mp4")

	transcription_progress_status[video_id] = {"status": "started", "percent": 10}

	# Check if AWS credentials are available
	if not os.environ.get("AWS_ACCESS_KEY_ID") or not os.environ.get("AWS_SECRET_ACCESS_KEY"):
	logger.warning("AWS credentials not found. Transcription will not work properly.")
	transcription_progress_status[video_id] = {
	"status": "error",
	"percent": 0,
	"message": "AWS credentials missing"
	}
	return

	# Run transcription via the imported function from get_transcription_with_amazon.py
	from get_transcription_with_amazon import get_word_timestamps
	word_timestamps = get_word_timestamps(video_path)

	with open(output_path, "w") as f:
	json.dump(word_timestamps, f, indent=4)

	transcription_progress_status[video_id] = {"status": "completed", "percent": 100}
	except Exception as e:
	logger.error(f"Error during transcription for {video_id}: {str(e)}")
	transcription_progress_status[video_id] = {"status": "error", "percent": 0, "message": str(e)}


	# Authentication routes
	@app.route("/login")
	def login():
	"""Handle login for both local and HF environments."""
	logger.info(f"Login route called. Headers: {dict(request.headers)}")

	if is_hf_space:
	username = request.headers.get("X-Spaces-Username")
	logger.info(f"Username from headers in login: {username}")

	if username and is_allowed_user(username):
	session["user"] = {"name": username, "is_hf": True}
	return redirect(url_for("index"))
	else:
	# Redirect to the HF auth endpoint
	return redirect("/auth")
	else:
	# For local development
	session["user"] = {"name": "LocalDeveloper", "is_mock": True}
	return redirect(url_for("index"))


	@app.route("/auth/callback")
	def auth_callback():
	"""This route will be called by Hugging Face after successful authentication."""
	logger.info(f"Auth callback called. Headers: {dict(request.headers)}")

	if is_hf_space:
	# In Hugging Face Spaces, the user info is available in the request headers
	username = request.headers.get("X-Spaces-Username")
	if username:
	session["user"] = {"name": username, "is_hf": True}
	return redirect(url_for("index"))
	else:
	return render_template("error.html", message="Authentication failed. No username provided.")
	return redirect(url_for("login"))


	@app.route("/health")
	def health_check():
	"""Health check endpoint for container verification."""
	# Log environment variables for debugging
	env_vars = {
	"FLASK_ENV": os.environ.get("FLASK_ENV", "production"),
	"DEBUG": os.environ.get("DEBUG", "Not set"),
	"SPACE_ID": os.environ.get("SPACE_ID", "Not set"),
	"BYPASS_AUTH": os.environ.get("BYPASS_AUTH", "Not set"),
	"SECRET_KEY": os.environ.get("SECRET_KEY", "Not set")[:5] + "..." if os.environ.get("SECRET_KEY") else "Not set",
	"S3_BUCKET": os.environ.get("S3_BUCKET", "Not set"),
	"S3_VIDEO_PREFIX": os.environ.get("S3_VIDEO_PREFIX", "Not set"),
	"USE_S3_FOR_VIDEOS": os.environ.get("USE_S3_FOR_VIDEOS", "Not set")
	}

	logger.info(f"Health check called. Environment: {env_vars}")

	# Get session information for debugging
	session_info = dict(session) if session else None
	session_keys = list(session.keys()) if session else []

	return jsonify({
	"status": "healthy",
	"environment": env_vars,
	"session_keys": session_keys,
	"is_hf_space": is_hf_space,
	"bypass_auth": bypass_auth,
	"directories": {
	"videos": os.path.exists(VIDEO_DIR),
	"annotations": os.path.exists(ANNOTATIONS_DIR),
	"temp": os.path.exists(TEMP_DIR)
	}
	})


	@app.route("/auth")
	def auth():
	"""This route handles HF authentication."""
	logger.info(f"Auth route called. Headers: {dict(request.headers)}")

	# Force bypass auth to be true for debugging
	bypass_auth = True

	# If bypass is enabled, authenticate immediately
	if bypass_auth:
	logger.info("Auth bypass enabled, setting default user")
	session["user"] = {"name": "Perilon", "is_hf": True}
	return redirect(url_for("index"))

	# Normal authentication logic
	username = request.headers.get("X-Spaces-Username")
	logger.info(f"Username from headers in auth: {username}")

	if is_hf_space and username and is_allowed_user(username):
	logger.info(f"Setting user in session: {username}")
	session["user"] = {"name": username, "is_hf": True}
	return redirect(url_for("index"))
	elif not is_hf_space:
	# For local development
	session["user"] = {"name": "LocalDeveloper", "is_mock": True}
	return redirect(url_for("index"))
	else:
	# For HF with no valid username yet
	return render_template("error.html", message=
	"Waiting for Hugging Face authentication. If you continue to see this message, "
	"please make sure you're logged into Hugging Face and your username is allowed.")


	@app.before_request
	def check_auth():
	"""Check authentication before processing requests."""
	# Skip authentication for certain routes and static files
	if request.path in ["/login", "/logout", "/auth", "/auth/callback", "/debug", "/health"] or request.path.startswith("/static/"):
	return

	# Force bypass auth to be true for debugging
	bypass_auth = True

	# Log all request paths to help troubleshoot
	logger.debug(f"Request path: {request.path}, User in session: {'user' in session}")

	if bypass_auth:
	# Set default user for bypass mode if not already set
	if "user" not in session:
	session["user"] = {"name": "Perilon", "is_hf": True}
	return

	if is_hf_space:
	# Check for HF username header
	username = request.headers.get("X-Spaces-Username")

	if "user" in session:
	logger.debug(f"User in session: {session['user']}")
	return

	if username and is_allowed_user(username):
	logger.info(f"Setting user from headers: {username}")
	session["user"] = {"name": username, "is_hf": True}
	return

	# No valid user in session or headers
	logger.info(f"No authenticated user, redirecting to /auth")
	return redirect("/auth")
	elif "user" not in session:
	return redirect(url_for("login"))


	@app.route("/logout")
	def logout():
	"""Clear session and redirect to login."""
	session.clear() # Clear the entire session
	if is_hf_space:
	return redirect("/auth/logout")
	return redirect(url_for("login"))


	@app.route("/debug")
	def debug_info():
	"""Return debug information."""
	cookies = {key: request.cookies.get(key) for key in request.cookies.keys()}

	info = {
	"session": dict(session) if session else None,
	"headers": dict(request.headers),
	"cookies": cookies,
	"is_hf_space": is_hf_space,
	"allowed_users": os.getenv("ALLOWED_USERS", "Perilon"),
	"app_config": {k: str(v) for k, v in app.config.items() if k in
	["SESSION_COOKIE_SECURE", "SESSION_COOKIE_HTTPONLY",
	"SESSION_COOKIE_SAMESITE", "PERMANENT_SESSION_LIFETIME"]},
	"s3_config": {
	"S3_BUCKET": S3_BUCKET,
	"S3_VIDEO_PREFIX": S3_VIDEO_PREFIX,
	"USE_S3_FOR_VIDEOS": USE_S3_FOR_VIDEOS
	}
	}
	return jsonify(info)


	# Main application routes
	@app.route("/")
	@login_required
	def index():
	"""Main entry point, redirects to video selection."""
	return redirect(url_for("select_video"))


	@app.route("/select_video")
	@login_required
	def select_video():
	"""Page to select a video for annotation."""
	if USE_S3_FOR_VIDEOS:
	video_ids = list_s3_videos()
	else:
	if not os.path.exists(VIDEO_DIR):
	return render_template("error.html", message="Video directory not found.")
	videos = [f for f in os.listdir(VIDEO_DIR) if f.endswith(".mp4")]
	video_ids = [os.path.splitext(v)[0] for v in videos]

	return render_template("select_video.html", video_ids=video_ids, user=session.get("user"))


	@app.route("/player/<video_id>")
	@login_required
	def player(video_id):
	"""Video player page for annotation."""
	return render_template("player.html", video_id=video_id, user=session.get("user"))


	@app.route("/videos")
	@login_required
	def get_videos():
	"""API endpoint to get available videos."""
	if USE_S3_FOR_VIDEOS:
	videos = list_s3_videos()
	if not videos:
	return jsonify({"error": "No videos found in S3"}), 404
	# Return just the filenames with .mp4 extension for compatibility
	return jsonify([f"{vid}.mp4" for vid in videos])
	else:
	# Original local file behavior
	if not os.path.exists(VIDEO_DIR):
	return jsonify({"error": "Video directory not found"}), 404
	videos = [f for f in os.listdir(VIDEO_DIR) if f.endswith((".mp4", ".avi", ".mov"))]
	if not videos:
	return jsonify({"error": "No videos found"}), 404
	return jsonify(videos)


	@app.route("/video/<path:filename>")
	@login_required
	def serve_video(filename):
	"""Serve a video file from S3 or local storage."""
	video_id = os.path.splitext(filename)[0] # Remove extension

	if USE_S3_FOR_VIDEOS:
	# Option 1: Generate a presigned URL and redirect
	presigned_url = generate_presigned_url(video_id)
	if presigned_url:
	return redirect(presigned_url)

	# Option 2 (fallback): Download from S3 to local temporary storage and serve
	local_path = download_video_from_s3(video_id)
	if local_path and os.path.exists(local_path):
	return send_from_directory(VIDEO_DIR, filename)

	return jsonify({"error": "Video not found in S3"}), 404
	else:
	# Original local file behavior
	if not os.path.exists(os.path.join(VIDEO_DIR, filename)):
	return jsonify({"error": "Video not found"}), 404
	return send_from_directory(VIDEO_DIR, filename)


	@app.route("/save_annotations", methods=["POST"])
	@login_required
	def save_annotations():
	"""Save annotation data."""
	data = request.json
	if not data or "video" not in data or "timestamps" not in data:
	return jsonify({"success": False, "message": "Invalid data"}), 400

	annotation_file = os.path.join(ANNOTATIONS_DIR, f"{data['video']}_annotations.json")
	annotation_data = {
	"video_name": data["video"] + ".mp4",
	"timestamps": sorted(data["timestamps"]),
	"annotation_date": datetime.now().isoformat(),
	"annotated_by": session.get("user", {}).get("name", "unknown")
	}
	with open(annotation_file, "w") as f:
	json.dump(annotation_data, f, indent=4)
	return jsonify({"success": True, "message": "Annotations saved successfully"})


	@app.route("/get_annotations/<path:video_name>")
	@login_required
	def get_annotations(video_name):
	"""Get annotations for a video."""
	annotation_file = os.path.join(ANNOTATIONS_DIR, f"{video_name}_annotations.json")
	if not os.path.exists(annotation_file):
	return jsonify({"error": "No annotations found"}), 404
	with open(annotation_file, "r") as f:
	annotations = json.load(f)
	return jsonify(annotations)


	@app.route("/alignment/<video_id>")
	@login_required
	def alignment_mode(video_id):
	"""Page for aligning sign language with transcribed text."""
	annotation_file = os.path.join(ANNOTATIONS_DIR, f"{video_id}_annotations.json")
	if not os.path.exists(annotation_file):
	return render_template("error.html", message="No annotations found for this video. Please annotate the video first.")
	with open(annotation_file, "r") as f:
	annotations = json.load(f)
	return render_template(
	"alignment.html",
	video_id=video_id,
	total_clips=len(annotations["timestamps"]) - 1,
	user=session.get("user")
	)


	@app.route("/api/transcript/<video_id>")
	@login_required
	def get_transcript(video_id):
	"""Get transcript for a video."""
	timestamps_file = os.path.join(WORD_TIMESTAMPS_DIR, f"{video_id}_word_timestamps.json")
	logger.info(f"Attempting to load word timestamps from: {timestamps_file}")
	if not os.path.exists(timestamps_file):
	logger.warning(f"Word timestamps file not found: {timestamps_file}")
	return jsonify({
	"status": "error",
	"message": "No word timestamps found for this video"
	}), 404
	try:
	with open(timestamps_file, "r") as f:
	word_data = json.load(f)
	full_text = " ".join(item["punctuated_word"] for item in word_data)
	words_with_times = [{
	"word": item["punctuated_word"],
	"start": float(item["start_time"]),
	"end": float(item["end_time"])
	} for item in word_data]
	logger.info(f"Successfully created transcript ({len(full_text)} characters)")
	return jsonify({
	"status": "success",
	"text": full_text,
	"words": words_with_times
	})
	except Exception as e:
	logger.error(f"Error processing word timestamps: {str(e)}")
	return jsonify({
	"status": "error",
	"message": f"Error processing word timestamps: {str(e)}"
	}), 500


	@app.route("/api/word_timestamps/<video_id>")
	@login_required
	def get_word_timestamps(video_id):
	"""Get word-level timestamps for a video."""
	timestamps_file = os.path.join(WORD_TIMESTAMPS_DIR, f"{video_id}_word_timestamps.json")
	logger.info(f"Attempting to load word timestamps from: {timestamps_file}")
	if not os.path.exists(timestamps_file):
	logger.warning(f"Word timestamps file not found: {timestamps_file}")
	return jsonify({
	"status": "error",
	"message": "No word timestamps found for this video"
	}), 404
	try:
	with open(timestamps_file, "r") as f:
	word_data = json.load(f)
	logger.info(f"Successfully loaded {len(word_data)} word timestamps")
	return jsonify({
	"status": "success",
	"words": word_data
	})
	except Exception as e:
	logger.error(f"Error processing word timestamps: {str(e)}")
	return jsonify({
	"status": "error",
	"message": f"Error processing word timestamps: {str(e)}"
	}), 500


	@app.route("/api/clips/<video_id>")
	@login_required
	def get_video_clips(video_id):
	"""Get clips for a video."""
	try:
	annotation_file = os.path.join(ANNOTATIONS_DIR, f"{video_id}_annotations.json")
	if not os.path.exists(annotation_file):
	raise FileNotFoundError("Annotations not found")
	with open(annotation_file, "r") as f:
	annotations = json.load(f)
	timestamps = annotations["timestamps"]
	clips = []
	for i in range(len(timestamps)-1):
	clips.append({
	"index": i,
	"start": timestamps[i],
	"end": timestamps[i+1],
	"path": f"/clip/{video_id}/{i}"
	})
	return jsonify({
	"status": "success",
	"clips": clips
	})
	except Exception as e:
	logger.error(f"Error getting clips: {str(e)}")
	return jsonify({
	"status": "error",
	"message": str(e)
	}), 500


	@app.route("/clip/<video_id>/<int:clip_index>")
	@login_required
	def serve_clip(video_id, clip_index):
	"""Serve a specific clip."""
	clip_path = os.path.join(
	TEMP_DIR,
	f"{video_id}_clip_{clip_index:03d}.mp4"
	)
	logger.info(f"Attempting to serve clip: {clip_path}")
	if not os.path.exists(clip_path):
	logger.error(f"Clip not found: {clip_path}")
	return jsonify({
	"status": "error",
	"message": "Clip not found"
	}), 404
	return send_file(clip_path, mimetype="video/mp4")


	@app.route("/api/save_alignments", methods=["POST"])
	@login_required
	def save_alignments():
	"""Save alignment data."""
	try:
	data = request.json
	if not data or "video_id" not in data or "alignments" not in data:
	return jsonify({"success": False, "message": "Invalid data"}), 400

	# Add user information to the alignments
	for alignment in data["alignments"]:
	if alignment:
	alignment["aligned_by"] = session.get("user", {}).get("name", "unknown")

	output_path = os.path.join(ALIGNMENTS_DIR, f"{data['video_id']}.json")
	with open(output_path, "w") as f:
	json.dump(data["alignments"], f, indent=2)
	return jsonify({
	"success": True,
	"message": "Alignments saved successfully"
	})
	except Exception as e:
	logger.error(f"Error saving alignments: {str(e)}")
	return jsonify({
	"success": False,
	"message": str(e)
	}), 500


	@app.route("/api/extract_clips/<video_id>")
	@login_required
	def extract_clips_for_video(video_id):
	"""Extract clips and start transcription for a video."""
	# If using S3, ensure the video is downloaded first
	if USE_S3_FOR_VIDEOS:
	video_path = download_video_from_s3(video_id)
	if not video_path:
	return jsonify({
	"status": "error",
	"message": f"Failed to download video {video_id} from S3"
	}), 404

	status = clip_extraction_status.get(video_id, {})
	if status.get("percent", 0) < 100:
	thread = threading.Thread(target=run_clip_extraction, args=(video_id,))
	thread.start()
	if video_id not in transcription_progress_status or transcription_progress_status.get(video_id, {}).get("percent", 0) < 100:
	thread_trans = threading.Thread(target=run_transcription, args=(video_id,))
	thread_trans.start()
	return jsonify({"status": "started"})


	@app.route("/api/clip_progress/<video_id>")
	@login_required
	def clip_progress(video_id):
	"""Get clip extraction progress."""
	progress = clip_extraction_status.get(video_id, {"current": 0, "total": 0, "percent": 0})
	return jsonify(progress)


	@app.route("/api/transcription_progress/<video_id>")
	@login_required
	def transcription_progress(video_id):
	"""Get transcription progress."""
	progress = transcription_progress_status.get(video_id, {"status": "not started", "percent": 0})
	return jsonify(progress)


	if __name__ == "__main__":
	try:
	# Print diagnostic information
	print("=" * 50)
	print(f"Starting app with configuration:")
	print(f"- Running in HF Space: {is_hf_space}")
	print(f"- Auth bypass: {bypass_auth}")
	print(f"- Port: {os.getenv('PORT', 7860)}")
	print(f"- S3 for videos: {USE_S3_FOR_VIDEOS}")
	print(f"- S3 bucket: {S3_BUCKET}")
	print(f"- S3 prefix: {S3_VIDEO_PREFIX}")
	print(f"- Available videos: {os.listdir(VIDEO_DIR) if os.path.exists(VIDEO_DIR) else 'None'}")
	if USE_S3_FOR_VIDEOS:
	try:
	s3_videos = list_s3_videos()
	print(f"- Available S3 videos: {s3_videos if s3_videos else 'None'}")
	except Exception as e:
	print(f"- Error listing S3 videos: {str(e)}")
	print("=" * 50)

	port = int(os.getenv("PORT", 7860))
	app.run(host="0.0.0.0", port=port, debug=True)
	except Exception as e:
	print(f"Error starting the application: {e}")
	import traceback
	traceback.print_exc()