Spaces:

Gresekxnol
/

clipyr

Running

App Files Files Community

Gresekxnol commited on 29 days ago

Commit

04536c6

verified ·

1 Parent(s): fbf187a

Create app.py

Browse files

Files changed (1) hide show

app.py +457 -0

app.py ADDED Viewed

	@@ -0,0 +1,457 @@

+import gradio as gr
+import whisper
+import cv2
+import numpy as np
+import moviepy.editor as mp
+from moviepy.video.fx import resize
+from transformers import pipeline, AutoTokenizer, AutoModel
+import torch
+import re
+import os
+import tempfile
+from typing import List, Dict, Tuple
+import json
+import librosa
+from textblob import TextBlob
+import emoji
+class AIVideoClipper:
+    def __init__(self):
+        # Initialize models
+        print("Loading models...")
+        self.whisper_model = whisper.load_model("base")  # Using base model for free tier
+        self.sentiment_analyzer = pipeline("sentiment-analysis",
+                                         model="cardiffnlp/twitter-roberta-base-sentiment-latest")
+        self.emotion_analyzer = pipeline("text-classification",
+                                       model="j-hartmann/emotion-english-distilroberta-base")
+        # Viral keywords and patterns
+        self.viral_keywords = [
+            "wow", "amazing", "incredible", "unbelievable", "shocking", "surprise",
+            "secret", "trick", "hack", "tip", "mistake", "fail", "success",
+            "breakthrough", "discovery", "reveal", "expose", "truth", "lie",
+            "before", "after", "transformation", "change", "upgrade", "improve",
+            "money", "rich", "poor", "expensive", "cheap", "free", "save",
+            "love", "hate", "angry", "happy", "sad", "funny", "laugh", "cry",
+            "first time", "last time", "never", "always", "everyone", "nobody",
+            "finally", "suddenly", "immediately", "instantly", "quickly"
+        ]
+        self.hook_patterns = [
+            r"you won't believe",
+            r"this will change",
+            r"nobody talks about",
+            r"the truth about",
+            r"what happens when",
+            r"here's what",
+            r"this is why",
+            r"the secret",
+            r"watch this",
+            r"wait for it"
+        ]
+    def extract_audio_features(self, audio_path: str) -> Dict:
+        """Extract audio features for engagement analysis"""
+        y, sr = librosa.load(audio_path)
+        # Extract features
+        tempo, _ = librosa.beat.beat_track(y=y, sr=sr)
+        spectral_centroids = librosa.feature.spectral_centroid(y=y, sr=sr)[0]
+        spectral_rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)[0]
+        mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
+        return {
+            'tempo': float(tempo),
+            'spectral_centroid_mean': float(np.mean(spectral_centroids)),
+            'spectral_rolloff_mean': float(np.mean(spectral_rolloff)),
+            'mfcc_mean': float(np.mean(mfccs)),
+            'energy_variance': float(np.var(librosa.feature.rms(y=y)[0]))
+        }
+    def transcribe_video(self, video_path: str) -> List[Dict]:
+        """Transcribe video and return segments with timestamps"""
+        print("Transcribing video...")
+        result = self.whisper_model.transcribe(video_path, word_timestamps=True)
+        segments = []
+        for segment in result["segments"]:
+            segments.append({
+                'start': segment['start'],
+                'end': segment['end'],
+                'text': segment['text'].strip(),
+                'words': segment.get('words', [])
+            })
+        return segments
+    def calculate_virality_score(self, text: str, audio_features: Dict,
+                                segment_duration: float) -> float:
+        """Calculate virality score for a text segment"""
+        score = 0.0
+        text_lower = text.lower()
+        # Sentiment analysis
+        sentiment = self.sentiment_analyzer(text)[0]
+        if sentiment['label'] == 'POSITIVE' and sentiment['score'] > 0.8:
+            score += 2.0
+        elif sentiment['label'] == 'NEGATIVE' and sentiment['score'] > 0.8:
+            score += 1.5
+        # Emotion analysis
+        emotion = self.emotion_analyzer(text)[0]
+        high_engagement_emotions = ['surprise', 'excitement', 'anger', 'joy']
+        if emotion['label'].lower() in high_engagement_emotions and emotion['score'] > 0.7:
+            score += 2.0
+        # Viral keywords
+        for keyword in self.viral_keywords:
+            if keyword in text_lower:
+                score += 1.0
+        # Hook patterns
+        for pattern in self.hook_patterns:
+            if re.search(pattern, text_lower):
+                score += 3.0
+        # Audio engagement features
+        if audio_features['tempo'] > 120:  # Higher tempo = more engaging
+            score += 1.0
+        if audio_features['energy_variance'] > 0.01:  # Energy variation
+            score += 1.0
+        # Segment duration (30-60 seconds ideal for clips)
+        if 25 <= segment_duration <= 65:
+            score += 2.0
+        elif 15 <= segment_duration <= 90:
+            score += 1.0
+        # Text length (not too short, not too long)
+        word_count = len(text.split())
+        if 20 <= word_count <= 100:
+            score += 1.0
+        return min(score, 10.0)  # Cap at 10
+    def find_best_moments(self, segments: List[Dict], audio_features: Dict,
+                         clip_duration: int = 30) -> List[Dict]:
+        """Find the best moments for short clips"""
+        print("Analyzing segments for viral potential...")
+        scored_segments = []
+        for i, segment in enumerate(segments):
+            # Group segments into potential clips
+            clip_segments = [segment]
+            current_duration = segment['end'] - segment['start']
+            # Extend clip to reach desired duration
+            j = i + 1
+            while j < len(segments) and current_duration < clip_duration:
+                next_segment = segments[j]
+                if next_segment['end'] - segment['start'] <= clip_duration * 1.5:
+                    clip_segments.append(next_segment)
+                    current_duration = next_segment['end'] - segment['start']
+                    j += 1
+                else:
+                    break
+            # Calculate combined text and virality score
+            combined_text = " ".join([s['text'] for s in clip_segments])
+            virality_score = self.calculate_virality_score(
+                combined_text, audio_features, current_duration
+            )
+            scored_segments.append({
+                'start': segment['start'],
+                'end': clip_segments[-1]['end'],
+                'text': combined_text,
+                'duration': current_duration,
+                'virality_score': virality_score,
+                'segments': clip_segments
+            })
+        # Sort by virality score and remove overlaps
+        scored_segments.sort(key=lambda x: x['virality_score'], reverse=True)
+        # Remove overlapping segments
+        final_segments = []
+        for segment in scored_segments:
+            overlap = False
+            for existing in final_segments:
+                if (segment['start'] < existing['end'] and
+                    segment['end'] > existing['start']):
+                    overlap = True
+                    break
+            if not overlap:
+                final_segments.append(segment)
+                if len(final_segments) >= 5:  # Limit to top 5 clips
+                    break
+        return final_segments
+    def add_emojis_to_text(self, text: str) -> str:
+        """Add relevant emojis to text based on content"""
+        emoji_map = {
+            'money': '💰', 'rich': '💰', 'dollar': '💵',
+            'love': '❤️', 'heart': '❤️', 'like': '👍',
+            'fire': '🔥', 'hot': '🔥', 'amazing': '🔥',
+            'laugh': '😂', 'funny': '😂', 'lol': '😂',
+            'wow': '😱', 'omg': '😱', 'shocking': '😱',
+            'cool': '😎', 'awesome': '😎', 'great': '😎',
+            'think': '🤔', 'question': '❓', 'why': '🤔',
+            'warning': '⚠️', 'careful': '⚠️', 'danger': '⚠️',
+            'success': '✅', 'win': '🏆', 'winner': '🏆',
+            'music': '🎵', 'song': '🎵', 'sound': '🔊'
+        }
+        words = text.lower().split()
+        for word in words:
+            clean_word = re.sub(r'[^\w]', '', word)
+            if clean_word in emoji_map:
+                text = re.sub(f"\\b{re.escape(word)}\\b",
+                            f"{word} {emoji_map[clean_word]}", text, flags=re.IGNORECASE)
+        return text
+    def create_clip(self, video_path: str, start_time: float, end_time: float,
+                   text: str, output_path: str, add_subtitles: bool = True) -> str:
+        """Create a short clip from the video"""
+        print(f"Creating clip: {start_time:.1f}s - {end_time:.1f}s")
+        # Load video
+        video = mp.VideoFileClip(video_path).subclip(start_time, end_time)
+        # Resize to 9:16 aspect ratio (1080x1920)
+        target_width = 1080
+        target_height = 1920
+        # Calculate scaling to fit the video in the frame
+        scale_w = target_width / video.w
+        scale_h = target_height / video.h
+        scale = min(scale_w, scale_h)
+        # Resize video
+        video_resized = video.resize(scale)
+        # Create background (blur or solid color)
+        if video_resized.h < target_height or video_resized.w < target_width:
+            # Create blurred background
+            background = video.resize((target_width, target_height))
+            background = background.fl_image(lambda frame: cv2.GaussianBlur(frame, (21, 21), 0))
+            # Overlay the main video in center
+            final_video = mp.CompositeVideoClip([
+                background,
+                video_resized.set_position('center')
+            ], size=(target_width, target_height))
+        else:
+            final_video = video_resized
+        # Add subtitles if requested
+        if add_subtitles and text:
+            # Add emojis to text
+            text_with_emojis = self.add_emojis_to_text(text)
+            # Create text clip
+            txt_clip = mp.TextClip(
+                text_with_emojis,
+                fontsize=60,
+                color='white',
+                stroke_color='black',
+                stroke_width=3,
+                size=(target_width - 100, None),
+                method='caption'
+            ).set_position(('center', 0.8), relative=True).set_duration(final_video.duration)
+            final_video = mp.CompositeVideoClip([final_video, txt_clip])
+        # Write the final video
+        final_video.write_videofile(
+            output_path,
+            codec='libx264',
+            audio_codec='aac',
+            temp_audiofile='temp-audio.m4a',
+            remove_temp=True,
+            fps=30,
+            preset='ultrafast'  # Faster encoding for free tier
+        )
+        # Clean up
+        video.close()
+        final_video.close()
+        return output_path
+def process_video(video_file, clip_duration, num_clips, add_subtitles):
+    """Main function to process video and create clips"""
+    if video_file is None:
+        return "Please upload a video file.", [], []
+    clipper = AIVideoClipper()
+    try:
+        # Create temporary directory
+        with tempfile.TemporaryDirectory() as temp_dir:
+            video_path = video_file.name
+            # Extract audio features
+            print("Extracting audio features...")
+            audio_features = clipper.extract_audio_features(video_path)
+            # Transcribe video
+            segments = clipper.transcribe_video(video_path)
+            if not segments:
+                return "Could not transcribe video. Please check the audio quality.", [], []
+            # Find best moments
+            best_moments = clipper.find_best_moments(segments, audio_features, clip_duration)
+            best_moments = best_moments[:num_clips]  # Limit to requested number
+            if not best_moments:
+                return "No suitable clips found. Try adjusting parameters.", [], []
+            # Create clips
+            output_videos = []
+            clip_info = []
+            for i, moment in enumerate(best_moments):
+                output_path = os.path.join(temp_dir, f"clip_{i+1}.mp4")
+                try:
+                    clipper.create_clip(
+                        video_path,
+                        moment['start'],
+                        moment['end'],
+                        moment['text'],
+                        output_path,
+                        add_subtitles
+                    )
+                    # Copy to permanent location
+                    permanent_path = f"clip_{i+1}_{hash(video_path)}_{i}.mp4"
+                    os.rename(output_path, permanent_path)
+                    output_videos.append(permanent_path)
+                    clip_info.append({
+                        'clip_number': i + 1,
+                        'start_time': f"{moment['start']:.1f}s",
+                        'end_time': f"{moment['end']:.1f}s",
+                        'duration': f"{moment['duration']:.1f}s",
+                        'virality_score': f"{moment['virality_score']:.2f}/10",
+                        'text_preview': moment['text'][:100] + "..." if len(moment['text']) > 100 else moment['text']
+                    })
+                except Exception as e:
+                    print(f"Error creating clip {i+1}: {str(e)}")
+                    continue
+            success_msg = f"Successfully created {len(output_videos)} clips!"
+            return success_msg, output_videos, clip_info
+    except Exception as e:
+        return f"Error processing video: {str(e)}", [], []
+# Create Gradio interface
+def create_interface():
+    with gr.Blocks(title="AI Video Clipper", theme=gr.themes.Soft()) as demo:
+        gr.Markdown(
+            """
+            # 🎬 AI Video Clipper
+            Transform your long videos into viral short clips automatically!
+            Upload your video and let AI find the most engaging moments.
+            **Features:**
+            - 🤖 AI-powered moment detection
+            - 📱 Auto 9:16 aspect ratio conversion
+            - 📝 Automatic subtitles with emojis
+            - 📊 Virality scoring
+            - 🎯 Multi-language support
+            """
+        )
+        with gr.Row():
+            with gr.Column():
+                video_input = gr.File(
+                    label="Upload Video",
+                    file_types=[".mp4", ".avi", ".mov", ".mkv", ".webm"],
+                    type="filepath"
+                )
+                with gr.Row():
+                    clip_duration = gr.Slider(
+                        minimum=15,
+                        maximum=90,
+                        value=30,
+                        step=5,
+                        label="Target Clip Duration (seconds)"
+                    )
+                    num_clips = gr.Slider(
+                        minimum=1,
+                        maximum=5,
+                        value=3,
+                        step=1,
+                        label="Number of Clips to Generate"
+                    )
+                add_subtitles = gr.Checkbox(
+                    label="Add Subtitles with Emojis",
+                    value=True
+                )
+                process_btn = gr.Button(
+                    "🚀 Create Clips",
+                    variant="primary",
+                    size="lg"
+                )
+            with gr.Column():
+                status_output = gr.Textbox(
+                    label="Status",
+                    interactive=False,
+                    lines=2
+                )
+                clips_output = gr.Gallery(
+                    label="Generated Clips",
+                    show_label=True,
+                    elem_id="gallery",
+                    columns=1,
+                    rows=3,
+                    height="auto",
+                    allow_preview=True,
+                    show_download_button=True
+                )
+        with gr.Row():
+            info_output = gr.JSON(
+                label="Clip Analysis",
+                visible=True
+            )
+        # Example videos section
+        gr.Markdown("### 📺 Tips for Best Results:")
+        gr.Markdown("""
+        - Upload videos with clear speech (podcasts, interviews, tutorials work great!)
+        - Longer videos (5+ minutes) provide more clip opportunities
+        - Videos with engaging content and emotional moments score higher
+        - Good audio quality improves transcription accuracy
+        """)
+        process_btn.click(
+            process_video,
+            inputs=[video_input, clip_duration, num_clips, add_subtitles],
+            outputs=[status_output, clips_output, info_output]
+        )
+    return demo
+# Launch the app
+if __name__ == "__main__":
+    demo = create_interface()
+    demo.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        share=False
+    )