Gresekxnol commited on
Commit
04536c6
Β·
verified Β·
1 Parent(s): fbf187a

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +457 -0
app.py ADDED
@@ -0,0 +1,457 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import whisper
3
+ import cv2
4
+ import numpy as np
5
+ import moviepy.editor as mp
6
+ from moviepy.video.fx import resize
7
+ from transformers import pipeline, AutoTokenizer, AutoModel
8
+ import torch
9
+ import re
10
+ import os
11
+ import tempfile
12
+ from typing import List, Dict, Tuple
13
+ import json
14
+ import librosa
15
+ from textblob import TextBlob
16
+ import emoji
17
+
18
+ class AIVideoClipper:
19
+ def __init__(self):
20
+ # Initialize models
21
+ print("Loading models...")
22
+ self.whisper_model = whisper.load_model("base") # Using base model for free tier
23
+ self.sentiment_analyzer = pipeline("sentiment-analysis",
24
+ model="cardiffnlp/twitter-roberta-base-sentiment-latest")
25
+ self.emotion_analyzer = pipeline("text-classification",
26
+ model="j-hartmann/emotion-english-distilroberta-base")
27
+
28
+ # Viral keywords and patterns
29
+ self.viral_keywords = [
30
+ "wow", "amazing", "incredible", "unbelievable", "shocking", "surprise",
31
+ "secret", "trick", "hack", "tip", "mistake", "fail", "success",
32
+ "breakthrough", "discovery", "reveal", "expose", "truth", "lie",
33
+ "before", "after", "transformation", "change", "upgrade", "improve",
34
+ "money", "rich", "poor", "expensive", "cheap", "free", "save",
35
+ "love", "hate", "angry", "happy", "sad", "funny", "laugh", "cry",
36
+ "first time", "last time", "never", "always", "everyone", "nobody",
37
+ "finally", "suddenly", "immediately", "instantly", "quickly"
38
+ ]
39
+
40
+ self.hook_patterns = [
41
+ r"you won't believe",
42
+ r"this will change",
43
+ r"nobody talks about",
44
+ r"the truth about",
45
+ r"what happens when",
46
+ r"here's what",
47
+ r"this is why",
48
+ r"the secret",
49
+ r"watch this",
50
+ r"wait for it"
51
+ ]
52
+
53
+ def extract_audio_features(self, audio_path: str) -> Dict:
54
+ """Extract audio features for engagement analysis"""
55
+ y, sr = librosa.load(audio_path)
56
+
57
+ # Extract features
58
+ tempo, _ = librosa.beat.beat_track(y=y, sr=sr)
59
+ spectral_centroids = librosa.feature.spectral_centroid(y=y, sr=sr)[0]
60
+ spectral_rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)[0]
61
+ mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
62
+
63
+ return {
64
+ 'tempo': float(tempo),
65
+ 'spectral_centroid_mean': float(np.mean(spectral_centroids)),
66
+ 'spectral_rolloff_mean': float(np.mean(spectral_rolloff)),
67
+ 'mfcc_mean': float(np.mean(mfccs)),
68
+ 'energy_variance': float(np.var(librosa.feature.rms(y=y)[0]))
69
+ }
70
+
71
+ def transcribe_video(self, video_path: str) -> List[Dict]:
72
+ """Transcribe video and return segments with timestamps"""
73
+ print("Transcribing video...")
74
+ result = self.whisper_model.transcribe(video_path, word_timestamps=True)
75
+
76
+ segments = []
77
+ for segment in result["segments"]:
78
+ segments.append({
79
+ 'start': segment['start'],
80
+ 'end': segment['end'],
81
+ 'text': segment['text'].strip(),
82
+ 'words': segment.get('words', [])
83
+ })
84
+
85
+ return segments
86
+
87
+ def calculate_virality_score(self, text: str, audio_features: Dict,
88
+ segment_duration: float) -> float:
89
+ """Calculate virality score for a text segment"""
90
+ score = 0.0
91
+ text_lower = text.lower()
92
+
93
+ # Sentiment analysis
94
+ sentiment = self.sentiment_analyzer(text)[0]
95
+ if sentiment['label'] == 'POSITIVE' and sentiment['score'] > 0.8:
96
+ score += 2.0
97
+ elif sentiment['label'] == 'NEGATIVE' and sentiment['score'] > 0.8:
98
+ score += 1.5
99
+
100
+ # Emotion analysis
101
+ emotion = self.emotion_analyzer(text)[0]
102
+ high_engagement_emotions = ['surprise', 'excitement', 'anger', 'joy']
103
+ if emotion['label'].lower() in high_engagement_emotions and emotion['score'] > 0.7:
104
+ score += 2.0
105
+
106
+ # Viral keywords
107
+ for keyword in self.viral_keywords:
108
+ if keyword in text_lower:
109
+ score += 1.0
110
+
111
+ # Hook patterns
112
+ for pattern in self.hook_patterns:
113
+ if re.search(pattern, text_lower):
114
+ score += 3.0
115
+
116
+ # Audio engagement features
117
+ if audio_features['tempo'] > 120: # Higher tempo = more engaging
118
+ score += 1.0
119
+ if audio_features['energy_variance'] > 0.01: # Energy variation
120
+ score += 1.0
121
+
122
+ # Segment duration (30-60 seconds ideal for clips)
123
+ if 25 <= segment_duration <= 65:
124
+ score += 2.0
125
+ elif 15 <= segment_duration <= 90:
126
+ score += 1.0
127
+
128
+ # Text length (not too short, not too long)
129
+ word_count = len(text.split())
130
+ if 20 <= word_count <= 100:
131
+ score += 1.0
132
+
133
+ return min(score, 10.0) # Cap at 10
134
+
135
+ def find_best_moments(self, segments: List[Dict], audio_features: Dict,
136
+ clip_duration: int = 30) -> List[Dict]:
137
+ """Find the best moments for short clips"""
138
+ print("Analyzing segments for viral potential...")
139
+
140
+ scored_segments = []
141
+
142
+ for i, segment in enumerate(segments):
143
+ # Group segments into potential clips
144
+ clip_segments = [segment]
145
+ current_duration = segment['end'] - segment['start']
146
+
147
+ # Extend clip to reach desired duration
148
+ j = i + 1
149
+ while j < len(segments) and current_duration < clip_duration:
150
+ next_segment = segments[j]
151
+ if next_segment['end'] - segment['start'] <= clip_duration * 1.5:
152
+ clip_segments.append(next_segment)
153
+ current_duration = next_segment['end'] - segment['start']
154
+ j += 1
155
+ else:
156
+ break
157
+
158
+ # Calculate combined text and virality score
159
+ combined_text = " ".join([s['text'] for s in clip_segments])
160
+ virality_score = self.calculate_virality_score(
161
+ combined_text, audio_features, current_duration
162
+ )
163
+
164
+ scored_segments.append({
165
+ 'start': segment['start'],
166
+ 'end': clip_segments[-1]['end'],
167
+ 'text': combined_text,
168
+ 'duration': current_duration,
169
+ 'virality_score': virality_score,
170
+ 'segments': clip_segments
171
+ })
172
+
173
+ # Sort by virality score and remove overlaps
174
+ scored_segments.sort(key=lambda x: x['virality_score'], reverse=True)
175
+
176
+ # Remove overlapping segments
177
+ final_segments = []
178
+ for segment in scored_segments:
179
+ overlap = False
180
+ for existing in final_segments:
181
+ if (segment['start'] < existing['end'] and
182
+ segment['end'] > existing['start']):
183
+ overlap = True
184
+ break
185
+ if not overlap:
186
+ final_segments.append(segment)
187
+ if len(final_segments) >= 5: # Limit to top 5 clips
188
+ break
189
+
190
+ return final_segments
191
+
192
+ def add_emojis_to_text(self, text: str) -> str:
193
+ """Add relevant emojis to text based on content"""
194
+ emoji_map = {
195
+ 'money': 'πŸ’°', 'rich': 'πŸ’°', 'dollar': 'πŸ’΅',
196
+ 'love': '❀️', 'heart': '❀️', 'like': 'πŸ‘',
197
+ 'fire': 'πŸ”₯', 'hot': 'πŸ”₯', 'amazing': 'πŸ”₯',
198
+ 'laugh': 'πŸ˜‚', 'funny': 'πŸ˜‚', 'lol': 'πŸ˜‚',
199
+ 'wow': '😱', 'omg': '😱', 'shocking': '😱',
200
+ 'cool': '😎', 'awesome': '😎', 'great': '😎',
201
+ 'think': 'πŸ€”', 'question': '❓', 'why': 'πŸ€”',
202
+ 'warning': '⚠️', 'careful': '⚠️', 'danger': '⚠️',
203
+ 'success': 'βœ…', 'win': 'πŸ†', 'winner': 'πŸ†',
204
+ 'music': '🎡', 'song': '🎡', 'sound': 'πŸ”Š'
205
+ }
206
+
207
+ words = text.lower().split()
208
+ for word in words:
209
+ clean_word = re.sub(r'[^\w]', '', word)
210
+ if clean_word in emoji_map:
211
+ text = re.sub(f"\\b{re.escape(word)}\\b",
212
+ f"{word} {emoji_map[clean_word]}", text, flags=re.IGNORECASE)
213
+
214
+ return text
215
+
216
+ def create_clip(self, video_path: str, start_time: float, end_time: float,
217
+ text: str, output_path: str, add_subtitles: bool = True) -> str:
218
+ """Create a short clip from the video"""
219
+ print(f"Creating clip: {start_time:.1f}s - {end_time:.1f}s")
220
+
221
+ # Load video
222
+ video = mp.VideoFileClip(video_path).subclip(start_time, end_time)
223
+
224
+ # Resize to 9:16 aspect ratio (1080x1920)
225
+ target_width = 1080
226
+ target_height = 1920
227
+
228
+ # Calculate scaling to fit the video in the frame
229
+ scale_w = target_width / video.w
230
+ scale_h = target_height / video.h
231
+ scale = min(scale_w, scale_h)
232
+
233
+ # Resize video
234
+ video_resized = video.resize(scale)
235
+
236
+ # Create background (blur or solid color)
237
+ if video_resized.h < target_height or video_resized.w < target_width:
238
+ # Create blurred background
239
+ background = video.resize((target_width, target_height))
240
+ background = background.fl_image(lambda frame: cv2.GaussianBlur(frame, (21, 21), 0))
241
+
242
+ # Overlay the main video in center
243
+ final_video = mp.CompositeVideoClip([
244
+ background,
245
+ video_resized.set_position('center')
246
+ ], size=(target_width, target_height))
247
+ else:
248
+ final_video = video_resized
249
+
250
+ # Add subtitles if requested
251
+ if add_subtitles and text:
252
+ # Add emojis to text
253
+ text_with_emojis = self.add_emojis_to_text(text)
254
+
255
+ # Create text clip
256
+ txt_clip = mp.TextClip(
257
+ text_with_emojis,
258
+ fontsize=60,
259
+ color='white',
260
+ stroke_color='black',
261
+ stroke_width=3,
262
+ size=(target_width - 100, None),
263
+ method='caption'
264
+ ).set_position(('center', 0.8), relative=True).set_duration(final_video.duration)
265
+
266
+ final_video = mp.CompositeVideoClip([final_video, txt_clip])
267
+
268
+ # Write the final video
269
+ final_video.write_videofile(
270
+ output_path,
271
+ codec='libx264',
272
+ audio_codec='aac',
273
+ temp_audiofile='temp-audio.m4a',
274
+ remove_temp=True,
275
+ fps=30,
276
+ preset='ultrafast' # Faster encoding for free tier
277
+ )
278
+
279
+ # Clean up
280
+ video.close()
281
+ final_video.close()
282
+
283
+ return output_path
284
+
285
+ def process_video(video_file, clip_duration, num_clips, add_subtitles):
286
+ """Main function to process video and create clips"""
287
+ if video_file is None:
288
+ return "Please upload a video file.", [], []
289
+
290
+ clipper = AIVideoClipper()
291
+
292
+ try:
293
+ # Create temporary directory
294
+ with tempfile.TemporaryDirectory() as temp_dir:
295
+ video_path = video_file.name
296
+
297
+ # Extract audio features
298
+ print("Extracting audio features...")
299
+ audio_features = clipper.extract_audio_features(video_path)
300
+
301
+ # Transcribe video
302
+ segments = clipper.transcribe_video(video_path)
303
+ if not segments:
304
+ return "Could not transcribe video. Please check the audio quality.", [], []
305
+
306
+ # Find best moments
307
+ best_moments = clipper.find_best_moments(segments, audio_features, clip_duration)
308
+ best_moments = best_moments[:num_clips] # Limit to requested number
309
+
310
+ if not best_moments:
311
+ return "No suitable clips found. Try adjusting parameters.", [], []
312
+
313
+ # Create clips
314
+ output_videos = []
315
+ clip_info = []
316
+
317
+ for i, moment in enumerate(best_moments):
318
+ output_path = os.path.join(temp_dir, f"clip_{i+1}.mp4")
319
+
320
+ try:
321
+ clipper.create_clip(
322
+ video_path,
323
+ moment['start'],
324
+ moment['end'],
325
+ moment['text'],
326
+ output_path,
327
+ add_subtitles
328
+ )
329
+
330
+ # Copy to permanent location
331
+ permanent_path = f"clip_{i+1}_{hash(video_path)}_{i}.mp4"
332
+ os.rename(output_path, permanent_path)
333
+
334
+ output_videos.append(permanent_path)
335
+ clip_info.append({
336
+ 'clip_number': i + 1,
337
+ 'start_time': f"{moment['start']:.1f}s",
338
+ 'end_time': f"{moment['end']:.1f}s",
339
+ 'duration': f"{moment['duration']:.1f}s",
340
+ 'virality_score': f"{moment['virality_score']:.2f}/10",
341
+ 'text_preview': moment['text'][:100] + "..." if len(moment['text']) > 100 else moment['text']
342
+ })
343
+
344
+ except Exception as e:
345
+ print(f"Error creating clip {i+1}: {str(e)}")
346
+ continue
347
+
348
+ success_msg = f"Successfully created {len(output_videos)} clips!"
349
+ return success_msg, output_videos, clip_info
350
+
351
+ except Exception as e:
352
+ return f"Error processing video: {str(e)}", [], []
353
+
354
+ # Create Gradio interface
355
+ def create_interface():
356
+ with gr.Blocks(title="AI Video Clipper", theme=gr.themes.Soft()) as demo:
357
+ gr.Markdown(
358
+ """
359
+ # 🎬 AI Video Clipper
360
+
361
+ Transform your long videos into viral short clips automatically!
362
+ Upload your video and let AI find the most engaging moments.
363
+
364
+ **Features:**
365
+ - πŸ€– AI-powered moment detection
366
+ - πŸ“± Auto 9:16 aspect ratio conversion
367
+ - πŸ“ Automatic subtitles with emojis
368
+ - πŸ“Š Virality scoring
369
+ - 🎯 Multi-language support
370
+ """
371
+ )
372
+
373
+ with gr.Row():
374
+ with gr.Column():
375
+ video_input = gr.File(
376
+ label="Upload Video",
377
+ file_types=[".mp4", ".avi", ".mov", ".mkv", ".webm"],
378
+ type="filepath"
379
+ )
380
+
381
+ with gr.Row():
382
+ clip_duration = gr.Slider(
383
+ minimum=15,
384
+ maximum=90,
385
+ value=30,
386
+ step=5,
387
+ label="Target Clip Duration (seconds)"
388
+ )
389
+
390
+ num_clips = gr.Slider(
391
+ minimum=1,
392
+ maximum=5,
393
+ value=3,
394
+ step=1,
395
+ label="Number of Clips to Generate"
396
+ )
397
+
398
+ add_subtitles = gr.Checkbox(
399
+ label="Add Subtitles with Emojis",
400
+ value=True
401
+ )
402
+
403
+ process_btn = gr.Button(
404
+ "πŸš€ Create Clips",
405
+ variant="primary",
406
+ size="lg"
407
+ )
408
+
409
+ with gr.Column():
410
+ status_output = gr.Textbox(
411
+ label="Status",
412
+ interactive=False,
413
+ lines=2
414
+ )
415
+
416
+ clips_output = gr.Gallery(
417
+ label="Generated Clips",
418
+ show_label=True,
419
+ elem_id="gallery",
420
+ columns=1,
421
+ rows=3,
422
+ height="auto",
423
+ allow_preview=True,
424
+ show_download_button=True
425
+ )
426
+
427
+ with gr.Row():
428
+ info_output = gr.JSON(
429
+ label="Clip Analysis",
430
+ visible=True
431
+ )
432
+
433
+ # Example videos section
434
+ gr.Markdown("### πŸ“Ί Tips for Best Results:")
435
+ gr.Markdown("""
436
+ - Upload videos with clear speech (podcasts, interviews, tutorials work great!)
437
+ - Longer videos (5+ minutes) provide more clip opportunities
438
+ - Videos with engaging content and emotional moments score higher
439
+ - Good audio quality improves transcription accuracy
440
+ """)
441
+
442
+ process_btn.click(
443
+ process_video,
444
+ inputs=[video_input, clip_duration, num_clips, add_subtitles],
445
+ outputs=[status_output, clips_output, info_output]
446
+ )
447
+
448
+ return demo
449
+
450
+ # Launch the app
451
+ if __name__ == "__main__":
452
+ demo = create_interface()
453
+ demo.launch(
454
+ server_name="0.0.0.0",
455
+ server_port=7860,
456
+ share=False
457
+ )