angeloqq commited on
Commit
806c931
·
1 Parent(s): e15d477
Files changed (1) hide show
  1. app.py +451 -0
app.py ADDED
@@ -0,0 +1,451 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, request, jsonify, render_template, send_from_directory
2
+ from transformers import (
3
+ AutoModelForSequenceClassification,
4
+ AutoTokenizer,
5
+ TFCLIPModel,
6
+ CLIPProcessor,
7
+ pipeline,
8
+ BertTokenizer,
9
+ BertForSequenceClassification
10
+ )
11
+ import cv2
12
+ import os
13
+ import subprocess
14
+ import torch
15
+ from PIL import Image
16
+ import numpy as np
17
+ import base64
18
+ import uuid
19
+ from ultralytics import YOLO
20
+ import tensorflow as tf
21
+ import logging
22
+
23
+ # Configure logging
24
+ logging.basicConfig(level=logging.INFO)
25
+ logger = logging.getLogger(__name__)
26
+
27
+ app = Flask(__name__)
28
+
29
+ # Create directories
30
+ os.makedirs('save', exist_ok=True)
31
+ os.makedirs('temp', exist_ok=True)
32
+ os.makedirs('unsafe_frames', exist_ok=True)
33
+ os.makedirs('audio', exist_ok=True)
34
+ os.makedirs('logs', exist_ok=True)
35
+ os.makedirs('text_output', exist_ok=True)
36
+
37
+ print("Loading models...")
38
+ try:
39
+ # Load models
40
+ nudity_model = YOLO("Models/nudenet/320n.pt")
41
+
42
+ bert_tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
43
+ bert_model = BertForSequenceClassification.from_pretrained('bert-base-uncased')
44
+
45
+ profanity_model = AutoModelForSequenceClassification.from_pretrained("unitary/toxic-bert")
46
+ profanity_tokenizer = AutoTokenizer.from_pretrained("unitary/toxic-bert")
47
+
48
+ hate_speech_model = AutoModelForSequenceClassification.from_pretrained("Hate-speech-CNERG/dehatebert-mono-english")
49
+ hate_speech_tokenizer = AutoTokenizer.from_pretrained("Hate-speech-CNERG/dehatebert-mono-english")
50
+
51
+ clip_model = TFCLIPModel.from_pretrained("openai/clip-vit-base-patch32")
52
+ clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
53
+
54
+ whisper_model = pipeline("automatic-speech-recognition", model="openai/whisper-tiny")
55
+
56
+ print("All models loaded successfully")
57
+ except Exception as e:
58
+ logger.error(f"Error loading models: {str(e)}")
59
+ raise
60
+
61
+ @app.route("/")
62
+ def home():
63
+ return render_template('index.html')
64
+
65
+ @app.route("/extract_text", methods=["POST"])
66
+ def extract_text():
67
+ try:
68
+ audio_file = request.form.get('audio_file')
69
+ if not audio_file:
70
+ return jsonify({"error": "No audio file specified"}), 400
71
+
72
+ audio_path = os.path.join('audio', audio_file)
73
+ if not os.path.exists(audio_path):
74
+ return jsonify({"error": "Audio file not found"}), 404
75
+
76
+ # Process audio and get text
77
+ audio_result = process_audio(audio_path)
78
+
79
+ if not audio_result['success']:
80
+ return jsonify({"error": audio_result['error']}), 500
81
+
82
+ # Save extracted text
83
+ text_filename = f"text_{uuid.uuid4().hex}.txt"
84
+ text_path = os.path.join('text_output', text_filename)
85
+
86
+ with open(text_path, 'w', encoding='utf-8') as f:
87
+ f.write(audio_result['text'])
88
+
89
+ # Analyze text content
90
+ text_analysis = analyze_text_content(audio_result['text'])
91
+
92
+ return jsonify({
93
+ "success": True,
94
+ "text": audio_result['text'],
95
+ "text_file": text_filename,
96
+ "confidence": audio_result['confidence'],
97
+ "analysis": text_analysis
98
+ })
99
+
100
+ except Exception as e:
101
+ logger.error(f"Error extracting text: {str(e)}")
102
+ return jsonify({"error": str(e)}), 500
103
+
104
+ @app.route('/audio/<path:filename>')
105
+ def serve_audio(filename):
106
+ return send_from_directory('audio', filename)
107
+
108
+ @app.route("/upload", methods=["POST"])
109
+ def upload_file():
110
+ try:
111
+ if 'file' not in request.files:
112
+ return jsonify({"error": "No file uploaded"}), 400
113
+
114
+ video = request.files['file']
115
+ if video.filename == '':
116
+ return jsonify({"error": "No file selected"}), 400
117
+
118
+ video_path = os.path.join('save', video.filename)
119
+ video.save(video_path)
120
+
121
+ try:
122
+ frames = extract_frames(video_path)
123
+ results = []
124
+
125
+ audio_filename = f"audio_{uuid.uuid4().hex}.wav"
126
+ audio_path = os.path.join('audio', audio_filename)
127
+ audio_result = extract_audio(video_path, audio_path)
128
+
129
+ if audio_result:
130
+ audio_text = process_audio(audio_path)
131
+ text_content = audio_text.get('text', '')
132
+
133
+ # Save extracted text
134
+ if text_content:
135
+ text_filename = f"text_{uuid.uuid4().hex}.txt"
136
+ text_path = os.path.join('text_output', text_filename)
137
+
138
+ with open(text_path, 'w', encoding='utf-8') as f:
139
+ f.write(text_content)
140
+
141
+ text_analysis = analyze_text_content(text_content)
142
+ else:
143
+ text_filename = None
144
+ text_analysis = None
145
+ else:
146
+ text_content = ''
147
+ text_filename = None
148
+ text_analysis = None
149
+
150
+ batch_size = 15
151
+ for i in range(0, len(frames), batch_size):
152
+ batch_frames = frames[i:i + batch_size]
153
+ result = analyze_batch(batch_frames, text_content)
154
+
155
+ if result is None:
156
+ continue
157
+
158
+ results.extend(result)
159
+
160
+ # Cleanup frames
161
+ for frame_data in batch_frames:
162
+ if frame_data.get('is_inappropriate', False) or frame_data.get('is_harmful', False):
163
+ unique_filename = f'unsafe_{uuid.uuid4().hex}.png'
164
+ unsafe_frame_path = os.path.join('unsafe_frames', unique_filename)
165
+ os.rename(frame_data['frame'], unsafe_frame_path)
166
+ else:
167
+ os.remove(frame_data['frame'])
168
+ os.remove(frame_data['thumbnail'])
169
+
170
+ if os.path.exists(video_path):
171
+ os.remove(video_path)
172
+
173
+ if results:
174
+ total_meta_score = sum(r['meta_standards']['score'] for r in results) / len(results)
175
+ overall_assessment = {
176
+ "total_score": total_meta_score,
177
+ "risk_level": "High" if total_meta_score > 35 else "Medium" if total_meta_score > 30 else "Low",
178
+ "recommendation": get_recommendation(total_meta_score)
179
+ }
180
+ else:
181
+ overall_assessment = {
182
+ "total_score": 0,
183
+ "risk_level": "Low",
184
+ "recommendation": "No issues detected"
185
+ }
186
+
187
+ return jsonify({
188
+ "success": True,
189
+ "results": results,
190
+ "audio_path": audio_filename,
191
+ "audio_text": text_content,
192
+ "text_file": text_filename,
193
+ "text_analysis": text_analysis,
194
+ "overall_assessment": overall_assessment
195
+ })
196
+
197
+ except Exception as e:
198
+ if os.path.exists(video_path):
199
+ os.remove(video_path)
200
+ logger.error(f"Error in content analysis: {str(e)}")
201
+ return jsonify({"error": str(e)}), 500
202
+
203
+ except Exception as e:
204
+ logger.error(f"Error in upload: {str(e)}")
205
+ return jsonify({"error": str(e)}), 500
206
+
207
+ def extract_frames(video_path):
208
+ cap = cv2.VideoCapture(video_path)
209
+ if not cap.isOpened():
210
+ raise Exception("Error opening video file")
211
+
212
+ frames = []
213
+ frame_count = 0
214
+ fps = int(cap.get(cv2.CAP_PROP_FPS))
215
+
216
+ while cap.isOpened():
217
+ ret, frame = cap.read()
218
+ if not ret:
219
+ break
220
+
221
+ if frame_count % fps == 0:
222
+ frame_path = os.path.join('temp', f'frame_{frame_count}.jpg')
223
+ thumbnail_path = os.path.join('temp', f'thumb_{frame_count}.jpg')
224
+
225
+ cv2.imwrite(frame_path, frame)
226
+ thumbnail = cv2.resize(frame, (648, 648))
227
+ cv2.imwrite(thumbnail_path, thumbnail)
228
+
229
+ frames.append({
230
+ 'frame': frame_path,
231
+ 'thumbnail': thumbnail_path,
232
+ 'timestamp': frame_count // fps
233
+ })
234
+ frame_count += 1
235
+
236
+ cap.release()
237
+ return frames
238
+
239
+ def extract_audio(video_path, output_path):
240
+ try:
241
+ command = [
242
+ 'ffmpeg',
243
+ '-i', video_path,
244
+ '-vn',
245
+ '-acodec', 'pcm_s16le',
246
+ '-ar', '16000',
247
+ '-ac', '1',
248
+ '-y',
249
+ output_path
250
+ ]
251
+
252
+ result = subprocess.run(
253
+ command,
254
+ check=True,
255
+ stderr=subprocess.PIPE,
256
+ stdout=subprocess.PIPE
257
+ )
258
+
259
+ if os.path.exists(output_path) and os.path.getsize(output_path) > 0:
260
+ logger.info(f"Audio extracted successfully: {output_path}")
261
+ return output_path
262
+ else:
263
+ raise Exception("Audio extraction failed - empty or missing file")
264
+
265
+ except Exception as e:
266
+ logger.error(f"Audio extraction error: {str(e)}")
267
+ return None
268
+
269
+ def process_audio(audio_path):
270
+ try:
271
+ if not os.path.exists(audio_path):
272
+ logger.error(f"Audio file not found: {audio_path}")
273
+ return {
274
+ 'success': False,
275
+ 'text': "Audio file not found",
276
+ 'error': "File not found"
277
+ }
278
+
279
+ logger.info(f"Processing audio file: {audio_path}")
280
+
281
+ # First pass with Whisper
282
+ whisper_result = whisper_model(audio_path)
283
+
284
+ logger.info(f"Whisper result: {whisper_result}")
285
+
286
+ if not whisper_result.get('text'):
287
+ logger.error("Whisper failed to extract text")
288
+ return {
289
+ 'success': False,
290
+ 'text': "Whisper failed to extract text",
291
+ 'error': "No text found in Whisper output"
292
+ }
293
+
294
+ text = whisper_result['text']
295
+
296
+ # Second pass with BERT
297
+ chunks = [text[i:i+512] for i in range(0, len(text), 512)]
298
+ processed_chunks = []
299
+
300
+ for chunk in chunks:
301
+ inputs = bert_tokenizer(chunk, return_tensors="pt", truncation=True, max_length=512)
302
+ with torch.no_grad():
303
+ outputs = bert_model(**inputs)
304
+
305
+ processed_chunk = bert_tokenizer.decode(
306
+ inputs['input_ids'][0],
307
+ skip_special_tokens=True
308
+ )
309
+ processed_chunks.append(processed_chunk)
310
+
311
+ final_text = " ".join(processed_chunks)
312
+
313
+ return {
314
+ 'success': True,
315
+ 'text': final_text,
316
+ 'confidence': whisper_result.get('confidence', 0)
317
+ }
318
+
319
+ except Exception as e:
320
+ logger.error(f"Audio processing error: {str(e)}")
321
+ return {
322
+ 'success': False,
323
+ 'text': "Audio processing failed",
324
+ 'error': str(e)
325
+ }
326
+
327
+ def analyze_text_content(text):
328
+ try:
329
+ # Analyze profanity
330
+ profanity_inputs = profanity_tokenizer(text, return_tensors="pt", padding=True, truncation=True)
331
+ with torch.no_grad():
332
+ profanity_outputs = profanity_model(**profanity_inputs)
333
+ profanity_scores = torch.nn.functional.softmax(profanity_outputs.logits, dim=-1)
334
+
335
+ # Analyze hate speech
336
+ hate_speech_inputs = hate_speech_tokenizer(text, return_tensors="pt", padding=True, truncation=True)
337
+ with torch.no_grad():
338
+ hate_speech_outputs = hate_speech_model(**hate_speech_inputs)
339
+ hate_speech_scores = torch.nn.functional.softmax(hate_speech_outputs.logits, dim=-1)
340
+
341
+ return {
342
+ "profanity": {
343
+ "score": float(profanity_scores[0][1]) * 100,
344
+ "is_offensive": float(profanity_scores[0][1]) > 0.5
345
+ },
346
+ "hate_speech": {
347
+ "score": float(hate_speech_scores[0][1]) * 100,
348
+ "is_hateful": float(hate_speech_scores[0][1]) > 0.5
349
+ }
350
+ }
351
+ except Exception as e:
352
+ logger.error(f"Error analyzing text: {str(e)}")
353
+ return None
354
+
355
+ def analyze_batch(batch_frames, text):
356
+ try:
357
+ results = []
358
+ images = []
359
+ timestamps = []
360
+
361
+ for frame_data in batch_frames:
362
+ image = Image.open(frame_data['frame'])
363
+ image = image.resize((128, 128))
364
+ images.append(image)
365
+ timestamps.append(frame_data['timestamp'])
366
+
367
+ # Prepare image data
368
+ image_arrays = np.array([np.array(img) / 255.0 for img in images])
369
+ image_tensors = torch.tensor(image_arrays).permute(0, 3, 1, 2).float()
370
+
371
+ # Run analyses
372
+ with torch.no_grad():
373
+ nudity_results = nudity_model(image_tensors)
374
+ nudity_predictions = [result.boxes for result in nudity_results]
375
+
376
+ if text:
377
+ profanity_inputs = profanity_tokenizer(text, return_tensors="pt", padding=True, truncation=True)
378
+ with torch.no_grad():
379
+ profanity_outputs = profanity_model(**profanity_inputs)
380
+ profanity_scores = torch.nn.functional.softmax(profanity_outputs.logits, dim=-1)
381
+
382
+ hate_speech_inputs = hate_speech_tokenizer(text, return_tensors="pt", padding=True, truncation=True)
383
+ with torch.no_grad():
384
+ hate_speech_outputs = hate_speech_model(**hate_speech_inputs)
385
+ hate_speech_scores = torch.nn.functional.softmax(hate_speech_outputs.logits, dim=-1)
386
+ else:
387
+ profanity_scores = torch.zeros(1, 2)
388
+ hate_speech_scores = torch.zeros(1, 2)
389
+
390
+ clip_inputs = clip_processor(text=[text if text else ""] * len(images), images=images, return_tensors="tf", padding=True)
391
+ clip_outputs = clip_model(**clip_inputs)
392
+ clip_scores = tf.nn.softmax(clip_outputs.logits_per_image, axis=-1).numpy()
393
+
394
+ for i in range(len(batch_frames)):
395
+ with open(batch_frames[i]['thumbnail'], 'rb') as img_file:
396
+ thumbnail_base64 = base64.b64encode(img_file.read()).decode('utf-8')
397
+
398
+ nudity_score = float(nudity_predictions[i].conf[0]) * 99 if nudity_predictions[i] else 0.0
399
+ profanity_score = float(profanity_scores[0][1]) * 99
400
+ hate_speech_score = float(hate_speech_scores[0][1]) * 99
401
+ harm_score = float(clip_scores[i][1]) * 99 if clip_scores[i].size > 1 else 0.0
402
+
403
+ meta_standards_score = (
404
+ (nudity_score * 0.6) +
405
+ (hate_speech_score * 0.1) +
406
+ (harm_score * 0.1) +
407
+ (profanity_score * 0.1)
408
+ )
409
+
410
+ results.append({
411
+ "nudity": {
412
+ "score": nudity_score,
413
+ "is_inappropriate": nudity_score > 65
414
+ },
415
+ "profanity": {
416
+ "score": profanity_score,
417
+ "is_offensive": profanity_score > 65
418
+ },
419
+ "hate_speech": {
420
+ "score": hate_speech_score,
421
+ "is_hateful": hate_speech_score > 40
422
+ },
423
+ "harm": {
424
+ "score": harm_score,
425
+ "is_harmful": harm_score > 40
426
+ },
427
+ "meta_standards": {
428
+ "score": meta_standards_score,
429
+ "is_violating": meta_standards_score > 30,
430
+ "risk_level": "High" if meta_standards_score > 60 else "Medium" if meta_standards_score > 25 else "Low",
431
+ "recommendation": get_recommendation(meta_standards_score)
432
+ },
433
+ "thumbnail": thumbnail_base64,
434
+ "timestamp": timestamps[i]
435
+ })
436
+
437
+ return results
438
+ except Exception as e:
439
+ logger.error(f"Error in batch analysis: {str(e)}")
440
+ return None
441
+
442
+ def get_recommendation(score):
443
+ if score > 70:
444
+ return "Content likely violates Meta Community Standards. Major modifications needed."
445
+ elif score > 30:
446
+ return "Content may need modifications to comply with Meta Community Standards."
447
+ else:
448
+ return "Content likely complies with Meta Community Standards."
449
+
450
+ if __name__ == "__main__":
451
+ app.run(host="0.0.0.0", port=5000, debug=True)