Chanlefe commited on
Commit
abb7e37
Β·
verified Β·
1 Parent(s): 406906e

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +192 -0
app.py ADDED
@@ -0,0 +1,192 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ import torch.nn as nn
4
+ from transformers import pipeline, BertTokenizer, CLIPProcessor
5
+ from PIL import Image
6
+ import pytesseract
7
+ import cv2
8
+ import numpy as np
9
+
10
+ # Initialize OCR
11
+ # Note: You need to install tesseract-ocr on your system
12
+ # For Hugging Face Spaces, add: apt-get install -y tesseract-ocr
13
+ # to a file called packages.txt
14
+
15
+ class MemeAnalyzerWithOCR:
16
+ def __init__(self):
17
+ # Sentiment Analysis for text (Positive, Negative, Neutral)
18
+ self.text_classifier = pipeline(
19
+ "sentiment-analysis",
20
+ model="cardiffnlp/twitter-roberta-base-sentiment-latest"
21
+ )
22
+
23
+ # Hate Speech Detection for the complete meme
24
+ self.hate_detector = pipeline(
25
+ "text-classification",
26
+ model="unitary/toxic-bert"
27
+ )
28
+
29
+ # Image understanding (not specifically for hate, but for context)
30
+ self.image_classifier = pipeline(
31
+ "image-classification",
32
+ model="google/vit-base-patch16-224"
33
+ )
34
+
35
+ def extract_text_from_image(self, image):
36
+ """Extract text from meme using OCR"""
37
+ try:
38
+ # Convert PIL to opencv format
39
+ image_cv = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
40
+
41
+ # Preprocess image for better OCR
42
+ gray = cv2.cvtColor(image_cv, cv2.COLOR_BGR2GRAY)
43
+ # Increase contrast
44
+ enhanced = cv2.convertScaleAbs(gray, alpha=1.5, beta=0)
45
+
46
+ # Extract text
47
+ text = pytesseract.image_to_string(enhanced)
48
+ return text.strip()
49
+ except Exception as e:
50
+ return ""
51
+
52
+ def analyze_meme(self, text_input, image):
53
+ results = {
54
+ 'extracted_text': '',
55
+ 'sentiment': None,
56
+ 'hate_detection': None,
57
+ 'image_content': None,
58
+ 'combined_analysis': ''
59
+ }
60
+
61
+ # Step 1: Extract text from image if provided
62
+ if image is not None:
63
+ extracted_text = self.extract_text_from_image(image)
64
+ results['extracted_text'] = extracted_text
65
+
66
+ # Analyze image content
67
+ image_results = self.image_classifier(image)
68
+ results['image_content'] = image_results[0]['label']
69
+
70
+ # Step 2: Combine manual text input with OCR text
71
+ combined_text = ""
72
+ if text_input:
73
+ combined_text = text_input
74
+ if results['extracted_text']:
75
+ combined_text = combined_text + " " + results['extracted_text'] if combined_text else results['extracted_text']
76
+
77
+ if not combined_text:
78
+ return "No text found! Please provide text or an image with text."
79
+
80
+ # Step 3: Sentiment Analysis (Positive, Negative, Neutral)
81
+ sentiment_result = self.text_classifier(combined_text)[0]
82
+
83
+ # Map to your categories
84
+ sentiment_mapping = {
85
+ 'positive': 'Positive',
86
+ 'negative': 'Negative',
87
+ 'neutral': 'Neutral'
88
+ }
89
+
90
+ results['sentiment'] = {
91
+ 'label': sentiment_mapping.get(sentiment_result['label'].lower(), 'Neutral'),
92
+ 'score': sentiment_result['score']
93
+ }
94
+
95
+ # Step 4: Hate Speech Detection
96
+ hate_result = self.hate_detector(combined_text)[0]
97
+
98
+ # Determine if hateful
99
+ is_hateful = hate_result['label'] == 'TOXIC' and hate_result['score'] > 0.7
100
+ results['hate_detection'] = {
101
+ 'label': 'Hateful' if is_hateful else 'Non-hateful',
102
+ 'score': hate_result['score'] if is_hateful else 1 - hate_result['score']
103
+ }
104
+
105
+ # Step 5: Format results
106
+ output = "## πŸ“Š Meme Analysis Results\n\n"
107
+
108
+ # Show extracted text
109
+ if results['extracted_text']:
110
+ output += f"### πŸ” Text Extracted from Image (OCR):\n`{results['extracted_text']}`\n\n"
111
+
112
+ # Sentiment Analysis
113
+ output += f"### 😊 Sentiment Analysis (BERT):\n"
114
+ output += f"**{results['sentiment']['label']}** "
115
+ output += f"(Confidence: {results['sentiment']['score']:.1%})\n\n"
116
+
117
+ # Hate Detection
118
+ output += f"### 🚫 Hate Speech Detection:\n"
119
+ output += f"**{results['hate_detection']['label']}** "
120
+ output += f"(Confidence: {results['hate_detection']['score']:.1%})\n\n"
121
+
122
+ # Image content
123
+ if results['image_content']:
124
+ output += f"### πŸ–ΌοΈ Image Content:\n{results['image_content']}\n\n"
125
+
126
+ # Combined analysis
127
+ output += "### πŸ“ Analyzed Text:\n"
128
+ output += f"`{combined_text}`\n\n"
129
+
130
+ # Warning for hateful content
131
+ if is_hateful:
132
+ output += "⚠️ **Warning**: This content may contain hateful or offensive material.\n"
133
+
134
+ return output
135
+
136
+ # Initialize analyzer
137
+ analyzer = MemeAnalyzerWithOCR()
138
+
139
+ # Create Gradio interface
140
+ demo = gr.Interface(
141
+ fn=analyzer.analyze_meme,
142
+ inputs=[
143
+ gr.Textbox(
144
+ label="πŸ“ Manual Text Input (Optional)",
145
+ placeholder="Enter text if not in image...",
146
+ info="Leave empty if text is in the image"
147
+ ),
148
+ gr.Image(
149
+ label="πŸ“Έ Upload Meme Image",
150
+ type="pil",
151
+ info="The AI will extract text from the image"
152
+ )
153
+ ],
154
+ outputs=gr.Markdown(label="Analysis Results"),
155
+ title="🎭 Meme Analyzer with OCR",
156
+ description="""
157
+ This tool analyzes memes by:
158
+ 1. **Extracting text** from images using OCR
159
+ 2. **Sentiment analysis** (Positive/Negative/Neutral) using BERT
160
+ 3. **Hate speech detection** (Hateful/Non-hateful)
161
+ 4. **Image content analysis**
162
+
163
+ Upload a meme image and/or provide text to analyze!
164
+ """,
165
+ examples=[
166
+ ["", "examples/meme1.jpg"],
167
+ ["This is hilarious!", None],
168
+ ["I hate everyone", None]
169
+ ],
170
+ theme=gr.themes.Soft()
171
+ )
172
+
173
+ # Launch
174
+ demo.launch()
175
+
176
+ # For Hugging Face Spaces, create these additional files:
177
+
178
+ # requirements.txt:
179
+ """
180
+ gradio
181
+ torch
182
+ transformers
183
+ pillow
184
+ opencv-python
185
+ pytesseract
186
+ numpy
187
+ """
188
+
189
+ # packages.txt (for system dependencies):
190
+ """
191
+ tesseract-ocr
192
+ """