Upload 6 files
Browse files- DVCarFraudDetection.csv +0 -0
- Dockerfile +9 -0
- RFModel.pkl +3 -0
- X_train.csv +0 -0
- app.py +690 -0
- requirements.txt +16 -0
DVCarFraudDetection.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
Dockerfile
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.9
|
2 |
+
|
3 |
+
WORKDIR /app
|
4 |
+
|
5 |
+
ADD . /app
|
6 |
+
RUN python3 -m pip install --upgrade pip
|
7 |
+
RUN pip install -r requirements.txt
|
8 |
+
|
9 |
+
CMD [ "python","app.py" ]
|
RFModel.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9ca585f9b657db88ecff3b835aeb36c63e36615ef8e5989069450a3c65bde044
|
3 |
+
size 41657465
|
X_train.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
app.py
ADDED
@@ -0,0 +1,690 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from flask import Flask, render_template, request
|
2 |
+
import matplotlib.pyplot as plt
|
3 |
+
import pandas as pd
|
4 |
+
from joblib import load
|
5 |
+
import seaborn as sns
|
6 |
+
import io
|
7 |
+
from wordcloud import WordCloud
|
8 |
+
import base64
|
9 |
+
import string
|
10 |
+
import nltk
|
11 |
+
from nltk.corpus import stopwords
|
12 |
+
from nltk.tokenize import word_tokenize
|
13 |
+
from nltk.stem import WordNetLemmatizer
|
14 |
+
from google_play_scraper import app, Sort, reviews_all
|
15 |
+
from nltk.sentiment.vader import SentimentIntensityAnalyzer
|
16 |
+
from nltk.corpus import stopwords
|
17 |
+
from collections import Counter
|
18 |
+
from matplotlib.sankey import Sankey
|
19 |
+
import networkx as nx
|
20 |
+
|
21 |
+
app = Flask(__name__)
|
22 |
+
|
23 |
+
def preprocess_text(text):
|
24 |
+
if text is not None:
|
25 |
+
# Convert to lowercase
|
26 |
+
text = text.lower()
|
27 |
+
# Remove special characters and punctuation
|
28 |
+
text = text.translate(str.maketrans('', '', string.punctuation))
|
29 |
+
# Tokenize text
|
30 |
+
tokens = word_tokenize(text)
|
31 |
+
# Remove stopwords
|
32 |
+
stop_words = set(stopwords.words('english'))
|
33 |
+
tokens = [word for word in tokens if word not in stop_words]
|
34 |
+
# Lemmatize tokens
|
35 |
+
lemmatizer = WordNetLemmatizer()
|
36 |
+
tokens = [lemmatizer.lemmatize(word) for word in tokens]
|
37 |
+
# Join tokens back into string
|
38 |
+
preprocessed_text = ' '.join(tokens)
|
39 |
+
return preprocessed_text
|
40 |
+
else:
|
41 |
+
return ''
|
42 |
+
|
43 |
+
def preprocess_dataframe(df):
|
44 |
+
# Drop unnecessary columns
|
45 |
+
df.drop(['userName', 'reviewId', 'userImage', 'reviewCreatedVersion', 'at'], axis=1, inplace=True)
|
46 |
+
|
47 |
+
# Convert 'repliedAt' column to datetime
|
48 |
+
df['repliedAt'] = pd.to_datetime(df['repliedAt'])
|
49 |
+
|
50 |
+
# Extract month and year from 'repliedAt'
|
51 |
+
df['RepliedMonth'] = df['repliedAt'].dt.month
|
52 |
+
df['RepliedYear'] = df['repliedAt'].dt.year
|
53 |
+
|
54 |
+
# Drop the original 'repliedAt' column
|
55 |
+
df.drop('repliedAt', axis=1, inplace=True)
|
56 |
+
|
57 |
+
# Convert 'replyContent' to binary indicator
|
58 |
+
df['IsReplied'] = df['replyContent'].apply(lambda x: 'Yes' if x and x.strip() != '' else 'No')
|
59 |
+
|
60 |
+
# Drop 'replyContent' column
|
61 |
+
df.drop('replyContent', axis=1, inplace=True)
|
62 |
+
|
63 |
+
# Fill missing values in 'appVersion' with '0'
|
64 |
+
df['appVersion'].fillna('0', inplace=True)
|
65 |
+
|
66 |
+
# Only keep necessary columns (content, score, IsReplied)
|
67 |
+
df = df[['content', 'score', 'IsReplied']]
|
68 |
+
|
69 |
+
return df
|
70 |
+
|
71 |
+
def analyze_sentiment(text, score):
|
72 |
+
# Initialize VADER sentiment analyzer
|
73 |
+
analyzer = SentimentIntensityAnalyzer()
|
74 |
+
# Perform sentiment analysis
|
75 |
+
sentiment_score = analyzer.polarity_scores(text)['compound']
|
76 |
+
|
77 |
+
if sentiment_score >= 0.05 and score >= 3:
|
78 |
+
return 'positive'
|
79 |
+
elif sentiment_score <= -0.05 and score < 3:
|
80 |
+
return 'negative'
|
81 |
+
else:
|
82 |
+
return 'neutral'
|
83 |
+
|
84 |
+
@app.route('/predict/app', methods=['POST'])
|
85 |
+
def predict_appFraud():
|
86 |
+
# Get the app ID and other necessary data from the form
|
87 |
+
app_id = request.form['app-id']
|
88 |
+
app_name = request.form['app-name']
|
89 |
+
|
90 |
+
# Scrape reviews for the specified app
|
91 |
+
reviews = reviews_all(app_id, sleep_milliseconds=0, lang="Eng", country="in", sort=Sort.NEWEST)
|
92 |
+
df = pd.json_normalize(reviews)
|
93 |
+
|
94 |
+
# Preprocess the DataFrame
|
95 |
+
df = preprocess_dataframe(df)
|
96 |
+
|
97 |
+
# Perform sentiment analysis
|
98 |
+
df['sentiment'] = df.apply(lambda row: analyze_sentiment(row['content'], row['score']), axis=1)
|
99 |
+
# Generate result based on sentiment
|
100 |
+
positive_count = (df['sentiment'] == 'positive').sum()
|
101 |
+
negative_count = (df['sentiment'] == 'negative').sum()
|
102 |
+
|
103 |
+
if positive_count > negative_count:
|
104 |
+
result = "The App is Not Fraud"
|
105 |
+
else:
|
106 |
+
result = "The App is Fraud"
|
107 |
+
|
108 |
+
total_reviews = len(df)
|
109 |
+
positive_reviews = (df['sentiment'] == 'positive').sum()
|
110 |
+
negative_reviews = (df['sentiment'] == 'negative').sum()
|
111 |
+
neutral_reviews = (df['sentiment'] == 'neutral').sum()
|
112 |
+
average_rating = round(df['score'].mean(), 2)
|
113 |
+
positive_percentage = round((positive_reviews / total_reviews) * 100, 2)
|
114 |
+
negative_percentage = round((negative_reviews / total_reviews) * 100, 2)
|
115 |
+
neutral_percentage = round((neutral_reviews / total_reviews) * 100, 2)
|
116 |
+
replied_percentage = round((df['IsReplied'] == 'Yes').mean() * 100, 2)
|
117 |
+
|
118 |
+
# Generate visualizations
|
119 |
+
# 1. Percentage pie chart of reviews
|
120 |
+
reviews_counts = df['sentiment'].value_counts()
|
121 |
+
labels = reviews_counts.index
|
122 |
+
colors = ['red', 'green', 'blue']
|
123 |
+
plt.figure(figsize=(6, 4))
|
124 |
+
plt.pie(reviews_counts, labels=labels, colors=colors, autopct='%1.1f%%', startangle=140)
|
125 |
+
plt.title('Percentage of Reviews in Fraud App')
|
126 |
+
buffer1 = io.BytesIO()
|
127 |
+
plt.savefig(buffer1, format='png')
|
128 |
+
buffer1.seek(0)
|
129 |
+
buffer_data1 = base64.b64encode(buffer1.getvalue()).decode()
|
130 |
+
plt.close()
|
131 |
+
|
132 |
+
# 2. Count plot of each type of review
|
133 |
+
plt.figure(figsize=(6, 4))
|
134 |
+
sns.countplot(x='sentiment', data=df, palette={'positive': 'green', 'negative': 'red', 'neutral': 'blue'})
|
135 |
+
plt.title('Count of Each Review Type in Fraud App')
|
136 |
+
plt.xlabel('Sentiment')
|
137 |
+
plt.ylabel('Count')
|
138 |
+
buffer2 = io.BytesIO()
|
139 |
+
plt.savefig(buffer2, format='png')
|
140 |
+
buffer2.seek(0)
|
141 |
+
buffer_data2 = base64.b64encode(buffer2.getvalue()).decode()
|
142 |
+
plt.close()
|
143 |
+
|
144 |
+
# 3. Histogram for each type of score
|
145 |
+
plt.figure(figsize=(6, 4))
|
146 |
+
sns.histplot(data=df, x='score', hue='sentiment', multiple='stack', bins=20)
|
147 |
+
plt.title('Histogram of Rating for Each Review Type in Fraud App')
|
148 |
+
plt.xlabel('Score')
|
149 |
+
plt.ylabel('Count')
|
150 |
+
buffer3 = io.BytesIO()
|
151 |
+
plt.savefig(buffer3, format='png')
|
152 |
+
buffer3.seek(0)
|
153 |
+
buffer_data3 = base64.b64encode(buffer3.getvalue()).decode()
|
154 |
+
plt.close()
|
155 |
+
|
156 |
+
# 4. Pie chart of isreplied (Yes vs No)
|
157 |
+
replied_counts = df['IsReplied'].value_counts()
|
158 |
+
labels = replied_counts.index
|
159 |
+
plt.figure(figsize=(6, 4))
|
160 |
+
plt.pie(replied_counts, labels=labels, autopct='%1.1f%%', startangle=140, colors=['lightgreen', 'lightcoral'])
|
161 |
+
plt.title('Percentage of Replies in Fraud App Reviews')
|
162 |
+
buffer4 = io.BytesIO()
|
163 |
+
plt.savefig(buffer4, format='png')
|
164 |
+
buffer4.seek(0)
|
165 |
+
buffer_data4 = base64.b64encode(buffer4.getvalue()).decode()
|
166 |
+
plt.close()
|
167 |
+
|
168 |
+
# 5. Violin plot of review vs score
|
169 |
+
plt.figure(figsize=(6, 4))
|
170 |
+
sns.violinplot(x='sentiment', y='score', data=df, palette={'positive': 'green', 'negative': 'red', 'neutral': 'blue'})
|
171 |
+
plt.title('Violin Plot of Review vs Rating in Fraud App')
|
172 |
+
plt.xlabel('Sentiment')
|
173 |
+
plt.ylabel('Score')
|
174 |
+
buffer5 = io.BytesIO()
|
175 |
+
plt.savefig(buffer5, format='png')
|
176 |
+
buffer5.seek(0)
|
177 |
+
buffer_data5 = base64.b64encode(buffer5.getvalue()).decode()
|
178 |
+
plt.close()
|
179 |
+
|
180 |
+
# 6. Joint count plot for positive, negative, and neutral reviews based on isreplied (Yes or No)
|
181 |
+
plt.figure(figsize=(6, 4)) # Set the size of the figure
|
182 |
+
sns.catplot(x='sentiment', kind='count', hue='IsReplied', data=df, palette='Set1',height=4,aspect=1)
|
183 |
+
plt.title('Sentiments vs Review Reply Status')
|
184 |
+
plt.xlabel('Sentiment')
|
185 |
+
plt.ylabel('Count')
|
186 |
+
plt.tight_layout()
|
187 |
+
buffer6 = io.BytesIO()
|
188 |
+
plt.savefig(buffer6, format='png')
|
189 |
+
buffer6.seek(0)
|
190 |
+
buffer_data6 = base64.b64encode(buffer6.getvalue()).decode()
|
191 |
+
plt.close()
|
192 |
+
|
193 |
+
# Render template with result and any other data you want to display
|
194 |
+
return render_template('app_result.html', result=result, app_name=app_name,
|
195 |
+
total_reviews=total_reviews, positive_reviews=positive_reviews,
|
196 |
+
negative_reviews=negative_reviews, neutral_reviews=neutral_reviews,
|
197 |
+
average_rating=average_rating, positive_percentage=positive_percentage,
|
198 |
+
negative_percentage=negative_percentage, neutral_percentage=neutral_percentage, replied_percentage=replied_percentage, plot1=buffer_data1, plot2=buffer_data2,
|
199 |
+
plot3=buffer_data3, plot4=buffer_data4, plot5=buffer_data5, plot6=buffer_data6)
|
200 |
+
|
201 |
+
# Load the pre-trained model
|
202 |
+
best_rf_classifier = load('RFModel.pkl')
|
203 |
+
|
204 |
+
# Load X_train
|
205 |
+
X_train = pd.read_csv('X_train.csv')
|
206 |
+
|
207 |
+
# Load the dataset
|
208 |
+
df = pd.read_csv('DVCarFraudDetection.csv')
|
209 |
+
|
210 |
+
@app.route('/')
|
211 |
+
def index():
|
212 |
+
return render_template('index.html')
|
213 |
+
|
214 |
+
@app.route('/vehicle_insurance')
|
215 |
+
def vehicle_insurance():
|
216 |
+
return render_template('vehicle.html')
|
217 |
+
|
218 |
+
@app.route('/predict/insurance')
|
219 |
+
def predict_insurance():
|
220 |
+
return render_template('vehicle.html')
|
221 |
+
|
222 |
+
@app.route('/dataset')
|
223 |
+
def dataset_display():
|
224 |
+
# Generate visualizations
|
225 |
+
fig1, ax1 = plt.subplots(figsize=(6, 4))
|
226 |
+
sns.countplot(y='CarCompany', data=df)
|
227 |
+
buffer1 = io.BytesIO()
|
228 |
+
plt.savefig(buffer1, format='png')
|
229 |
+
buffer1.seek(0)
|
230 |
+
buffer_data1 = base64.b64encode(buffer1.getvalue()).decode()
|
231 |
+
plt.close(fig1)
|
232 |
+
|
233 |
+
fig2, ax2 = plt.subplots(figsize=(6, 4))
|
234 |
+
sns.countplot(x='BasePolicy', hue='IsFraud', data=df, palette={0: 'green', 1: 'red'})
|
235 |
+
buffer2 = io.BytesIO()
|
236 |
+
plt.savefig(buffer2, format='png')
|
237 |
+
buffer2.seek(0)
|
238 |
+
buffer_data2 = base64.b64encode(buffer2.getvalue()).decode()
|
239 |
+
plt.close(fig2)
|
240 |
+
|
241 |
+
fig3, ax3 = plt.subplots(figsize=(6, 4))
|
242 |
+
past_claims_counts = df['PastNumberOfClaims'].value_counts()
|
243 |
+
ax3.pie(past_claims_counts, labels=past_claims_counts.index, autopct='%1.1f%%')
|
244 |
+
ax3.set_title('Past Number of Claims Count')
|
245 |
+
buffer3 = io.BytesIO()
|
246 |
+
plt.savefig(buffer3, format='png')
|
247 |
+
buffer3.seek(0)
|
248 |
+
buffer_data3 = base64.b64encode(buffer3.getvalue()).decode()
|
249 |
+
plt.close(fig3)
|
250 |
+
|
251 |
+
fig4, ax4 = plt.subplots(figsize=(6, 4)) # Adjust the figsize as per your preference
|
252 |
+
sns.countplot(x='IsAddressChanged', hue='IsFraud', data=df, palette={0: 'green', 1: 'red'})
|
253 |
+
ax4.set_title('Address Change and Fraud Distribution')
|
254 |
+
ax4.set_xlabel('Is Address Changed?')
|
255 |
+
ax4.set_ylabel('Count')
|
256 |
+
plt.legend(title='Is Fraud')
|
257 |
+
buffer4 = io.BytesIO()
|
258 |
+
plt.savefig(buffer4, format='png')
|
259 |
+
buffer4.seek(0)
|
260 |
+
buffer_data4 = base64.b64encode(buffer4.getvalue()).decode()
|
261 |
+
plt.close(fig4)
|
262 |
+
|
263 |
+
fig5, ax5 = plt.subplots(figsize=(6, 4)) # Adjust the figsize as per your preference
|
264 |
+
heatmap_data = df.groupby(['CarCompany', 'OwnerGender']).size().unstack()
|
265 |
+
sns.heatmap(heatmap_data, annot=True, cmap='coolwarm', fmt='.2f', ax=ax5)
|
266 |
+
ax5.set_title('Car Company vs Owner Gender')
|
267 |
+
ax5.set_xlabel('Owner Gender')
|
268 |
+
ax5.set_ylabel('Car Company')
|
269 |
+
plt.yticks(rotation=0) # Rotate y-axis labels for better readability
|
270 |
+
plt.tight_layout()
|
271 |
+
buffer5 = io.BytesIO()
|
272 |
+
plt.savefig(buffer5, format='png')
|
273 |
+
buffer5.seek(0)
|
274 |
+
buffer_data5 = base64.b64encode(buffer5.getvalue()).decode()
|
275 |
+
plt.close(fig5)
|
276 |
+
|
277 |
+
fig6, ax6 = plt.subplots(figsize=(6, 4))
|
278 |
+
num_supplements_counts = df['NumberOfSuppliments'].value_counts()
|
279 |
+
ax6.pie(num_supplements_counts, labels=num_supplements_counts.index, autopct='%1.1f%%')
|
280 |
+
ax6.set_title('NUmber of Suplements Count')
|
281 |
+
buffer6 = io.BytesIO()
|
282 |
+
plt.savefig(buffer6, format='png')
|
283 |
+
buffer6.seek(0)
|
284 |
+
buffer_data6 = base64.b64encode(buffer6.getvalue()).decode()
|
285 |
+
plt.close(fig6)
|
286 |
+
|
287 |
+
|
288 |
+
fig7, ax7 = plt.subplots(figsize=(6, 4))
|
289 |
+
sns.countplot(x='PoliceReportFiled', hue='IsFraud', data=df)
|
290 |
+
buffer7 = io.BytesIO()
|
291 |
+
plt.savefig(buffer7, format='png')
|
292 |
+
buffer7.seek(0)
|
293 |
+
buffer_data7 = base64.b64encode(buffer7.getvalue()).decode()
|
294 |
+
plt.close(fig7)
|
295 |
+
|
296 |
+
fig8, ax8 = plt.subplots(figsize=(6, 4))
|
297 |
+
sns.violinplot(x='OwnerGender', y='OwnerAge', data=df, palette={'Male': 'blue', 'Female': 'pink'}, ax=ax8)
|
298 |
+
buffer8 = io.BytesIO()
|
299 |
+
plt.savefig(buffer8, format='png')
|
300 |
+
buffer8.seek(0)
|
301 |
+
buffer_data8 = base64.b64encode(buffer8.getvalue()).decode()
|
302 |
+
plt.close(fig8)
|
303 |
+
|
304 |
+
fig9, ax9 = plt.subplots(figsize=(6, 4)) # Create a new figure and axis
|
305 |
+
sns.scatterplot(x='OwnerAge', y='NumberOfSuppliments', data=df, ax=ax9)
|
306 |
+
plt.title('Scatter Plot of OwnerAge vs NumberOfSuppliments') # Set the title of the plot
|
307 |
+
plt.tight_layout() # Ensure tight layout
|
308 |
+
buffer9 = io.BytesIO() # Create a BytesIO buffer to store the plot image
|
309 |
+
plt.savefig(buffer9, format='png') # Save the plot to the buffer in PNG format
|
310 |
+
buffer9.seek(0) # Reset the buffer position to the start
|
311 |
+
buffer_data9 = base64.b64encode(buffer9.getvalue()).decode() # Encode the plot image as base64
|
312 |
+
plt.close(fig9) # Close the figure to release resources
|
313 |
+
|
314 |
+
|
315 |
+
fig10, ax10 = plt.subplots(figsize=(6, 4))
|
316 |
+
sns.boxplot(x='CarCategory', y='CarPrice', data=df, ax=ax10)
|
317 |
+
buffer10 = io.BytesIO()
|
318 |
+
plt.savefig(buffer10, format='png')
|
319 |
+
buffer10.seek(0)
|
320 |
+
buffer_data10 = base64.b64encode(buffer10.getvalue()).decode()
|
321 |
+
plt.close(fig10)
|
322 |
+
|
323 |
+
|
324 |
+
# Render the dataset template with plots
|
325 |
+
return render_template('dataset.html', df=pd.read_csv('env\DVCarFraudDetection.csv'), plot1=buffer_data1, plot2=buffer_data2,
|
326 |
+
plot3=buffer_data3, plot4=buffer_data4, plot5=buffer_data5, plot6=buffer_data6,
|
327 |
+
plot7=buffer_data7, plot8=buffer_data8, plot9=buffer_data9, plot10=buffer_data10)
|
328 |
+
|
329 |
+
|
330 |
+
@app.route('/predict/insurance', methods=['POST'])
|
331 |
+
def make_prediction():
|
332 |
+
# Get the form data
|
333 |
+
CarCompany = request.form['CarCompany']
|
334 |
+
AccidentArea = request.form['AccidentArea']
|
335 |
+
OwnerGender = request.form['OwnerGender']
|
336 |
+
OwnerAge = int(request.form['OwnerAge'])
|
337 |
+
Fault = request.form['Fault']
|
338 |
+
CarCategory = request.form['CarCategory']
|
339 |
+
CarPrice = int(request.form['CarPrice'])
|
340 |
+
PoliceReportFiled = request.form['PoliceReportFiled']
|
341 |
+
WitnessPresent = request.form['WitnessPresent']
|
342 |
+
AgentType = request.form['AgentType']
|
343 |
+
NumberOfSuppliments = int(request.form['NumberOfSuppliments'])
|
344 |
+
BasePolicy = request.form['BasePolicy']
|
345 |
+
IsAddressChanged = request.form['IsAddressChanged']
|
346 |
+
PastNumberOfClaims = int(request.form['PastNumberOfClaims'])
|
347 |
+
|
348 |
+
# Preprocess the input data
|
349 |
+
car_price = CarPrice / 10 # scaling car price as in your previous code
|
350 |
+
user_input = {
|
351 |
+
'CarCompany': [CarCompany],
|
352 |
+
'AccidentArea': [AccidentArea],
|
353 |
+
'OwnerGender': [OwnerGender],
|
354 |
+
'OwnerAge': [OwnerAge],
|
355 |
+
'Fault': [Fault],
|
356 |
+
'CarCategory': [CarCategory],
|
357 |
+
'CarPrice': [car_price],
|
358 |
+
'PoliceReportFiled': [PoliceReportFiled],
|
359 |
+
'WitnessPresent': [WitnessPresent],
|
360 |
+
'AgentType': [AgentType],
|
361 |
+
'NumberOfSuppliments': [NumberOfSuppliments],
|
362 |
+
'BasePolicy': [BasePolicy],
|
363 |
+
'IsAddressChanged': [IsAddressChanged],
|
364 |
+
'PastNumberOfClaims': [PastNumberOfClaims]
|
365 |
+
}
|
366 |
+
user_df = pd.DataFrame(user_input)
|
367 |
+
processed_user_input = pd.get_dummies(user_df)
|
368 |
+
# Assuming X_train is your training data, you need to replace it with your actual training data
|
369 |
+
processed_user_input = processed_user_input.reindex(columns=X_train.columns, fill_value=0)
|
370 |
+
|
371 |
+
# Make prediction
|
372 |
+
prediction = best_rf_classifier.predict(processed_user_input)
|
373 |
+
|
374 |
+
# Return prediction result
|
375 |
+
if prediction[0] == 1:
|
376 |
+
result = "Fraud in Insurance"
|
377 |
+
else:
|
378 |
+
result = "No Fraud in Insurance"
|
379 |
+
|
380 |
+
# Generate visualizations
|
381 |
+
fig1, ax1 = plt.subplots(figsize=(6, 4)) # Adjust the figsize as per your preference
|
382 |
+
sns.countplot(x='OwnerGender', hue='IsFraud', data=df, ax=ax1)
|
383 |
+
buffer1 = io.BytesIO()
|
384 |
+
plt.savefig(buffer1, format='png')
|
385 |
+
buffer1.seek(0)
|
386 |
+
buffer_data1 = base64.b64encode(buffer1.getvalue()).decode()
|
387 |
+
plt.close(fig1)
|
388 |
+
|
389 |
+
fig2, ax2 = plt.subplots(figsize=(6, 4)) # Adjust the figsize as per your preference
|
390 |
+
sns.violinplot(x='CarCategory', y='CarPrice', data=df, ax=ax2)
|
391 |
+
buffer2 = io.BytesIO()
|
392 |
+
plt.savefig(buffer2, format='png')
|
393 |
+
buffer2.seek(0)
|
394 |
+
buffer_data2 = base64.b64encode(buffer2.getvalue()).decode()
|
395 |
+
plt.close(fig2)
|
396 |
+
|
397 |
+
fig3, ax3 = plt.subplots(figsize=(6, 4)) # Adjust the figsize as per your preference
|
398 |
+
sns.countplot(x='AgentType', hue='IsFraud', data=df, ax=ax3)
|
399 |
+
buffer3 = io.BytesIO()
|
400 |
+
plt.savefig(buffer3, format='png')
|
401 |
+
buffer3.seek(0)
|
402 |
+
buffer_data3 = base64.b64encode(buffer3.getvalue()).decode()
|
403 |
+
plt.close(fig3)
|
404 |
+
|
405 |
+
fig4, ax4 = plt.subplots(figsize=(6 , 4)) # Adjust the figsize as per your preference
|
406 |
+
policy_fraud_counts = df[df['IsFraud'] == 1]['BasePolicy'].value_counts()
|
407 |
+
ax4.pie(policy_fraud_counts, labels=policy_fraud_counts.index, autopct='%1.1f%%')
|
408 |
+
buffer4 = io.BytesIO()
|
409 |
+
plt.savefig(buffer4, format='png')
|
410 |
+
buffer4.seek(0)
|
411 |
+
buffer_data4 = base64.b64encode(buffer4.getvalue()).decode()
|
412 |
+
plt.close(fig4)
|
413 |
+
|
414 |
+
fig5, ax5 = plt.subplots(figsize=(6, 4))
|
415 |
+
fraud_data = df[df['IsFraud'] == 1]
|
416 |
+
non_fraud_data = df[df['IsFraud'] == 0]
|
417 |
+
sns.boxplot(x='IsFraud', y='CarPrice', data=fraud_data, ax=ax5)
|
418 |
+
sns.boxplot(x='IsFraud', y='CarPrice', data=non_fraud_data, ax=ax5)
|
419 |
+
ax5.set_xlabel('Fraud Status')
|
420 |
+
ax5.set_ylabel('Car Price')
|
421 |
+
ax5.set_title('Box Plot of Car Price for Fraud and Non-Fraud Cases')
|
422 |
+
handles, labels = ax5.get_legend_handles_labels()
|
423 |
+
ax5.legend(handles, labels)
|
424 |
+
buffer5 = io.BytesIO()
|
425 |
+
plt.savefig(buffer5, format='png')
|
426 |
+
buffer5.seek(0)
|
427 |
+
buffer_data5 = base64.b64encode(buffer5.getvalue()).decode()
|
428 |
+
plt.close(fig5)
|
429 |
+
|
430 |
+
fig6, ax6 = plt.subplots(figsize=(6, 4)) # Adjust the figsize as per your preference
|
431 |
+
sns.histplot(data=df, x='PastNumberOfClaims', bins=range(max(df['PastNumberOfClaims'])+2), kde=False, ax=ax6)
|
432 |
+
ax6.set_ylabel('Fraud cases count')
|
433 |
+
buffer6 = io.BytesIO()
|
434 |
+
plt.savefig(buffer6, format='png')
|
435 |
+
buffer6.seek(0)
|
436 |
+
buffer_data6 = base64.b64encode(buffer6.getvalue()).decode()
|
437 |
+
plt.close(fig6)
|
438 |
+
|
439 |
+
fig7, ax7 = plt.subplots(figsize=(6, 4)) # Adjust the figsize as per your preference
|
440 |
+
policy_fraud_counts = df[df['IsFraud'] == 1]['CarCategory'].value_counts()
|
441 |
+
ax7.pie(policy_fraud_counts, labels=policy_fraud_counts.index, autopct='%1.1f%%')
|
442 |
+
buffer7 = io.BytesIO()
|
443 |
+
plt.savefig(buffer7, format='png')
|
444 |
+
buffer7.seek(0)
|
445 |
+
buffer_data7 = base64.b64encode(buffer7.getvalue()).decode()
|
446 |
+
plt.close(fig7)
|
447 |
+
|
448 |
+
fig8, ax8 = plt.subplots(figsize=(6, 4)) # Adjust the figsize as per your preference
|
449 |
+
sns.countplot(x='PastNumberOfClaims', hue='IsFraud', data=df, ax=ax8)
|
450 |
+
buffer8 = io.BytesIO()
|
451 |
+
plt.savefig(buffer8, format='png')
|
452 |
+
buffer8.seek(0)
|
453 |
+
buffer_data8 = base64.b64encode(buffer8.getvalue()).decode()
|
454 |
+
plt.close(fig8)
|
455 |
+
|
456 |
+
# Return prediction result and base64 encoded images
|
457 |
+
return render_template('prediction_result.html', result=result, plot1=buffer_data1, plot2=buffer_data2,
|
458 |
+
plot3=buffer_data3, plot4=buffer_data4, plot5=buffer_data5, plot6=buffer_data6,
|
459 |
+
plot7=buffer_data7, plot8=buffer_data8)
|
460 |
+
|
461 |
+
@app.route("/predict/app")
|
462 |
+
def predict_app():
|
463 |
+
return render_template('fraudapp.html')
|
464 |
+
|
465 |
+
|
466 |
+
@app.route("/mobile_app")
|
467 |
+
def mobile_app():
|
468 |
+
return render_template('fraudapp.html')
|
469 |
+
|
470 |
+
|
471 |
+
@app.route('/analysis/app')
|
472 |
+
def analysis_app():
|
473 |
+
return render_template('app_analysis.html')
|
474 |
+
|
475 |
+
@app.route('/analysis/app', methods=['POST'])
|
476 |
+
def analysisresult_app():
|
477 |
+
app_id = request.form['app-id']
|
478 |
+
app_name = request.form['app-name']
|
479 |
+
|
480 |
+
# Scrape reviews for the specified app
|
481 |
+
reviews = reviews_all(app_id, sleep_milliseconds=0, lang="Eng", country="in", sort=Sort.NEWEST)
|
482 |
+
df = pd.json_normalize(reviews)
|
483 |
+
|
484 |
+
# Preprocess the DataFrame
|
485 |
+
df = preprocess_dataframe(df)
|
486 |
+
|
487 |
+
# Perform sentiment analysis
|
488 |
+
df['sentiment'] = df.apply(lambda row: analyze_sentiment(row['content'], row['score']), axis=1)
|
489 |
+
|
490 |
+
# Word Cloud
|
491 |
+
text = ' '.join(df['content'].astype(str).tolist())
|
492 |
+
wordcloud = WordCloud(width=600, height=400, background_color='white').generate(text)
|
493 |
+
img_buffer1 = save_wordcloud_to_buffer(wordcloud)
|
494 |
+
|
495 |
+
stop_words = set(stopwords.words('english'))
|
496 |
+
|
497 |
+
# Add more words if necessary
|
498 |
+
additional_stopwords = set(['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', 'her', 'hers', 'herself', 'it', 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further', 'then', 'once', 'here', 'there', 'when', 'where', 'why', 'how', 'all', 'any', 'both', 'each', 'few', 'more', 'most', 'other', 'some', 'such', 'no', 'nor', 'not', 'only', 'own', 'same', 'so', 'than', 'too', 'very', 's', 't', 'can', 'will', 'just', 'don', 'should', 'now', 'd', 'll', 'm', 'o', 're', 've', 'y', 'ain', 'aren', 'couldn', 'didn', 'doesn', 'hadn', 'hasn', 'haven', 'isn', 'ma', 'mightn', 'mustn', 'needn', 'shan', 'shouldn', 'wasn', 'weren', 'won', 'wouldn'])
|
499 |
+
stop_words.update(additional_stopwords)
|
500 |
+
|
501 |
+
# Count Plot of 10 Most Repeated Proper Nouns
|
502 |
+
proper_nouns = []
|
503 |
+
for review in df['content']:
|
504 |
+
words = review.split()
|
505 |
+
for word in words:
|
506 |
+
if word.istitle() and word.isalpha() and word.lower() not in stop_words:
|
507 |
+
proper_nouns.append(word)
|
508 |
+
top_proper_nouns = Counter(proper_nouns).most_common(10)
|
509 |
+
fig2, ax2 = plt.subplots(figsize=(6, 4))
|
510 |
+
sns.countplot(y=proper_nouns, order=[word[0] for word in top_proper_nouns], palette='viridis', ax=ax2)
|
511 |
+
ax2.set_title('Count Plot of 10 Most Repeated Proper Nouns')
|
512 |
+
ax2.set_xlabel('Count')
|
513 |
+
buffer2 = save_plot_to_buffer(fig2)
|
514 |
+
|
515 |
+
fig3, ax3 = plt.subplots(figsize=(6, 4))
|
516 |
+
is_replied_no_df = df[df['IsReplied'] == 'No']
|
517 |
+
sentiment_counts = is_replied_no_df['sentiment'].value_counts()
|
518 |
+
ax3.pie(sentiment_counts, labels=sentiment_counts.index, autopct='%1.1f%%', startangle=90, colors=['green', 'red', 'blue'])
|
519 |
+
ax3.set_title('Pie Chart of Sentiment Distribution for IsReplied NO')
|
520 |
+
buffer3 = save_plot_to_buffer(fig3)
|
521 |
+
|
522 |
+
# Calculate Review Length
|
523 |
+
df['review_length'] = df['content'].apply(lambda x: len(x.split()))
|
524 |
+
|
525 |
+
# Create a pivot table to aggregate sentiment scores by review length
|
526 |
+
sentiment_distribution = df.pivot_table(index='review_length', columns='sentiment', values='score', aggfunc='mean')
|
527 |
+
|
528 |
+
# Plot the heatmap
|
529 |
+
fig4, ax4 = plt.subplots(figsize=(6, 4))
|
530 |
+
sns.heatmap(sentiment_distribution, cmap='YlGnBu', linewidths=0.5, ax=ax4)
|
531 |
+
ax4.set_title('Sentiment Distribution Heatmap')
|
532 |
+
ax4.set_xlabel('Sentiment')
|
533 |
+
ax4.set_ylabel('Review Length')
|
534 |
+
|
535 |
+
# Save the plot to buffer
|
536 |
+
buffer4 = save_plot_to_buffer(fig4)
|
537 |
+
|
538 |
+
# Heatmap of Word Frequency
|
539 |
+
word_lengths = df['content'].apply(lambda x: len(x.split()))
|
540 |
+
word_freq = pd.DataFrame({'Word Length': word_lengths, 'Rating': df['score']})
|
541 |
+
|
542 |
+
fig5, ax5 = plt.subplots(figsize=(6, 4))
|
543 |
+
sns.heatmap(word_freq.corr(), annot=True, cmap='coolwarm', ax=ax5)
|
544 |
+
ax5.set_title('Heatmap of Word Length vs Rating')
|
545 |
+
buffer5 = save_plot_to_buffer(fig5)
|
546 |
+
|
547 |
+
# Joint Count Plot of Score for Positive, Negative, and Neutral
|
548 |
+
fig6, ax6 = plt.subplots(figsize=(6, 4))
|
549 |
+
sns.histplot(data=df, x='score', hue='sentiment', multiple='stack', palette='husl', ax=ax6)
|
550 |
+
ax6.set_title('Joint Count Plot of Score for Positive, Negative, and Neutral')
|
551 |
+
ax6.set_xlabel('Score')
|
552 |
+
ax6.set_ylabel('Count')
|
553 |
+
buffer6 = save_plot_to_buffer(fig6)
|
554 |
+
|
555 |
+
return render_template('app_analysis_final.html', df=df, app_name=app_name,
|
556 |
+
buffer1=img_buffer1, buffer2=buffer2, buffer3=buffer3,
|
557 |
+
buffer4=buffer4, buffer5=buffer5, buffer6=buffer6)
|
558 |
+
|
559 |
+
|
560 |
+
# Function to save plot to buffer
|
561 |
+
def save_plot_to_buffer(fig):
|
562 |
+
buffer = io.BytesIO()
|
563 |
+
fig.savefig(buffer, format='png')
|
564 |
+
buffer.seek(0)
|
565 |
+
buffer_data = base64.b64encode(buffer.getvalue()).decode()
|
566 |
+
plt.close(fig)
|
567 |
+
return buffer_data
|
568 |
+
|
569 |
+
# Function to save WordCloud image to buffer
|
570 |
+
def save_wordcloud_to_buffer(wordcloud):
|
571 |
+
img = wordcloud.to_image()
|
572 |
+
img_buffer = io.BytesIO()
|
573 |
+
img.save(img_buffer, format='PNG')
|
574 |
+
img_buffer.seek(0)
|
575 |
+
buffer = base64.b64encode(img_buffer.getvalue()).decode()
|
576 |
+
img_buffer.close()
|
577 |
+
return buffer
|
578 |
+
|
579 |
+
|
580 |
+
@app.route('/analysis/insurance')
|
581 |
+
def analysis_insurance():
|
582 |
+
# Generate visualizations
|
583 |
+
# Visualization 1: Distribution of Car Prices
|
584 |
+
fig1, ax1 = plt.subplots(figsize=(6, 4))
|
585 |
+
sns.histplot(df['CarPrice'], kde=True, color='skyblue', ax=ax1)
|
586 |
+
ax1.set_title('Distribution of Car Prices')
|
587 |
+
ax1.set_xlabel('Car Price')
|
588 |
+
ax1.set_ylabel('Frequency')
|
589 |
+
buffer1 = save_plot_to_buffer(fig1)
|
590 |
+
|
591 |
+
# Visualization 2: Distribution of Owner Ages
|
592 |
+
fig2, ax2 = plt.subplots(figsize=(6, 4))
|
593 |
+
sns.histplot(df['OwnerAge'], kde=True, color='salmon', ax=ax2)
|
594 |
+
ax2.set_title('Distribution of Owner Ages')
|
595 |
+
ax2.set_xlabel('Owner Age')
|
596 |
+
ax2.set_ylabel('Frequency')
|
597 |
+
buffer2 = save_plot_to_buffer(fig2)
|
598 |
+
|
599 |
+
# Visualization 3: Count of Claims by Base Policy
|
600 |
+
fig3, ax3 = plt.subplots(figsize=(6, 4))
|
601 |
+
sns.countplot(x='CarCategory', hue='IsFraud', data=df, palette='coolwarm', ax=ax3)
|
602 |
+
ax3.set_title('Count of Claims by Car category')
|
603 |
+
ax3.set_xlabel('Car category')
|
604 |
+
ax3.set_ylabel('Count')
|
605 |
+
buffer3 = save_plot_to_buffer(fig3)
|
606 |
+
|
607 |
+
# Visualization 4: Distribution of Car Prices by Fraud Status
|
608 |
+
fig4, ax4 = plt.subplots(figsize=(6, 4))
|
609 |
+
sns.boxplot(x='IsFraud', y='CarPrice', data=df, palette='Set2', ax=ax4)
|
610 |
+
ax4.set_title('Distribution of Car Prices by Fraud Status')
|
611 |
+
ax4.set_xlabel('Fraud Status')
|
612 |
+
ax4.set_ylabel('Car Price')
|
613 |
+
buffer4 = save_plot_to_buffer(fig4)
|
614 |
+
|
615 |
+
# Visualization 5: Count of Claims by Accident Area
|
616 |
+
fig5, ax5 = plt.subplots(figsize=(6, 4))
|
617 |
+
sns.countplot(x='AccidentArea', hue='IsFraud', data=df, palette='husl', ax=ax5)
|
618 |
+
ax5.set_title('Count of Claims by Accident Area')
|
619 |
+
ax5.set_xlabel('Accident Area')
|
620 |
+
ax5.set_ylabel('Count')
|
621 |
+
buffer5 = save_plot_to_buffer(fig5)
|
622 |
+
|
623 |
+
# Visualization 6: Distribution of Number of Supplements
|
624 |
+
fig6, ax6 = plt.subplots(figsize=(6, 4))
|
625 |
+
sns.histplot(df['NumberOfSuppliments'], kde=True, color='orange', ax=ax6)
|
626 |
+
ax6.set_title('Distribution of Number of Supplements')
|
627 |
+
ax6.set_xlabel('Number of Supplements')
|
628 |
+
ax6.set_ylabel('Frequency')
|
629 |
+
buffer6 = save_plot_to_buffer(fig6)
|
630 |
+
|
631 |
+
# Visualization 7: Count of Claims by Witness Presence
|
632 |
+
fig7, ax7 = plt.subplots(figsize=(6, 4))
|
633 |
+
sns.countplot(x='WitnessPresent', hue='IsFraud', data=df, palette='viridis', ax=ax7)
|
634 |
+
ax7.set_title('Count of Claims by Witness Presence')
|
635 |
+
ax7.set_xlabel('Witness Presence')
|
636 |
+
ax7.set_ylabel('Count')
|
637 |
+
buffer7 = save_plot_to_buffer(fig7)
|
638 |
+
|
639 |
+
# Visualization 8: Distribution of Past Number of Claims
|
640 |
+
fig8, ax8 = plt.subplots(figsize=(6, 4))
|
641 |
+
sns.histplot(df['PastNumberOfClaims'], kde=True, color='purple', ax=ax8)
|
642 |
+
ax8.set_title('Distribution of Past Number of Claims')
|
643 |
+
ax8.set_xlabel('Past Number of Claims')
|
644 |
+
ax8.set_ylabel('Frequency')
|
645 |
+
buffer8 = save_plot_to_buffer(fig8)
|
646 |
+
|
647 |
+
numeric_columns = df.select_dtypes(include='number')
|
648 |
+
|
649 |
+
# Compute the correlation matrix
|
650 |
+
corr = numeric_columns.corr()
|
651 |
+
|
652 |
+
# Create the heatmap
|
653 |
+
fig9, ax9 = plt.subplots(figsize=(6.5, 4.5))
|
654 |
+
sns.heatmap(corr, annot=True, cmap='coolwarm', fmt=".2f", ax=ax9)
|
655 |
+
ax9.set_title('Heatmap of Correlation Matrix')
|
656 |
+
buffer9 = save_plot_to_buffer(fig9)
|
657 |
+
|
658 |
+
# Visualization 10: Network Graph of Car Brands and Fraud Status
|
659 |
+
fig10, ax10 = plt.subplots(figsize=(6, 4))
|
660 |
+
G = nx.from_pandas_edgelist(df, 'CarCompany', 'IsFraud')
|
661 |
+
nx.draw(G, with_labels=True, node_color='skyblue', node_size=2000, font_size=10, ax=ax10)
|
662 |
+
ax10.set_title('Network Graph of Car Brands and Fraud Status')
|
663 |
+
buffer10 = save_plot_to_buffer(fig10)
|
664 |
+
|
665 |
+
# Visualization 11: Violin Plot of Accident Area and Car Price
|
666 |
+
fig11, ax11 = plt.subplots(figsize=(6, 4))
|
667 |
+
sns.violinplot(x='AccidentArea', y='CarPrice', data=df, hue='IsFraud', split=True, palette='husl', ax=ax11)
|
668 |
+
ax11.set_title('Violin Plot of Accident Area and Car Price')
|
669 |
+
buffer11 = save_plot_to_buffer(fig11)
|
670 |
+
|
671 |
+
fig12, ax12 = plt.subplots(figsize=(6, 4))
|
672 |
+
hb = ax12.hexbin(df['CarPrice'], df['OwnerAge'], gridsize=50, cmap='inferno')
|
673 |
+
ax12.set_title('Hexbin Plot of Car Prices and Owner Ages')
|
674 |
+
ax12.set_xlabel('Car Price')
|
675 |
+
ax12.set_ylabel('Owner Age')
|
676 |
+
cb = fig12.colorbar(hb, ax=ax12)
|
677 |
+
cb.set_label('Frequency')
|
678 |
+
buffer12 = save_plot_to_buffer(fig12)
|
679 |
+
|
680 |
+
# Return render template with the additional plots
|
681 |
+
return render_template('insurance_analysis.html', plot1=buffer1, plot2=buffer2,
|
682 |
+
plot3=buffer3, plot4=buffer4, plot5=buffer5, plot6=buffer6,
|
683 |
+
plot7=buffer7, plot8=buffer8, plot9=buffer9, plot10=buffer10,
|
684 |
+
plot11=buffer11, plot12=buffer12)
|
685 |
+
|
686 |
+
|
687 |
+
if __name__ == "__main__":
|
688 |
+
app.run(debug=True)
|
689 |
+
|
690 |
+
|
requirements.txt
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Flask
|
2 |
+
matplotlib
|
3 |
+
gunicorn
|
4 |
+
pandas
|
5 |
+
joblib
|
6 |
+
seaborn
|
7 |
+
wordcloud
|
8 |
+
nltk
|
9 |
+
google-play-scraper
|
10 |
+
networkx
|
11 |
+
gunicorn
|
12 |
+
plotly
|
13 |
+
scikit-learn==1.2.2
|
14 |
+
numpy==1.25.2
|
15 |
+
beautifulsoup4
|
16 |
+
requests
|