Spaces:

loxzdigital
/

Model_SA_Space

Runtime error

App Files Files Community

Duy-Anh Dang commited on Mar 7, 2023

Commit

0641616

1 Parent(s): fe86c4e

load_data()

Browse files

Files changed (1) hide show

FunctionsModelSA_V1.py +132 -56

FunctionsModelSA_V1.py CHANGED Viewed

@@ -1,10 +1,11 @@
 import pandas as pd
 import numpy as np
 from numpy import arange
-# from colour import Color
 import plotly.graph_objects as go
 from nltk import tokenize
-# from IPython.display import Markdown
 from PIL import ImageColor
 from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
 import nltk
@@ -17,13 +18,18 @@ from scipy import spatial
 import re
 import pytorch_lightning as pl
 from bs4 import BeautifulSoup
 from transformers import BertTokenizerFast as BertTokenizer, BertModel, BertConfig
 import torch.nn as nn
 import torch
 import boto3
 from scipy import spatial
-import streamlit as st
-import utils
 PARAMS={
@@ -33,11 +39,11 @@ PARAMS={
 'N_EPOCHS': 10,
 'n_classes':8,
 'LABEL_COLUMNS': ['label_analytical', 'label_casual', 'label_confident', 'label_friendly',
-       'label_joyful', 'label_opstimistic', 'label_respectful',
        'label_urgent'],
 'TEXTCOL': 'text',
 'rf_labels':['label_analytical', 'label_casual', 'label_confident',
-       'label_friendly', 'label_joyful', 'label_opstimistic',
        'label_respectful', 'label_urgent',
        'industry_Academic and Education', 'industry_Energy',
        'industry_Entertainment', 'industry_Finance and Banking',
@@ -50,7 +56,7 @@ PARAMS={
        'campaign_type_Usage_and_Consumption', 'campaign_type_Webinar']
 }
-CI_rates = utils.get_files_from_aws('emailcampaignmodeldata','ModelSADataSets/CI_RATES.csv')
 ### create file uploading widget
 def email_upload():
@@ -174,6 +180,7 @@ def convert_text_to_tone(text,model=model,params=PARAMS):
     data.append([plain_text,sentiment_dict,predictions])
     final=pd.DataFrame(data,columns=['text','sentiment','sentencetone'])
     agg_tones=final['sentencetone'].apply(np.mean,axis=0)
     tones=pd.DataFrame(agg_tones.tolist(),columns=params['LABEL_COLUMNS'])
     return final,tones
@@ -204,15 +211,15 @@ model_dict={'Open_Rate':ORM,
 'Revenue_Per_Email':RVM}
-def plot_CI(pred,lower,upper,scale_factor=0.5,streamlit=False):
     """This function plots the confidence intervals of your prediction
     pred- The prediction varaible given from the Random Forest for the target variable
     lower- The lower half of the prediction confidence interval
     upper- The upper half of the confidence interval
     scale_factor- This will modify the size of the graph """
     title=f'The Predicted Value is {pred}'
     fig = go.Figure()
     fig.update_xaxes(showgrid=False)
@@ -229,59 +236,88 @@ def plot_CI(pred,lower,upper,scale_factor=0.5,streamlit=False):
     fig.add_vline(x=upper,annotation_text=f"{upper}",annotation_position="top")
     fig.add_vrect(lower,upper,fillcolor='red',opacity=0.25,annotation_text='95% CI',annotation_position="outside top")
     fig.update_layout(title_text=title, title_x=0.5)
-    if streamlit:
-        st.plotly_chart(fig)
-    else:
-        fig.show()
 def find_max_cat(df,target,industry,campaign):
     d=df[(df[campaign]==1) & (df[industry]==1)]
     if(len(d)>0):
-        rec=df.loc[d[target].idxmax()][3:11]
-        return round(d[target].max(),3),rec
     else:
-        return 0,0
-def recommend(tones,recommend_changes,change,target,streamlit=False):
     ''' This function creates the recomended changes plots it takes it the tones, the changes and '''
     fig = go.Figure()
     fig.add_trace(go.Bar(
-        y=tones.columns,
-        x=tones.values[0],
-        name='Current Tones',
-        orientation='h',
-       # text=np.round(tones.values[0],3),
-        width=.5,
-        marker=dict(
-            color='#00e6b1',
-            line=dict(color='rgba(58, 71, 80, 1.0)', width=3)
-        )
-    ))
-    fig.add_trace(go.Bar(
-        y=tones.columns,
         x=recommend_changes,
         name='Recommend changes',
         orientation='h',
         text=np.round(recommend_changes,3),
-        width=0.3,
         marker=dict(
             color='#e60f00',
-            line=dict(color='rgba(58, 71, 80, 1.0)', width=3)
         )
     ))
     fig.update_traces(textfont_size=18, textposition="outside", cliponaxis=False)
-    fig.update_layout(height=700, plot_bgcolor='white')
-    fig.update_layout(barmode='stack', yaxis={'categoryorder':'array','categoryarray': recommend_changes.sort_values(key=abs,ascending=True).index})
-    fig.update_layout(title_text=f'The following Changes will yield a {round(change,3)} increase in {target}')
-    if streamlit:
-        st.plotly_chart(fig)
     else:
-        fig.show()
 def prediction(tones,campaign_val,industry_val,target):
     model_val=pd.DataFrame(tones,columns=PARAMS['rf_labels']).fillna(0)
@@ -289,15 +325,58 @@ def prediction(tones,campaign_val,industry_val,target):
     model_val.loc[0,industry_val]=1
     model=model_dict[target]
     pred=model.predict(model_val)[0]
     CI=CI_rates[CI_rates['model']==target]
     lower=pred+CI['2_5'].values[0]
     higher=pred+CI['97_5'].values[0]
-    return round(pred,3),round(lower,3),round(higher,3),model
-def load_data(buckets,key):
-    # data_location='Tone_and_target.csv'
-    # data=pd.read_csv(data_location)
-    df=utils.get_files_from_aws(buckets,key)
     df_unique = df.drop_duplicates()
     df_unique = pd.get_dummies(df_unique, columns=['industry','campaign_type'])
     df_data=df_unique.drop(columns=['Unnamed: 0','body'])
@@ -305,7 +384,7 @@ def load_data(buckets,key):
     return df_data
-def plot_table(sorted_setence_tuple,streamlit=True):
     """ Plots the bottom most table, takes in a list of tuples where the tuple is the sentence  the sentiment distance
     from the best values """
     sentences=list(zip(*sorted_setence_tuple))[0]
@@ -327,13 +406,10 @@ def plot_table(sorted_setence_tuple,streamlit=True):
                align=['left','center'],
                font=dict(family="Arial",size=12)))
     ])
-    if streamlit:
-        st.plotly_chart(fig)
-    else:
-        fig.show()
-def corrections(best,df,streamlit=False):
     """This function finds the the difference between the tone of each sentence and the  best tone for the desired metric
     best- tone values of the best email for the current categories
     df- dataframe of the sentences of the uploaded email and the """
@@ -350,7 +426,7 @@ def corrections(best,df,streamlit=False):
         rbg=ImageColor.getcolor(f'{col}', "RGB")
         sentence_order.append((text,new_value,rbg))
         sorted_sentences=sorted(sentence_order,key=lambda x: x[1],reverse=True)
-    plot_table(sorted_sentences,streamlit)
 def read_file(fc):
     with open(fc.selected) as file: # Use file to refer to the file object

+import s3fs
 import pandas as pd
 import numpy as np
 from numpy import arange
+from colour import Color
 import plotly.graph_objects as go
 from nltk import tokenize
+from IPython.display import Markdown
 from PIL import ImageColor
 from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
 import nltk
 import re
 import pytorch_lightning as pl
 from bs4 import BeautifulSoup
+import ipywidgets as widgets
+from ipywidgets import FileUpload
+from urlextract import URLExtract
 from transformers import BertTokenizerFast as BertTokenizer, BertModel, BertConfig
 import torch.nn as nn
 import torch
+from ipywidgets import interact, Dropdown
 import boto3
+from sagemaker import get_execution_role
 from scipy import spatial
+from ipyfilechooser import FileChooser
+import random
 PARAMS={
 'N_EPOCHS': 10,
 'n_classes':8,
 'LABEL_COLUMNS': ['label_analytical', 'label_casual', 'label_confident', 'label_friendly',
+       'label_joyful', 'label_optimistic', 'label_respectful',
        'label_urgent'],
 'TEXTCOL': 'text',
 'rf_labels':['label_analytical', 'label_casual', 'label_confident',
+       'label_friendly', 'label_joyful', 'label_optimistic',
        'label_respectful', 'label_urgent',
        'industry_Academic and Education', 'industry_Energy',
        'industry_Entertainment', 'industry_Finance and Banking',
        'campaign_type_Usage_and_Consumption', 'campaign_type_Webinar']
 }
+CI_rates=pd.read_csv('CI_RATES.csv')
 ### create file uploading widget
 def email_upload():
     data.append([plain_text,sentiment_dict,predictions])
     final=pd.DataFrame(data,columns=['text','sentiment','sentencetone'])
+#     print(final)
     agg_tones=final['sentencetone'].apply(np.mean,axis=0)
     tones=pd.DataFrame(agg_tones.tolist(),columns=params['LABEL_COLUMNS'])
     return final,tones
 'Revenue_Per_Email':RVM}
+## Plot confidence interval
+def plot_CI(pred,lower,upper,scale_factor=0.5):
     """This function plots the confidence intervals of your prediction
     pred- The prediction varaible given from the Random Forest for the target variable
     lower- The lower half of the prediction confidence interval
     upper- The upper half of the confidence interval
     scale_factor- This will modify the size of the graph """
     title=f'The Predicted Value is {pred}'
     fig = go.Figure()
     fig.update_xaxes(showgrid=False)
     fig.add_vline(x=upper,annotation_text=f"{upper}",annotation_position="top")
     fig.add_vrect(lower,upper,fillcolor='red',opacity=0.25,annotation_text='95% CI',annotation_position="outside top")
     fig.update_layout(title_text=title, title_x=0.5)
+    fig.show()
 def find_max_cat(df,target,industry,campaign):
+    #### Select entries with the matching industry and campaign (1 == True)
     d=df[(df[campaign]==1) & (df[industry]==1)]
     if(len(d)>0):
+        rec=df.loc[d[target].idxmax()][3:11]  ## Select the tone values for the best target values
+        return round(d[target].min(),3),round(d[target].max(),3),rec  ## select the top target variable value and return with the tones
     else:
+        return 0,0,0
+def scale_values(val, tn):  ## val = slider value, tn = current tone value
+    val = tn*100
+    return val
+tone_labels = ['Analytical', 'Casual', 'Confident', 'Friendly', 'Joyful', 'Optimistic', 'Respectful', 'Urgent']
+# ## Plot recommendations - ORIGINAL FROM V1.0
+# def recommend(tones,recommend_changes,change,target):
+#     ''' This function creates the recomended changes plots it takes it the tones, the changes and '''
+#     fig = go.Figure()
+#     fig.add_trace(go.Bar(
+#         y=tones.columns,
+#         x=tones.values[0],
+#         name='Current Tones',
+#         orientation='h',
+#        # text=np.round(tones.values[0],3),
+#         width=.9,
+#         marker=dict(
+#             color='#00e6b1',
+#             line=dict(color='rgba(58, 71, 80, 1.0)', width=3)
+#         )
+#     ))
+#     fig.add_trace(go.Bar(
+#         y=tones.columns,
+#         x=recommend_changes,
+#         name='Recommend changes',
+#         orientation='h',
+#         text=np.round(recommend_changes,3),
+#         width=.5,
+#         marker=dict(
+#             color='#e60f00',
+#             line=dict(color='rgba(58, 71, 80, 1.0)', width=3)
+#         )
+#     ))
+#     fig.update_traces(textfont_size=18, textposition="outside", cliponaxis=False)
+#     fig.update_layout(height=1000, plot_bgcolor='white')
+#     fig.update_layout(barmode='stack', yaxis={'categoryorder':'array','categoryarray': recommend_changes.sort_values(key=abs,ascending=True).index})
+#     fig.update_layout(title_text=f'The following Changes will yield a {round(change,3)} increase in {target}')
+#     fig.show()
+## Plot recommendations - MODIFIED
+def recommend(tones,recommend_changes,change,target):
     ''' This function creates the recomended changes plots it takes it the tones, the changes and '''
     fig = go.Figure()
     fig.add_trace(go.Bar(
+#         y=tones.columns,
+        y=tone_labels,
         x=recommend_changes,
         name='Recommend changes',
         orientation='h',
         text=np.round(recommend_changes,3),
+        width=.5,
         marker=dict(
             color='#e60f00',
+            line=dict(color='rgba(58, 71, 80, 1.0)', width=1)
         )
     ))
     fig.update_traces(textfont_size=18, textposition="outside", cliponaxis=False)
+#     fig.update_layout(height=1000, plot_bgcolor='white')
+#     fig.update_layout(barmode='stack', yaxis={'categoryorder':'array','categoryarray': recommend_changes.sort_values(key=abs,ascending=True).index})
+#     fig.update_layout(title_text=f'The following Changes will yield a {round(change,3)} increase in {target}')
+    if target == 'Revenue_Per_Email':
+        out = f"${round(change,2)}"
     else:
+        out = f"{round(change,2)*100}%"
+    fig.update_layout(title_text=f'The following Changes will yield a {out} increase in {target}')
+    fig.show()
 def prediction(tones,campaign_val,industry_val,target):
     model_val=pd.DataFrame(tones,columns=PARAMS['rf_labels']).fillna(0)
     model_val.loc[0,industry_val]=1
     model=model_dict[target]
     pred=model.predict(model_val)[0]
+#     y_pred = regr.predict(X_test)
+#     r2_test = r2_score(y_test, y_pred)
     CI=CI_rates[CI_rates['model']==target]
     lower=pred+CI['2_5'].values[0]
     higher=pred+CI['97_5'].values[0]
+    return pred,round(lower,3),round(higher,3),model
+## Plot recommendations for intensity changes
+def intensity_changes(tones,recommend_changes,change,target):
+    ''' This function creates a plot to show the change made to intensities and shows the resulting change in target rate '''
+    fig = go.Figure()
+    fig.add_trace(go.Bar(
+#         y=tones.columns,
+        y=tone_labels,
+        x=recommend_changes,
+        name='Recommend changes',
+        orientation='h',
+        text=np.round(recommend_changes,3),
+        width=.5,
+        marker=dict(
+            color='#00e6b1',
+            line=dict(color='rgba(58, 71, 80, 1.0)', width=1)
+        )
+    ))
+    fig.update_traces(textfont_size=18, textposition="outside", cliponaxis=False)
+    if change < 0:
+        if target == 'Revenue_Per_Email':
+            out = f"${round(change*(-1),2)}"
+        else:
+            out = f"{round(change*(-1),2)}%"
+        fig.update_layout(title_text=f'The following Changes will decrease the {target} by {out}')
+    elif change >= 0:
+        if target == 'Revenue_Per_Email':
+            out = f"${round(change,2)}"
+        else:
+            out = f"{round(change,2)*100}%"
+        fig.update_layout(title_text=f'The following Changes will increase the {target} by {out}')
+#     fig.update_layout(title_text=f'The changes made to the tone intensities')
+    fig.show()
+def load_data():
+    data_location='Tone_and_target.csv'
+    df=pd.read_csv(data_location)
     df_unique = df.drop_duplicates()
     df_unique = pd.get_dummies(df_unique, columns=['industry','campaign_type'])
     df_data=df_unique.drop(columns=['Unnamed: 0','body'])
     return df_data
+def plot_table(sorted_setence_tuple):
     """ Plots the bottom most table, takes in a list of tuples where the tuple is the sentence  the sentiment distance
     from the best values """
     sentences=list(zip(*sorted_setence_tuple))[0]
                align=['left','center'],
                font=dict(family="Arial",size=12)))
     ])
+    #fig.show()
+def corrections(best,df):
     """This function finds the the difference between the tone of each sentence and the  best tone for the desired metric
     best- tone values of the best email for the current categories
     df- dataframe of the sentences of the uploaded email and the """
         rbg=ImageColor.getcolor(f'{col}', "RGB")
         sentence_order.append((text,new_value,rbg))
         sorted_sentences=sorted(sentence_order,key=lambda x: x[1],reverse=True)
+    plot_table(sorted_sentences)
 def read_file(fc):
     with open(fc.selected) as file: # Use file to refer to the file object