File size: 2,742 Bytes
af1cbd8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
# import s3fs

import pandas as pd
import numpy as np
from numpy import arange
from colour import Color
import plotly.graph_objects as go
from nltk import tokenize
from IPython.display import Markdown
from PIL import ImageColor
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import nltk
nltk.download('punkt')
from io import StringIO
from scipy import spatial
import re
import pytorch_lightning as pl
from bs4 import BeautifulSoup
import ipywidgets as widgets
from ipywidgets import FileUpload
from urlextract import URLExtract
from transformers import BertTokenizerFast as BertTokenizer, BertModel, BertConfig
import torch.nn as nn
import torch
from ipywidgets import interact, Dropdown
import boto3
# from sagemaker import get_execution_role
from scipy import spatial
from bokeh.models.widgets import Div
import streamlit as st


def get_files_from_aws(bucket,prefix):
    """
        get files from aws s3 bucket
    
    bucket (STRING): bucket name
    prefix (STRING): file location in s3 bucket
    """
    s3_client = boto3.client('s3',
        aws_access_key_id = st.secrets["aws_id"],
        aws_secret_access_key = st.secrets["aws_key"])

    file_obj = s3_client.get_object(Bucket=bucket,Key=prefix)
    body = file_obj['Body']
    string = body.read().decode('utf-8')
    
    df = pd.read_csv(StringIO(string))
    
    return df 

def url_button(button_name,url):
    if st.button(button_name):
        js = """window.open('{url}')""".format(url=url) # New tab or window
        html = '<img src onerror="{}">'.format(js)
        div = Div(text=html)
        st.bokeh_chart(div)



PARAMS={
'BATCH_SIZE': 8,
'MAX_TOKEN_COUNT':100, 
'BERT_MODEL_NAME':'google/bert_uncased_L-2_H-128_A-2'  , 
'N_EPOCHS': 10,  
'n_classes':8,
'LABEL_COLUMNS': ['label_analytical', 'label_casual', 'label_confident', 'label_friendly',
       'label_joyful', 'label_opstimistic', 'label_respectful',
       'label_urgent'],
'TEXTCOL': 'text',
'rf_labels':['label_analytical', 'label_casual', 'label_confident',
       'label_friendly', 'label_joyful', 'label_opstimistic',
       'label_respectful', 'label_urgent',
       'industry_Academic and Education', 'industry_Energy',
       'industry_Entertainment', 'industry_Finance and Banking',
       'industry_Healthcare', 'industry_Hospitality', 'industry_Real Estate',
       'industry_Retail', 'industry_Software and Technology',
       'campaign_type_Abandoned_Cart', 'campaign_type_Engagement',
       'campaign_type_Newsletter', 'campaign_type_Product_Announcement',
       'campaign_type_Promotional', 'campaign_type_Review_Request',
       'campaign_type_Survey', 'campaign_type_Transactional',
       'campaign_type_Usage_and_Consumption', 'campaign_type_Webinar']
}