File size: 2,981 Bytes
a39564a
76afe9c
 
c1fa42a
c534649
 
a39564a
 
c534649
0322c01
 
 
6c88aec
 
 
 
 
260dfd0
a3f716c
c534649
 
 
 
 
 
 
0322c01
 
c985a1d
beccb55
0322c01
b0ed50b
c534649
ca1549a
486fd87
b0ed50b
e526537
 
 
b0ed50b
e526537
 
95c8add
13a1ba7
e526537
 
b0ed50b
 
 
486fd87
 
0322c01
0a69740
 
 
 
 
 
 
 
a3f716c
0322c01
 
a3f716c
0322c01
a3f716c
0a69740
a3f716c
 
 
0322c01
a3f716c
fcf1637
 
 
 
 
 
 
3e34378
05227d7
260dfd0
05227d7
6ee2072
f84a377
3e34378
 
0a69740
0322c01
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import streamlit as st
from PIL import Image

from transformers import pipeline, BertTokenizer
import numpy as np


st.set_page_config(layout='wide',
                   page_title='Twitter Hashtag Recommender'
                   )


def read_md(file_path):
    with open(file_path, 'r') as f:
        content = f.read()
    return content

def get_hashtags(text, candidates, tokenizer):
    hashtags = []
    for i in range(len(candidates)):
        token = tokenizer.decode(candidates[i]['token'])
        topic = ''.join(token.split())
        hashtags.append(topic)
    return hashtags



def main():
    image = Image.open('markdown/hashtag.png')
    st.image(image, caption='Resource: https://www.resourceaholic.com/p/twitter-hashtags.html')
    st.title('Twitter Hashtag Recommender')

    st.header('Overview')
    overview = read_md('markdown/overview.md')
    st.markdown(overview)

    images = []
    image = Image.open('markdown/twitter_webpage.png')
    images.append(image)
    image = Image.open('markdown/twitter_phone.jpg')
    images.append(image)

    st.image(images, caption=['Screenshots from Twitter.com','Screenshots from Twitter APP'],\
     width = 400)
    # image = Image.open('markdown/twitter_phone.jpg')
    # st.image(image, caption='Screenshots from Twitter APP')

    solution = read_md('markdown/solution.md')
    st.markdown(solution)
    critical_analysis = read_md('markdown/critical.md')
    st.markdown(critical_analysis)

    trending_topics = ['#mondaythoughts',\
         '#mondaymotivation',\
         '#bostonmarathon',\
         '#thebatman',\
         '#thefirstlady',\
         '#kandiandthegang',\
         '#bostonmarathon',\
         '#katg',\
         '#easter']


    st.header("Try it out!")

    texts = ['Bruce has an electric guitar set in [MASK]. ', \
    'The Batman: Genesis special feature is a must watch. [MASK] '\
    'I don’t understand the need to exaggerate [MASK] Michelle Obama’s facial expressions. ', \
    'Phillip, we are seeing on a consistent basis that Brandon isn’t doing his job! Give him the energy you gave Brian, Shawndreca and Torin! [MASK] ',\
     "Evans Chebet ran mile 22 in 4 minutes and 27 seconds to take the men's [MASK] crown 💨"]

    selected_text = st.selectbox('Select a text',(texts))

    MODEL = "vivianhuang88/bert_twitter_hashtag"
    print(MODEL)
    fill_mask = pipeline("fill-mask", model=MODEL, tokenizer=MODEL)
    print(fill_mask)
    tokenizer = BertTokenizer.from_pretrained(MODEL, additional_special_tokens=trending_topics)
    print(tokenizer)
    candidates = fill_mask(selected_text, targets = trending_topics)
    print(candidates)
    hashtags = get_hashtags(selected_text, candidates, tokenizer)
    print(hashtags)
    if len(hashtags) > 0:
        selected_topic = st.selectbox("Select the hashtag you like", (hashtags))
        finaltext = selected_text.replace("[MASK]", selected_topic)
        st.write(finaltext)

if __name__ == '__main__':
    main()