import streamlit as st from PIL import Image from transformers import pipeline, BertTokenizer import numpy as np st.set_page_config(layout='wide', page_title='Twitter Hashtag Recommender' ) def read_md(file_path): with open(file_path, 'r') as f: content = f.read() return content def get_hashtags(text, candidates, tokenizer): hashtags = [] for i in range(len(candidates)): token = tokenizer.decode(candidates[i]['token']) topic = ''.join(token.split()) hashtags.append(topic) return hashtags def main(): image = Image.open('markdown/hashtag.png') st.image(image, caption='Resource: https://www.resourceaholic.com/p/twitter-hashtags.html') st.title('Twitter Hashtag Recommender') st.header('Overview') overview = read_md('markdown/overview.md') st.markdown(overview) images = [] image = Image.open('markdown/twitter_webpage.png') images.append(image) image = Image.open('markdown/twitter_phone.jpg') images.append(image) st.image(images, caption=['Screenshots from Twitter.com','Screenshots from Twitter APP'],\ width = 400) # image = Image.open('markdown/twitter_phone.jpg') # st.image(image, caption='Screenshots from Twitter APP') solution = read_md('markdown/solution.md') st.markdown(solution) critical_analysis = read_md('markdown/critical.md') st.markdown(critical_analysis) trending_topics = ['#mondaythoughts',\ '#mondaymotivation',\ '#bostonmarathon',\ '#thebatman',\ '#thefirstlady',\ '#kandiandthegang',\ '#bostonmarathon',\ '#katg',\ '#easter'] st.header("Try it out!") texts = ['Bruce has an electric guitar set in [MASK]. ', \ 'The Batman: Genesis special feature is a must watch. [MASK] '\ 'I don’t understand the need to exaggerate [MASK] Michelle Obama’s facial expressions. ', \ 'Phillip, we are seeing on a consistent basis that Brandon isn’t doing his job! Give him the energy you gave Brian, Shawndreca and Torin! [MASK] ',\ "Evans Chebet ran mile 22 in 4 minutes and 27 seconds to take the men's [MASK] crown 💨"] selected_text = st.selectbox('Select a text',(texts)) MODEL = "vivianhuang88/bert_twitter_hashtag" print(MODEL) fill_mask = pipeline("fill-mask", model=MODEL, tokenizer=MODEL) print(fill_mask) tokenizer = BertTokenizer.from_pretrained(MODEL, additional_special_tokens=trending_topics) print(tokenizer) candidates = fill_mask(selected_text, targets = trending_topics) print(candidates) hashtags = get_hashtags(selected_text, candidates, tokenizer) print(hashtags) if len(hashtags) > 0: selected_topic = st.selectbox("Select the hashtag you like", (hashtags)) finaltext = selected_text.replace("[MASK]", selected_topic) st.write(finaltext) if __name__ == '__main__': main()