Spaces:
Runtime error
Runtime error
File size: 5,229 Bytes
bb7b80b fd5158a bb7b80b fd5158a bb7b80b fd5158a bb7b80b fd5158a bb7b80b fd5158a bb7b80b fd5158a bb7b80b fd5158a bb7b80b fd5158a bb7b80b fd5158a bb7b80b fd5158a bb7b80b fd5158a bb7b80b fd5158a bb7b80b fd5158a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 |
import streamlit as st
from PIL import Image
import numpy as np
import nltk
nltk.download('stopwords')
nltk.download('punkt')
import pandas as pd
import random
import easyocr
import re
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from transformers import AutoTokenizer, ViTFeatureExtractor, VisionEncoderDecoderModel
# Directory path to the saved model on Google Drive
model = VisionEncoderDecoderModel.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
# Load the feature extractor and tokenizer
feature_extractor = ViTFeatureExtractor.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
tokenizer = AutoTokenizer.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
def generate_captions(image):
image = Image.open(image).convert("RGB")
generated_caption = tokenizer.decode(model.generate(feature_extractor(image, return_tensors="pt").pixel_values.to("cpu"))[0])
sentence = generated_caption
text_to_remove = "<|endoftext|>"
generated_caption = sentence.replace(text_to_remove, "")
return generated_caption
# use easyocr to extract text from the image
def image_text(image):
img_np = np.array(image)
reader = easyocr.Reader(['en'])
text = reader.readtext(img_np)
detected_text = " ".join([item[1] for item in text])
# Extract individual words, convert to lowercase, and add "#" symbol
detected_text= ['#' + entry[1].strip().lower().replace(" ", "") for entry in text]
return detected_text
# Load NLTK stopwords for filtering
stop_words = set(stopwords.words('english'))
# Add hashtags to keywords, which have been generated from image captioing
def add_hashtags(keywords):
hashtags = []
for keyword in keywords:
# Generate hashtag from the keyword (you can modify this part as per your requirements)
hashtag = '#' + keyword.lower()
hashtags.append(hashtag)
return hashtags
def trending_hashtags(caption):
# Read trending hashtags from a file separated by commas
with open("hashies.txt", "r") as file:
hashtags_string = file.read()
# Split the hashtags by commas and remove any leading/trailing spaces
trending_hashtags = [hashtag.strip() for hashtag in hashtags_string.split(',')]
# Create a DataFrame from the hashtags
df = pd.DataFrame(trending_hashtags, columns=["Hashtags"])
# Function to extract keywords from a given text
def extract_keywords(caption):
tokens = word_tokenize(caption)
keywords = [token.lower() for token in tokens if token.lower() not in stop_words]
return keywords
# Extract keywords from caption and trending hashtags
caption_keywords = extract_keywords(caption)
hashtag_keywords = [extract_keywords(hashtag) for hashtag in df["Hashtags"]]
# Function to calculate cosine similarity between two strings
def calculate_similarity(text1, text2):
tfidf_vectorizer = TfidfVectorizer()
tfidf_matrix = tfidf_vectorizer.fit_transform([text1, text2])
similarity_matrix = cosine_similarity(tfidf_matrix[0], tfidf_matrix[1])
return similarity_matrix[0][0]
# Calculate similarity between caption and each trending hashtag
similarities = [calculate_similarity(' '.join(caption_keywords), ' '.join(keywords)) for keywords in hashtag_keywords]
# Sort trending hashtags based on similarity in descending order
sorted_hashtags = [hashtag for _, hashtag in sorted(zip(similarities, df["Hashtags"]), reverse=True)]
# Select top k relevant hashtags (e.g., top 5) without duplicates
selected_hashtags = list(set(sorted_hashtags[:5]))
selected_hashtag = [word.strip("'") for word in selected_hashtags]
return selected_hashtag
# create the Streamlit app
def app():
st.title('Image from your Side, Trending Hashtags from our Side')
st.write('Upload an image to see what we have in store.')
# create file uploader
uploaded_file = st.file_uploader("Got You Covered, Upload your wish!, magic on the Way! ", type=["jpg", "jpeg", "png"])
# check if file has been uploaded
if uploaded_file is not None:
# load the image
image = Image.open(uploaded_file).convert("RGB")
# Image Captions
string = generate_captions(uploaded_file)
tokens = word_tokenize(string)
keywords = [token.lower() for token in tokens if token.lower() not in stop_words]
hashtags = add_hashtags(keywords)
# Text Captions from image
extracted_text = image_text(image)
#Final Hashtags Generation
web_hashtags = trending_hashtags(string)
combined_hashtags = hashtags + extracted_text + web_hashtags
# Shuffle the list randomly
random.shuffle(combined_hashtags)
combined_hashtags = list(set(item for item in combined_hashtags[:15] if not re.search(r'\d$', item)))
# display the image
st.image(image, caption='The Uploaded File')
st.write("First is first captions for your Photo : ", string)
st.write("Magical hashies have arrived : ", combined_hashtags)
# run the app
if __name__ == '__main__':
app()
|