Spaces:
Runtime error
Runtime error
import streamlit as st | |
from PIL import Image | |
import numpy as np | |
import nltk | |
nltk.download('stopwords') | |
nltk.download('punkt') | |
import pandas as pd | |
import random | |
import easyocr | |
import re | |
from nltk.corpus import stopwords | |
from nltk.tokenize import word_tokenize | |
from sklearn.feature_extraction.text import TfidfVectorizer | |
from sklearn.metrics.pairwise import cosine_similarity | |
from transformers import AutoTokenizer, ViTFeatureExtractor, VisionEncoderDecoderModel | |
# Directory path to the saved model on Google Drive | |
model = VisionEncoderDecoderModel.from_pretrained("nlpconnect/vit-gpt2-image-captioning") | |
# Load the feature extractor and tokenizer | |
feature_extractor = ViTFeatureExtractor.from_pretrained("nlpconnect/vit-gpt2-image-captioning") | |
tokenizer = AutoTokenizer.from_pretrained("nlpconnect/vit-gpt2-image-captioning") | |
def generate_captions(image): | |
image = Image.open(image).convert("RGB") | |
generated_caption = tokenizer.decode(model.generate(feature_extractor(image, return_tensors="pt").pixel_values.to("cpu"))[0]) | |
sentence = generated_caption | |
text_to_remove = "<|endoftext|>" | |
generated_caption = sentence.replace(text_to_remove, "") | |
return generated_caption | |
# use easyocr to extract text from the image | |
def image_text(image): | |
img_np = np.array(image) | |
reader = easyocr.Reader(['en']) | |
text = reader.readtext(img_np) | |
detected_text = " ".join([item[1] for item in text]) | |
# Extract individual words, convert to lowercase, and add "#" symbol | |
detected_text= ['#' + entry[1].strip().lower().replace(" ", "") for entry in text] | |
return detected_text | |
# Load NLTK stopwords for filtering | |
stop_words = set(stopwords.words('english')) | |
# Add hashtags to keywords, which have been generated from image captioing | |
def add_hashtags(keywords): | |
hashtags = [] | |
for keyword in keywords: | |
# Generate hashtag from the keyword (you can modify this part as per your requirements) | |
hashtag = '#' + keyword.lower() | |
hashtags.append(hashtag) | |
return hashtags | |
def trending_hashtags(caption): | |
# Read trending hashtags from a file separated by commas | |
with open("hashies.txt", "r") as file: | |
hashtags_string = file.read() | |
# Split the hashtags by commas and remove any leading/trailing spaces | |
trending_hashtags = [hashtag.strip() for hashtag in hashtags_string.split(',')] | |
# Create a DataFrame from the hashtags | |
df = pd.DataFrame(trending_hashtags, columns=["Hashtags"]) | |
# Function to extract keywords from a given text | |
def extract_keywords(caption): | |
tokens = word_tokenize(caption) | |
keywords = [token.lower() for token in tokens if token.lower() not in stop_words] | |
return keywords | |
# Extract keywords from caption and trending hashtags | |
caption_keywords = extract_keywords(caption) | |
hashtag_keywords = [extract_keywords(hashtag) for hashtag in df["Hashtags"]] | |
# Function to calculate cosine similarity between two strings | |
def calculate_similarity(text1, text2): | |
tfidf_vectorizer = TfidfVectorizer() | |
tfidf_matrix = tfidf_vectorizer.fit_transform([text1, text2]) | |
similarity_matrix = cosine_similarity(tfidf_matrix[0], tfidf_matrix[1]) | |
return similarity_matrix[0][0] | |
# Calculate similarity between caption and each trending hashtag | |
similarities = [calculate_similarity(' '.join(caption_keywords), ' '.join(keywords)) for keywords in hashtag_keywords] | |
# Sort trending hashtags based on similarity in descending order | |
sorted_hashtags = [hashtag for _, hashtag in sorted(zip(similarities, df["Hashtags"]), reverse=True)] | |
# Select top k relevant hashtags (e.g., top 5) without duplicates | |
selected_hashtags = list(set(sorted_hashtags[:5])) | |
selected_hashtag = [word.strip("'") for word in selected_hashtags] | |
return selected_hashtag | |
# create the Streamlit app | |
def app(): | |
st.title('Image from your Side, Trending Hashtags from our Side') | |
st.write('Upload an image to see what we have in store.') | |
# create file uploader | |
uploaded_file = st.file_uploader("Got You Covered, Upload your wish!, magic on the Way! ", type=["jpg", "jpeg", "png"]) | |
# check if file has been uploaded | |
if uploaded_file is not None: | |
# load the image | |
image = Image.open(uploaded_file).convert("RGB") | |
# Image Captions | |
string = generate_captions(uploaded_file) | |
tokens = word_tokenize(string) | |
keywords = [token.lower() for token in tokens if token.lower() not in stop_words] | |
hashtags = add_hashtags(keywords) | |
# Text Captions from image | |
extracted_text = image_text(image) | |
#Final Hashtags Generation | |
web_hashtags = trending_hashtags(string) | |
combined_hashtags = hashtags + extracted_text + web_hashtags | |
# Shuffle the list randomly | |
random.shuffle(combined_hashtags) | |
combined_hashtags = list(set(item for item in combined_hashtags[:15] if not re.search(r'\d$', item))) | |
# display the image | |
st.image(image, caption='The Uploaded File') | |
st.write("First is first captions for your Photo : ", string) | |
st.write("Magical hashies have arrived : ", combined_hashtags) | |
# run the app | |
if __name__ == '__main__': | |
app() | |