|
import streamlit as st
|
|
from sentence_transformers import SentenceTransformer
|
|
from sklearn.metrics.pairwise import cosine_similarity
|
|
import openai
|
|
import numpy as np
|
|
import pandas as pd
|
|
|
|
|
|
st.title("Keyword Cosine Similarity Tool")
|
|
|
|
|
|
st.markdown(
|
|
"""
|
|
**Purpose:**
|
|
Quickly remove irrelevant keywords from your keyword research and move to the next step in your optimization!
|
|
|
|
Have you ever had to review a long list of queries to determine whether they were relevant to your target keyword? This Space aims to automate that process by entering your primary keyword and a list of related queries from any source you might do keyword research.
|
|
The resulting table is an ordered list of your comparison keywords based on the cosine similarity of each query's embeddings.
|
|
|
|
**Instructions:**
|
|
1. Enter your **Primary Keyword** in the input field.
|
|
2. Provide a list of **Keywords to Compare** (separated by new lines or commas).
|
|
3. Select an **Embedding Model** to compute keyword embeddings.
|
|
4. If using OpenAI embeddings, input your **API Key**.
|
|
5. Click **Calculate Similarities** to compute and rank your keywords by relevance.
|
|
|
|
**Output:**
|
|
- A sorted table of your comparison keywords based on their cosine similarity to your primary keyword.
|
|
- Option to download the results as a CSV file.
|
|
"""
|
|
)
|
|
|
|
|
|
st.header("Input Parameters")
|
|
primary_keyword = st.text_input("Primary Keyword", placeholder="Enter your primary keyword")
|
|
st.text_area(
|
|
"Keywords to Compare",
|
|
placeholder="Enter keywords separated by new lines or commas",
|
|
help="You can input keywords on separate lines or separated by commas. Any keywords with commas will be treated as separate queries.",
|
|
key="keywords"
|
|
)
|
|
keywords = st.session_state.keywords
|
|
model_name = st.selectbox("Select Embedding Model", ["sentence-transformers/LaBSE", "sentence-transformers/all-MiniLM-L6-v2", "OpenAI Embeddings"])
|
|
openai_api_key = st.text_input("OpenAI API Key (optional)", type="password")
|
|
|
|
|
|
if st.button("Calculate Similarities"):
|
|
if not primary_keyword or not keywords:
|
|
st.error("Please provide both the primary keyword and keywords to compare.")
|
|
else:
|
|
|
|
keyword_list = [kw.strip() for kw in keywords.replace(",", "\n").split("\n") if kw.strip()]
|
|
|
|
|
|
if model_name in ["sentence-transformers/LaBSE", "sentence-transformers/all-MiniLM-L6-v2"]:
|
|
|
|
st.info(f"Loading model: {model_name}")
|
|
model = SentenceTransformer(model_name)
|
|
|
|
|
|
st.info("Generating embeddings...")
|
|
primary_embedding = model.encode(primary_keyword, convert_to_tensor=True)
|
|
keyword_embeddings = model.encode(keyword_list, convert_to_tensor=True)
|
|
|
|
elif model_name == "OpenAI Embeddings":
|
|
if not openai_api_key:
|
|
st.error("Please provide your OpenAI API key for this model.")
|
|
else:
|
|
openai.api_key = openai_api_key
|
|
st.info("Generating OpenAI embeddings...")
|
|
|
|
def get_openai_embedding(text):
|
|
response = openai.Embedding.create(
|
|
model="text-embedding-ada-002",
|
|
input=text
|
|
)
|
|
return np.array(response['data'][0]['embedding'])
|
|
|
|
primary_embedding = get_openai_embedding(primary_keyword)
|
|
keyword_embeddings = np.array([get_openai_embedding(kw) for kw in keyword_list])
|
|
|
|
else:
|
|
st.error("Invalid model selection.")
|
|
st.stop()
|
|
|
|
|
|
st.info("Calculating cosine similarities...")
|
|
similarities = cosine_similarity([primary_embedding], keyword_embeddings)[0]
|
|
|
|
|
|
st.info("Sorting results...")
|
|
results = [{"Keyword": kw, "Cosine Similarity": sim} for kw, sim in zip(keyword_list, similarities)]
|
|
sorted_results = sorted(results, key=lambda x: x["Cosine Similarity"], reverse=True)
|
|
|
|
|
|
st.header("Results")
|
|
df_results = pd.DataFrame(sorted_results)
|
|
st.table(df_results)
|
|
|
|
|
|
st.download_button(
|
|
label="Download Results as CSV",
|
|
data=df_results.to_csv(index=False),
|
|
file_name="cosine_similarity_results.csv",
|
|
mime="text/csv"
|
|
)
|
|
|
|
|
|
st.header("Debugging Info")
|
|
st.write("Primary Embedding:", primary_embedding)
|
|
st.write("Keyword Embeddings:", keyword_embeddings)
|
|
|
|
|
|
st.markdown("---")
|
|
st.markdown("Created by [Ryland Bacorn](https://huggingface.co/ReithBjarkan). Report a [bug or make a suggestion](mailto:rybacorn@gmail.com)")
|
|
|