import streamlit as st st.title("Paraphrase Mining Example") from sentence_transformers import SentenceTransformer, util model = SentenceTransformer('all-MiniLM-L6-v2') # Two lists of sentences sentences1 = ['A man is playing guitar', 'The cat sits outside', 'The new movie is awesome'] sentences2 = ['The dog plays in the garden', 'A woman watches TV', 'The new movie is so great'] st.text("When you have two arrays of sentences, you can compare them. Inspect these two unlabeled arrays") st.text(sentences1) st.text(sentences2) #Compute embedding for both lists embeddings1 = model.encode(sentences1, convert_to_tensor=True) embeddings2 = model.encode(sentences2, convert_to_tensor=True) #Compute cosine-similarities cosine_scores = util.cos_sim(embeddings1, embeddings2) st.text("Computing which pairs are most similar") (col1, col2, score_col)= st.columns(3) col1.header("Left Token") col2.header("Right Token") score_col.header("Score") #Output the pairs with their score for i in range(len(sentences1)): #st.text("{} \t\t {} \t\t Score: {:.4f}".format(sentences1[i], sentences2[i], cosine_scores[i][i])) col1.write(sentences1[i]) col2.write(sentences2[i]) score_col.write(cosine_scores[i][i])