File size: 1,283 Bytes
2df3395
 
 
 
e16d5a9
 
 
 
5afef2f
 
f4ffc35
e16d5a9
 
 
 
 
5afef2f
 
 
 
 
e16d5a9
 
 
 
 
 
 
5afef2f
 
359508c
5afef2f
 
 
e16d5a9
 
359508c
 
 
 
2df3395
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import streamlit as st

st.title("Paraphrase Mining Example")

from sentence_transformers import SentenceTransformer, util
model = SentenceTransformer('all-MiniLM-L6-v2')

# Two lists of sentences
sentences1 = ['A man is playing guitar',
             'The cat sits outside',
             'The new movie is awesome']

sentences2 = ['The dog plays in the garden',
              'A woman watches TV',
              'The new movie is so great']

st.text("When you have two arrays of sentences, you can compare them. Inspect these two unlabeled arrays")

st.text(sentences1)
st.text(sentences2)

#Compute embedding for both lists
embeddings1 = model.encode(sentences1, convert_to_tensor=True)
embeddings2 = model.encode(sentences2, convert_to_tensor=True)

#Compute cosine-similarities
cosine_scores = util.cos_sim(embeddings1, embeddings2)

st.text("Computing which pairs are most similar")

(col1, col2, score_col)= st.columns(3)
col1.header("Left Token")
col2.header("Right Token")
score_col.header("Score")
#Output the pairs with their score
for i in range(len(sentences1)):
    #st.text("{} \t\t {} \t\t Score: {:.4f}".format(sentences1[i], sentences2[i], cosine_scores[i][i]))
    col1.write(sentences1[i])
    col2.write(sentences2[i])
    score_col.write(cosine_scores[i][i])