Spaces:
Runtime error
Runtime error
Trent
commited on
Commit
·
f7a5664
1
Parent(s):
5cd1ac6
Asymmetric QA
Browse files- app.py +17 -10
- backend/config.py +1 -1
app.py
CHANGED
@@ -13,16 +13,15 @@ st.markdown('''
|
|
13 |
|
14 |
Hi! This is the demo for the [flax sentence embeddings](https://huggingface.co/flax-sentence-embeddings) created for the **Flax/JAX community week 🤗**. We are going to use three flax-sentence-embeddings models: a **distilroberta base**, a **mpnet base** and a **minilm-l6**. All were trained on all the dataset of the 1B+ train corpus with the v3 setup.
|
15 |
|
16 |
-
|
17 |
|
18 |
-
|
|
|
|
|
|
|
19 |
|
20 |
For more cool information on sentence embeddings, see the [sBert project](https://www.sbert.net/examples/applications/computing-embeddings/README.html).
|
21 |
-
|
22 |
-
Please enjoy!!
|
23 |
''')
|
24 |
-
|
25 |
-
if menu == "Sentence Similarity":
|
26 |
select_models = st.multiselect("Choose models", options=list(MODELS_ID), default=list(MODELS_ID)[0])
|
27 |
|
28 |
anchor = st.text_input(
|
@@ -45,7 +44,7 @@ if menu == "Sentence Similarity":
|
|
45 |
results = {model: inference.text_similarity(anchor, inputs, model, MODELS_ID) for model in select_models}
|
46 |
df_results = {model: results[model] for model in results}
|
47 |
|
48 |
-
index = [f"{idx}:{input[:min(15, len(input))]}..." for idx, input in enumerate(inputs)]
|
49 |
df_total = pd.DataFrame(index=index)
|
50 |
for key, value in df_results.items():
|
51 |
df_total[key] = list(value['score'].values)
|
@@ -55,11 +54,19 @@ if menu == "Sentence Similarity":
|
|
55 |
st.write('Visualize the results of each model:')
|
56 |
st.line_chart(df_total)
|
57 |
elif menu == "Asymmetric QA":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
select_models = st.multiselect("Choose models", options=list(QA_MODELS_ID), default=list(QA_MODELS_ID)[0])
|
59 |
|
60 |
anchor = st.text_input(
|
61 |
'Please enter here the query you want to compare with given answers:',
|
62 |
-
value="
|
63 |
)
|
64 |
|
65 |
n_texts = st.number_input(
|
@@ -69,7 +76,7 @@ elif menu == "Asymmetric QA":
|
|
69 |
|
70 |
inputs = []
|
71 |
|
72 |
-
defaults = ["
|
73 |
for i in range(int(n_texts)):
|
74 |
input = st.text_input(f'Answer {i + 1}:', value=defaults[i] if i < len(defaults) else "")
|
75 |
|
@@ -79,7 +86,7 @@ elif menu == "Asymmetric QA":
|
|
79 |
results = {model: inference.text_similarity(anchor, inputs, model, QA_MODELS_ID) for model in select_models}
|
80 |
df_results = {model: results[model] for model in results}
|
81 |
|
82 |
-
index = [f"{idx}:{input[:min(15, len(input))]}..." for idx, input in enumerate(inputs)]
|
83 |
df_total = pd.DataFrame(index=index)
|
84 |
for key, value in df_results.items():
|
85 |
df_total[key] = list(value['score'].values)
|
|
|
13 |
|
14 |
Hi! This is the demo for the [flax sentence embeddings](https://huggingface.co/flax-sentence-embeddings) created for the **Flax/JAX community week 🤗**. We are going to use three flax-sentence-embeddings models: a **distilroberta base**, a **mpnet base** and a **minilm-l6**. All were trained on all the dataset of the 1B+ train corpus with the v3 setup.
|
15 |
|
16 |
+
''')
|
17 |
|
18 |
+
if menu == "Sentence Similarity":
|
19 |
+
st.header('Sentence Similarity')
|
20 |
+
st.markdown('''
|
21 |
+
**Instructions**: You can compare the similarity of a main text with other texts of your choice. In the background, we'll create an embedding for each text, and then we'll use the cosine similarity function to calculate a similarity metric between our main sentence and the others.
|
22 |
|
23 |
For more cool information on sentence embeddings, see the [sBert project](https://www.sbert.net/examples/applications/computing-embeddings/README.html).
|
|
|
|
|
24 |
''')
|
|
|
|
|
25 |
select_models = st.multiselect("Choose models", options=list(MODELS_ID), default=list(MODELS_ID)[0])
|
26 |
|
27 |
anchor = st.text_input(
|
|
|
44 |
results = {model: inference.text_similarity(anchor, inputs, model, MODELS_ID) for model in select_models}
|
45 |
df_results = {model: results[model] for model in results}
|
46 |
|
47 |
+
index = [f"{idx + 1}:{input[:min(15, len(input))]}..." for idx, input in enumerate(inputs)]
|
48 |
df_total = pd.DataFrame(index=index)
|
49 |
for key, value in df_results.items():
|
50 |
df_total[key] = list(value['score'].values)
|
|
|
54 |
st.write('Visualize the results of each model:')
|
55 |
st.line_chart(df_total)
|
56 |
elif menu == "Asymmetric QA":
|
57 |
+
st.header('Asymmetric QA')
|
58 |
+
st.markdown('''
|
59 |
+
**Instructions**: You can compare the Answer likeliness of a given Query with answer candidates of your choice. In the background, we'll create an embedding for each answers, and then we'll use the cosine similarity function to calculate a similarity metric between our query sentence and the others.
|
60 |
+
`mpnet_asymmetric_qa` model works best for hard negative answers or distinguishing similar queries due to separate models applied for encoding questions and answers.
|
61 |
+
|
62 |
+
For more cool information on sentence embeddings, see the [sBert project](https://www.sbert.net/examples/applications/computing-embeddings/README.html).
|
63 |
+
''')
|
64 |
+
|
65 |
select_models = st.multiselect("Choose models", options=list(QA_MODELS_ID), default=list(QA_MODELS_ID)[0])
|
66 |
|
67 |
anchor = st.text_input(
|
68 |
'Please enter here the query you want to compare with given answers:',
|
69 |
+
value="What is the weather in Paris?"
|
70 |
)
|
71 |
|
72 |
n_texts = st.number_input(
|
|
|
76 |
|
77 |
inputs = []
|
78 |
|
79 |
+
defaults = ["It is raining in Paris right now with 70 F temperature.", "What is the weather in Berlin?", "I have 3 brothers."]
|
80 |
for i in range(int(n_texts)):
|
81 |
input = st.text_input(f'Answer {i + 1}:', value=defaults[i] if i < len(defaults) else "")
|
82 |
|
|
|
86 |
results = {model: inference.text_similarity(anchor, inputs, model, QA_MODELS_ID) for model in select_models}
|
87 |
df_results = {model: results[model] for model in results}
|
88 |
|
89 |
+
index = [f"{idx + 1}:{input[:min(15, len(input))]}..." for idx, input in enumerate(inputs)]
|
90 |
df_total = pd.DataFrame(index=index)
|
91 |
for key, value in df_results.items():
|
92 |
df_total[key] = list(value['score'].values)
|
backend/config.py
CHANGED
@@ -3,8 +3,8 @@ MODELS_ID = dict(distilroberta = 'flax-sentence-embeddings/st-codesearch-distilr
|
|
3 |
minilm_l6 = 'flax-sentence-embeddings/all_datasets_v3_MiniLM-L6')
|
4 |
|
5 |
QA_MODELS_ID = dict(
|
6 |
-
mpnet_qa = 'flax-sentence-embeddings/mpnet_stackexchange_v1',
|
7 |
mpnet_asymmetric_qa = ['flax-sentence-embeddings/multi-QA_v1-mpnet-asymmetric-Q',
|
8 |
'flax-sentence-embeddings/multi-QA_v1-mpnet-asymmetric-A'],
|
|
|
9 |
distilbert_qa = 'flax-sentence-embeddings/multi-qa_v1-distilbert-cls_dot'
|
10 |
)
|
|
|
3 |
minilm_l6 = 'flax-sentence-embeddings/all_datasets_v3_MiniLM-L6')
|
4 |
|
5 |
QA_MODELS_ID = dict(
|
|
|
6 |
mpnet_asymmetric_qa = ['flax-sentence-embeddings/multi-QA_v1-mpnet-asymmetric-Q',
|
7 |
'flax-sentence-embeddings/multi-QA_v1-mpnet-asymmetric-A'],
|
8 |
+
mpnet_qa='flax-sentence-embeddings/mpnet_stackexchange_v1',
|
9 |
distilbert_qa = 'flax-sentence-embeddings/multi-qa_v1-distilbert-cls_dot'
|
10 |
)
|