File size: 3,066 Bytes
9cc70aa
6f20762
 
 
c9f624f
0b8554b
af9ef3e
 
 
 
6f20762
af9ef3e
 
f620641
9cc70aa
f620641
2242573
130dafb
f620641
2242573
f620641
e6df2a5
f620641
985c25d
f620641
bd294cd
1c23950
ef53ab4
bd294cd
69127db
9e106c3
69127db
9e106c3
31c3f47
4aac2a1
31c3f47
bd294cd
69127db
4e90045
 
1c23950
ae66724
5bf8c88
3ac03fe
ef6e056
c9f624f
 
 
59b4c5f
 
5bf8c88
1c23950
c9f624f
0e52996
 
 
 
 
c9f624f
 
 
933a0c8
c9f624f
 
78e02cf
 
7603b72
 
78e02cf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import gradio as gr
import pickle
import pandas as pd


data = pickle.load(open("OOV_Train_2.pkl", "rb"))
data = pd.DataFrame(
    data, 
    columns=["Input_Seq", "Label", "Adj_Class", "Adj", "Nn", "Hypr", "Adj_NN"]
)

adjs = set(data["Adj"])
Nns  = set(list(data["Nn"]) + list(data["Hypr"]))
all_set = set(list(adjs) + list(Nns))

def test_input(words):
    word_dict = ""
    for w in words.split(","):
        if w in all_set:
            word_dict += "{} : in-distribution\n".format(w)
        else:
            word_dict += "{} : out-of-distribution\n".format(w)
    return word_dict

    
title = "Phrase-Entailment Detection with BERT"

description = """
Did you know that logically speaking **A small cat is not a small animal**, and that **A fake smile is not a smile**? Learn more by testing our BERT model tuned to perform phrase-level adjective-noun entailment. The proposed model was tuned with a section of the PLANE (**P**hrase-**L**evel **A**djective-**N**oun **E**ntailment) dataset, introduced in COLING 2022 [Bertolini et al.,](https://aclanthology.org/2022.coling-1.359/). Please note that the scope of the model is not to run lexical-entailment or hypernym detection (e.g., *"A dog is an animal*"), but to perform a very specific subset of phrase-level compositional entailment over adjective-noun phrases. The type of question you can ask the model are limited, and should have one of three forms:

- An *Adjective-Noun* is a *Noun* (e.g. A red car is a car)

- An *Adjective-Noun* is a *Hypernym(Noun)* (e.g. A red car is a vehicle)

- An *Adjective-Noun* is a *Adjective-Hypernym(Noun)* (e.g. A red car is a red vehicle)

As in the examples above, the **adjective should be the same for both phrases**, and the **Hypernym(Noun) should be a true hypernym of the selected noun**.

The current model achieves an accuracy of 90% on out-of-distribution evaluation.
Use the next page to check if your test-items (i.e. adjective, noun and hypernyms) were part of the training data!"""

examples = [["A red car is a vehicle"], ["A fake smile is a smile"], ["A small cat is a small animal"]]

interface_model = gr.Interface(
            model="lorenzoscottb/bert-base-cased-PLANE-ood-2",
            description=description,
            examples=examples,
            title=title,
            inputs="text", 
            outputs="label",
)

description_w = """
You can use this page to test if a set of words was included in the training data used to tune the model. As in the samples below, use as input a series of words separated solely by a comma (e.g. *red,car,vehicle*).
"""

examples_w = [["red,car,vehicle"], ["fake,smile"], ["small,cat,animal"]]

interface_words = gr.Interface(
            fn=test_input,
            inputs=gr.Textbox(label="Input:word_1,word2,...,word_n"),
            outputs=gr.Textbox(label="In training-distribution?"),
            description=description_w,
            examples=examples_w,
)

gr.TabbedInterface(
    [interface_model, interface_words], ["Test Model", "Check if words in/out-distribution"]
).launch()