File size: 7,296 Bytes
81b9405
4e9d603
81b9405
9269d50
 
 
81b9405
 
9269d50
8746fcc
9269d50
4835e80
 
9269d50
 
4835e80
 
 
 
 
9269d50
 
 
 
81b9405
4835e80
 
 
81b9405
 
 
 
 
 
 
 
 
9269d50
 
 
722ef03
9269d50
 
81b9405
 
 
 
 
 
 
 
9269d50
81b9405
 
 
 
 
 
 
 
 
 
9269d50
81b9405
 
9269d50
 
 
4835e80
81b9405
4835e80
81b9405
4835e80
 
 
 
318551b
4835e80
 
 
 
 
 
 
 
 
 
 
 
 
 
15b17df
d39adb8
4835e80
 
d39adb8
15b17df
 
81b9405
9269d50
81b9405
4835e80
 
81b9405
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4835e80
 
d39adb8
 
 
 
 
81b9405
4835e80
81b9405
 
 
 
 
e549fa9
722ef03
81b9405
 
 
 
 
 
 
 
 
 
 
 
 
 
d39adb8
 
81b9405
d39adb8
4835e80
 
 
 
 
 
 
4e9d603
0fa757f
 
4835e80
9269d50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81b9405
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
# coding:utf-8
import gradio as gr
import rebiber
import os
import uuid

# Load Bib Database
filepath = os.path.abspath(rebiber.__file__).replace("__init__.py", "")
bib_list_path = os.path.join(filepath, "bib_list.txt")
abbr_tsv_path = "abbr.tsv"

bib_db = rebiber.construct_bib_db(bib_list_path, start_dir=filepath)
abbr_dict = rebiber.normalize.load_abbr_tsv(abbr_tsv_path)


def process(input_bib, shorten, remove_keys, deduplicate, sort):
    if "@" not in input_bib:
        return "N/A"
    global abbr_dict
    # print(f"remove_keys={remove_keys}")
    random_id = uuid.uuid4().hex
    with open(f"input_{random_id}.bib", "w") as f:
        f.write(input_bib.replace("\t", "    "))
    all_bib_entries = rebiber.load_bib_file(f"input_{random_id}.bib")
    # print("# Input Bib Entries:", len(all_bib_entries))
    abbr_dict_pass = []
    if shorten:
        abbr_dict_pass = abbr_dict
    rebiber.normalize_bib(
        bib_db,
        all_bib_entries,
        f"output_{random_id}.bib",
        abbr_dict=abbr_dict_pass,
        deduplicate=deduplicate,
        sort=sort,
        removed_value_names=remove_keys,
    )
    with open(f"output_{random_id}.bib") as f:
        output_bib = f.read().replace("\n ", "\n    ")
    # delete both files
    return output_bib, random_id, gr.update(visible=True)


def download_file(ex_uuid):
    global download_content
    # Replace this with your code to generate/download the file
    file_path = f"output_{ex_uuid}.bib"
    download_content.update(visible=False)
    return file_path, gr.update(visible=True)
    

example_input = """
    @article{lin2020birds,
        title={Birds have four legs?! NumerSense: Probing Numerical Commonsense Knowledge of Pre-trained Language Models},
        author={Lin, Bill Yuchen and Lee, Seyeon and Khanna, Rahul and Ren, Xiang},
        journal={arXiv preprint arXiv:2005.00683},
        year={2020}
    } 
    @inproceedings{Lin2020CommonGenAC,
    title={CommonGen: A Constrained Text Generation Challenge for Generative Commonsense Reasoning},
    author={Bill Yuchen Lin and Minghan Shen and Wangchunshu Zhou and Pei Zhou and Chandra Bhagavatula and Yejin Choi and Xiang Ren},
    booktitle={Findings},
    year={2020}
    }
"""

examples = [[example_input]]

with gr.Blocks() as demo:

    gr.Markdown(
        """# Rebiber: A tool for normalizing bibtex with official info.
                <table> 
                <tr>
                <td>
                <a href="https://yuchenlin.xyz/">
                    <img src="https://img.shields.io/badge/Yuchen%20Lin-🐼-blue?style=social">
                </a>
                </td>
                <td>
                <a href="https://github.com/yuchenlin/rebiber">
                    <img src="https://img.shields.io/badge/Github--blue?style=social&logo=github">
                </a>
                </td>
                <td>
                <a href="https://twitter.com/billyuchenlin/status/1353850378438070272?s=20">
                    <img src="https://img.shields.io/badge/Tweet--blue?style=social&logo=twitter">
                </a>
                </td>
                </tr>
                </table>
                <span style="font-size:13pt">
                
                We often cite papers using their arXiv versions without noting that they are already __PUBLISHED__ in some conferences. These unofficial bib entries might violate rules about submissions or camera-ready versions for some conferences. 
                We introduce __Rebiber__, a simple tool in Python to fix them automatically. It is based on the official conference information from the [DBLP](https://dblp.org/) or [the ACL anthology](https://www.aclweb.org/anthology/) (for NLP conferences)!
                Apart from handling outdated arXiv citations, __Rebiber__ also normalizes citations in a unified way (DBLP-style), supporting abbreviation and value selection.  
                
                </span>
            """
    )

    with gr.Row():
        with gr.Column(scale=3):
            input_bib = gr.Textbox(
                lines=15, label="Input BIB", value=example_input, interactive=True
            )
            removekeys = gr.CheckboxGroup(
                [
                    "url",
                    "biburl",
                    "address",
                    "publisher",
                    "pages",
                    "doi",
                    "volume",
                    "bibsource",
                ],
                # value=[False, False, False, False, False, False, False, False],
                label="Remove Keys",
                info="Which keys to remove?",
            )
            shorten = gr.Checkbox(
                label="Abbreviation",
                info="Shorten the conference/journal names (e.g., `Proceedings of the 2020 International Conference of ...` --> `Proc. of ICML')",
                value=False,
            )
            dedup = gr.Checkbox(label="Deduplicate entries.", value=False)
            sort = gr.Checkbox(label="Sort alphabetically by ID.", value=False)
            with gr.Row():
                clr_button = gr.Button("Clear")
                button = gr.Button("Submit")
            ex_uuid = gr.Text(label="UUID")
            ex_uuid.visible = False
            
        with gr.Column(scale=3):
            output = gr.Textbox(
                label="Output BIB (Note that you can copy the output bib file by clicking the top-right button.)",
                show_copy_button=True,
                interactive=False,
            )
            download_btn = gr.Button("Generate Bib File")
            download_btn.visible = False
            download_content = gr.File()
            download_content.visible = False

    download_btn.click(
        download_file, inputs=ex_uuid, outputs=[download_content, download_content]
    )
    
    button.click(
        process,
        inputs=[input_bib, shorten, removekeys, dedup, sort],
        outputs=[output, ex_uuid, download_btn],
        api_name="process",
    )

    def clean(text):
        return ""

    clr_button.click(clean, input_bib, input_bib)
    # gr.Interface(fn=process,
    # outputs=gr.outputs.Textbox(label="Output BIB").style(show_copy_button=True),
    # examples=examples,
    # allow_flagging="never",
    # scroll_to_output=True,
    # show_progress=True,
    # )


if __name__ == "__main__":
    demo.launch()


"""
@article{lin2020birds,
    title={Birds have four legs?! NumerSense: Probing Numerical Commonsense Knowledge of Pre-trained Language Models},
    author={Lin, Bill Yuchen and Lee, Seyeon and Khanna, Rahul and Ren, Xiang},
    journal={arXiv preprint arXiv:2005.00683},
    year={2020}
} 

@inproceedings{lin2020birds,
 address = {Online},
 author = {Lin, Bill Yuchen  and
Lee, Seyeon  and
Khanna, Rahul  and
Ren, Xiang},
 booktitle = {Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)},
 doi = {10.18653/v1/2020.emnlp-main.557},
 pages = {6862--6868},
 publisher = {Association for Computational Linguistics},
 title = {{B}irds have four legs?! {N}umer{S}ense: {P}robing {N}umerical {C}ommonsense {K}nowledge of {P}re-{T}rained {L}anguage {M}odels},
 url = {https://aclanthology.org/2020.emnlp-main.557},
 year = {2020}
}   
"""