Spaces:
Running
Running
File size: 7,296 Bytes
81b9405 4e9d603 81b9405 9269d50 81b9405 9269d50 8746fcc 9269d50 4835e80 9269d50 4835e80 9269d50 81b9405 4835e80 81b9405 9269d50 722ef03 9269d50 81b9405 9269d50 81b9405 9269d50 81b9405 9269d50 4835e80 81b9405 4835e80 81b9405 4835e80 318551b 4835e80 15b17df d39adb8 4835e80 d39adb8 15b17df 81b9405 9269d50 81b9405 4835e80 81b9405 4835e80 d39adb8 81b9405 4835e80 81b9405 e549fa9 722ef03 81b9405 d39adb8 81b9405 d39adb8 4835e80 4e9d603 0fa757f 4835e80 9269d50 81b9405 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 |
# coding:utf-8
import gradio as gr
import rebiber
import os
import uuid
# Load Bib Database
filepath = os.path.abspath(rebiber.__file__).replace("__init__.py", "")
bib_list_path = os.path.join(filepath, "bib_list.txt")
abbr_tsv_path = "abbr.tsv"
bib_db = rebiber.construct_bib_db(bib_list_path, start_dir=filepath)
abbr_dict = rebiber.normalize.load_abbr_tsv(abbr_tsv_path)
def process(input_bib, shorten, remove_keys, deduplicate, sort):
if "@" not in input_bib:
return "N/A"
global abbr_dict
# print(f"remove_keys={remove_keys}")
random_id = uuid.uuid4().hex
with open(f"input_{random_id}.bib", "w") as f:
f.write(input_bib.replace("\t", " "))
all_bib_entries = rebiber.load_bib_file(f"input_{random_id}.bib")
# print("# Input Bib Entries:", len(all_bib_entries))
abbr_dict_pass = []
if shorten:
abbr_dict_pass = abbr_dict
rebiber.normalize_bib(
bib_db,
all_bib_entries,
f"output_{random_id}.bib",
abbr_dict=abbr_dict_pass,
deduplicate=deduplicate,
sort=sort,
removed_value_names=remove_keys,
)
with open(f"output_{random_id}.bib") as f:
output_bib = f.read().replace("\n ", "\n ")
# delete both files
return output_bib, random_id, gr.update(visible=True)
def download_file(ex_uuid):
global download_content
# Replace this with your code to generate/download the file
file_path = f"output_{ex_uuid}.bib"
download_content.update(visible=False)
return file_path, gr.update(visible=True)
example_input = """
@article{lin2020birds,
title={Birds have four legs?! NumerSense: Probing Numerical Commonsense Knowledge of Pre-trained Language Models},
author={Lin, Bill Yuchen and Lee, Seyeon and Khanna, Rahul and Ren, Xiang},
journal={arXiv preprint arXiv:2005.00683},
year={2020}
}
@inproceedings{Lin2020CommonGenAC,
title={CommonGen: A Constrained Text Generation Challenge for Generative Commonsense Reasoning},
author={Bill Yuchen Lin and Minghan Shen and Wangchunshu Zhou and Pei Zhou and Chandra Bhagavatula and Yejin Choi and Xiang Ren},
booktitle={Findings},
year={2020}
}
"""
examples = [[example_input]]
with gr.Blocks() as demo:
gr.Markdown(
"""# Rebiber: A tool for normalizing bibtex with official info.
<table>
<tr>
<td>
<a href="https://yuchenlin.xyz/">
<img src="https://img.shields.io/badge/Yuchen%20Lin-🐼-blue?style=social">
</a>
</td>
<td>
<a href="https://github.com/yuchenlin/rebiber">
<img src="https://img.shields.io/badge/Github--blue?style=social&logo=github">
</a>
</td>
<td>
<a href="https://twitter.com/billyuchenlin/status/1353850378438070272?s=20">
<img src="https://img.shields.io/badge/Tweet--blue?style=social&logo=twitter">
</a>
</td>
</tr>
</table>
<span style="font-size:13pt">
We often cite papers using their arXiv versions without noting that they are already __PUBLISHED__ in some conferences. These unofficial bib entries might violate rules about submissions or camera-ready versions for some conferences.
We introduce __Rebiber__, a simple tool in Python to fix them automatically. It is based on the official conference information from the [DBLP](https://dblp.org/) or [the ACL anthology](https://www.aclweb.org/anthology/) (for NLP conferences)!
Apart from handling outdated arXiv citations, __Rebiber__ also normalizes citations in a unified way (DBLP-style), supporting abbreviation and value selection.
</span>
"""
)
with gr.Row():
with gr.Column(scale=3):
input_bib = gr.Textbox(
lines=15, label="Input BIB", value=example_input, interactive=True
)
removekeys = gr.CheckboxGroup(
[
"url",
"biburl",
"address",
"publisher",
"pages",
"doi",
"volume",
"bibsource",
],
# value=[False, False, False, False, False, False, False, False],
label="Remove Keys",
info="Which keys to remove?",
)
shorten = gr.Checkbox(
label="Abbreviation",
info="Shorten the conference/journal names (e.g., `Proceedings of the 2020 International Conference of ...` --> `Proc. of ICML')",
value=False,
)
dedup = gr.Checkbox(label="Deduplicate entries.", value=False)
sort = gr.Checkbox(label="Sort alphabetically by ID.", value=False)
with gr.Row():
clr_button = gr.Button("Clear")
button = gr.Button("Submit")
ex_uuid = gr.Text(label="UUID")
ex_uuid.visible = False
with gr.Column(scale=3):
output = gr.Textbox(
label="Output BIB (Note that you can copy the output bib file by clicking the top-right button.)",
show_copy_button=True,
interactive=False,
)
download_btn = gr.Button("Generate Bib File")
download_btn.visible = False
download_content = gr.File()
download_content.visible = False
download_btn.click(
download_file, inputs=ex_uuid, outputs=[download_content, download_content]
)
button.click(
process,
inputs=[input_bib, shorten, removekeys, dedup, sort],
outputs=[output, ex_uuid, download_btn],
api_name="process",
)
def clean(text):
return ""
clr_button.click(clean, input_bib, input_bib)
# gr.Interface(fn=process,
# outputs=gr.outputs.Textbox(label="Output BIB").style(show_copy_button=True),
# examples=examples,
# allow_flagging="never",
# scroll_to_output=True,
# show_progress=True,
# )
if __name__ == "__main__":
demo.launch()
"""
@article{lin2020birds,
title={Birds have four legs?! NumerSense: Probing Numerical Commonsense Knowledge of Pre-trained Language Models},
author={Lin, Bill Yuchen and Lee, Seyeon and Khanna, Rahul and Ren, Xiang},
journal={arXiv preprint arXiv:2005.00683},
year={2020}
}
@inproceedings{lin2020birds,
address = {Online},
author = {Lin, Bill Yuchen and
Lee, Seyeon and
Khanna, Rahul and
Ren, Xiang},
booktitle = {Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)},
doi = {10.18653/v1/2020.emnlp-main.557},
pages = {6862--6868},
publisher = {Association for Computational Linguistics},
title = {{B}irds have four legs?! {N}umer{S}ense: {P}robing {N}umerical {C}ommonsense {K}nowledge of {P}re-{T}rained {L}anguage {M}odels},
url = {https://aclanthology.org/2020.emnlp-main.557},
year = {2020}
}
"""
|