Spaces:
Running
Running
| import gradio as gr | |
| import rebiber | |
| import os | |
| import uuid | |
| # Load Bib Database | |
| filepath = os.path.abspath(rebiber.__file__).replace("__init__.py","") | |
| bib_list_path = os.path.join(filepath, "bib_list.txt") | |
| abbr_tsv_path = "abbr.tsv" | |
| bib_db = rebiber.construct_bib_db(bib_list_path, start_dir=filepath) | |
| abbr_dict = rebiber.normalize.load_abbr_tsv(abbr_tsv_path) | |
| def process(input_bib, shorten, remove_keys, deduplicate, sort): | |
| if "@" not in input_bib: | |
| return "N/A" | |
| global abbr_dict | |
| # print(f"remove_keys={remove_keys}") | |
| random_id = uuid.uuid4().hex | |
| with open(f"input_{random_id}.bib", "w") as f: | |
| f.write(input_bib.replace("\t", " ")) | |
| all_bib_entries = rebiber.load_bib_file(f"input_{random_id}.bib") | |
| print("# Input Bib Entries:", len(all_bib_entries)) | |
| abbr_dict_pass = [] | |
| if shorten: | |
| abbr_dict_pass = abbr_dict | |
| rebiber.normalize_bib(bib_db, all_bib_entries, f"output_{random_id}.bib", | |
| abbr_dict=abbr_dict_pass, | |
| deduplicate=deduplicate, | |
| sort=sort, | |
| removed_value_names=remove_keys) | |
| with open(f"output_{random_id}.bib") as f: | |
| output_bib = f.read().replace("\n ", "\n ") | |
| # delete both files | |
| # print(output_bib) | |
| return output_bib, random_id | |
| example_input = """ | |
| @article{lin2020birds, | |
| title={Birds have four legs?! NumerSense: Probing Numerical Commonsense Knowledge of Pre-trained Language Models}, | |
| author={Lin, Bill Yuchen and Lee, Seyeon and Khanna, Rahul and Ren, Xiang}, | |
| journal={arXiv preprint arXiv:2005.00683}, | |
| year={2020} | |
| } | |
| """ | |
| examples = [[example_input]] | |
| # iface = gr.Interface(fn=process, | |
| # inputs=gr.inputs.Textbox(lines=30, label="Input BIB"), | |
| # outputs=gr.outputs.Textbox(label="Output BIB").style(show_copy_button=True), | |
| # examples=examples, | |
| # allow_flagging="never" | |
| # ) | |
| with gr.Blocks() as demo: | |
| gr.Markdown( | |
| '''# Rebiber: A tool for normalizing bibtex with official info. | |
| <table> | |
| <tr> | |
| <td> | |
| <a href="https://yuchenlin.xyz/"> | |
| <img src="https://img.shields.io/badge/Yuchen%20Lin-๐ผ-blue?style=social"> | |
| </a> | |
| </td> | |
| <td> | |
| <a href="https://github.com/yuchenlin/rebiber"> | |
| <img src="https://img.shields.io/badge/Github--blue?style=social&logo=github"> | |
| </a> | |
| </td> | |
| <td> | |
| <a href="https://twitter.com/billyuchenlin/status/1353850378438070272?s=20"> | |
| <img src="https://img.shields.io/badge/Tweet--blue?style=social&logo=twitter"> | |
| </a> | |
| </td> | |
| </tr> | |
| </table> | |
| We often cite papers using their arXiv versions without noting that they are already __PUBLISHED__ in some conferences. These unofficial bib entries might violate rules about submissions or camera-ready versions for some conferences. | |
| We introduce __Rebiber__, a simple tool in Python to fix them automatically. It is based on the official conference information from the [DBLP](https://dblp.org/) or [the ACL anthology](https://www.aclweb.org/anthology/) (for NLP conferences)! | |
| Apart from handling outdated arXiv citations, __Rebiber__ also normalizes citations in a unified way (DBLP-style), supporting abbreviation and value selection. | |
| ''' | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=3): | |
| input_bib = gr.Textbox(lines=20, label="Input BIB", value=example_input, interactive=True) | |
| removekeys = gr.CheckboxGroup(["url", "biburl", "address", "publisher", "pages", "doi", "volume", "bibsource"], | |
| value=[False, False, False, False, False, False, False, False], | |
| label="Remove Keys", info="Which keys to remove?") | |
| shorten = gr.Checkbox(label="Abbreviation", info="Shorten the conference/journal names (e.g., `Proceedings of the 2020 International Conference of ...` --> `Proc. of ICML')", value=False) | |
| dedup = gr.Checkbox(label="Deduplicate entries.", value=False) | |
| sort = gr.Checkbox(label="Sort alphabetically by ID.", value=False) | |
| with gr.Row(): | |
| clr_button = gr.Button("Clear") | |
| button = gr.Button("Submit") | |
| ex_uuid = gr.Text(label="UUID") | |
| ex_uuid.visible = False | |
| with gr.Column(scale=3): | |
| output=gr.Textbox(label="Output BIB (Note that you can copy the output bib file by clicking the top-right button.)").style(show_copy_button=True, interactive=False) | |
| download_btn = gr.Button("Download") | |
| download_content = gr.outputs.File() | |
| download_content.visible = False | |
| def download_file(ex_uuid): | |
| global download_content | |
| # Replace this with your code to generate/download the file | |
| file_path = f"output_{ex_uuid}.bib" | |
| download_content.update(visible=False) | |
| return file_path, gr.update(visible=True) | |
| download_btn.click(download_file, inputs=ex_uuid, outputs=[download_content,download_content]) | |
| button.click(process, inputs=[input_bib, shorten, removekeys, dedup, sort], outputs=[output, ex_uuid], api_name = "process") | |
| def clean(text): | |
| return "" | |
| clr_button.click(clean, input_bib, input_bib) | |
| # gr.Interface(fn=process, | |
| # outputs=gr.outputs.Textbox(label="Output BIB").style(show_copy_button=True), | |
| # examples=examples, | |
| # allow_flagging="never", | |
| # scroll_to_output=True, | |
| # show_progress=True, | |
| # ) | |
| if __name__ == "__main__": | |
| demo.launch() | |
| """ | |
| @article{lin2020birds, | |
| title={Birds have four legs?! NumerSense: Probing Numerical Commonsense Knowledge of Pre-trained Language Models}, | |
| author={Lin, Bill Yuchen and Lee, Seyeon and Khanna, Rahul and Ren, Xiang}, | |
| journal={arXiv preprint arXiv:2005.00683}, | |
| year={2020} | |
| } | |
| @inproceedings{lin2020birds, | |
| address = {Online}, | |
| author = {Lin, Bill Yuchen and | |
| Lee, Seyeon and | |
| Khanna, Rahul and | |
| Ren, Xiang}, | |
| booktitle = {Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)}, | |
| doi = {10.18653/v1/2020.emnlp-main.557}, | |
| pages = {6862--6868}, | |
| publisher = {Association for Computational Linguistics}, | |
| title = {{B}irds have four legs?! {N}umer{S}ense: {P}robing {N}umerical {C}ommonsense {K}nowledge of {P}re-{T}rained {L}anguage {M}odels}, | |
| url = {https://aclanthology.org/2020.emnlp-main.557}, | |
| year = {2020} | |
| } | |
| """ |