RebiberPlus

Running

File size: 7,296 Bytes

# coding:utf-8
import gradio as gr
import rebiber
import os
import uuid

# Load Bib Database
filepath = os.path.abspath(rebiber.__file__).replace("__init__.py", "")
bib_list_path = os.path.join(filepath, "bib_list.txt")
abbr_tsv_path = "abbr.tsv"

bib_db = rebiber.construct_bib_db(bib_list_path, start_dir=filepath)
abbr_dict = rebiber.normalize.load_abbr_tsv(abbr_tsv_path)


def process(input_bib, shorten, remove_keys, deduplicate, sort):
    if "@" not in input_bib:
        return "N/A"
    global abbr_dict
    # print(f"remove_keys={remove_keys}")
    random_id = uuid.uuid4().hex
    with open(f"input_{random_id}.bib", "w") as f:
        f.write(input_bib.replace("\t", "    "))
    all_bib_entries = rebiber.load_bib_file(f"input_{random_id}.bib")
    # print("# Input Bib Entries:", len(all_bib_entries))
    abbr_dict_pass = []
    if shorten:
        abbr_dict_pass = abbr_dict
    rebiber.normalize_bib(
        bib_db,
        all_bib_entries,
        f"output_{random_id}.bib",
        abbr_dict=abbr_dict_pass,
        deduplicate=deduplicate,
        sort=sort,
        removed_value_names=remove_keys,
    )
    with open(f"output_{random_id}.bib") as f:
        output_bib = f.read().replace("\n ", "\n    ")
    # delete both files
    return output_bib, random_id, gr.update(visible=True)


def download_file(ex_uuid):
    global download_content
    # Replace this with your code to generate/download the file
    file_path = f"output_{ex_uuid}.bib"
    download_content.update(visible=False)
    return file_path, gr.update(visible=True)
    

example_input = """
    @article{lin2020birds,
        title={Birds have four legs?! NumerSense: Probing Numerical Commonsense Knowledge of Pre-trained Language Models},
        author={Lin, Bill Yuchen and Lee, Seyeon and Khanna, Rahul and Ren, Xiang},
        journal={arXiv preprint arXiv:2005.00683},
        year={2020}
    } 
    @inproceedings{Lin2020CommonGenAC,
    title={CommonGen: A Constrained Text Generation Challenge for Generative Commonsense Reasoning},
    author={Bill Yuchen Lin and Minghan Shen and Wangchunshu Zhou and Pei Zhou and Chandra Bhagavatula and Yejin Choi and Xiang Ren},
    booktitle={Findings},
    year={2020}
    }
"""

examples = [[example_input]]

with gr.Blocks() as demo:

    gr.Markdown(
        """# Rebiber: A tool for normalizing bibtex with official info.
                <table> 
                <tr>
                <td>
                <a href="https://yuchenlin.xyz/">
                    <img src="https://img.shields.io/badge/Yuchen%20Lin-🐼-blue?style=social">
                </a>
                </td>
                <td>
                <a href="https://github.com/yuchenlin/rebiber">
                    <img src="https://img.shields.io/badge/Github--blue?style=social&logo=github">
                </a>
                </td>
                <td>
                <a href="https://twitter.com/billyuchenlin/status/1353850378438070272?s=20">
                    <img src="https://img.shields.io/badge/Tweet--blue?style=social&logo=twitter">
                </a>
                </td>
                </tr>
                </table>
                <span style="font-size:13pt">
                
                We often cite papers using their arXiv versions without noting that they are already __PUBLISHED__ in some conferences. These unofficial bib entries might violate rules about submissions or camera-ready versions for some conferences. 
                We introduce __Rebiber__, a simple tool in Python to fix them automatically. It is based on the official conference information from the [DBLP](https://dblp.org/) or [the ACL anthology](https://www.aclweb.org/anthology/) (for NLP conferences)!
                Apart from handling outdated arXiv citations, __Rebiber__ also normalizes citations in a unified way (DBLP-style), supporting abbreviation and value selection.  
                
                </span>
            """
    )

    with gr.Row():
        with gr.Column(scale=3):
            input_bib = gr.Textbox(
                lines=15, label="Input BIB", value=example_input, interactive=True
            )
            removekeys = gr.CheckboxGroup(
                [
                    "url",
                    "biburl",
                    "address",
                    "publisher",
                    "pages",
                    "doi",
                    "volume",
                    "bibsource",
                ],
                # value=[False, False, False, False, False, False, False, False],
                label="Remove Keys",
                info="Which keys to remove?",
            )
            shorten = gr.Checkbox(
                label="Abbreviation",
                info="Shorten the conference/journal names (e.g., `Proceedings of the 2020 International Conference of ...` --> `Proc. of ICML')",
                value=False,
            )
            dedup = gr.Checkbox(label="Deduplicate entries.", value=False)
            sort = gr.Checkbox(label="Sort alphabetically by ID.", value=False)
            with gr.Row():
                clr_button = gr.Button("Clear")
                button = gr.Button("Submit")
            ex_uuid = gr.Text(label="UUID")
            ex_uuid.visible = False
            
        with gr.Column(scale=3):
            output = gr.Textbox(
                label="Output BIB (Note that you can copy the output bib file by clicking the top-right button.)",
                show_copy_button=True,
                interactive=False,
            )
            download_btn = gr.Button("Generate Bib File")
            download_btn.visible = False
            download_content = gr.File()
            download_content.visible = False

    download_btn.click(
        download_file, inputs=ex_uuid, outputs=[download_content, download_content]
    )
    
    button.click(
        process,
        inputs=[input_bib, shorten, removekeys, dedup, sort],
        outputs=[output, ex_uuid, download_btn],
        api_name="process",
    )

    def clean(text):
        return ""

    clr_button.click(clean, input_bib, input_bib)
    # gr.Interface(fn=process,
    # outputs=gr.outputs.Textbox(label="Output BIB").style(show_copy_button=True),
    # examples=examples,
    # allow_flagging="never",
    # scroll_to_output=True,
    # show_progress=True,
    # )


if __name__ == "__main__":
    demo.launch()


"""
@article{lin2020birds,
    title={Birds have four legs?! NumerSense: Probing Numerical Commonsense Knowledge of Pre-trained Language Models},
    author={Lin, Bill Yuchen and Lee, Seyeon and Khanna, Rahul and Ren, Xiang},
    journal={arXiv preprint arXiv:2005.00683},
    year={2020}
} 

@inproceedings{lin2020birds,
 address = {Online},
 author = {Lin, Bill Yuchen  and
Lee, Seyeon  and
Khanna, Rahul  and
Ren, Xiang},
 booktitle = {Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)},
 doi = {10.18653/v1/2020.emnlp-main.557},
 pages = {6862--6868},
 publisher = {Association for Computational Linguistics},
 title = {{B}irds have four legs?! {N}umer{S}ense: {P}robing {N}umerical {C}ommonsense {K}nowledge of {P}re-{T}rained {L}anguage {M}odels},
 url = {https://aclanthology.org/2020.emnlp-main.557},
 year = {2020}
}   
"""