In [1]:
# from langchain.text_splitter import LatexTextSplitter
from langchain.text_splitter import RecursiveCharacterTextSplitter
from typing import Any
import requests
import logging
import json
import tiktoken
import gradio as gr
from langchain.document_loaders import UnstructuredPDFLoader

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
turbo_encoding = tiktoken.encoding_for_model("gpt-3.5-turbo")
with open("sample.tex", "r") as f:
    content = f.read()

In [3]:
class LatexTextSplitter(RecursiveCharacterTextSplitter):
    """Attempts to split the text along Latex-formatted layout elements."""

    def __init__(self, **kwargs: Any):
        """Initialize a LatexTextSplitter."""
        separators = [
            # First, try to split along Latex sections
            "\chapter{",
            "\section{",
            "\subsection{",
            "\subsubsection{",

            # Now split by environments
            "\begin{"
            # "\n\\begin{enumerate}",
            # "\n\\begin{itemize}",
            # "\n\\begin{description}",
            # "\n\\begin{list}",
            # "\n\\begin{quote}",
            # "\n\\begin{quotation}",
            # "\n\\begin{verse}",
            # "\n\\begin{verbatim}",

            ## Now split by math environments
            # "\n\\begin{align}",
            # "$$",
            # "$",

            # Now split by the normal type of lines
            " ",
            "",
        ]
        super().__init__(separators=separators, **kwargs)


def json_validator(text: str, openai_key: str, retry: int = 3):
    for _ in range(retry):
        try:
            return json.loads(text)
        except Exception:
            
            try:
                prompt = f"Modify the following into a valid json format:\n{text}"
                prompt_token_length = len(turbo_encoding.encode(prompt))

                data = {
                    "model": "text-davinci-003",
                    "prompt": prompt,
                    "max_tokens": 4097 - prompt_token_length - 64
                }
                headers = {
                    "Content-Type": "application/json",
                    "Authorization": f"Bearer {openai_key}"
                }
                for _ in range(retry):
                    response = requests.post(
                        'https://api.openai.com/v1/completions',
                        json=data,
                        headers=headers,
                        timeout=300
                    )
                    if response.status_code != 200:
                        logging.warning(f'fetch openai chat retry: {response.text}')
                        continue
                    text = response.json()['choices'][0]['text']
                    break
            except:
                return response.json()['error']
            
    return text

In [9]:
def analyze(latex_whole_document: str, openai_key: str, progress):
    
    logging.info("start analysis")
    
    output_format = """

    ```json
    [
        \\ Potential point for improvement 1
        {{
            "title": string \\ What this modification is about
            "thought": string \\ The reason why this should be improved
            "action": string \\ how to make improvement
            "original": string \\ the original latex snippet that can be improved
            "improved": string \\ the improved latex snippet which address your point
        }},
        {{}}
    ]
    ```
    """
    
    chunk_size = 1000
    # for _ in range(5):
    #     try:
    #         latex_splitter = LatexTextSplitter(
    #             chunk_size=min(chunk_size, len(latex_whole_document)),
    #             chunk_overlap=0,
    #         )
    #         docs = latex_splitter.create_documents([latex_whole_document])
    #         break
    #     except:
    #         chunk_size // 2

    latex_splitter = LatexTextSplitter(
        chunk_size=min(chunk_size, len(latex_whole_document)),
        chunk_overlap=0,
    )
    docs = latex_splitter.create_documents([latex_whole_document])
    
    progress(0.05)
    ideas = []
    for doc in progress.tqdm(docs):

        prompt = f"""
        I'm a computer science student.
        You are my editor.
        Your goal is to improve my paper quality at your best.
        
        
        ```
        {doc.page_content}
        ```
        The above is a segment of my research paper. If the end of the segment is not complete, just ignore it.
        Point out the parts that can be improved.
        Focus on grammar, writing, content, section structure.
        Ignore comments and those that are outside the document environment.
        List out all the points with a latex snippet which is the improved version addressing your point.
        Same paragraph should be only address once.
        Output the response in the following valid json format:
        {output_format}

        """
        
        idea = fetch_chat(prompt, openai_key)
        if isinstance(idea, list):
            ideas += idea
            break
        else:
            raise gr.Error(idea)

    logging.info('complete analysis')
    return ideas


def fetch_chat(prompt: str, openai_key: str, retry: int = 3):
    json = {
        "model": "gpt-3.5-turbo-16k",
        "messages": [{"role": "user", "content": prompt}]
    }
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {openai_key}"
    }
    for _ in range(retry):
        response = requests.post(
            'https://api.openai.com/v1/chat/completions',
            json=json,
            headers=headers,
            timeout=300
        )
        if response.status_code != 200:
            logging.warning(f'fetch openai chat retry: {response.text}')
            continue
        result = response.json()['choices'][0]['message']['content']
        return json_validator(result, openai_key)
    
    return response.json()["error"]
    
    
def read_file(f: str):
    if f is None:
        return ""
    elif f.name.endswith('pdf'):
        loader = UnstructuredPDFLoader(f.name)
        pages = loader.load_and_split()
        return "\n".join([p.page_content for p in pages])
    elif f.name.endswith('tex'):
        with open(f.name, "r") as f:
            return f.read()
    else:
        return "Only support .tex & .pdf"

In [11]:
idea_list = []
max_ideas = 20


with gr.Blocks() as demo:
    
    def generate(txt: str, openai_key: str, progress=gr.Progress()):
        
        if not openai_key:
            raise gr.Error("Please provide openai key !")
        
        try:
            global idea_list
            idea_list = analyze(txt, openai_key, progress)
            k = min(len(idea_list), max_ideas)

            idea_buttons = [
                gr.Button.update(visible=True, value=i['title'])
                for e, i in enumerate(idea_list[:max_ideas])
            ]
            idea_buttons += [
                gr.Button.update(visible=False)
            ]*(max_ideas-len(idea_buttons))

            idea_details = [
                gr.Textbox.update(value="", label="thought", visible=True),
                gr.Textbox.update(value="", label="action", visible=True),
                gr.Textbox.update(value="", label="original", visible=True, max_lines=5, lines=5),
                gr.Textbox.update(value="", label="improved", visible=True, max_lines=5, lines=5)
            ]

            return [
                gr.Textbox.update("Suggestions", interactive=False, show_label=False),
                gr.Button.update(visible=True, value="Analyze")
            ] + idea_details + idea_buttons
        except Exception as e:
            raise gr.Error(str(e))

    def select(name: str):
        global idea_list
        for i in idea_list:
            if i['title'] == name:
                return [
                    gr.Textbox.update(value=i["thought"], label="thought", visible=True),
                    gr.Textbox.update(value=i["action"], label="action", visible=True),
                    gr.Textbox.update(value=i["original"], label="original", visible=True, max_lines=5, lines=5),
                    gr.Textbox.update(value=i["improved"], label="improved", visible=True, max_lines=5, lines=5)
                ]
            
    title = gr.Button("PaperGPT", interactive=False).style(size=10)
    key = gr.Textbox(label="openai_key")
    with gr.Row().style(equal_height=True):
        with gr.Column(scale=0.95):
            txt_in = gr.Textbox(label="Input", lines=11, max_lines=11, value=content[2048+2048+256-45:])
        with gr.Column(scale=0.05):
            upload = gr.File(file_count="single", file_types=["tex", ".pdf"])
            btn = gr.Button("Analyze")
            upload.change(read_file, inputs=upload, outputs=txt_in)

    textboxes = []
    sug = gr.Textbox("Suggestions", interactive=False, show_label=False).style(text_align="center")
    with gr.Row():
        with gr.Column(scale=0.4):
            for i in range(max_ideas):
                t = gr.Button("", visible=False)
                textboxes.append(t)
        with gr.Column(scale=0.6):
            thought = gr.Textbox(label="thought", visible=False, interactive=False)
            action = gr.Textbox(label="action", visible=False, interactive=False)
            original = gr.Textbox(label="original", visible=False, max_lines=5, lines=5, interactive=False)
            improved = gr.Textbox(label="improved", visible=False, max_lines=5, lines=5, interactive=False)

    btn.click(generate, inputs=[txt_in, key], outputs=[sug, btn, thought, action, original, improved] + textboxes)
    for i in textboxes:
        i.click(select, inputs=[i], outputs=[thought, action, original, improved])
    demo.launch(server_name="0.0.0.0", server_port=7653, share=True, enable_queue=True)



Running on local URL:  http://0.0.0.0:7653
Running on public URL: https://73992a9ff20adf33a3.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


    "error": {
        "message": "",
        "type": "invalid_request_error",
        "param": null,
        "code": "invalid_api_key"
    }
}

    "error": {
        "message": "",
        "type": "invalid_request_error",
        "param": null,
        "code": "invalid_api_key"
    }
}

    "error": {
        "message": "",
        "type": "invalid_request_error",
        "param": null,
        "code": "invalid_api_key"
    }
}

Traceback (most recent call last):
  File "/tmp/ipykernel_22031/279099274.py", line 14, in generate
    idea_list = analyze(txt, openai_key, progress)
  File "/tmp/ipykernel_22031/3345783910.py", line 69, in analyze
    raise gr.Error(idea)
gradio.exceptions.Error: {'message': '', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/hsiao1229/.local/share/virtualenvs/chatGPT-yp18Rznv/lib/python3.8/site-packages/gradio/ro

In [10]:
demo.close()

Closing server running on port: 7653
