import gradio as gr
import spaces
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch.nn.functional as F
import torch.nn as nn
import re
model_path = r'ssocean/NAIP'  
device = 'cuda:0'

global model, tokenizer
model = None
tokenizer = None


@spaces.GPU(duration=60, enable_queue=True)
def predict(title, abstract):
    global model, tokenizer
    if model is None:
        model = AutoModelForSequenceClassification.from_pretrained(
        model_path,
        num_labels=1,
        load_in_8bit=True,)
        tokenizer = AutoTokenizer.from_pretrained(model_path)
        model.eval()
    text = f'''Given a certain paper, Title: {title}\n Abstract: {abstract}. \n Predict its normalized academic impact (between 0 and 1):'''
    inputs = tokenizer(text, return_tensors="pt").to(device)
    with torch.no_grad():
        outputs = model(**inputs)
    probability = torch.sigmoid(outputs.logits).item()
    # reason for +0.05: We observed that the predicted values in the web demo are generally around 0.05 lower than those in the local deployment (due to differences in software/hardware environments). Therefore, we applied the following compensation in the web demo. Please do not use this in the local deployment.
    if probability + 0.05 >=1.0:
        return round(1, 4)
    return round(probability + 0.05, 4)


# 示例数据
examples = [
    [
        "LoRA-IR: Taming Low-Rank Experts for Efficient All-in-One Image Restoration",
        ('''Prompt-based all-in-one image restoration (IR) frameworks have achieved
 remarkable performance by incorporating degradation-specific information into
 prompt modules. Nevertheless, handling the complex and diverse degradations
 encountered in real-world scenarios remains a significant challenge. To address
 this challenge, we propose LoRA-IR, a flexible framework that dynamically
 leverages compact low-rank experts to facilitate efficient all-in-one image
 restoration. Specifically, LoRA-IR consists of two training stages:
 degradation-guided pre-training and parameter-efficient fine-tuning. In the
 pre-training stage, we enhance the pre-trained CLIP model by introducing a
 simple mechanism that scales it to higher resolutions, allowing us to extract
 robust degradation representations that adaptively guide the IR network. In the
 fine-tuning stage, we refine the pre-trained IR network using low-rank
 adaptation (LoRA). Built upon a Mixture-of-Experts (MoE) architecture, LoRA-IR
 dynamically integrates multiple low-rank restoration experts through a
 degradation-guided router. This dynamic integration mechanism significantly
 enhances our model's adaptability to diverse and unknown degradations in
 complex real-world scenarios. Extensive experiments demonstrate that LoRA-IR
 achieves state-of-the-art performance across 14 image restoration tasks and 29
 benchmarks. Code and pre-trained models will be available at:
 https://github.com/shallowdream204/LoRA-IR.''')
    ],
    [
        "ConsistentAvatar: Learning to Diffuse Fully Consistent Talking Head Avatar with Temporal Guidance",
        ('''Diffusion models have shown impressive potential on talking head generation.
 While plausible appearance and talking effect are achieved, these methods still
 suffer from temporal, 3D or expression inconsistency due to the error
 accumulation and inherent limitation of single-image generation ability. In
 this paper, we propose ConsistentAvatar, a novel framework for fully consistent
 and high-fidelity talking avatar generation. Instead of directly employing
 multi-modal conditions to the diffusion process, our method learns to first
 model the temporal representation for stability between adjacent frames.
 Specifically, we propose a Temporally-Sensitive Detail (TSD) map containing
 high-frequency feature and contours that vary significantly along the time
 axis. Using a temporal consistent diffusion module, we learn to align TSD of
 the initial result to that of the video frame ground truth. The final avatar is
 generated by a fully consistent diffusion module, conditioned on the aligned
 TSD, rough head normal, and emotion prompt embedding. We find that the aligned
 TSD, which represents the temporal patterns, constrains the diffusion process
 to generate temporally stable talking head. Further, its reliable guidance
 complements the inaccuracy of other conditions, suppressing the accumulated
 error while improving the consistency on various aspects. Extensive experiments
 demonstrate that ConsistentAvatar outperforms the state-of-the-art methods on
 the generated appearance, 3D, expression and temporal consistency. Project
 page: https://njust-yang.github.io/ConsistentAvatar.github.io/''')
    ]
]

def validate_input(title, abstract):
    """验证输入是否符合要求"""

    # 黑名单：屏蔽非拉丁字符
    non_latin_pattern = re.compile(r'[^\u0000-\u007F]')
    if len(title.split(' '))<4:
        return False, "The title must be at least 3 words long."
    if len(abstract.split(' ')) < 50:
        return False, "The abstract must be at least 50 words long."
    if len((title + abstract).split(' '))>1024:
        return True, "Warning, The input length is approaching tokenization limits (1024) and may be truncated without further warning!"
    if non_latin_pattern.search(title):
        return False, "The title contains invalid characters. Only English letters and special symbols are allowed."
    if non_latin_pattern.search(abstract):
        return False, "The abstract contains invalid characters. Only English letters and special symbols are allowed."

    return True, "Inputs are valid! Good to go!"

def update_button_status(title, abstract):
    """根据输入内容动态更新按钮状态"""
    valid, message = validate_input(title, abstract)
    if not valid:
        return gr.update(value="Error: " + message), gr.update(interactive=False)
    return gr.update(value=message), gr.update(interactive=True)

# 创建 Gradio 界面
with gr.Blocks() as iface:
    gr.Markdown("""
    # 🧠 Predict Academic Impact of Newly Published Paper!
    ### Estimate the future academic impact of a paper using LLM
    [Read the full paper](https://arxiv.org/abs/2408.03934)
    """)
    with gr.Row():
        with gr.Column():
            title_input = gr.Textbox(
                lines=2,
                placeholder="Enter Paper Title Here...",
                label="Paper Title"
            )
            abstract_input = gr.Textbox(
                lines=5,
                placeholder="Enter Paper Abstract Here... (Do not input line breaks. No more than 1024 tokens.)",
                label="Paper Abstract"
            )
            validation_status = gr.Textbox(label="Validation Status", interactive=False)
            submit_button = gr.Button("Predict Impact", interactive=False)
        with gr.Column():
            output = gr.Label(label="Predicted Impact")

    # 输入事件绑定
    title_input.change(
        update_button_status,
        inputs=[title_input, abstract_input],
        outputs=[validation_status, submit_button]
    )
    abstract_input.change(
        update_button_status,
        inputs=[title_input, abstract_input],
        outputs=[validation_status, submit_button]
    )

    submit_button.click(
        predict,
        inputs=[title_input, abstract_input],
        outputs=output
    )

    gr.Examples(
        examples=examples,
        inputs=[title_input, abstract_input],
        outputs=[validation_status, output],
        cache_examples=False
    )
    gr.Markdown("""
    **Important Notes**  
    - It is intended as a tool for research and educational purposes only.
    - Predicted impact is a probabilistic value generated by the model and does not reflect paper quality or novelty.  
    - The author takes no responsibility for the prediction results.
    - To identify potentially impactful papers, this study uses the sigmoid+MSE approach to optimize NDCG values (over sigmoid+BCE), resulting in predicted values concentrated between 0.1 and 0.9 due to the sigmoid gradient effect.
    - Generally, it is considered a predicted influence score greater than 0.65 to indicate an impactful paper.
    """)
iface.launch()