import gradio as gr import spaces import torch from transformers import AutoTokenizer, AutoModelForSequenceClassification import torch.nn.functional as F import torch.nn as nn import re model_path = r'ssocean/NAIP' device = 'cuda:0' global model, tokenizer model = None tokenizer = None @spaces.GPU(duration=60, enable_queue=True) def predict(title, abstract): global model, tokenizer if model is None: model = AutoModelForSequenceClassification.from_pretrained( model_path, num_labels=1, load_in_8bit=True,) tokenizer = AutoTokenizer.from_pretrained(model_path) model.eval() text = f'''Given a certain paper, Title: {title}\n Abstract: {abstract}. \n Predict its normalized academic impact (between 0 and 1):''' inputs = tokenizer(text, return_tensors="pt").to(device) with torch.no_grad(): outputs = model(**inputs) probability = torch.sigmoid(outputs.logits).item() # reason for +0.05: We observed that the predicted values in the web demo are generally around 0.05 lower than those in the local deployment (due to differences in software/hardware environments). Therefore, we applied the following compensation in the web demo. Please do not use this in the local deployment. if probability + 0.05 >=1.0: return round(1, 4) return round(probability + 0.05, 4) # 示例数据 examples = [ [ "LoRA-IR: Taming Low-Rank Experts for Efficient All-in-One Image Restoration", ('''Prompt-based all-in-one image restoration (IR) frameworks have achieved remarkable performance by incorporating degradation-specific information into prompt modules. Nevertheless, handling the complex and diverse degradations encountered in real-world scenarios remains a significant challenge. To address this challenge, we propose LoRA-IR, a flexible framework that dynamically leverages compact low-rank experts to facilitate efficient all-in-one image restoration. Specifically, LoRA-IR consists of two training stages: degradation-guided pre-training and parameter-efficient fine-tuning. In the pre-training stage, we enhance the pre-trained CLIP model by introducing a simple mechanism that scales it to higher resolutions, allowing us to extract robust degradation representations that adaptively guide the IR network. In the fine-tuning stage, we refine the pre-trained IR network using low-rank adaptation (LoRA). Built upon a Mixture-of-Experts (MoE) architecture, LoRA-IR dynamically integrates multiple low-rank restoration experts through a degradation-guided router. This dynamic integration mechanism significantly enhances our model's adaptability to diverse and unknown degradations in complex real-world scenarios. Extensive experiments demonstrate that LoRA-IR achieves state-of-the-art performance across 14 image restoration tasks and 29 benchmarks. Code and pre-trained models will be available at: https://github.com/shallowdream204/LoRA-IR.''') ], [ "ConsistentAvatar: Learning to Diffuse Fully Consistent Talking Head Avatar with Temporal Guidance", ('''Diffusion models have shown impressive potential on talking head generation. While plausible appearance and talking effect are achieved, these methods still suffer from temporal, 3D or expression inconsistency due to the error accumulation and inherent limitation of single-image generation ability. In this paper, we propose ConsistentAvatar, a novel framework for fully consistent and high-fidelity talking avatar generation. Instead of directly employing multi-modal conditions to the diffusion process, our method learns to first model the temporal representation for stability between adjacent frames. Specifically, we propose a Temporally-Sensitive Detail (TSD) map containing high-frequency feature and contours that vary significantly along the time axis. Using a temporal consistent diffusion module, we learn to align TSD of the initial result to that of the video frame ground truth. The final avatar is generated by a fully consistent diffusion module, conditioned on the aligned TSD, rough head normal, and emotion prompt embedding. We find that the aligned TSD, which represents the temporal patterns, constrains the diffusion process to generate temporally stable talking head. Further, its reliable guidance complements the inaccuracy of other conditions, suppressing the accumulated error while improving the consistency on various aspects. Extensive experiments demonstrate that ConsistentAvatar outperforms the state-of-the-art methods on the generated appearance, 3D, expression and temporal consistency. Project page: https://njust-yang.github.io/ConsistentAvatar.github.io/''') ] ] def validate_input(title, abstract): """验证输入是否符合要求""" # 黑名单:屏蔽非拉丁字符 non_latin_pattern = re.compile(r'[^\u0000-\u007F]') if len(title.split(' '))<4: return False, "The title must be at least 3 words long." if len(abstract.split(' ')) < 50: return False, "The abstract must be at least 50 words long." if len((title + abstract).split(' '))>1024: return True, "Warning, The input length is approaching tokenization limits (1024) and may be truncated without further warning!" if non_latin_pattern.search(title): return False, "The title contains invalid characters. Only English letters and special symbols are allowed." if non_latin_pattern.search(abstract): return False, "The abstract contains invalid characters. Only English letters and special symbols are allowed." return True, "Inputs are valid! Good to go!" def update_button_status(title, abstract): """根据输入内容动态更新按钮状态""" valid, message = validate_input(title, abstract) if not valid: return gr.update(value="Error: " + message), gr.update(interactive=False) return gr.update(value=message), gr.update(interactive=True) # 创建 Gradio 界面 with gr.Blocks() as iface: gr.Markdown(""" # 🧠 Predict Academic Impact of Newly Published Paper! ### Estimate the future academic impact of a paper using LLM [Read the full paper](https://arxiv.org/abs/2408.03934) """) with gr.Row(): with gr.Column(): title_input = gr.Textbox( lines=2, placeholder="Enter Paper Title Here...", label="Paper Title" ) abstract_input = gr.Textbox( lines=5, placeholder="Enter Paper Abstract Here... (Do not input line breaks. No more than 1024 tokens.)", label="Paper Abstract" ) validation_status = gr.Textbox(label="Validation Status", interactive=False) submit_button = gr.Button("Predict Impact", interactive=False) with gr.Column(): output = gr.Label(label="Predicted Impact") # 输入事件绑定 title_input.change( update_button_status, inputs=[title_input, abstract_input], outputs=[validation_status, submit_button] ) abstract_input.change( update_button_status, inputs=[title_input, abstract_input], outputs=[validation_status, submit_button] ) submit_button.click( predict, inputs=[title_input, abstract_input], outputs=output ) gr.Examples( examples=examples, inputs=[title_input, abstract_input], outputs=[validation_status, output], cache_examples=False ) gr.Markdown(""" **Important Notes** - It is intended as a tool for research and educational purposes only. - Predicted impact is a probabilistic value generated by the model and does not reflect paper quality or novelty. - The author takes no responsibility for the prediction results. - To identify potentially impactful papers, this study uses the sigmoid+MSE approach to optimize NDCG values (over sigmoid+BCE), resulting in predicted values concentrated between 0.1 and 0.9 due to the sigmoid gradient effect. - Generally, it is considered a predicted influence score greater than 0.65 to indicate an impactful paper. """) iface.launch()