Spaces:
Running
on
Zero
Running
on
Zero
import gradio as gr | |
import spaces | |
import torch | |
from transformers import AutoTokenizer, AutoModelForSequenceClassification | |
import torch.nn.functional as F | |
import torch.nn as nn | |
import re | |
model_path = r'ssocean/NAIP' | |
device = 'cuda:0' | |
global model, tokenizer | |
model = None | |
tokenizer = None | |
def predict(title, abstract): | |
global model, tokenizer | |
if model is None: | |
model = AutoModelForSequenceClassification.from_pretrained( | |
model_path, | |
num_labels=1, | |
load_in_8bit=True,) | |
tokenizer = AutoTokenizer.from_pretrained(model_path) | |
model.eval() | |
text = f'''Given a certain paper, Title: {title}\n Abstract: {abstract}. \n Predict its normalized academic impact (between 0 and 1):''' | |
inputs = tokenizer(text, return_tensors="pt").to(device) | |
with torch.no_grad(): | |
outputs = model(**inputs) | |
probability = torch.sigmoid(outputs.logits).item() | |
# reason for +0.05: We observed that the predicted values in the web demo are generally around 0.05 lower than those in the local deployment (due to differences in software/hardware environments). Therefore, we applied the following compensation in the web demo. Please do not use this in the local deployment. | |
if probability + 0.05 >=1.0: | |
return round(1, 4) | |
return round(probability + 0.05, 4) | |
# 示例数据 | |
examples = [ | |
[ | |
"LoRA-IR: Taming Low-Rank Experts for Efficient All-in-One Image Restoration", | |
('''Prompt-based all-in-one image restoration (IR) frameworks have achieved | |
remarkable performance by incorporating degradation-specific information into | |
prompt modules. Nevertheless, handling the complex and diverse degradations | |
encountered in real-world scenarios remains a significant challenge. To address | |
this challenge, we propose LoRA-IR, a flexible framework that dynamically | |
leverages compact low-rank experts to facilitate efficient all-in-one image | |
restoration. Specifically, LoRA-IR consists of two training stages: | |
degradation-guided pre-training and parameter-efficient fine-tuning. In the | |
pre-training stage, we enhance the pre-trained CLIP model by introducing a | |
simple mechanism that scales it to higher resolutions, allowing us to extract | |
robust degradation representations that adaptively guide the IR network. In the | |
fine-tuning stage, we refine the pre-trained IR network using low-rank | |
adaptation (LoRA). Built upon a Mixture-of-Experts (MoE) architecture, LoRA-IR | |
dynamically integrates multiple low-rank restoration experts through a | |
degradation-guided router. This dynamic integration mechanism significantly | |
enhances our model's adaptability to diverse and unknown degradations in | |
complex real-world scenarios. Extensive experiments demonstrate that LoRA-IR | |
achieves state-of-the-art performance across 14 image restoration tasks and 29 | |
benchmarks. Code and pre-trained models will be available at: | |
https://github.com/shallowdream204/LoRA-IR.''') | |
], | |
[ | |
"ConsistentAvatar: Learning to Diffuse Fully Consistent Talking Head Avatar with Temporal Guidance", | |
('''Diffusion models have shown impressive potential on talking head generation. | |
While plausible appearance and talking effect are achieved, these methods still | |
suffer from temporal, 3D or expression inconsistency due to the error | |
accumulation and inherent limitation of single-image generation ability. In | |
this paper, we propose ConsistentAvatar, a novel framework for fully consistent | |
and high-fidelity talking avatar generation. Instead of directly employing | |
multi-modal conditions to the diffusion process, our method learns to first | |
model the temporal representation for stability between adjacent frames. | |
Specifically, we propose a Temporally-Sensitive Detail (TSD) map containing | |
high-frequency feature and contours that vary significantly along the time | |
axis. Using a temporal consistent diffusion module, we learn to align TSD of | |
the initial result to that of the video frame ground truth. The final avatar is | |
generated by a fully consistent diffusion module, conditioned on the aligned | |
TSD, rough head normal, and emotion prompt embedding. We find that the aligned | |
TSD, which represents the temporal patterns, constrains the diffusion process | |
to generate temporally stable talking head. Further, its reliable guidance | |
complements the inaccuracy of other conditions, suppressing the accumulated | |
error while improving the consistency on various aspects. Extensive experiments | |
demonstrate that ConsistentAvatar outperforms the state-of-the-art methods on | |
the generated appearance, 3D, expression and temporal consistency. Project | |
page: https://njust-yang.github.io/ConsistentAvatar.github.io/''') | |
] | |
] | |
def validate_input(title, abstract): | |
"""验证输入是否符合要求""" | |
# 黑名单:屏蔽非拉丁字符 | |
non_latin_pattern = re.compile(r'[^\u0000-\u007F]') | |
if len(title.split(' '))<4: | |
return False, "The title must be at least 3 words long." | |
if len(abstract.split(' ')) < 50: | |
return False, "The abstract must be at least 50 words long." | |
if len((title + abstract).split(' '))>1024: | |
return True, "Warning, The input length is approaching tokenization limits (1024) and may be truncated without further warning!" | |
if non_latin_pattern.search(title): | |
return False, "The title contains invalid characters. Only English letters and special symbols are allowed." | |
if non_latin_pattern.search(abstract): | |
return False, "The abstract contains invalid characters. Only English letters and special symbols are allowed." | |
return True, "Inputs are valid! Good to go!" | |
def update_button_status(title, abstract): | |
"""根据输入内容动态更新按钮状态""" | |
valid, message = validate_input(title, abstract) | |
if not valid: | |
return gr.update(value="Error: " + message), gr.update(interactive=False) | |
return gr.update(value=message), gr.update(interactive=True) | |
# 创建 Gradio 界面 | |
with gr.Blocks() as iface: | |
gr.Markdown(""" | |
# 🧠 Predict Academic Impact of Newly Published Paper! | |
### Estimate the future academic impact of a paper using LLM | |
[Read the full paper](https://arxiv.org/abs/2408.03934) | |
""") | |
with gr.Row(): | |
with gr.Column(): | |
title_input = gr.Textbox( | |
lines=2, | |
placeholder="Enter Paper Title Here...", | |
label="Paper Title" | |
) | |
abstract_input = gr.Textbox( | |
lines=5, | |
placeholder="Enter Paper Abstract Here... (Do not input line breaks. No more than 1024 tokens.)", | |
label="Paper Abstract" | |
) | |
validation_status = gr.Textbox(label="Validation Status", interactive=False) | |
submit_button = gr.Button("Predict Impact", interactive=False) | |
with gr.Column(): | |
output = gr.Label(label="Predicted Impact") | |
# 输入事件绑定 | |
title_input.change( | |
update_button_status, | |
inputs=[title_input, abstract_input], | |
outputs=[validation_status, submit_button] | |
) | |
abstract_input.change( | |
update_button_status, | |
inputs=[title_input, abstract_input], | |
outputs=[validation_status, submit_button] | |
) | |
submit_button.click( | |
predict, | |
inputs=[title_input, abstract_input], | |
outputs=output | |
) | |
gr.Examples( | |
examples=examples, | |
inputs=[title_input, abstract_input], | |
outputs=[validation_status, output], | |
cache_examples=False | |
) | |
gr.Markdown(""" | |
**Important Notes** | |
- It is intended as a tool for research and educational purposes only. | |
- Predicted impact is a probabilistic value generated by the model and does not reflect paper quality or novelty. | |
- The author takes no responsibility for the prediction results. | |
- To identify potentially impactful papers, this study uses the sigmoid+MSE approach to optimize NDCG values (over sigmoid+BCE), resulting in predicted values concentrated between 0.1 and 0.9 due to the sigmoid gradient effect. | |
- Generally, it is considered a predicted influence score greater than 0.65 to indicate an impactful paper. | |
""") | |
iface.launch() | |