Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
from transformers import AutoModelForSequenceClassification, AutoTokenizer | |
import torch | |
from datasets import load_dataset | |
# Load model and tokenizer | |
model_path = "rb757/new_app" | |
model = AutoModelForSequenceClassification.from_pretrained(model_path) | |
tokenizer = AutoTokenizer.from_pretrained(model_path) | |
# Load the dataset | |
dataset_dict = load_dataset( | |
'HUPD/hupd', | |
name='sample', | |
data_files="https://huggingface.co/datasets/HUPD/hupd/resolve/main/hupd_metadata_2022-02-22.feather", | |
train_filing_start_date='2016-01-01', | |
train_filing_end_date='2016-01-21', | |
val_filing_start_date='2016-01-22', | |
val_filing_end_date='2016-01-31', | |
trust_remote_code=True | |
) | |
# Convert to DataFrame | |
train_df = pd.DataFrame(dataset_dict['train']) | |
val_df = pd.DataFrame(dataset_dict['validation']) | |
# Print columns to verify availability | |
print("Train set columns:", train_df.columns.tolist()) | |
print("Validation set columns:", val_df.columns.tolist()) | |
# Check if 'patent_number' exists | |
if 'patent_number' not in train_df.columns: | |
st.error("Column 'patent_number' not found in the training dataset.") | |
else: | |
# Title and description | |
st.title("π Milestone Patent Evaluation") | |
st.write("Select a patent application to evaluate its patentability.") | |
# Dropdown for patent numbers | |
patent_numbers = train_df['patent_number'].unique() | |
selected_patent = st.selectbox("Select Patent Number", patent_numbers) | |
# Retrieve relevant information | |
patent_info = train_df[train_df['patent_number'] == selected_patent].iloc[0] | |
title = patent_info['title'] | |
abstract = patent_info['abstract'] | |
claims = patent_info['claims'] | |
background = patent_info['background'] | |
summary = patent_info['summary'] | |
description = patent_info['description'] | |
cpc_label = patent_info['cpc_label'] | |
ipc_label = patent_info['ipc_label'] | |
filing_date = patent_info['filing_date'] | |
patent_issue_date = patent_info['patent_issue_date'] | |
date_published = patent_info['date_published'] | |
examiner_id = patent_info['examiner_id'] | |
# Display the information | |
st.markdown("### Title") | |
st.markdown(f"**{title}**") | |
st.markdown("---") | |
st.markdown("### Abstract") | |
st.text_area("Abstract", abstract, height=150) | |
st.markdown("---") | |
st.markdown("### Claims") | |
st.text_area("Claims", claims, height=150) | |
st.markdown("---") | |
st.markdown("### Background") | |
st.text_area("Background", background, height=150) | |
st.markdown("---") | |
st.markdown("### Summary") | |
st.text_area("Summary", summary, height=150) | |
st.markdown("---") | |
st.markdown("### Description") | |
st.text_area("Description", description, height=150) | |
st.markdown("---") | |
st.markdown("### CPC Label") | |
st.markdown(f"**{cpc_label}**") | |
st.markdown("### IPC Label") | |
st.markdown(f"**{ipc_label}**") | |
st.markdown("### Filing Date") | |
st.markdown(f"**{filing_date}**") | |
st.markdown("### Patent Issue Date") | |
st.markdown(f"**{patent_issue_date}**") | |
st.markdown("### Date Published") | |
st.markdown(f"**{date_published}**") | |
st.markdown("### Examiner ID") | |
st.markdown(f"**{examiner_id}**") | |
# Submit button | |
if st.button("Get Patentability Score"): | |
# Prepare the input text | |
input_text = f"{title} {abstract} {claims} {background} {summary} {description}" | |
inputs = tokenizer(input_text, return_tensors="pt", truncation=True, padding=True) | |
# Get the model prediction | |
with torch.no_grad(): | |
logits = model(**inputs).logits | |
predictions = torch.argmax(logits, dim=-1) | |
# Display the patentability score | |
decision_labels = ['REJECTED', 'ACCEPTED', 'PENDING', 'CONT-REJECTED', 'CONT-ACCEPTED', 'CONT-PENDING'] | |
score = decision_labels[predictions.item()] | |
st.success(f"Patentability Score: **{score}**") | |