Spaces:

rb757
/

new_patent_app

Sleeping

App Files Files Community

new_patent_app / app.py

rb757

Add Streamlit app for patentability score prediction

b6898af 8 months ago

raw

history blame

4 kB

	import streamlit as st
	import pandas as pd
	from transformers import AutoModelForSequenceClassification, AutoTokenizer
	import torch
	from datasets import load_dataset

	# Load model and tokenizer
	model_path = "rb757/new_app"
	model = AutoModelForSequenceClassification.from_pretrained(model_path)
	tokenizer = AutoTokenizer.from_pretrained(model_path)

	# Load the dataset
	dataset_dict = load_dataset(
	'HUPD/hupd',
	name='sample',
	data_files="https://huggingface.co/datasets/HUPD/hupd/resolve/main/hupd_metadata_2022-02-22.feather",
	train_filing_start_date='2016-01-01',
	train_filing_end_date='2016-01-21',
	val_filing_start_date='2016-01-22',
	val_filing_end_date='2016-01-31',
	trust_remote_code=True
	)

	# Convert to DataFrame
	train_df = pd.DataFrame(dataset_dict['train'])
	val_df = pd.DataFrame(dataset_dict['validation'])

	# Print columns to verify availability
	print("Train set columns:", train_df.columns.tolist())
	print("Validation set columns:", val_df.columns.tolist())

	# Check if 'patent_number' exists
	if 'patent_number' not in train_df.columns:
	st.error("Column 'patent_number' not found in the training dataset.")
	else:
	# Title and description
	st.title("📜 Milestone Patent Evaluation")
	st.write("Select a patent application to evaluate its patentability.")

	# Dropdown for patent numbers
	patent_numbers = train_df['patent_number'].unique()
	selected_patent = st.selectbox("Select Patent Number", patent_numbers)

	# Retrieve relevant information
	patent_info = train_df[train_df['patent_number'] == selected_patent].iloc[0]
	title = patent_info['title']
	abstract = patent_info['abstract']
	claims = patent_info['claims']
	background = patent_info['background']
	summary = patent_info['summary']
	description = patent_info['description']
	cpc_label = patent_info['cpc_label']
	ipc_label = patent_info['ipc_label']
	filing_date = patent_info['filing_date']
	patent_issue_date = patent_info['patent_issue_date']
	date_published = patent_info['date_published']
	examiner_id = patent_info['examiner_id']

	# Display the information
	st.markdown("### Title")
	st.markdown(f"{title}")

	st.markdown("---")

	st.markdown("### Abstract")
	st.text_area("Abstract", abstract, height=150)

	st.markdown("---")

	st.markdown("### Claims")
	st.text_area("Claims", claims, height=150)

	st.markdown("---")

	st.markdown("### Background")
	st.text_area("Background", background, height=150)

	st.markdown("---")

	st.markdown("### Summary")
	st.text_area("Summary", summary, height=150)

	st.markdown("---")

	st.markdown("### Description")
	st.text_area("Description", description, height=150)

	st.markdown("---")

	st.markdown("### CPC Label")
	st.markdown(f"{cpc_label}")

	st.markdown("### IPC Label")
	st.markdown(f"{ipc_label}")

	st.markdown("### Filing Date")
	st.markdown(f"{filing_date}")

	st.markdown("### Patent Issue Date")
	st.markdown(f"{patent_issue_date}")

	st.markdown("### Date Published")
	st.markdown(f"{date_published}")

	st.markdown("### Examiner ID")
	st.markdown(f"{examiner_id}")

	# Submit button
	if st.button("Get Patentability Score"):
	# Prepare the input text
	input_text = f"{title} {abstract} {claims} {background} {summary} {description}"
	inputs = tokenizer(input_text, return_tensors="pt", truncation=True, padding=True)

	# Get the model prediction
	with torch.no_grad():
	logits = model(**inputs).logits
	predictions = torch.argmax(logits, dim=-1)

	# Display the patentability score
	decision_labels = ['REJECTED', 'ACCEPTED', 'PENDING', 'CONT-REJECTED', 'CONT-ACCEPTED', 'CONT-PENDING']
	score = decision_labels[predictions.item()]
	st.success(f"Patentability Score: {score}")