Spaces:
Runtime error
Runtime error
Commit
·
b1fe96d
1
Parent(s):
164142a
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,7 +1,8 @@
|
|
| 1 |
import streamlit as st
|
| 2 |
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
|
| 3 |
from datasets import load_dataset
|
| 4 |
-
|
|
|
|
| 5 |
dataset = load_dataset('HUPD/hupd',
|
| 6 |
name='sample',
|
| 7 |
data_files="https://huggingface.co/datasets/HUPD/hupd/blob/main/hupd_metadata_2022-02-22.feather",
|
|
@@ -11,7 +12,9 @@ dataset = load_dataset('HUPD/hupd',
|
|
| 11 |
val_filing_start_date='2016-01-30',
|
| 12 |
val_filing_end_date='2016-01-31',
|
| 13 |
)
|
|
|
|
| 14 |
exclude_pending = dataset.filter(lambda example: example["decision"] != "PENDING")
|
|
|
|
| 15 |
data = exclude_pending["validation"]
|
| 16 |
p_number = data["patent_number"]
|
| 17 |
p_abstract = data["abstract"]
|
|
@@ -20,7 +23,7 @@ p_decision = data["decision"]
|
|
| 20 |
abstract = ""
|
| 21 |
claims = ""
|
| 22 |
decision = ""
|
| 23 |
-
|
| 24 |
st.title("Patentability Score")
|
| 25 |
st.write("Select a patent ID and click submit to get patentability score.")
|
| 26 |
selected_id = st.selectbox("Patent ID:", p_number, index=len(p_number)-1)
|
|
@@ -28,26 +31,26 @@ selected_index = p_number.index(selected_id)
|
|
| 28 |
abstract = p_abstract[selected_index]
|
| 29 |
claims = p_claims[selected_index]
|
| 30 |
decision = p_decision[selected_index]
|
| 31 |
-
|
| 32 |
st.text_area("Abstract:", abstract)
|
| 33 |
st.text_area("Claims:", claims)
|
| 34 |
-
|
| 35 |
def get_pipeline():
|
| 36 |
model = AutoModelForSequenceClassification.from_pretrained("moonahhyun/project-uspto")
|
| 37 |
tokenizer = AutoTokenizer.from_pretrained("moonahhyun/project-uspto")
|
| 38 |
pl = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)
|
| 39 |
return pl
|
| 40 |
-
|
| 41 |
-
# Load the model and perform sentiment analysis
|
| 42 |
if st.button("Submit"):
|
| 43 |
-
with st.spinner("Analyzing the
|
| 44 |
pl = get_pipeline()
|
| 45 |
result = pl(abstract)
|
| 46 |
label = result[0]['label']
|
| 47 |
score = result[0]['score']
|
|
|
|
| 48 |
if label == "ACCEPTED":
|
| 49 |
st.write(f"Patentability Score: {score}")
|
| 50 |
st.write(f"Higher % of being: {label}")
|
|
|
|
| 51 |
else:
|
| 52 |
score = 1 - score
|
| 53 |
st.write(f"Patentability Score: {score}")
|
|
|
|
| 1 |
import streamlit as st
|
| 2 |
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
|
| 3 |
from datasets import load_dataset
|
| 4 |
+
### Load HUPD dataset
|
| 5 |
+
# Sample a small subset of the dataset that corresponds to all patent applications submitted in Jan 2016.
|
| 6 |
dataset = load_dataset('HUPD/hupd',
|
| 7 |
name='sample',
|
| 8 |
data_files="https://huggingface.co/datasets/HUPD/hupd/blob/main/hupd_metadata_2022-02-22.feather",
|
|
|
|
| 12 |
val_filing_start_date='2016-01-30',
|
| 13 |
val_filing_end_date='2016-01-31',
|
| 14 |
)
|
| 15 |
+
### Exclude pending applications (also removed in finetuned model)
|
| 16 |
exclude_pending = dataset.filter(lambda example: example["decision"] != "PENDING")
|
| 17 |
+
### Extract necessary data lists
|
| 18 |
data = exclude_pending["validation"]
|
| 19 |
p_number = data["patent_number"]
|
| 20 |
p_abstract = data["abstract"]
|
|
|
|
| 23 |
abstract = ""
|
| 24 |
claims = ""
|
| 25 |
decision = ""
|
| 26 |
+
### Streamlit app
|
| 27 |
st.title("Patentability Score")
|
| 28 |
st.write("Select a patent ID and click submit to get patentability score.")
|
| 29 |
selected_id = st.selectbox("Patent ID:", p_number, index=len(p_number)-1)
|
|
|
|
| 31 |
abstract = p_abstract[selected_index]
|
| 32 |
claims = p_claims[selected_index]
|
| 33 |
decision = p_decision[selected_index]
|
|
|
|
| 34 |
st.text_area("Abstract:", abstract)
|
| 35 |
st.text_area("Claims:", claims)
|
| 36 |
+
### Load model and tokenizer
|
| 37 |
def get_pipeline():
|
| 38 |
model = AutoModelForSequenceClassification.from_pretrained("moonahhyun/project-uspto")
|
| 39 |
tokenizer = AutoTokenizer.from_pretrained("moonahhyun/project-uspto")
|
| 40 |
pl = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)
|
| 41 |
return pl
|
| 42 |
+
### Perform patentability analysis
|
|
|
|
| 43 |
if st.button("Submit"):
|
| 44 |
+
with st.spinner("Analyzing the patentability..."):
|
| 45 |
pl = get_pipeline()
|
| 46 |
result = pl(abstract)
|
| 47 |
label = result[0]['label']
|
| 48 |
score = result[0]['score']
|
| 49 |
+
# Print score when label = accepted
|
| 50 |
if label == "ACCEPTED":
|
| 51 |
st.write(f"Patentability Score: {score}")
|
| 52 |
st.write(f"Higher % of being: {label}")
|
| 53 |
+
# Print 1 - score when label = rejected
|
| 54 |
else:
|
| 55 |
score = 1 - score
|
| 56 |
st.write(f"Patentability Score: {score}")
|