Spaces:
Runtime error
Runtime error
| import streamlit as st | |
| from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification | |
| from datasets import load_dataset | |
| ### Load HUPD dataset | |
| # Sample a small subset of the dataset that corresponds to all patent applications submitted in Jan 2016. | |
| dataset = load_dataset('HUPD/hupd', | |
| name='sample', | |
| data_files="https://huggingface.co/datasets/HUPD/hupd/blob/main/hupd_metadata_2022-02-22.feather", | |
| icpr_label=None, | |
| train_filing_start_date='2016-01-01', | |
| train_filing_end_date='2016-01-01', | |
| val_filing_start_date='2016-01-30', | |
| val_filing_end_date='2016-01-31', | |
| ) | |
| ### Exclude pending applications (also removed in finetuned model) | |
| exclude_pending = dataset.filter(lambda example: example["decision"] != "PENDING") | |
| ### Extract necessary data lists | |
| data = exclude_pending["validation"] | |
| p_number = data["patent_number"] | |
| p_abstract = data["abstract"] | |
| p_claims = data["claims"] | |
| p_decision = data["decision"] | |
| abstract = "" | |
| claims = "" | |
| decision = "" | |
| ### Streamlit app | |
| st.title("Patentability Score") | |
| st.write("Select a patent ID and click submit to get patentability score.") | |
| selected_id = st.selectbox("Patent ID:", p_number, index=len(p_number)-1) | |
| selected_index = p_number.index(selected_id) | |
| abstract = p_abstract[selected_index] | |
| claims = p_claims[selected_index] | |
| decision = p_decision[selected_index] | |
| st.text_area("Abstract:", abstract) | |
| st.text_area("Claims:", claims) | |
| ### Load model and tokenizer | |
| def get_pipeline(): | |
| model = AutoModelForSequenceClassification.from_pretrained("moonahhyun/project-uspto") | |
| tokenizer = AutoTokenizer.from_pretrained("moonahhyun/project-uspto") | |
| pl = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer) | |
| return pl | |
| ### Perform patentability analysis | |
| if st.button("Submit"): | |
| with st.spinner("Analyzing the patentability..."): | |
| pl = get_pipeline() | |
| result = pl(abstract) | |
| label = result[0]['label'] | |
| score = result[0]['score'] | |
| # Print score when label = accepted | |
| if label == "ACCEPTED": | |
| st.write(f"Patentability Score: {score}") | |
| st.write(f"Higher % of being: {label}") | |
| # Print 1 - score when label = rejected | |
| else: | |
| score = 1 - score | |
| st.write(f"Patentability Score: {score}") | |
| st.write(f"Higher % of being: {label}") | |
| st.write(f"Actual Decision: {decision}") | |
| else: | |
| st.write("Click 'Submit' for patentability score.") |