| import gradio as gr | |
| from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, Text2TextGenerationPipeline | |
| pipe = Text2TextGenerationPipeline(model = AutoModelForSeq2SeqLM.from_pretrained("jpwahle/t5-large-word-sense-disambiguation"), | |
| tokenizer = AutoTokenizer.from_pretrained("jpwahle/t5-large-word-sense-disambiguation")) | |
| def wsd_gen(word, context, d1, d2, d3): | |
| question = 'question: question: which description describes the word' + ' " ' + word + ' " ' | |
| descriptions_context = 'best in the following context? \descriptions:[ " ' + d1 + '" , " ' + d2 + ' " , or " '+ d3 + ' " ] context: ' + context + "'" | |
| raw_input = question + descriptions_context | |
| output = pipe(raw_input)[0]['generated_text'] | |
| return output | |
| examples = [["beat", 'The underdog team "beat" the reigning champion.', " A main accent or rhythmic unit in music or poetry. " , " To strike repeatedly and violently so as to hurt or injure.", " To defeat (someone) in a game or other competitive situation. "], ["shell", 'The first "shell" exploded in mid air taking out an enemy plane.', "The hard protective outer case of a mollusk or crustacean.", "An explosive artillery projectile or bomb.", "Something resembling or likened to a shell because of its shape or its function as an outer case."]] | |
| word_mask = gr.Textbox(lines=1, placeholder= "Enter word to disambiguate", label = "Based on the context, which description best matches this word: ") | |
| input_context = gr.Textbox(lines=1, placeholder="Enter context", label = "context: ") | |
| input_desc1 = gr.Textbox(lines=1, placeholder="Enter description", label = "description 1: ") | |
| input_desc2 = gr.Textbox(lines=1, placeholder="Enter description", label = "description 2: ") | |
| input_desc3 = gr.Textbox(lines=1, placeholder="Enter description", label = "description 3: ") | |
| gr.Interface(wsd_gen, | |
| inputs = [word_mask , input_context, input_desc1, input_desc2, input_desc3], | |
| outputs= "textbox", | |
| examples = examples, | |
| title = "T5-Word Sense Disambiguation", | |
| description = "Determines which 'sense' (meaning) of a word is activated by the use of the word in a particular context given three different descriptions.", | |
| theme = "seafoam", | |
| article = "This is an implementation of Google's T5-large model applied to Word Sense Disambiguation (WSD) and trained on the SemCor dataset. the SemCor dataset is a corpus made up of 352 documents for a total of 226,040 manually sense-annotated annotations used specifically used to train supervised WSD systems. The model used in this spaces was uploaded by Jan Philip Wahle (jpelhaw) in huggingface.", | |
| allow_flagging="never").launch(inbrowser=True) |