Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import numpy as np | |
| from keybert import KeyBERT | |
| import random | |
| from transformers import ( | |
| T5ForConditionalGeneration, | |
| T5Tokenizer, | |
| ) | |
| import re | |
| import transformers | |
| import torch | |
| DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu') | |
| MAX_LEN = 512 | |
| tokenizer = T5Tokenizer.from_pretrained('t5-base') | |
| model = T5ForConditionalGeneration.from_pretrained( | |
| 'Vaibhavbrkn/question-gen') | |
| mod = KeyBERT('distilbert-base-nli-mean-tokens') | |
| model.to(DEVICE) | |
| context = "The Transgender Persons Bill, 2016 was hurriedly passed in the Lok Sabha, amid much outcry from the very community it claims to protect." | |
| def filter_keyword(data, ran=5): | |
| ap = [] | |
| real = [] | |
| res = re.sub(r'-', ' ', data) | |
| res = re.sub(r'[^\w\s\.\,]', '', res) | |
| for i in range(1, 4): | |
| ap.append(mod.extract_keywords( | |
| res, keyphrase_ngram_range=(1, i), diversity=0.7, top_n=ran*2)) | |
| for i in range(3): | |
| for j in range(len(ap[i])): | |
| if ap[i][j][0].lower() in res.lower(): | |
| real.append(ap[i][j]) | |
| real = sorted(real, key=lambda x: x[1], reverse=True) | |
| ap = [] | |
| st = "" | |
| for i in range(len(real)): | |
| if real[i][0] in st: | |
| continue | |
| else: | |
| ap.append(real[i]) | |
| st += real[i][0] + " " | |
| if len(ap) == ran: | |
| break | |
| return ap | |
| # FOR BAD label negative or bottom 3 | |
| def func(context, slide): | |
| slide = int(slide) | |
| randomness = 0.4 | |
| orig = int(np.ceil(randomness * slide)) | |
| temp = slide - orig | |
| ap = filter_keyword(context, ran=slide*2) | |
| outputs = [] | |
| print(slide) | |
| print(orig) | |
| print(ap) | |
| for i in range(orig): | |
| inputs = "context: " + context + " keyword: " + ap[i][0] | |
| source_tokenizer = tokenizer.encode_plus( | |
| inputs, max_length=512, pad_to_max_length=True, return_tensors="pt") | |
| outs = model.generate(input_ids=source_tokenizer['input_ids'].to( | |
| DEVICE), attention_mask=source_tokenizer['attention_mask'].to(DEVICE), max_length=50) | |
| dec = [tokenizer.decode(ids) for ids in outs][0] | |
| st = dec.replace("<pad> ", "") | |
| st = st.replace("</s>", "") | |
| if ap[i][1] > 0.0: | |
| outputs.append((st, "Good")) | |
| else: | |
| outputs.append((st, "Bad")) | |
| del ap[: orig] | |
| print("first",outputs) | |
| print(temp) | |
| if temp > 0: | |
| for i in range(temp): | |
| keyword = random.choice(ap) | |
| inputs = "context: " + context + " keyword: " + keyword[0] | |
| source_tokenizer = tokenizer.encode_plus( | |
| inputs, max_length=512, pad_to_max_length=True, return_tensors="pt") | |
| outs = model.generate(input_ids=source_tokenizer['input_ids'].to( | |
| DEVICE), attention_mask=source_tokenizer['attention_mask'].to(DEVICE), max_length=50) | |
| dec = [tokenizer.decode(ids) for ids in outs][0] | |
| st = dec.replace("<pad> ", "") | |
| st = st.replace("</s>", "") | |
| if keyword[1] > 0.0: | |
| outputs.append((st, "Good")) | |
| else: | |
| outputs.append((st, "Bad")) | |
| print("second",outputs) | |
| return outputs | |
| gr.Interface(func, | |
| [ | |
| gr.inputs.Textbox(lines=10, label="context"), | |
| gr.inputs.Slider(minimum=1, maximum=5, | |
| default=1, label="No of Question"), | |
| ], | |
| gr.outputs.KeyValues()).launch() | |