Update app.py
Browse files
app.py
CHANGED
|
@@ -28,6 +28,7 @@ os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
|
| 28 |
#os.system('pip install -q pytesseract')
|
| 29 |
#os.system('conda install -c conda-forge poppler')
|
| 30 |
import streamlit as st
|
|
|
|
| 31 |
import torch
|
| 32 |
from transformers import AutoTokenizer, AutoModelWithLMHead, GPT2LMHeadModel
|
| 33 |
import docx2txt
|
|
@@ -51,7 +52,8 @@ import line_cor
|
|
| 51 |
import altair as alt
|
| 52 |
#pytesseract.pytesseract.tesseract_cmd = r"./Tesseract-OCR/tesseract.exe"
|
| 53 |
from PIL import Image
|
| 54 |
-
|
|
|
|
| 55 |
def read_pdf(file):
|
| 56 |
# images=pdf2image.convert_from_path(file)
|
| 57 |
# # print(type(images))
|
|
@@ -85,20 +87,23 @@ def read_pdf(file):
|
|
| 85 |
# all_page_text += text + " " #page.extractText()
|
| 86 |
# return all_page_text
|
| 87 |
st.title("NLP APPLICATION")
|
| 88 |
-
|
|
|
|
| 89 |
def text_analyzer(my_text):
|
| 90 |
nlp = spacy.load('en_core_web_sm')
|
| 91 |
docx = nlp(my_text)
|
| 92 |
# tokens = [ token.text for token in docx]
|
| 93 |
allData = [('"Token":{},\n"Lemma":{}'.format(token.text,token.lemma_))for token in docx ]
|
| 94 |
return allData
|
| 95 |
-
|
|
|
|
| 96 |
def load_models():
|
| 97 |
tokenizer = AutoTokenizer.from_pretrained('gpt2-large')
|
| 98 |
model = GPT2LMHeadModel.from_pretrained('gpt2-large')
|
| 99 |
return tokenizer, model
|
| 100 |
# Function For Extracting Entities
|
| 101 |
-
|
|
|
|
| 102 |
def entity_analyzer(my_text):
|
| 103 |
nlp = spacy.load('en_core_web_sm')
|
| 104 |
docx = nlp(my_text)
|
|
@@ -164,15 +169,20 @@ def main():
|
|
| 164 |
#img = cv2.imread("scholarly_text.jpg")
|
| 165 |
text = message
|
| 166 |
if st.checkbox("Show Named Entities English/Bangla"):
|
|
|
|
| 167 |
entity_result = entity_analyzer(text)
|
| 168 |
st.json(entity_result)
|
|
|
|
| 169 |
if st.checkbox("Show Sentiment Analysis for English"):
|
|
|
|
| 170 |
blob = TextBlob(text)
|
| 171 |
result_sentiment = blob.sentiment
|
| 172 |
st.success(result_sentiment)
|
| 173 |
if st.checkbox("Spell Corrections for English"):
|
|
|
|
| 174 |
st.success(TextBlob(text).correct())
|
| 175 |
if st.checkbox("Text Generation"):
|
|
|
|
| 176 |
tokenizer, model = load_models()
|
| 177 |
input_ids = tokenizer(text, return_tensors='pt').input_ids
|
| 178 |
st.text("Using Hugging Face Transformer, Contrastive Search ..")
|
|
@@ -187,6 +197,7 @@ def main():
|
|
| 187 |
# st.success(summary_result)
|
| 188 |
if st.checkbox("Mark to English Text Summarization!"):
|
| 189 |
#st.title("Summarize Your Text for English only!")
|
|
|
|
| 190 |
tokenizer = AutoTokenizer.from_pretrained('t5-base')
|
| 191 |
model = AutoModelWithLMHead.from_pretrained('t5-base', return_dict=True)
|
| 192 |
#st.text("Using Google T5 Transformer ..")
|
|
@@ -198,6 +209,7 @@ def main():
|
|
| 198 |
summary = tokenizer.decode(summary_ids[0])
|
| 199 |
st.success(summary)
|
| 200 |
if st.button("refresh"):
|
|
|
|
| 201 |
st.experimental_rerun()
|
| 202 |
if __name__ == '__main__':
|
| 203 |
main()
|
|
|
|
| 28 |
#os.system('pip install -q pytesseract')
|
| 29 |
#os.system('conda install -c conda-forge poppler')
|
| 30 |
import streamlit as st
|
| 31 |
+
st.set_page_config(page_title="Anomaly_Detection_Tool", layout="wide", initial_sidebar_state="expanded")
|
| 32 |
import torch
|
| 33 |
from transformers import AutoTokenizer, AutoModelWithLMHead, GPT2LMHeadModel
|
| 34 |
import docx2txt
|
|
|
|
| 52 |
import altair as alt
|
| 53 |
#pytesseract.pytesseract.tesseract_cmd = r"./Tesseract-OCR/tesseract.exe"
|
| 54 |
from PIL import Image
|
| 55 |
+
#@st.experimental_singleton
|
| 56 |
+
@st.cache_resource(experimental_allow_widgets=True)
|
| 57 |
def read_pdf(file):
|
| 58 |
# images=pdf2image.convert_from_path(file)
|
| 59 |
# # print(type(images))
|
|
|
|
| 87 |
# all_page_text += text + " " #page.extractText()
|
| 88 |
# return all_page_text
|
| 89 |
st.title("NLP APPLICATION")
|
| 90 |
+
#@st.experimental_singleton
|
| 91 |
+
@st.cache_resource(experimental_allow_widgets=True)
|
| 92 |
def text_analyzer(my_text):
|
| 93 |
nlp = spacy.load('en_core_web_sm')
|
| 94 |
docx = nlp(my_text)
|
| 95 |
# tokens = [ token.text for token in docx]
|
| 96 |
allData = [('"Token":{},\n"Lemma":{}'.format(token.text,token.lemma_))for token in docx ]
|
| 97 |
return allData
|
| 98 |
+
#@st.experimental_singleton
|
| 99 |
+
@st.cache_resource(experimental_allow_widgets=True)
|
| 100 |
def load_models():
|
| 101 |
tokenizer = AutoTokenizer.from_pretrained('gpt2-large')
|
| 102 |
model = GPT2LMHeadModel.from_pretrained('gpt2-large')
|
| 103 |
return tokenizer, model
|
| 104 |
# Function For Extracting Entities
|
| 105 |
+
#@st.experimental_singleton
|
| 106 |
+
@st.cache_resource(experimental_allow_widgets=True)
|
| 107 |
def entity_analyzer(my_text):
|
| 108 |
nlp = spacy.load('en_core_web_sm')
|
| 109 |
docx = nlp(my_text)
|
|
|
|
| 169 |
#img = cv2.imread("scholarly_text.jpg")
|
| 170 |
text = message
|
| 171 |
if st.checkbox("Show Named Entities English/Bangla"):
|
| 172 |
+
st.cache_data.clear()
|
| 173 |
entity_result = entity_analyzer(text)
|
| 174 |
st.json(entity_result)
|
| 175 |
+
|
| 176 |
if st.checkbox("Show Sentiment Analysis for English"):
|
| 177 |
+
st.cache_data.clear()
|
| 178 |
blob = TextBlob(text)
|
| 179 |
result_sentiment = blob.sentiment
|
| 180 |
st.success(result_sentiment)
|
| 181 |
if st.checkbox("Spell Corrections for English"):
|
| 182 |
+
st.cache_data.clear()
|
| 183 |
st.success(TextBlob(text).correct())
|
| 184 |
if st.checkbox("Text Generation"):
|
| 185 |
+
st.cache_data.clear()
|
| 186 |
tokenizer, model = load_models()
|
| 187 |
input_ids = tokenizer(text, return_tensors='pt').input_ids
|
| 188 |
st.text("Using Hugging Face Transformer, Contrastive Search ..")
|
|
|
|
| 197 |
# st.success(summary_result)
|
| 198 |
if st.checkbox("Mark to English Text Summarization!"):
|
| 199 |
#st.title("Summarize Your Text for English only!")
|
| 200 |
+
st.cache_data.clear()
|
| 201 |
tokenizer = AutoTokenizer.from_pretrained('t5-base')
|
| 202 |
model = AutoModelWithLMHead.from_pretrained('t5-base', return_dict=True)
|
| 203 |
#st.text("Using Google T5 Transformer ..")
|
|
|
|
| 209 |
summary = tokenizer.decode(summary_ids[0])
|
| 210 |
st.success(summary)
|
| 211 |
if st.button("refresh"):
|
| 212 |
+
st.cache_data.clear()
|
| 213 |
st.experimental_rerun()
|
| 214 |
if __name__ == '__main__':
|
| 215 |
main()
|