Spaces:

Soumen
/

Text-Summarization-and-NLP-tasks

Running

App Files Files Community

Soumen commited on Aug 29, 2023

Commit

9eb1dec

1 Parent(s): 234b51a

Update app.py

Browse files

Files changed (1) hide show

app.py +16 -4

app.py CHANGED Viewed

@@ -28,6 +28,7 @@ os.environ["TOKENIZERS_PARALLELISM"] = "false"
 #os.system('pip install -q pytesseract')
 #os.system('conda install -c conda-forge poppler')
 import streamlit as st
 import torch
 from transformers import AutoTokenizer, AutoModelWithLMHead, GPT2LMHeadModel
 import docx2txt
@@ -51,7 +52,8 @@ import line_cor
 import altair as alt
 #pytesseract.pytesseract.tesseract_cmd = r"./Tesseract-OCR/tesseract.exe"
 from PIL import Image
-@st.experimental_singleton
 def read_pdf(file):
 #     images=pdf2image.convert_from_path(file)
 #     # print(type(images))
@@ -85,20 +87,23 @@ def read_pdf(file):
 #     all_page_text += text + " " #page.extractText()
 #     return all_page_text
 st.title("NLP APPLICATION")
-@st.experimental_singleton
 def text_analyzer(my_text):
 	nlp = spacy.load('en_core_web_sm')
 	docx = nlp(my_text)
 	# tokens = [ token.text for token in docx]
 	allData = [('"Token":{},\n"Lemma":{}'.format(token.text,token.lemma_))for token in docx ]
 	return allData
-@st.experimental_singleton
 def load_models():
     tokenizer = AutoTokenizer.from_pretrained('gpt2-large')
     model = GPT2LMHeadModel.from_pretrained('gpt2-large')
     return tokenizer, model
 # Function For Extracting Entities
-@st.experimental_singleton
 def entity_analyzer(my_text):
 	nlp = spacy.load('en_core_web_sm')
 	docx = nlp(my_text)
@@ -164,15 +169,20 @@ def main():
         #img = cv2.imread("scholarly_text.jpg")
             text = message
         if st.checkbox("Show Named Entities English/Bangla"):
             entity_result = entity_analyzer(text)
             st.json(entity_result)
         if st.checkbox("Show Sentiment Analysis for English"):
             blob = TextBlob(text)
             result_sentiment = blob.sentiment
             st.success(result_sentiment)
         if st.checkbox("Spell Corrections for English"):
             st.success(TextBlob(text).correct())
         if st.checkbox("Text Generation"):
             tokenizer, model = load_models()
             input_ids = tokenizer(text, return_tensors='pt').input_ids
             st.text("Using Hugging Face Transformer, Contrastive Search ..")
@@ -187,6 +197,7 @@ def main():
         #     st.success(summary_result)
         if st.checkbox("Mark to English Text Summarization!"):
 			#st.title("Summarize Your Text for English only!")
             tokenizer = AutoTokenizer.from_pretrained('t5-base')
             model = AutoModelWithLMHead.from_pretrained('t5-base', return_dict=True)
 			#st.text("Using Google T5 Transformer ..")
@@ -198,6 +209,7 @@ def main():
             summary = tokenizer.decode(summary_ids[0])
             st.success(summary)
         if st.button("refresh"):
             st.experimental_rerun()
 if __name__ == '__main__':
     main()

 #os.system('pip install -q pytesseract')
 #os.system('conda install -c conda-forge poppler')
 import streamlit as st
+st.set_page_config(page_title="Anomaly_Detection_Tool", layout="wide", initial_sidebar_state="expanded")
 import torch
 from transformers import AutoTokenizer, AutoModelWithLMHead, GPT2LMHeadModel
 import docx2txt
 import altair as alt
 #pytesseract.pytesseract.tesseract_cmd = r"./Tesseract-OCR/tesseract.exe"
 from PIL import Image
+#@st.experimental_singleton
+@st.cache_resource(experimental_allow_widgets=True)
 def read_pdf(file):
 #     images=pdf2image.convert_from_path(file)
 #     # print(type(images))
 #     all_page_text += text + " " #page.extractText()
 #     return all_page_text
 st.title("NLP APPLICATION")
+#@st.experimental_singleton
+@st.cache_resource(experimental_allow_widgets=True)
 def text_analyzer(my_text):
 	nlp = spacy.load('en_core_web_sm')
 	docx = nlp(my_text)
 	# tokens = [ token.text for token in docx]
 	allData = [('"Token":{},\n"Lemma":{}'.format(token.text,token.lemma_))for token in docx ]
 	return allData
+#@st.experimental_singleton
+@st.cache_resource(experimental_allow_widgets=True)
 def load_models():
     tokenizer = AutoTokenizer.from_pretrained('gpt2-large')
     model = GPT2LMHeadModel.from_pretrained('gpt2-large')
     return tokenizer, model
 # Function For Extracting Entities
+#@st.experimental_singleton
+@st.cache_resource(experimental_allow_widgets=True)
 def entity_analyzer(my_text):
 	nlp = spacy.load('en_core_web_sm')
 	docx = nlp(my_text)
         #img = cv2.imread("scholarly_text.jpg")
             text = message
         if st.checkbox("Show Named Entities English/Bangla"):
+            st.cache_data.clear()
             entity_result = entity_analyzer(text)
             st.json(entity_result)
         if st.checkbox("Show Sentiment Analysis for English"):
+            st.cache_data.clear()
             blob = TextBlob(text)
             result_sentiment = blob.sentiment
             st.success(result_sentiment)
         if st.checkbox("Spell Corrections for English"):
+            st.cache_data.clear()
             st.success(TextBlob(text).correct())
         if st.checkbox("Text Generation"):
+            st.cache_data.clear()
             tokenizer, model = load_models()
             input_ids = tokenizer(text, return_tensors='pt').input_ids
             st.text("Using Hugging Face Transformer, Contrastive Search ..")
         #     st.success(summary_result)
         if st.checkbox("Mark to English Text Summarization!"):
 			#st.title("Summarize Your Text for English only!")
+            st.cache_data.clear()
             tokenizer = AutoTokenizer.from_pretrained('t5-base')
             model = AutoModelWithLMHead.from_pretrained('t5-base', return_dict=True)
 			#st.text("Using Google T5 Transformer ..")
             summary = tokenizer.decode(summary_ids[0])
             st.success(summary)
         if st.button("refresh"):
+            st.cache_data.clear()
             st.experimental_rerun()
 if __name__ == '__main__':
     main()