Update app.py
Browse files
app.py
CHANGED
|
@@ -40,16 +40,26 @@ headers1 = {"Authorization": "Bearer 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"}
|
|
| 40 |
API_URL2 = "https://api-inference.huggingface.co/models/gpt2"
|
| 41 |
headers2 = {"Authorization": "Bearer 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"}
|
| 42 |
|
| 43 |
-
def read_pdf(file):
|
| 44 |
-
# images=pdf2image.convert_from_path(file)
|
| 45 |
-
# # print(type(images))
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
|
| 54 |
def engsum(output):
|
| 55 |
def query(payload):
|
|
@@ -90,7 +100,7 @@ def main():
|
|
| 90 |
#file = uploaded_photo.read() # Read the data
|
| 91 |
#image_result = open(uploaded_photo.name, 'wb') # creates a writable image and later we can write the decoded result
|
| 92 |
#image_result.write(file)
|
| 93 |
-
tet =
|
| 94 |
#tet = pytesseract.image_to_string(img, lang="ben") if st.checkbox("Mark to see Bangla Image's Text") else pytesseract.image_to_string(img)
|
| 95 |
values = st.slider('Select a approximate number of lines to see and summarize',value=[0, len(tet)//(7*100)])
|
| 96 |
text = tet[values[0]*7*10:values[1]*7*100] if values[0]!=len(tet)//(7*100) else tet[len(tet)//(7*100):]
|
|
|
|
| 40 |
API_URL2 = "https://api-inference.huggingface.co/models/gpt2"
|
| 41 |
headers2 = {"Authorization": "Bearer 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"}
|
| 42 |
|
| 43 |
+
# def read_pdf(file):
|
| 44 |
+
# # images=pdf2image.convert_from_path(file)
|
| 45 |
+
# # # print(type(images))
|
| 46 |
+
# pdfReader = PdfFileReader(file)
|
| 47 |
+
# count = pdfReader.numPages
|
| 48 |
+
# all_page_text = " "
|
| 49 |
+
# for i in range(count):
|
| 50 |
+
# page = pdfReader.getPage(i)
|
| 51 |
+
# all_page_text += page.extractText()+" "
|
| 52 |
+
# return all_page_text
|
| 53 |
+
def read_pdf_with_pdfplumber(file):
|
| 54 |
+
# Open the uploaded PDF file with pdfplumber
|
| 55 |
+
with pdfplumber.open(file) as pdf:
|
| 56 |
+
extracted_text = ''
|
| 57 |
+
for page in pdf.pages:
|
| 58 |
+
extracted_text += page.extract_text()
|
| 59 |
+
|
| 60 |
+
# Display the extracted text
|
| 61 |
+
#st.text(extracted_text)
|
| 62 |
+
return extracted_text
|
| 63 |
|
| 64 |
def engsum(output):
|
| 65 |
def query(payload):
|
|
|
|
| 100 |
#file = uploaded_photo.read() # Read the data
|
| 101 |
#image_result = open(uploaded_photo.name, 'wb') # creates a writable image and later we can write the decoded result
|
| 102 |
#image_result.write(file)
|
| 103 |
+
tet = read_pdf_with_pdfplumber(uploaded_photo)
|
| 104 |
#tet = pytesseract.image_to_string(img, lang="ben") if st.checkbox("Mark to see Bangla Image's Text") else pytesseract.image_to_string(img)
|
| 105 |
values = st.slider('Select a approximate number of lines to see and summarize',value=[0, len(tet)//(7*100)])
|
| 106 |
text = tet[values[0]*7*10:values[1]*7*100] if values[0]!=len(tet)//(7*100) else tet[len(tet)//(7*100):]
|