Spaces:
Sleeping
Sleeping
Commit
Β·
8364708
1
Parent(s):
de791d6
modifications done
Browse files- app.py +46 -11
- requirements.txt +2 -4
app.py
CHANGED
|
@@ -19,12 +19,30 @@ from typing import Optional, Tuple, List, Dict, Any
|
|
| 19 |
import gradio as gr
|
| 20 |
import pandas as pd
|
| 21 |
|
| 22 |
-
#
|
| 23 |
-
|
| 24 |
-
from
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
|
| 29 |
|
| 30 |
# UI Theme and Styling Constants
|
|
@@ -208,10 +226,10 @@ def validate_vlm_config(use_vlm: bool, vlm_api_key: str, vlm_provider: str = "ge
|
|
| 208 |
"""
|
| 209 |
Validate VLM configuration parameters.
|
| 210 |
"""
|
| 211 |
-
if use_vlm and vlm_provider
|
| 212 |
return "β Error: VLM API key is required when using VLM (except for Ollama)"
|
| 213 |
|
| 214 |
-
if use_vlm and vlm_api_key and vlm_provider
|
| 215 |
# Basic API key validation
|
| 216 |
if len(vlm_api_key.strip()) < 10:
|
| 217 |
return "β Error: VLM API key appears to be too short or invalid"
|
|
@@ -336,6 +354,10 @@ def run_full_parse(
|
|
| 336 |
if not pdf_file:
|
| 337 |
return ("No file provided.", None, [], [], "")
|
| 338 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 339 |
# Validate VLM configuration
|
| 340 |
vlm_error = validate_vlm_config(use_vlm, vlm_api_key, vlm_provider)
|
| 341 |
if vlm_error:
|
|
@@ -424,6 +446,10 @@ def run_extract(
|
|
| 424 |
if not pdf_file:
|
| 425 |
return ("No file provided.", "", [], [], "")
|
| 426 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 427 |
# Validate VLM configuration
|
| 428 |
vlm_error = validate_vlm_config(use_vlm, vlm_api_key, vlm_provider)
|
| 429 |
if vlm_error:
|
|
@@ -549,6 +575,10 @@ def run_docres_restoration(
|
|
| 549 |
if not pdf_file:
|
| 550 |
return ("No file provided.", None, None, None, [])
|
| 551 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 552 |
try:
|
| 553 |
# Initialize DocRes engine
|
| 554 |
device_str = None if device == "auto" else device
|
|
@@ -625,6 +655,10 @@ def run_enhanced_parse(
|
|
| 625 |
if not pdf_file:
|
| 626 |
return ("No file provided.", None, [], "", None, None, "")
|
| 627 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 628 |
# Validate VLM configuration if VLM is enabled
|
| 629 |
if use_vlm:
|
| 630 |
vlm_error = validate_vlm_config(use_vlm, vlm_api_key, vlm_provider)
|
|
@@ -752,6 +786,7 @@ def create_tips_markdown() -> str:
|
|
| 752 |
<li>Use <strong>DocRes Image Restoration</strong> for standalone image enhancement without parsing.</li>
|
| 753 |
<li>DocRes tasks: <code>appearance</code> (default), <code>dewarping</code>, <code>deshadowing</code>, <code>deblurring</code>, <code>binarization</code>, <code>end2end</code>.</li>
|
| 754 |
<li>Outputs are saved under <code>outputs/<pdf_stem>/</code>.</li>
|
|
|
|
| 755 |
</ul>
|
| 756 |
</div>
|
| 757 |
"""
|
|
@@ -774,7 +809,7 @@ with gr.Blocks(title="Doctra - Document Parser", theme=THEME, css=CUSTOM_CSS) as
|
|
| 774 |
with gr.Row():
|
| 775 |
pdf = gr.File(file_types=[".pdf"], label="PDF")
|
| 776 |
use_vlm = gr.Checkbox(label="Use VLM (optional)", value=False)
|
| 777 |
-
vlm_provider = gr.Dropdown(["
|
| 778 |
vlm_api_key = gr.Textbox(type="password", label="VLM API Key", placeholder="Optional if VLM disabled")
|
| 779 |
|
| 780 |
with gr.Accordion("Advanced", open=False):
|
|
@@ -814,7 +849,7 @@ with gr.Blocks(title="Doctra - Document Parser", theme=THEME, css=CUSTOM_CSS) as
|
|
| 814 |
pdf_e = gr.File(file_types=[".pdf"], label="PDF")
|
| 815 |
target = gr.Dropdown(["tables", "charts", "both"], value="both", label="Target")
|
| 816 |
use_vlm_e = gr.Checkbox(label="Use VLM (optional)", value=False)
|
| 817 |
-
vlm_provider_e = gr.Dropdown(["
|
| 818 |
vlm_api_key_e = gr.Textbox(type="password", label="VLM API Key", placeholder="Optional if VLM disabled")
|
| 819 |
|
| 820 |
with gr.Accordion("Advanced", open=False):
|
|
@@ -909,7 +944,7 @@ with gr.Blocks(title="Doctra - Document Parser", theme=THEME, css=CUSTOM_CSS) as
|
|
| 909 |
|
| 910 |
with gr.Row():
|
| 911 |
use_vlm_enhanced = gr.Checkbox(label="Use VLM (optional)", value=False)
|
| 912 |
-
vlm_provider_enhanced = gr.Dropdown(["
|
| 913 |
vlm_api_key_enhanced = gr.Textbox(type="password", label="VLM API Key", placeholder="Optional if VLM disabled")
|
| 914 |
|
| 915 |
with gr.Accordion("Advanced Settings", open=False):
|
|
|
|
| 19 |
import gradio as gr
|
| 20 |
import pandas as pd
|
| 21 |
|
| 22 |
+
# Mock google.genai to avoid import errors
|
| 23 |
+
import sys
|
| 24 |
+
from unittest.mock import MagicMock
|
| 25 |
+
|
| 26 |
+
# Create a mock google.genai module
|
| 27 |
+
mock_google_genai = MagicMock()
|
| 28 |
+
sys.modules['google.genai'] = mock_google_genai
|
| 29 |
+
sys.modules['google.genai.types'] = MagicMock()
|
| 30 |
+
|
| 31 |
+
# Now import Doctra components
|
| 32 |
+
try:
|
| 33 |
+
from doctra.parsers.structured_pdf_parser import StructuredPDFParser
|
| 34 |
+
from doctra.parsers.table_chart_extractor import ChartTablePDFParser
|
| 35 |
+
from doctra.parsers.enhanced_pdf_parser import EnhancedPDFParser
|
| 36 |
+
from doctra.ui.docres_wrapper import DocResUIWrapper
|
| 37 |
+
from doctra.utils.pdf_io import render_pdf_to_images
|
| 38 |
+
except ImportError as e:
|
| 39 |
+
print(f"Warning: Some Doctra components may not be available: {e}")
|
| 40 |
+
# Create mock classes if imports fail
|
| 41 |
+
StructuredPDFParser = None
|
| 42 |
+
ChartTablePDFParser = None
|
| 43 |
+
EnhancedPDFParser = None
|
| 44 |
+
DocResUIWrapper = None
|
| 45 |
+
render_pdf_to_images = None
|
| 46 |
|
| 47 |
|
| 48 |
# UI Theme and Styling Constants
|
|
|
|
| 226 |
"""
|
| 227 |
Validate VLM configuration parameters.
|
| 228 |
"""
|
| 229 |
+
if use_vlm and vlm_provider not in ["ollama"] and not vlm_api_key:
|
| 230 |
return "β Error: VLM API key is required when using VLM (except for Ollama)"
|
| 231 |
|
| 232 |
+
if use_vlm and vlm_api_key and vlm_provider not in ["ollama"]:
|
| 233 |
# Basic API key validation
|
| 234 |
if len(vlm_api_key.strip()) < 10:
|
| 235 |
return "β Error: VLM API key appears to be too short or invalid"
|
|
|
|
| 354 |
if not pdf_file:
|
| 355 |
return ("No file provided.", None, [], [], "")
|
| 356 |
|
| 357 |
+
# Check if Doctra components are available
|
| 358 |
+
if StructuredPDFParser is None:
|
| 359 |
+
return ("β Error: Doctra library not properly installed. Please check the requirements.", None, [], [], "")
|
| 360 |
+
|
| 361 |
# Validate VLM configuration
|
| 362 |
vlm_error = validate_vlm_config(use_vlm, vlm_api_key, vlm_provider)
|
| 363 |
if vlm_error:
|
|
|
|
| 446 |
if not pdf_file:
|
| 447 |
return ("No file provided.", "", [], [], "")
|
| 448 |
|
| 449 |
+
# Check if Doctra components are available
|
| 450 |
+
if ChartTablePDFParser is None:
|
| 451 |
+
return ("β Error: Doctra library not properly installed. Please check the requirements.", "", [], [], "")
|
| 452 |
+
|
| 453 |
# Validate VLM configuration
|
| 454 |
vlm_error = validate_vlm_config(use_vlm, vlm_api_key, vlm_provider)
|
| 455 |
if vlm_error:
|
|
|
|
| 575 |
if not pdf_file:
|
| 576 |
return ("No file provided.", None, None, None, [])
|
| 577 |
|
| 578 |
+
# Check if Doctra components are available
|
| 579 |
+
if DocResUIWrapper is None:
|
| 580 |
+
return ("β Error: Doctra library not properly installed. Please check the requirements.", None, None, None, [])
|
| 581 |
+
|
| 582 |
try:
|
| 583 |
# Initialize DocRes engine
|
| 584 |
device_str = None if device == "auto" else device
|
|
|
|
| 655 |
if not pdf_file:
|
| 656 |
return ("No file provided.", None, [], "", None, None, "")
|
| 657 |
|
| 658 |
+
# Check if Doctra components are available
|
| 659 |
+
if EnhancedPDFParser is None:
|
| 660 |
+
return ("β Error: Doctra library not properly installed. Please check the requirements.", None, [], "", None, None, "")
|
| 661 |
+
|
| 662 |
# Validate VLM configuration if VLM is enabled
|
| 663 |
if use_vlm:
|
| 664 |
vlm_error = validate_vlm_config(use_vlm, vlm_api_key, vlm_provider)
|
|
|
|
| 786 |
<li>Use <strong>DocRes Image Restoration</strong> for standalone image enhancement without parsing.</li>
|
| 787 |
<li>DocRes tasks: <code>appearance</code> (default), <code>dewarping</code>, <code>deshadowing</code>, <code>deblurring</code>, <code>binarization</code>, <code>end2end</code>.</li>
|
| 788 |
<li>Outputs are saved under <code>outputs/<pdf_stem>/</code>.</li>
|
| 789 |
+
<li><strong>Note:</strong> Google Gemini VLM may not be available due to dependency conflicts. Use OpenAI, Anthropic, or other VLM providers.</li>
|
| 790 |
</ul>
|
| 791 |
</div>
|
| 792 |
"""
|
|
|
|
| 809 |
with gr.Row():
|
| 810 |
pdf = gr.File(file_types=[".pdf"], label="PDF")
|
| 811 |
use_vlm = gr.Checkbox(label="Use VLM (optional)", value=False)
|
| 812 |
+
vlm_provider = gr.Dropdown(["openai", "anthropic", "openrouter", "ollama"], value="openai", label="VLM Provider")
|
| 813 |
vlm_api_key = gr.Textbox(type="password", label="VLM API Key", placeholder="Optional if VLM disabled")
|
| 814 |
|
| 815 |
with gr.Accordion("Advanced", open=False):
|
|
|
|
| 849 |
pdf_e = gr.File(file_types=[".pdf"], label="PDF")
|
| 850 |
target = gr.Dropdown(["tables", "charts", "both"], value="both", label="Target")
|
| 851 |
use_vlm_e = gr.Checkbox(label="Use VLM (optional)", value=False)
|
| 852 |
+
vlm_provider_e = gr.Dropdown(["openai", "anthropic", "openrouter", "ollama"], value="openai", label="VLM Provider")
|
| 853 |
vlm_api_key_e = gr.Textbox(type="password", label="VLM API Key", placeholder="Optional if VLM disabled")
|
| 854 |
|
| 855 |
with gr.Accordion("Advanced", open=False):
|
|
|
|
| 944 |
|
| 945 |
with gr.Row():
|
| 946 |
use_vlm_enhanced = gr.Checkbox(label="Use VLM (optional)", value=False)
|
| 947 |
+
vlm_provider_enhanced = gr.Dropdown(["openai", "anthropic", "openrouter", "ollama"], value="openai", label="VLM Provider")
|
| 948 |
vlm_api_key_enhanced = gr.Textbox(type="password", label="VLM API Key", placeholder="Optional if VLM disabled")
|
| 949 |
|
| 950 |
with gr.Accordion("Advanced Settings", open=False):
|
requirements.txt
CHANGED
|
@@ -1,7 +1,5 @@
|
|
| 1 |
# Core dependencies
|
| 2 |
gradio>=4.0.0,<5
|
| 3 |
-
# Use older websockets version to avoid conflicts
|
| 4 |
-
websockets==11.0.3
|
| 5 |
pandas>=2.0.0
|
| 6 |
numpy>=1.21.0
|
| 7 |
pillow>=9.0.0
|
|
@@ -25,10 +23,10 @@ paddlex>=3.0.0
|
|
| 25 |
openai>=1.0.0
|
| 26 |
anthropic>=0.3.0
|
| 27 |
google-generativeai>=0.3.0
|
| 28 |
-
google-genai>=1.31.0
|
| 29 |
httpx>=0.24.0
|
| 30 |
|
| 31 |
-
# websockets
|
|
|
|
| 32 |
|
| 33 |
# Doctra library (install from source)
|
| 34 |
git+https://github.com/AdemBoukhris457/Doctra.git
|
|
|
|
| 1 |
# Core dependencies
|
| 2 |
gradio>=4.0.0,<5
|
|
|
|
|
|
|
| 3 |
pandas>=2.0.0
|
| 4 |
numpy>=1.21.0
|
| 5 |
pillow>=9.0.0
|
|
|
|
| 23 |
openai>=1.0.0
|
| 24 |
anthropic>=0.3.0
|
| 25 |
google-generativeai>=0.3.0
|
|
|
|
| 26 |
httpx>=0.24.0
|
| 27 |
|
| 28 |
+
# Note: google-genai removed due to websockets conflict with Gradio
|
| 29 |
+
# The app will work with other VLM providers (OpenAI, Anthropic, etc.)
|
| 30 |
|
| 31 |
# Doctra library (install from source)
|
| 32 |
git+https://github.com/AdemBoukhris457/Doctra.git
|