Spaces:

DaVinciCode
/

doctra-document-parser

Sleeping

App Files Files Community

DaVinciCode commited on Oct 12, 2025

Commit

8364708

1 Parent(s): de791d6

modifications done

Browse files

Files changed (2) hide show

app.py +46 -11
requirements.txt +2 -4

app.py CHANGED Viewed

@@ -19,12 +19,30 @@ from typing import Optional, Tuple, List, Dict, Any
 import gradio as gr
 import pandas as pd
-# Import Doctra components
-from doctra.parsers.structured_pdf_parser import StructuredPDFParser
-from doctra.parsers.table_chart_extractor import ChartTablePDFParser
-from doctra.parsers.enhanced_pdf_parser import EnhancedPDFParser
-from doctra.ui.docres_wrapper import DocResUIWrapper
-from doctra.utils.pdf_io import render_pdf_to_images
 # UI Theme and Styling Constants
@@ -208,10 +226,10 @@ def validate_vlm_config(use_vlm: bool, vlm_api_key: str, vlm_provider: str = "ge
     """
     Validate VLM configuration parameters.
     """
-    if use_vlm and vlm_provider != "ollama" and not vlm_api_key:
         return "❌ Error: VLM API key is required when using VLM (except for Ollama)"
-    if use_vlm and vlm_api_key and vlm_provider != "ollama":
         # Basic API key validation
         if len(vlm_api_key.strip()) < 10:
             return "❌ Error: VLM API key appears to be too short or invalid"
@@ -336,6 +354,10 @@ def run_full_parse(
     if not pdf_file:
         return ("No file provided.", None, [], [], "")
     # Validate VLM configuration
     vlm_error = validate_vlm_config(use_vlm, vlm_api_key, vlm_provider)
     if vlm_error:
@@ -424,6 +446,10 @@ def run_extract(
     if not pdf_file:
         return ("No file provided.", "", [], [], "")
     # Validate VLM configuration
     vlm_error = validate_vlm_config(use_vlm, vlm_api_key, vlm_provider)
     if vlm_error:
@@ -549,6 +575,10 @@ def run_docres_restoration(
     if not pdf_file:
         return ("No file provided.", None, None, None, [])
     try:
         # Initialize DocRes engine
         device_str = None if device == "auto" else device
@@ -625,6 +655,10 @@ def run_enhanced_parse(
     if not pdf_file:
         return ("No file provided.", None, [], "", None, None, "")
     # Validate VLM configuration if VLM is enabled
     if use_vlm:
         vlm_error = validate_vlm_config(use_vlm, vlm_api_key, vlm_provider)
@@ -752,6 +786,7 @@ def create_tips_markdown() -> str:
     <li>Use <strong>DocRes Image Restoration</strong> for standalone image enhancement without parsing.</li>
     <li>DocRes tasks: <code>appearance</code> (default), <code>dewarping</code>, <code>deshadowing</code>, <code>deblurring</code>, <code>binarization</code>, <code>end2end</code>.</li>
     <li>Outputs are saved under <code>outputs/&lt;pdf_stem&gt;/</code>.</li>
   </ul>
 </div>
     """
@@ -774,7 +809,7 @@ with gr.Blocks(title="Doctra - Document Parser", theme=THEME, css=CUSTOM_CSS) as
         with gr.Row():
             pdf = gr.File(file_types=[".pdf"], label="PDF")
             use_vlm = gr.Checkbox(label="Use VLM (optional)", value=False)
-            vlm_provider = gr.Dropdown(["gemini", "openai", "anthropic", "openrouter", "ollama"], value="gemini", label="VLM Provider")
             vlm_api_key = gr.Textbox(type="password", label="VLM API Key", placeholder="Optional if VLM disabled")
         with gr.Accordion("Advanced", open=False):
@@ -814,7 +849,7 @@ with gr.Blocks(title="Doctra - Document Parser", theme=THEME, css=CUSTOM_CSS) as
             pdf_e = gr.File(file_types=[".pdf"], label="PDF")
             target = gr.Dropdown(["tables", "charts", "both"], value="both", label="Target")
             use_vlm_e = gr.Checkbox(label="Use VLM (optional)", value=False)
-            vlm_provider_e = gr.Dropdown(["gemini", "openai", "anthropic", "openrouter", "ollama"], value="gemini", label="VLM Provider")
             vlm_api_key_e = gr.Textbox(type="password", label="VLM API Key", placeholder="Optional if VLM disabled")
         with gr.Accordion("Advanced", open=False):
@@ -909,7 +944,7 @@ with gr.Blocks(title="Doctra - Document Parser", theme=THEME, css=CUSTOM_CSS) as
         with gr.Row():
             use_vlm_enhanced = gr.Checkbox(label="Use VLM (optional)", value=False)
-            vlm_provider_enhanced = gr.Dropdown(["gemini", "openai", "anthropic", "openrouter", "ollama"], value="gemini", label="VLM Provider")
             vlm_api_key_enhanced = gr.Textbox(type="password", label="VLM API Key", placeholder="Optional if VLM disabled")
         with gr.Accordion("Advanced Settings", open=False):

 import gradio as gr
 import pandas as pd
+# Mock google.genai to avoid import errors
+import sys
+from unittest.mock import MagicMock
+# Create a mock google.genai module
+mock_google_genai = MagicMock()
+sys.modules['google.genai'] = mock_google_genai
+sys.modules['google.genai.types'] = MagicMock()
+# Now import Doctra components
+try:
+    from doctra.parsers.structured_pdf_parser import StructuredPDFParser
+    from doctra.parsers.table_chart_extractor import ChartTablePDFParser
+    from doctra.parsers.enhanced_pdf_parser import EnhancedPDFParser
+    from doctra.ui.docres_wrapper import DocResUIWrapper
+    from doctra.utils.pdf_io import render_pdf_to_images
+except ImportError as e:
+    print(f"Warning: Some Doctra components may not be available: {e}")
+    # Create mock classes if imports fail
+    StructuredPDFParser = None
+    ChartTablePDFParser = None
+    EnhancedPDFParser = None
+    DocResUIWrapper = None
+    render_pdf_to_images = None
 # UI Theme and Styling Constants
     """
     Validate VLM configuration parameters.
     """
+    if use_vlm and vlm_provider not in ["ollama"] and not vlm_api_key:
         return "❌ Error: VLM API key is required when using VLM (except for Ollama)"
+    if use_vlm and vlm_api_key and vlm_provider not in ["ollama"]:
         # Basic API key validation
         if len(vlm_api_key.strip()) < 10:
             return "❌ Error: VLM API key appears to be too short or invalid"
     if not pdf_file:
         return ("No file provided.", None, [], [], "")
+    # Check if Doctra components are available
+    if StructuredPDFParser is None:
+        return ("❌ Error: Doctra library not properly installed. Please check the requirements.", None, [], [], "")
     # Validate VLM configuration
     vlm_error = validate_vlm_config(use_vlm, vlm_api_key, vlm_provider)
     if vlm_error:
     if not pdf_file:
         return ("No file provided.", "", [], [], "")
+    # Check if Doctra components are available
+    if ChartTablePDFParser is None:
+        return ("❌ Error: Doctra library not properly installed. Please check the requirements.", "", [], [], "")
     # Validate VLM configuration
     vlm_error = validate_vlm_config(use_vlm, vlm_api_key, vlm_provider)
     if vlm_error:
     if not pdf_file:
         return ("No file provided.", None, None, None, [])
+    # Check if Doctra components are available
+    if DocResUIWrapper is None:
+        return ("❌ Error: Doctra library not properly installed. Please check the requirements.", None, None, None, [])
     try:
         # Initialize DocRes engine
         device_str = None if device == "auto" else device
     if not pdf_file:
         return ("No file provided.", None, [], "", None, None, "")
+    # Check if Doctra components are available
+    if EnhancedPDFParser is None:
+        return ("❌ Error: Doctra library not properly installed. Please check the requirements.", None, [], "", None, None, "")
     # Validate VLM configuration if VLM is enabled
     if use_vlm:
         vlm_error = validate_vlm_config(use_vlm, vlm_api_key, vlm_provider)
     <li>Use <strong>DocRes Image Restoration</strong> for standalone image enhancement without parsing.</li>
     <li>DocRes tasks: <code>appearance</code> (default), <code>dewarping</code>, <code>deshadowing</code>, <code>deblurring</code>, <code>binarization</code>, <code>end2end</code>.</li>
     <li>Outputs are saved under <code>outputs/&lt;pdf_stem&gt;/</code>.</li>
+    <li><strong>Note:</strong> Google Gemini VLM may not be available due to dependency conflicts. Use OpenAI, Anthropic, or other VLM providers.</li>
   </ul>
 </div>
     """
         with gr.Row():
             pdf = gr.File(file_types=[".pdf"], label="PDF")
             use_vlm = gr.Checkbox(label="Use VLM (optional)", value=False)
+            vlm_provider = gr.Dropdown(["openai", "anthropic", "openrouter", "ollama"], value="openai", label="VLM Provider")
             vlm_api_key = gr.Textbox(type="password", label="VLM API Key", placeholder="Optional if VLM disabled")
         with gr.Accordion("Advanced", open=False):
             pdf_e = gr.File(file_types=[".pdf"], label="PDF")
             target = gr.Dropdown(["tables", "charts", "both"], value="both", label="Target")
             use_vlm_e = gr.Checkbox(label="Use VLM (optional)", value=False)
+            vlm_provider_e = gr.Dropdown(["openai", "anthropic", "openrouter", "ollama"], value="openai", label="VLM Provider")
             vlm_api_key_e = gr.Textbox(type="password", label="VLM API Key", placeholder="Optional if VLM disabled")
         with gr.Accordion("Advanced", open=False):
         with gr.Row():
             use_vlm_enhanced = gr.Checkbox(label="Use VLM (optional)", value=False)
+            vlm_provider_enhanced = gr.Dropdown(["openai", "anthropic", "openrouter", "ollama"], value="openai", label="VLM Provider")
             vlm_api_key_enhanced = gr.Textbox(type="password", label="VLM API Key", placeholder="Optional if VLM disabled")
         with gr.Accordion("Advanced Settings", open=False):

requirements.txt CHANGED Viewed

@@ -1,7 +1,5 @@
 # Core dependencies
 gradio>=4.0.0,<5
-# Use older websockets version to avoid conflicts
-websockets==11.0.3
 pandas>=2.0.0
 numpy>=1.21.0
 pillow>=9.0.0
@@ -25,10 +23,10 @@ paddlex>=3.0.0
 openai>=1.0.0
 anthropic>=0.3.0
 google-generativeai>=0.3.0
-google-genai>=1.31.0
 httpx>=0.24.0
-# websockets version already specified above
 # Doctra library (install from source)
 git+https://github.com/AdemBoukhris457/Doctra.git

 # Core dependencies
 gradio>=4.0.0,<5
 pandas>=2.0.0
 numpy>=1.21.0
 pillow>=9.0.0
 openai>=1.0.0
 anthropic>=0.3.0
 google-generativeai>=0.3.0
 httpx>=0.24.0
+# Note: google-genai removed due to websockets conflict with Gradio
+# The app will work with other VLM providers (OpenAI, Anthropic, etc.)
 # Doctra library (install from source)
 git+https://github.com/AdemBoukhris457/Doctra.git