DaVinciCode commited on
Commit
8364708
Β·
1 Parent(s): de791d6

modifications done

Browse files
Files changed (2) hide show
  1. app.py +46 -11
  2. requirements.txt +2 -4
app.py CHANGED
@@ -19,12 +19,30 @@ from typing import Optional, Tuple, List, Dict, Any
19
  import gradio as gr
20
  import pandas as pd
21
 
22
- # Import Doctra components
23
- from doctra.parsers.structured_pdf_parser import StructuredPDFParser
24
- from doctra.parsers.table_chart_extractor import ChartTablePDFParser
25
- from doctra.parsers.enhanced_pdf_parser import EnhancedPDFParser
26
- from doctra.ui.docres_wrapper import DocResUIWrapper
27
- from doctra.utils.pdf_io import render_pdf_to_images
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
 
30
  # UI Theme and Styling Constants
@@ -208,10 +226,10 @@ def validate_vlm_config(use_vlm: bool, vlm_api_key: str, vlm_provider: str = "ge
208
  """
209
  Validate VLM configuration parameters.
210
  """
211
- if use_vlm and vlm_provider != "ollama" and not vlm_api_key:
212
  return "❌ Error: VLM API key is required when using VLM (except for Ollama)"
213
 
214
- if use_vlm and vlm_api_key and vlm_provider != "ollama":
215
  # Basic API key validation
216
  if len(vlm_api_key.strip()) < 10:
217
  return "❌ Error: VLM API key appears to be too short or invalid"
@@ -336,6 +354,10 @@ def run_full_parse(
336
  if not pdf_file:
337
  return ("No file provided.", None, [], [], "")
338
 
 
 
 
 
339
  # Validate VLM configuration
340
  vlm_error = validate_vlm_config(use_vlm, vlm_api_key, vlm_provider)
341
  if vlm_error:
@@ -424,6 +446,10 @@ def run_extract(
424
  if not pdf_file:
425
  return ("No file provided.", "", [], [], "")
426
 
 
 
 
 
427
  # Validate VLM configuration
428
  vlm_error = validate_vlm_config(use_vlm, vlm_api_key, vlm_provider)
429
  if vlm_error:
@@ -549,6 +575,10 @@ def run_docres_restoration(
549
  if not pdf_file:
550
  return ("No file provided.", None, None, None, [])
551
 
 
 
 
 
552
  try:
553
  # Initialize DocRes engine
554
  device_str = None if device == "auto" else device
@@ -625,6 +655,10 @@ def run_enhanced_parse(
625
  if not pdf_file:
626
  return ("No file provided.", None, [], "", None, None, "")
627
 
 
 
 
 
628
  # Validate VLM configuration if VLM is enabled
629
  if use_vlm:
630
  vlm_error = validate_vlm_config(use_vlm, vlm_api_key, vlm_provider)
@@ -752,6 +786,7 @@ def create_tips_markdown() -> str:
752
  <li>Use <strong>DocRes Image Restoration</strong> for standalone image enhancement without parsing.</li>
753
  <li>DocRes tasks: <code>appearance</code> (default), <code>dewarping</code>, <code>deshadowing</code>, <code>deblurring</code>, <code>binarization</code>, <code>end2end</code>.</li>
754
  <li>Outputs are saved under <code>outputs/&lt;pdf_stem&gt;/</code>.</li>
 
755
  </ul>
756
  </div>
757
  """
@@ -774,7 +809,7 @@ with gr.Blocks(title="Doctra - Document Parser", theme=THEME, css=CUSTOM_CSS) as
774
  with gr.Row():
775
  pdf = gr.File(file_types=[".pdf"], label="PDF")
776
  use_vlm = gr.Checkbox(label="Use VLM (optional)", value=False)
777
- vlm_provider = gr.Dropdown(["gemini", "openai", "anthropic", "openrouter", "ollama"], value="gemini", label="VLM Provider")
778
  vlm_api_key = gr.Textbox(type="password", label="VLM API Key", placeholder="Optional if VLM disabled")
779
 
780
  with gr.Accordion("Advanced", open=False):
@@ -814,7 +849,7 @@ with gr.Blocks(title="Doctra - Document Parser", theme=THEME, css=CUSTOM_CSS) as
814
  pdf_e = gr.File(file_types=[".pdf"], label="PDF")
815
  target = gr.Dropdown(["tables", "charts", "both"], value="both", label="Target")
816
  use_vlm_e = gr.Checkbox(label="Use VLM (optional)", value=False)
817
- vlm_provider_e = gr.Dropdown(["gemini", "openai", "anthropic", "openrouter", "ollama"], value="gemini", label="VLM Provider")
818
  vlm_api_key_e = gr.Textbox(type="password", label="VLM API Key", placeholder="Optional if VLM disabled")
819
 
820
  with gr.Accordion("Advanced", open=False):
@@ -909,7 +944,7 @@ with gr.Blocks(title="Doctra - Document Parser", theme=THEME, css=CUSTOM_CSS) as
909
 
910
  with gr.Row():
911
  use_vlm_enhanced = gr.Checkbox(label="Use VLM (optional)", value=False)
912
- vlm_provider_enhanced = gr.Dropdown(["gemini", "openai", "anthropic", "openrouter", "ollama"], value="gemini", label="VLM Provider")
913
  vlm_api_key_enhanced = gr.Textbox(type="password", label="VLM API Key", placeholder="Optional if VLM disabled")
914
 
915
  with gr.Accordion("Advanced Settings", open=False):
 
19
  import gradio as gr
20
  import pandas as pd
21
 
22
+ # Mock google.genai to avoid import errors
23
+ import sys
24
+ from unittest.mock import MagicMock
25
+
26
+ # Create a mock google.genai module
27
+ mock_google_genai = MagicMock()
28
+ sys.modules['google.genai'] = mock_google_genai
29
+ sys.modules['google.genai.types'] = MagicMock()
30
+
31
+ # Now import Doctra components
32
+ try:
33
+ from doctra.parsers.structured_pdf_parser import StructuredPDFParser
34
+ from doctra.parsers.table_chart_extractor import ChartTablePDFParser
35
+ from doctra.parsers.enhanced_pdf_parser import EnhancedPDFParser
36
+ from doctra.ui.docres_wrapper import DocResUIWrapper
37
+ from doctra.utils.pdf_io import render_pdf_to_images
38
+ except ImportError as e:
39
+ print(f"Warning: Some Doctra components may not be available: {e}")
40
+ # Create mock classes if imports fail
41
+ StructuredPDFParser = None
42
+ ChartTablePDFParser = None
43
+ EnhancedPDFParser = None
44
+ DocResUIWrapper = None
45
+ render_pdf_to_images = None
46
 
47
 
48
  # UI Theme and Styling Constants
 
226
  """
227
  Validate VLM configuration parameters.
228
  """
229
+ if use_vlm and vlm_provider not in ["ollama"] and not vlm_api_key:
230
  return "❌ Error: VLM API key is required when using VLM (except for Ollama)"
231
 
232
+ if use_vlm and vlm_api_key and vlm_provider not in ["ollama"]:
233
  # Basic API key validation
234
  if len(vlm_api_key.strip()) < 10:
235
  return "❌ Error: VLM API key appears to be too short or invalid"
 
354
  if not pdf_file:
355
  return ("No file provided.", None, [], [], "")
356
 
357
+ # Check if Doctra components are available
358
+ if StructuredPDFParser is None:
359
+ return ("❌ Error: Doctra library not properly installed. Please check the requirements.", None, [], [], "")
360
+
361
  # Validate VLM configuration
362
  vlm_error = validate_vlm_config(use_vlm, vlm_api_key, vlm_provider)
363
  if vlm_error:
 
446
  if not pdf_file:
447
  return ("No file provided.", "", [], [], "")
448
 
449
+ # Check if Doctra components are available
450
+ if ChartTablePDFParser is None:
451
+ return ("❌ Error: Doctra library not properly installed. Please check the requirements.", "", [], [], "")
452
+
453
  # Validate VLM configuration
454
  vlm_error = validate_vlm_config(use_vlm, vlm_api_key, vlm_provider)
455
  if vlm_error:
 
575
  if not pdf_file:
576
  return ("No file provided.", None, None, None, [])
577
 
578
+ # Check if Doctra components are available
579
+ if DocResUIWrapper is None:
580
+ return ("❌ Error: Doctra library not properly installed. Please check the requirements.", None, None, None, [])
581
+
582
  try:
583
  # Initialize DocRes engine
584
  device_str = None if device == "auto" else device
 
655
  if not pdf_file:
656
  return ("No file provided.", None, [], "", None, None, "")
657
 
658
+ # Check if Doctra components are available
659
+ if EnhancedPDFParser is None:
660
+ return ("❌ Error: Doctra library not properly installed. Please check the requirements.", None, [], "", None, None, "")
661
+
662
  # Validate VLM configuration if VLM is enabled
663
  if use_vlm:
664
  vlm_error = validate_vlm_config(use_vlm, vlm_api_key, vlm_provider)
 
786
  <li>Use <strong>DocRes Image Restoration</strong> for standalone image enhancement without parsing.</li>
787
  <li>DocRes tasks: <code>appearance</code> (default), <code>dewarping</code>, <code>deshadowing</code>, <code>deblurring</code>, <code>binarization</code>, <code>end2end</code>.</li>
788
  <li>Outputs are saved under <code>outputs/&lt;pdf_stem&gt;/</code>.</li>
789
+ <li><strong>Note:</strong> Google Gemini VLM may not be available due to dependency conflicts. Use OpenAI, Anthropic, or other VLM providers.</li>
790
  </ul>
791
  </div>
792
  """
 
809
  with gr.Row():
810
  pdf = gr.File(file_types=[".pdf"], label="PDF")
811
  use_vlm = gr.Checkbox(label="Use VLM (optional)", value=False)
812
+ vlm_provider = gr.Dropdown(["openai", "anthropic", "openrouter", "ollama"], value="openai", label="VLM Provider")
813
  vlm_api_key = gr.Textbox(type="password", label="VLM API Key", placeholder="Optional if VLM disabled")
814
 
815
  with gr.Accordion("Advanced", open=False):
 
849
  pdf_e = gr.File(file_types=[".pdf"], label="PDF")
850
  target = gr.Dropdown(["tables", "charts", "both"], value="both", label="Target")
851
  use_vlm_e = gr.Checkbox(label="Use VLM (optional)", value=False)
852
+ vlm_provider_e = gr.Dropdown(["openai", "anthropic", "openrouter", "ollama"], value="openai", label="VLM Provider")
853
  vlm_api_key_e = gr.Textbox(type="password", label="VLM API Key", placeholder="Optional if VLM disabled")
854
 
855
  with gr.Accordion("Advanced", open=False):
 
944
 
945
  with gr.Row():
946
  use_vlm_enhanced = gr.Checkbox(label="Use VLM (optional)", value=False)
947
+ vlm_provider_enhanced = gr.Dropdown(["openai", "anthropic", "openrouter", "ollama"], value="openai", label="VLM Provider")
948
  vlm_api_key_enhanced = gr.Textbox(type="password", label="VLM API Key", placeholder="Optional if VLM disabled")
949
 
950
  with gr.Accordion("Advanced Settings", open=False):
requirements.txt CHANGED
@@ -1,7 +1,5 @@
1
  # Core dependencies
2
  gradio>=4.0.0,<5
3
- # Use older websockets version to avoid conflicts
4
- websockets==11.0.3
5
  pandas>=2.0.0
6
  numpy>=1.21.0
7
  pillow>=9.0.0
@@ -25,10 +23,10 @@ paddlex>=3.0.0
25
  openai>=1.0.0
26
  anthropic>=0.3.0
27
  google-generativeai>=0.3.0
28
- google-genai>=1.31.0
29
  httpx>=0.24.0
30
 
31
- # websockets version already specified above
 
32
 
33
  # Doctra library (install from source)
34
  git+https://github.com/AdemBoukhris457/Doctra.git
 
1
  # Core dependencies
2
  gradio>=4.0.0,<5
 
 
3
  pandas>=2.0.0
4
  numpy>=1.21.0
5
  pillow>=9.0.0
 
23
  openai>=1.0.0
24
  anthropic>=0.3.0
25
  google-generativeai>=0.3.0
 
26
  httpx>=0.24.0
27
 
28
+ # Note: google-genai removed due to websockets conflict with Gradio
29
+ # The app will work with other VLM providers (OpenAI, Anthropic, etc.)
30
 
31
  # Doctra library (install from source)
32
  git+https://github.com/AdemBoukhris457/Doctra.git