""" Hugging Face Models Interface for OpenManus Gradio App Advanced UI for accessing and using HuggingFace models """ import base64 import io from typing import Dict, Tuple import gradio as gr import PIL.Image from app.agent.huggingface_agent import HuggingFaceAgent from app.huggingface_models import HuggingFaceModels, ModelCategory class HuggingFaceModelsInterface: """Gradio interface for HuggingFace models integration""" def __init__(self, hf_agent: HuggingFaceAgent): self.hf_agent = hf_agent self.models = HuggingFaceModels() def create_interface(self) -> gr.Interface: """Create the HuggingFace models Gradio interface""" with gr.Blocks(title="🤗 HuggingFace Models") as interface: gr.Markdown("# 🤗 HuggingFace Models Integration") gr.Markdown( "Access thousands of state-of-the-art AI models via HuggingFace Inference API" ) with gr.Tabs(): # Core AI Tabs with gr.TabItem("📝 Text Generation"): self._create_text_generation_interface() with gr.TabItem("🎨 Image Generation"): self._create_image_generation_interface() with gr.TabItem("🎵 Audio Processing"): self._create_audio_processing_interface() with gr.TabItem("👁️ Image Analysis"): self._create_image_analysis_interface() with gr.TabItem("🔍 Text Analysis"): self._create_text_analysis_interface() # New Advanced Tabs with gr.TabItem("🎬 Video Generation"): self._create_video_generation_interface() with gr.TabItem("💻 Code Generation"): self._create_code_generation_interface() with gr.TabItem("🧊 3D Creation"): self._create_3d_generation_interface() with gr.TabItem("📄 Document Processing"): self._create_document_processing_interface() with gr.TabItem("🔗 Multimodal AI"): self._create_multimodal_interface() with gr.TabItem("🎭 Creative Content"): self._create_creative_content_interface() with gr.TabItem("🎮 Game Development"): self._create_game_development_interface() with gr.TabItem("� Science & Research"): self._create_science_research_interface() with gr.TabItem("💼 Business Tools"): self._create_business_tools_interface() with gr.TabItem("🗂️ Model Browser"): self._create_model_browser_interface() return interface def _create_text_generation_interface(self): """Create text generation interface""" gr.Markdown("## Generate text with powerful language models") with gr.Row(): with gr.Column(): text_model_dropdown = gr.Dropdown( choices=[ model.name for model in self.models.TEXT_GENERATION_MODELS ], value="MiniMax-M2", label="Text Generation Model", info="Choose from the latest and most powerful language models", ) text_prompt = gr.Textbox( label="Prompt", placeholder="Enter your text prompt here...", lines=4, max_lines=10, ) with gr.Row(): text_max_tokens = gr.Slider( minimum=10, maximum=2048, value=200, step=10, label="Max Tokens" ) text_temperature = gr.Slider( minimum=0.0, maximum=2.0, value=0.7, step=0.1, label="Temperature", ) text_generate_btn = gr.Button("🚀 Generate Text", variant="primary") with gr.Column(): text_output = gr.Textbox( label="Generated Text", lines=10, max_lines=20, interactive=False ) text_info = gr.JSON(label="Model Info", visible=False) text_generate_btn.click( fn=self._generate_text, inputs=[ text_model_dropdown, text_prompt, text_max_tokens, text_temperature, ], outputs=[text_output, text_info], ) def _create_image_generation_interface(self): """Create image generation interface""" gr.Markdown("## Create stunning images from text descriptions") with gr.Row(): with gr.Column(): image_model_dropdown = gr.Dropdown( choices=[model.name for model in self.models.TEXT_TO_IMAGE_MODELS], value="FLUX.1 Dev", label="Image Generation Model", info="State-of-the-art text-to-image models", ) image_prompt = gr.Textbox( label="Image Prompt", placeholder="Describe the image you want to create...", lines=3, ) image_negative_prompt = gr.Textbox( label="Negative Prompt (Optional)", placeholder="What to avoid in the image...", lines=2, ) with gr.Row(): image_width = gr.Slider( minimum=256, maximum=2048, value=1024, step=64, label="Width" ) image_height = gr.Slider( minimum=256, maximum=2048, value=1024, step=64, label="Height" ) image_generate_btn = gr.Button("🎨 Generate Image", variant="primary") with gr.Column(): image_output = gr.Image(label="Generated Image", type="pil") image_info = gr.JSON(label="Generation Info", visible=False) image_generate_btn.click( fn=self._generate_image, inputs=[ image_model_dropdown, image_prompt, image_negative_prompt, image_width, image_height, ], outputs=[image_output, image_info], ) def _create_audio_processing_interface(self): """Create audio processing interface""" gr.Markdown("## Speech recognition and text-to-speech") with gr.Tabs(): with gr.TabItem("🎤 Speech Recognition"): with gr.Row(): with gr.Column(): asr_model_dropdown = gr.Dropdown( choices=[model.name for model in self.models.ASR_MODELS], value="Whisper Large v3", label="Speech Recognition Model", ) audio_input = gr.Audio( label="Upload Audio File", type="filepath" ) asr_language = gr.Dropdown( choices=[ "auto", "en", "es", "fr", "de", "it", "pt", "ru", "ja", "ko", "zh", ], value="auto", label="Language (auto-detect if not specified)", ) transcribe_btn = gr.Button("📝 Transcribe", variant="primary") with gr.Column(): transcription_output = gr.Textbox( label="Transcription", lines=8, interactive=False ) with gr.TabItem("🔊 Text-to-Speech"): with gr.Row(): with gr.Column(): tts_model_dropdown = gr.Dropdown( choices=[model.name for model in self.models.TTS_MODELS], value="Kokoro 82M", label="Text-to-Speech Model", ) tts_text = gr.Textbox( label="Text to Speak", placeholder="Enter text to convert to speech...", lines=4, ) tts_voice = gr.Textbox( label="Voice ID (Optional)", placeholder="Leave empty for default voice", ) synthesize_btn = gr.Button("🔊 Synthesize", variant="primary") with gr.Column(): audio_output = gr.Audio(label="Generated Audio") transcribe_btn.click( fn=self._transcribe_audio, inputs=[asr_model_dropdown, audio_input, asr_language], outputs=[transcription_output], ) synthesize_btn.click( fn=self._synthesize_speech, inputs=[tts_model_dropdown, tts_text, tts_voice], outputs=[audio_output], ) def _create_image_analysis_interface(self): """Create image analysis interface""" gr.Markdown("## Analyze and classify images") with gr.Row(): with gr.Column(): analysis_model_dropdown = gr.Dropdown( choices=[ model.name for model in self.models.IMAGE_CLASSIFICATION_MODELS ], value="ViT Base Patch16", label="Image Analysis Model", ) analysis_task = gr.Radio( choices=[ "General Classification", "NSFW Detection", "Emotion Recognition", "Deepfake Detection", ], value="General Classification", label="Analysis Task", ) image_input = gr.Image(label="Upload Image", type="pil") analyze_btn = gr.Button("🔍 Analyze Image", variant="primary") with gr.Column(): analysis_output = gr.JSON(label="Analysis Results") analysis_confidence = gr.Plot(label="Confidence Scores") analyze_btn.click( fn=self._analyze_image, inputs=[analysis_model_dropdown, analysis_task, image_input], outputs=[analysis_output, analysis_confidence], ) def _create_text_analysis_interface(self): """Create text analysis interface""" gr.Markdown("## Analyze, translate, and summarize text") with gr.Tabs(): with gr.TabItem("🌍 Translation"): with gr.Row(): with gr.Column(): translation_text = gr.Textbox( label="Text to Translate", lines=5 ) with gr.Row(): source_lang = gr.Dropdown( choices=[ "auto", "en", "es", "fr", "de", "it", "pt", "ru", "ja", "ko", "zh", ], value="auto", label="Source Language", ) target_lang = gr.Dropdown( choices=[ "en", "es", "fr", "de", "it", "pt", "ru", "ja", "ko", "zh", ], value="en", label="Target Language", ) translate_btn = gr.Button("🌍 Translate", variant="primary") with gr.Column(): translation_output = gr.Textbox( label="Translation", lines=5, interactive=False ) with gr.TabItem("📄 Summarization"): with gr.Row(): with gr.Column(): summary_text = gr.Textbox( label="Text to Summarize", lines=8, placeholder="Paste long text here...", ) summary_length = gr.Slider( minimum=50, maximum=500, value=150, step=25, label="Summary Length", ) summarize_btn = gr.Button("📄 Summarize", variant="primary") with gr.Column(): summary_output = gr.Textbox( label="Summary", lines=8, interactive=False ) translate_btn.click( fn=self._translate_text, inputs=[translation_text, source_lang, target_lang], outputs=[translation_output], ) summarize_btn.click( fn=self._summarize_text, inputs=[summary_text, summary_length], outputs=[summary_output], ) def _create_model_browser_interface(self): """Create model browser interface""" gr.Markdown("## Browse available HuggingFace models") category_dropdown = gr.Dropdown( choices=[cat.value for cat in ModelCategory], value="text-generation", label="Model Category", ) refresh_btn = gr.Button("🔄 Refresh Models") models_display = gr.DataFrame( headers=["Model Name", "Model ID", "Description", "Compatible"], label="Available Models", interactive=False, ) def update_models(category): models_data = [] if category: models = self.hf_agent.get_available_hf_models(category) if "models" in models: for model in models["models"]: models_data.append( [ model["name"], model["model_id"], ( model["description"][:100] + "..." if len(model["description"]) > 100 else model["description"] ), "✅" if model["endpoint_compatible"] else "❌", ] ) return models_data category_dropdown.change( fn=update_models, inputs=[category_dropdown], outputs=[models_display] ) refresh_btn.click( fn=update_models, inputs=[category_dropdown], outputs=[models_display] ) async def _generate_text( self, model_name: str, prompt: str, max_tokens: int, temperature: float ) -> Tuple[str, Dict]: """Generate text using selected model""" try: result = await self.hf_agent.generate_text_with_hf( prompt=prompt, model_name=model_name, max_tokens=max_tokens, temperature=temperature, ) if "error" in result: return f"Error: {result['error']}", {} # Extract text from result generated_text = "" if "result" in result and isinstance(result["result"], list): generated_text = result["result"][0].get("generated_text", "") elif "result" in result and isinstance(result["result"], dict): generated_text = result["result"].get("generated_text", "") return generated_text, result except Exception as e: return f"Error: {str(e)}", {} async def _generate_image( self, model_name: str, prompt: str, negative_prompt: str, width: int, height: int, ) -> Tuple[PIL.Image.Image, Dict]: """Generate image using selected model""" try: result = await self.hf_agent.generate_image_with_hf( prompt=prompt, model_name=model_name, negative_prompt=negative_prompt or None, width=width, height=height, ) if "error" in result: return None, result # Convert base64 to PIL Image if "image_base64" in result: image_data = base64.b64decode(result["image_base64"]) image = PIL.Image.open(io.BytesIO(image_data)) return image, result return None, result except Exception as e: return None, {"error": str(e)} async def _transcribe_audio( self, model_name: str, audio_path: str, language: str ) -> str: """Transcribe audio using selected model""" try: if not audio_path: return "Please upload an audio file" with open(audio_path, "rb") as f: audio_data = f.read() result = await self.hf_agent.transcribe_audio_with_hf( audio_data=audio_data, model_name=model_name, language=language if language != "auto" else None, ) if "error" in result: return f"Error: {result['error']}" return result.get("transcription", "No transcription available") except Exception as e: return f"Error: {str(e)}" async def _synthesize_speech( self, model_name: str, text: str, voice_id: str ) -> bytes: """Synthesize speech using selected model""" try: if not text.strip(): return None result = await self.hf_agent.synthesize_speech_with_hf( text=text, model_name=model_name, voice_id=voice_id or None ) if "error" in result: return None if "audio_base64" in result: return base64.b64decode(result["audio_base64"]) return None except Exception as e: return None async def _analyze_image( self, model_name: str, task: str, image: PIL.Image.Image ) -> Tuple[Dict, gr.Plot]: """Analyze image using selected model""" try: if image is None: return {"error": "Please upload an image"}, None # Convert PIL to bytes img_byte_arr = io.BytesIO() image.save(img_byte_arr, format="PNG") img_byte_arr = img_byte_arr.getvalue() # Map task to model task_models = { "NSFW Detection": "NSFW Image Detection", "Emotion Recognition": "Facial Emotions Detection", "Deepfake Detection": "Deepfake Detection", "General Classification": model_name, } selected_model = task_models.get(task, model_name) result = await self.hf_agent.classify_image_with_hf( image_data=img_byte_arr, model_name=selected_model ) if "error" in result: return result, None return result, None except Exception as e: return {"error": str(e)}, None async def _translate_text( self, text: str, source_lang: str, target_lang: str ) -> str: """Translate text""" try: if not text.strip(): return "Please enter text to translate" result = await self.hf_agent.translate_with_hf( text=text, source_language=source_lang if source_lang != "auto" else None, target_language=target_lang, ) if "error" in result: return f"Error: {result['error']}" return result.get("translation", {}).get( "translation_text", "Translation failed" ) except Exception as e: return f"Error: {str(e)}" async def _summarize_text(self, text: str, max_length: int) -> str: """Summarize text""" try: if not text.strip(): return "Please enter text to summarize" result = await self.hf_agent.summarize_with_hf( text=text, max_length=max_length ) if "error" in result: return f"Error: {result['error']}" return result.get("summary", {}).get("summary_text", "Summarization failed") except Exception: return "Error: Summarization failed" def _create_video_generation_interface(self): """Create video generation interface""" gr.Markdown("## 🎬 Create videos from text descriptions") with gr.Row(): with gr.Column(): video_model_dropdown = gr.Dropdown( choices=[model.name for model in self.models.VIDEO_GENERATION_MODELS], value="Stable Video Diffusion", label="Video Generation Model" ) video_prompt = gr.Textbox( label="Video Description", placeholder="Describe the video you want to create...", lines=3 ) with gr.Row(): video_duration = gr.Slider( minimum=1, maximum=30, value=5, step=1, label="Duration (seconds)" ) video_fps = gr.Slider( minimum=12, maximum=60, value=24, step=1, label="FPS" ) generate_video_btn = gr.Button("🎬 Generate Video", variant="primary") with gr.Column(): video_output = gr.Video(label="Generated Video") video_info = gr.JSON(label="Generation Info", visible=False) def _create_code_generation_interface(self): """Create code generation interface""" gr.Markdown("## 💻 Generate code from natural language") with gr.Tabs(): with gr.TabItem("Code Generation"): with gr.Row(): with gr.Column(): code_model_dropdown = gr.Dropdown( choices=[model.name for model in self.models.CODE_GENERATION_MODELS], value="CodeLlama 34B Instruct", label="Code Generation Model" ) code_prompt = gr.Textbox( label="Code Description", placeholder="Describe the code you want to generate...", lines=4 ) code_language = gr.Dropdown( choices=["python", "javascript", "java", "cpp", "c", "rust", "go", "swift"], value="python", label="Programming Language" ) generate_code_btn = gr.Button("💻 Generate Code", variant="primary") with gr.Column(): code_output = gr.Code(label="Generated Code", language="python") with gr.TabItem("App Generation"): with gr.Row(): with gr.Column(): app_description = gr.Textbox( label="App Description", placeholder="Describe the application you want to create...", lines=5 ) app_type = gr.Dropdown( choices=["web_app", "mobile_app", "desktop_app", "api", "cli_tool"], value="web_app", label="Application Type" ) generate_app_btn = gr.Button("🚀 Generate App", variant="primary") with gr.Column(): app_output = gr.Code(label="Generated App Code", language="python") def _create_3d_generation_interface(self): """Create 3D model generation interface""" gr.Markdown("## 🧊 Create 3D models and assets") with gr.Tabs(): with gr.TabItem("Text to 3D"): with gr.Row(): with gr.Column(): three_d_model_dropdown = gr.Dropdown( choices=[model.name for model in self.models.THREE_D_MODELS], value="Shap-E", label="3D Generation Model" ) three_d_prompt = gr.Textbox( label="3D Object Description", placeholder="Describe the 3D object you want to create...", lines=3 ) three_d_resolution = gr.Slider( minimum=32, maximum=256, value=64, step=32, label="Resolution" ) generate_3d_btn = gr.Button("🧊 Generate 3D Model", variant="primary") with gr.Column(): three_d_output = gr.File(label="Generated 3D Model") with gr.TabItem("Image to 3D"): with gr.Row(): with gr.Column(): image_3d_input = gr.Image(label="Input Image", type="pil") convert_3d_btn = gr.Button("🔄 Convert to 3D", variant="primary") with gr.Column(): image_3d_output = gr.File(label="3D Model from Image") def _create_document_processing_interface(self): """Create document processing interface""" gr.Markdown("## 📄 Process and analyze documents") with gr.Tabs(): with gr.TabItem("OCR"): with gr.Row(): with gr.Column(): ocr_model_dropdown = gr.Dropdown( choices=[model.name for model in self.models.DOCUMENT_PROCESSING_MODELS if "ocr" in model.name.lower()], value="TrOCR Large", label="OCR Model" ) ocr_image_input = gr.Image(label="Document Image", type="pil") ocr_language = gr.Dropdown( choices=["auto", "en", "es", "fr", "de", "it", "pt", "ru", "ja", "ko", "zh"], value="auto", label="Language" ) extract_text_btn = gr.Button("📝 Extract Text", variant="primary") with gr.Column(): ocr_output = gr.Textbox(label="Extracted Text", lines=10) with gr.TabItem("Document Analysis"): with gr.Row(): with gr.Column(): doc_file_input = gr.File(label="Upload Document", file_types=[".pdf", ".png", ".jpg", ".jpeg"]) analyze_doc_btn = gr.Button("🔍 Analyze Document", variant="primary") with gr.Column(): doc_analysis_output = gr.JSON(label="Document Analysis") def _create_multimodal_interface(self): """Create multimodal AI interface""" gr.Markdown("## 🔗 Combine vision, text, and reasoning") with gr.Row(): with gr.Column(): multimodal_model_dropdown = gr.Dropdown( choices=[model.name for model in self.models.MULTIMODAL_MODELS], value="BLIP-2", label="Multimodal Model" ) multimodal_image = gr.Image(label="Input Image", type="pil") multimodal_text = gr.Textbox( label="Text Query/Instruction", placeholder="Ask questions about the image or give instructions...", lines=3 ) multimodal_task = gr.Radio( choices=["Visual Q&A", "Image Captioning", "Multimodal Chat", "Cross-modal Generation"], value="Visual Q&A", label="Task Type" ) process_multimodal_btn = gr.Button("🔗 Process", variant="primary") with gr.Column(): multimodal_output = gr.Textbox(label="Response", lines=8) def _create_creative_content_interface(self): """Create creative content generation interface""" gr.Markdown("## 🎭 Generate creative content") with gr.Tabs(): with gr.TabItem("Creative Writing"): with gr.Row(): with gr.Column(): creative_model_dropdown = gr.Dropdown( choices=[model.name for model in self.models.CREATIVE_CONTENT_MODELS], value="GPT-3.5 Creative", label="Creative Writing Model" ) creative_prompt = gr.Textbox( label="Creative Prompt", placeholder="Provide a creative writing prompt...", lines=4 ) creative_type = gr.Dropdown( choices=["story", "poem", "article", "script", "blog"], value="story", label="Content Type" ) creative_length = gr.Slider( minimum=100, maximum=2000, value=500, step=100, label="Length (words)" ) generate_creative_btn = gr.Button("🎭 Generate Content", variant="primary") with gr.Column(): creative_output = gr.Textbox(label="Generated Content", lines=15) def _create_game_development_interface(self): """Create game development interface""" gr.Markdown("## 🎮 Generate game content and assets") with gr.Tabs(): with gr.TabItem("Character Generation"): with gr.Row(): with gr.Column(): character_prompt = gr.Textbox( label="Character Description", placeholder="Describe your game character...", lines=4 ) character_type = gr.Dropdown( choices=["hero", "villain", "npc", "companion", "boss"], value="hero", label="Character Type" ) generate_character_btn = gr.Button("👾 Generate Character", variant="primary") with gr.Column(): character_output = gr.Textbox(label="Character Profile", lines=10) with gr.TabItem("Level Design"): with gr.Row(): with gr.Column(): level_description = gr.Textbox( label="Level Description", placeholder="Describe your game level...", lines=4 ) level_type = gr.Dropdown( choices=["dungeon", "outdoor", "city", "space", "underwater"], value="dungeon", label="Environment Type" ) generate_level_btn = gr.Button("🗺️ Generate Level", variant="primary") with gr.Column(): level_output = gr.Textbox(label="Level Design", lines=10) def _create_science_research_interface(self): """Create science and research interface""" gr.Markdown("## 🔬 Scientific research and analysis tools") with gr.Tabs(): with gr.TabItem("Research Writing"): with gr.Row(): with gr.Column(): research_model_dropdown = gr.Dropdown( choices=[model.name for model in self.models.SCIENCE_RESEARCH_MODELS], value="SciBERT", label="Research Model" ) research_topic = gr.Textbox( label="Research Topic", placeholder="Enter your research topic...", lines=3 ) research_type = gr.Dropdown( choices=["abstract", "introduction", "methodology", "discussion", "conclusion"], value="abstract", label="Section Type" ) generate_research_btn = gr.Button("📊 Generate Research Content", variant="primary") with gr.Column(): research_output = gr.Textbox(label="Research Content", lines=12) with gr.TabItem("Data Analysis"): with gr.Row(): with gr.Column(): data_file = gr.File(label="Upload Data File", file_types=[".csv", ".xlsx", ".json"]) analysis_type = gr.Dropdown( choices=["descriptive", "statistical", "predictive", "clustering"], value="descriptive", label="Analysis Type" ) analyze_data_btn = gr.Button("📈 Analyze Data", variant="primary") with gr.Column(): data_analysis_output = gr.JSON(label="Analysis Results") def _create_business_tools_interface(self): """Create business tools interface""" gr.Markdown("## 💼 Professional business content generation") with gr.Tabs(): with gr.TabItem("Email Generation"): with gr.Row(): with gr.Column(): email_type = gr.Dropdown( choices=["formal", "casual", "marketing", "follow_up", "invitation"], value="formal", label="Email Type" ) email_context = gr.Textbox( label="Email Context", placeholder="Provide context for the email...", lines=4 ) email_tone = gr.Dropdown( choices=["professional", "friendly", "urgent", "persuasive"], value="professional", label="Tone" ) generate_email_btn = gr.Button("📧 Generate Email", variant="primary") with gr.Column(): email_output = gr.Textbox(label="Generated Email", lines=10) with gr.TabItem("Report Generation"): with gr.Row(): with gr.Column(): report_type = gr.Dropdown( choices=["quarterly", "annual", "project", "analysis", "summary"], value="project", label="Report Type" ) report_data = gr.Textbox( label="Report Data/Context", placeholder="Provide data or context for the report...", lines=6 ) generate_report_btn = gr.Button("📋 Generate Report", variant="primary") with gr.Column(): report_output = gr.Textbox(label="Generated Report", lines=15)