import gradio as gr import numpy as np import base64 import re import asyncio from groq import Groq from fastrtc import ( Stream, AsyncStreamHandler, AdditionalOutputs, wait_for_item, get_cloudflare_turn_credentials_async, # Sử dụng Cloudflare free ) from gradio.utils import get_space class VoiceCodingHandler(AsyncStreamHandler): """FastRTC Handler cho Voice Coding""" def __init__(self, groq_client: Groq): super().__init__( expected_layout="mono", output_sample_rate=24000, input_sample_rate=16000, ) self.groq_client = groq_client self.input_queue = asyncio.Queue() self.output_queue = asyncio.Queue() self.is_active = False # Prompts self.system_prompt = "You are an AI coding assistant. Your task is to write single-file HTML applications based on a user's request. Only return the necessary code. Include all necessary imports and styles. You may also be asked to edit your original response. Respond in Vietnamese when appropriate." self.user_prompt = "Please write a single-file HTML application to fulfill the following request.\nThe message:{user_message}\nCurrent code you have written:{code}" self.current_history = [{"role": "system", "content": self.system_prompt}] self.current_code = "" def copy(self): return VoiceCodingHandler(self.groq_client) def extract_html_content(self, text): """Extract content including HTML tags.""" if not text: return None match = re.search(r".*?", text, re.DOTALL) return match.group(0) if match else text # Return full text if no HTML found async def start_up(self): """Khởi động handler""" self.is_active = True print("✅ Voice Coding Handler started") async def receive(self, frame: tuple[int, np.ndarray]) -> None: """Nhận audio frame""" if not self.is_active: return sample_rate, array = frame array = array.squeeze() # Xử lý audio trong background asyncio.create_task(self._process_audio(array, sample_rate)) async def _process_audio(self, audio_data: np.ndarray, sample_rate: int): """Xử lý audio và generate code""" try: print("🎤 Processing audio for voice coding...") # Tạm thời sử dụng text input thay vì audio transcription # Trong thực tế, bạn sẽ tích hợp với VOSK/Whisper transcription = await self._mock_transcribe_audio() if transcription: print(f"🎯 Received request: {transcription}") # Generate loading state await self.output_queue.put(AdditionalOutputs({ "type": "loading", "message": "🦙 Llama đang code...", "history": self.current_history, "code": self.current_code })) # Generate code await self._generate_code(transcription) except Exception as e: print(f"❌ Lỗi xử lý audio: {e}") async def _mock_transcribe_audio(self) -> str: """Mock transcription - trong thực tế sẽ tích hợp với ASR""" # Tạm thời return test text # Bạn có thể tích hợp với VOSK/Whisper sau return "Tạo trang web hello world với màu nền xanh và chữ màu trắng" async def _generate_code(self, user_message: str): """Generate code từ text input""" try: # Format user message user_msg_formatted = self.user_prompt.format( user_message=user_message, code=self.current_code ) # Update history self.current_history.append({"role": "user", "content": user_msg_formatted}) # Generate code với Groq print("🦙 Generating code with Llama...") response = self.groq_client.chat.completions.create( model="llama-3.1-8b-instant", # Sử dụng model có sẵn messages=self.current_history, temperature=0.7, max_tokens=1024, top_p=0.9, stream=False, ) output = response.choices[0].message.content print("✅ Code generated successfully") # Extract HTML code html_code = self.extract_html_content(output) # Update state self.current_history.append({"role": "assistant", "content": output}) self.current_code = html_code # Send result await self.output_queue.put(AdditionalOutputs({ "type": "code_generated", "history": self.current_history, "code": html_code, "message": "✅ Code đã được generate!" })) except Exception as e: print(f"❌ Lỗi generate code: {e}") await self.output_queue.put(AdditionalOutputs({ "type": "error", "message": f"❌ Lỗi: {str(e)}", "history": self.current_history, "code": self.current_code })) async def emit(self): """Emit outputs""" try: return await wait_for_item(self.output_queue) except Exception as e: print(f"❌ Lỗi emit: {e}") return None async def shutdown(self): """Dừng handler""" self.is_active = False print("🛑 Voice Coding Handler stopped") class VoiceCodingService: """Dịch vụ Voice Coding sử dụng FastRTC""" def __init__(self, groq_client: Groq): self.groq_client = groq_client # Sử dụng Cloudflare TURN miễn phí hoặc None cho local development try: self.rtc_configuration = asyncio.run(get_cloudflare_turn_credentials_async()) print("✅ Using Cloudflare TURN servers") except Exception as e: print(f"⚠️ Cannot get TURN credentials, using None: {e}") self.rtc_configuration = None # Sẽ hoạt động trên local network # HTML templates self.sandbox_html = """

🎮 Sandbox Preview

Code sẽ được hiển thị ở đây sau khi generate

Chức năng voice đang được phát triển. Vui lòng sử dụng text input.

""" self.loading_html = """

🦙 Llama đang code...

""" def extract_html_content(self, text): """Extract content including HTML tags.""" if not text: return "" match = re.search(r".*?", text, re.DOTALL) return match.group(0) if match else f"\n

{text}

" def create_stream(self): """Tạo FastRTC stream""" return Stream( VoiceCodingHandler(self.groq_client), modality="audio", mode="send-receive", rtc_configuration=self.rtc_configuration, concurrency_limit=3, time_limit=120, ) def display_in_sandbox(self, code): """Hiển thị code trong sandbox iframe""" if not code or "No code" in code: return self.sandbox_html try: # Kiểm tra xem code có phải HTML không if any(tag in code.lower() for tag in ['' else: # Nếu không phải HTML, hiển thị dưới dạng text return f'

Generated Content:

{code}

' except Exception as e: print(f"❌ Lỗi display sandbox: {e}") return f'

Lỗi hiển thị sandbox: {str(e)}