|
|
import gradio as gr |
|
|
import numpy as np |
|
|
import base64 |
|
|
import re |
|
|
import asyncio |
|
|
from groq import Groq |
|
|
from fastrtc import ( |
|
|
Stream, |
|
|
AsyncStreamHandler, |
|
|
AdditionalOutputs, |
|
|
wait_for_item, |
|
|
get_cloudflare_turn_credentials_async, |
|
|
) |
|
|
from gradio.utils import get_space |
|
|
|
|
|
class VoiceCodingHandler(AsyncStreamHandler): |
|
|
"""FastRTC Handler cho Voice Coding""" |
|
|
|
|
|
def __init__(self, groq_client: Groq): |
|
|
super().__init__( |
|
|
expected_layout="mono", |
|
|
output_sample_rate=24000, |
|
|
input_sample_rate=16000, |
|
|
) |
|
|
self.groq_client = groq_client |
|
|
self.input_queue = asyncio.Queue() |
|
|
self.output_queue = asyncio.Queue() |
|
|
self.is_active = False |
|
|
|
|
|
|
|
|
self.system_prompt = "You are an AI coding assistant. Your task is to write single-file HTML applications based on a user's request. Only return the necessary code. Include all necessary imports and styles. You may also be asked to edit your original response. Respond in Vietnamese when appropriate." |
|
|
self.user_prompt = "Please write a single-file HTML application to fulfill the following request.\nThe message:{user_message}\nCurrent code you have written:{code}" |
|
|
|
|
|
self.current_history = [{"role": "system", "content": self.system_prompt}] |
|
|
self.current_code = "" |
|
|
|
|
|
def copy(self): |
|
|
return VoiceCodingHandler(self.groq_client) |
|
|
|
|
|
def extract_html_content(self, text): |
|
|
"""Extract content including HTML tags.""" |
|
|
if not text: |
|
|
return None |
|
|
match = re.search(r"<!DOCTYPE html>.*?</html>", text, re.DOTALL) |
|
|
return match.group(0) if match else text |
|
|
|
|
|
async def start_up(self): |
|
|
"""Khởi động handler""" |
|
|
self.is_active = True |
|
|
print("✅ Voice Coding Handler started") |
|
|
|
|
|
async def receive(self, frame: tuple[int, np.ndarray]) -> None: |
|
|
"""Nhận audio frame""" |
|
|
if not self.is_active: |
|
|
return |
|
|
|
|
|
sample_rate, array = frame |
|
|
array = array.squeeze() |
|
|
|
|
|
|
|
|
asyncio.create_task(self._process_audio(array, sample_rate)) |
|
|
|
|
|
async def _process_audio(self, audio_data: np.ndarray, sample_rate: int): |
|
|
"""Xử lý audio và generate code""" |
|
|
try: |
|
|
print("🎤 Processing audio for voice coding...") |
|
|
|
|
|
|
|
|
|
|
|
transcription = await self._mock_transcribe_audio() |
|
|
|
|
|
if transcription: |
|
|
print(f"🎯 Received request: {transcription}") |
|
|
|
|
|
|
|
|
await self.output_queue.put(AdditionalOutputs({ |
|
|
"type": "loading", |
|
|
"message": "🦙 Llama đang code...", |
|
|
"history": self.current_history, |
|
|
"code": self.current_code |
|
|
})) |
|
|
|
|
|
|
|
|
await self._generate_code(transcription) |
|
|
|
|
|
except Exception as e: |
|
|
print(f"❌ Lỗi xử lý audio: {e}") |
|
|
|
|
|
async def _mock_transcribe_audio(self) -> str: |
|
|
"""Mock transcription - trong thực tế sẽ tích hợp với ASR""" |
|
|
|
|
|
|
|
|
return "Tạo trang web hello world với màu nền xanh và chữ màu trắng" |
|
|
|
|
|
async def _generate_code(self, user_message: str): |
|
|
"""Generate code từ text input""" |
|
|
try: |
|
|
|
|
|
user_msg_formatted = self.user_prompt.format( |
|
|
user_message=user_message, |
|
|
code=self.current_code |
|
|
) |
|
|
|
|
|
|
|
|
self.current_history.append({"role": "user", "content": user_msg_formatted}) |
|
|
|
|
|
|
|
|
print("🦙 Generating code with Llama...") |
|
|
response = self.groq_client.chat.completions.create( |
|
|
model="llama-3.1-8b-instant", |
|
|
messages=self.current_history, |
|
|
temperature=0.7, |
|
|
max_tokens=1024, |
|
|
top_p=0.9, |
|
|
stream=False, |
|
|
) |
|
|
|
|
|
output = response.choices[0].message.content |
|
|
print("✅ Code generated successfully") |
|
|
|
|
|
|
|
|
html_code = self.extract_html_content(output) |
|
|
|
|
|
|
|
|
self.current_history.append({"role": "assistant", "content": output}) |
|
|
self.current_code = html_code |
|
|
|
|
|
|
|
|
await self.output_queue.put(AdditionalOutputs({ |
|
|
"type": "code_generated", |
|
|
"history": self.current_history, |
|
|
"code": html_code, |
|
|
"message": "✅ Code đã được generate!" |
|
|
})) |
|
|
|
|
|
except Exception as e: |
|
|
print(f"❌ Lỗi generate code: {e}") |
|
|
await self.output_queue.put(AdditionalOutputs({ |
|
|
"type": "error", |
|
|
"message": f"❌ Lỗi: {str(e)}", |
|
|
"history": self.current_history, |
|
|
"code": self.current_code |
|
|
})) |
|
|
|
|
|
async def emit(self): |
|
|
"""Emit outputs""" |
|
|
try: |
|
|
return await wait_for_item(self.output_queue) |
|
|
except Exception as e: |
|
|
print(f"❌ Lỗi emit: {e}") |
|
|
return None |
|
|
|
|
|
async def shutdown(self): |
|
|
"""Dừng handler""" |
|
|
self.is_active = False |
|
|
print("🛑 Voice Coding Handler stopped") |
|
|
|
|
|
class VoiceCodingService: |
|
|
"""Dịch vụ Voice Coding sử dụng FastRTC""" |
|
|
|
|
|
def __init__(self, groq_client: Groq): |
|
|
self.groq_client = groq_client |
|
|
|
|
|
|
|
|
try: |
|
|
self.rtc_configuration = asyncio.run(get_cloudflare_turn_credentials_async()) |
|
|
print("✅ Using Cloudflare TURN servers") |
|
|
except Exception as e: |
|
|
print(f"⚠️ Cannot get TURN credentials, using None: {e}") |
|
|
self.rtc_configuration = None |
|
|
|
|
|
|
|
|
self.sandbox_html = """ |
|
|
<div style="text-align: center; padding: 20px; border: 2px dashed #ccc; border-radius: 10px;"> |
|
|
<h3>🎮 Sandbox Preview</h3> |
|
|
<p>Code sẽ được hiển thị ở đây sau khi generate</p> |
|
|
<p><small>Chức năng voice đang được phát triển. Vui lòng sử dụng text input.</small></p> |
|
|
</div> |
|
|
""" |
|
|
|
|
|
self.loading_html = """ |
|
|
<div style="text-align: center; padding: 20px;"> |
|
|
<div class="spinner"></div> |
|
|
<p>🦙 Llama đang code...</p> |
|
|
</div> |
|
|
<style> |
|
|
.spinner { |
|
|
border: 4px solid #f3f3f3; |
|
|
border-top: 4px solid #3498db; |
|
|
border-radius: 50%; |
|
|
width: 40px; |
|
|
height: 40px; |
|
|
animation: spin 2s linear infinite; |
|
|
margin: 0 auto; |
|
|
} |
|
|
@keyframes spin { |
|
|
0% { transform: rotate(0deg); } |
|
|
100% { transform: rotate(360deg); } |
|
|
} |
|
|
</style> |
|
|
""" |
|
|
|
|
|
def extract_html_content(self, text): |
|
|
"""Extract content including HTML tags.""" |
|
|
if not text: |
|
|
return "<!-- No code generated -->" |
|
|
match = re.search(r"<!DOCTYPE html>.*?</html>", text, re.DOTALL) |
|
|
return match.group(0) if match else f"<!-- Generated Code -->\n<pre>{text}</pre>" |
|
|
|
|
|
def create_stream(self): |
|
|
"""Tạo FastRTC stream""" |
|
|
return Stream( |
|
|
VoiceCodingHandler(self.groq_client), |
|
|
modality="audio", |
|
|
mode="send-receive", |
|
|
rtc_configuration=self.rtc_configuration, |
|
|
concurrency_limit=3, |
|
|
time_limit=120, |
|
|
) |
|
|
|
|
|
def display_in_sandbox(self, code): |
|
|
"""Hiển thị code trong sandbox iframe""" |
|
|
if not code or "No code" in code: |
|
|
return self.sandbox_html |
|
|
|
|
|
try: |
|
|
|
|
|
if any(tag in code.lower() for tag in ['<html', '<!doctype', '<body', '<head']): |
|
|
encoded_html = base64.b64encode(code.encode("utf-8")).decode("utf-8") |
|
|
data_uri = f"data:text/html;charset=utf-8;base64,{encoded_html}" |
|
|
return f'<iframe src="{data_uri}" width="100%" height="600px" style="border: 1px solid #ccc; border-radius: 5px;"></iframe>' |
|
|
else: |
|
|
|
|
|
return f'<div style="padding: 20px; background: #f5f5f5; border-radius: 5px;"><h4>Generated Content:</h4><pre style="white-space: pre-wrap;">{code}</pre></div>' |
|
|
except Exception as e: |
|
|
print(f"❌ Lỗi display sandbox: {e}") |
|
|
return f'<div style="color: red; padding: 20px;">Lỗi hiển thị sandbox: {str(e)}</div>' |