voicebot / services /voice_coding_service.py
datbkpro's picture
Update services/voice_coding_service.py
3dde460 verified
import gradio as gr
import numpy as np
import base64
import re
import asyncio
from groq import Groq
from fastrtc import (
Stream,
AsyncStreamHandler,
AdditionalOutputs,
wait_for_item,
get_cloudflare_turn_credentials_async, # Sử dụng Cloudflare free
)
from gradio.utils import get_space
class VoiceCodingHandler(AsyncStreamHandler):
"""FastRTC Handler cho Voice Coding"""
def __init__(self, groq_client: Groq):
super().__init__(
expected_layout="mono",
output_sample_rate=24000,
input_sample_rate=16000,
)
self.groq_client = groq_client
self.input_queue = asyncio.Queue()
self.output_queue = asyncio.Queue()
self.is_active = False
# Prompts
self.system_prompt = "You are an AI coding assistant. Your task is to write single-file HTML applications based on a user's request. Only return the necessary code. Include all necessary imports and styles. You may also be asked to edit your original response. Respond in Vietnamese when appropriate."
self.user_prompt = "Please write a single-file HTML application to fulfill the following request.\nThe message:{user_message}\nCurrent code you have written:{code}"
self.current_history = [{"role": "system", "content": self.system_prompt}]
self.current_code = ""
def copy(self):
return VoiceCodingHandler(self.groq_client)
def extract_html_content(self, text):
"""Extract content including HTML tags."""
if not text:
return None
match = re.search(r"<!DOCTYPE html>.*?</html>", text, re.DOTALL)
return match.group(0) if match else text # Return full text if no HTML found
async def start_up(self):
"""Khởi động handler"""
self.is_active = True
print("✅ Voice Coding Handler started")
async def receive(self, frame: tuple[int, np.ndarray]) -> None:
"""Nhận audio frame"""
if not self.is_active:
return
sample_rate, array = frame
array = array.squeeze()
# Xử lý audio trong background
asyncio.create_task(self._process_audio(array, sample_rate))
async def _process_audio(self, audio_data: np.ndarray, sample_rate: int):
"""Xử lý audio và generate code"""
try:
print("🎤 Processing audio for voice coding...")
# Tạm thời sử dụng text input thay vì audio transcription
# Trong thực tế, bạn sẽ tích hợp với VOSK/Whisper
transcription = await self._mock_transcribe_audio()
if transcription:
print(f"🎯 Received request: {transcription}")
# Generate loading state
await self.output_queue.put(AdditionalOutputs({
"type": "loading",
"message": "🦙 Llama đang code...",
"history": self.current_history,
"code": self.current_code
}))
# Generate code
await self._generate_code(transcription)
except Exception as e:
print(f"❌ Lỗi xử lý audio: {e}")
async def _mock_transcribe_audio(self) -> str:
"""Mock transcription - trong thực tế sẽ tích hợp với ASR"""
# Tạm thời return test text
# Bạn có thể tích hợp với VOSK/Whisper sau
return "Tạo trang web hello world với màu nền xanh và chữ màu trắng"
async def _generate_code(self, user_message: str):
"""Generate code từ text input"""
try:
# Format user message
user_msg_formatted = self.user_prompt.format(
user_message=user_message,
code=self.current_code
)
# Update history
self.current_history.append({"role": "user", "content": user_msg_formatted})
# Generate code với Groq
print("🦙 Generating code with Llama...")
response = self.groq_client.chat.completions.create(
model="llama-3.1-8b-instant", # Sử dụng model có sẵn
messages=self.current_history,
temperature=0.7,
max_tokens=1024,
top_p=0.9,
stream=False,
)
output = response.choices[0].message.content
print("✅ Code generated successfully")
# Extract HTML code
html_code = self.extract_html_content(output)
# Update state
self.current_history.append({"role": "assistant", "content": output})
self.current_code = html_code
# Send result
await self.output_queue.put(AdditionalOutputs({
"type": "code_generated",
"history": self.current_history,
"code": html_code,
"message": "✅ Code đã được generate!"
}))
except Exception as e:
print(f"❌ Lỗi generate code: {e}")
await self.output_queue.put(AdditionalOutputs({
"type": "error",
"message": f"❌ Lỗi: {str(e)}",
"history": self.current_history,
"code": self.current_code
}))
async def emit(self):
"""Emit outputs"""
try:
return await wait_for_item(self.output_queue)
except Exception as e:
print(f"❌ Lỗi emit: {e}")
return None
async def shutdown(self):
"""Dừng handler"""
self.is_active = False
print("🛑 Voice Coding Handler stopped")
class VoiceCodingService:
"""Dịch vụ Voice Coding sử dụng FastRTC"""
def __init__(self, groq_client: Groq):
self.groq_client = groq_client
# Sử dụng Cloudflare TURN miễn phí hoặc None cho local development
try:
self.rtc_configuration = asyncio.run(get_cloudflare_turn_credentials_async())
print("✅ Using Cloudflare TURN servers")
except Exception as e:
print(f"⚠️ Cannot get TURN credentials, using None: {e}")
self.rtc_configuration = None # Sẽ hoạt động trên local network
# HTML templates
self.sandbox_html = """
<div style="text-align: center; padding: 20px; border: 2px dashed #ccc; border-radius: 10px;">
<h3>🎮 Sandbox Preview</h3>
<p>Code sẽ được hiển thị ở đây sau khi generate</p>
<p><small>Chức năng voice đang được phát triển. Vui lòng sử dụng text input.</small></p>
</div>
"""
self.loading_html = """
<div style="text-align: center; padding: 20px;">
<div class="spinner"></div>
<p>🦙 Llama đang code...</p>
</div>
<style>
.spinner {
border: 4px solid #f3f3f3;
border-top: 4px solid #3498db;
border-radius: 50%;
width: 40px;
height: 40px;
animation: spin 2s linear infinite;
margin: 0 auto;
}
@keyframes spin {
0% { transform: rotate(0deg); }
100% { transform: rotate(360deg); }
}
</style>
"""
def extract_html_content(self, text):
"""Extract content including HTML tags."""
if not text:
return "<!-- No code generated -->"
match = re.search(r"<!DOCTYPE html>.*?</html>", text, re.DOTALL)
return match.group(0) if match else f"<!-- Generated Code -->\n<pre>{text}</pre>"
def create_stream(self):
"""Tạo FastRTC stream"""
return Stream(
VoiceCodingHandler(self.groq_client),
modality="audio",
mode="send-receive",
rtc_configuration=self.rtc_configuration,
concurrency_limit=3,
time_limit=120,
)
def display_in_sandbox(self, code):
"""Hiển thị code trong sandbox iframe"""
if not code or "No code" in code:
return self.sandbox_html
try:
# Kiểm tra xem code có phải HTML không
if any(tag in code.lower() for tag in ['<html', '<!doctype', '<body', '<head']):
encoded_html = base64.b64encode(code.encode("utf-8")).decode("utf-8")
data_uri = f"data:text/html;charset=utf-8;base64,{encoded_html}"
return f'<iframe src="{data_uri}" width="100%" height="600px" style="border: 1px solid #ccc; border-radius: 5px;"></iframe>'
else:
# Nếu không phải HTML, hiển thị dưới dạng text
return f'<div style="padding: 20px; background: #f5f5f5; border-radius: 5px;"><h4>Generated Content:</h4><pre style="white-space: pre-wrap;">{code}</pre></div>'
except Exception as e:
print(f"❌ Lỗi display sandbox: {e}")
return f'<div style="color: red; padding: 20px;">Lỗi hiển thị sandbox: {str(e)}</div>'