Spaces:

datbkpro
/

voicebot

Running

App Files Files Community

voicebot / services /voice_coding_service.py

datbkpro

Update services/voice_coding_service.py

3dde460 verified about 1 month ago

raw

history blame contribute delete

9.44 kB

	import gradio as gr
	import numpy as np
	import base64
	import re
	import asyncio
	from groq import Groq
	from fastrtc import (
	Stream,
	AsyncStreamHandler,
	AdditionalOutputs,
	wait_for_item,
	get_cloudflare_turn_credentials_async, # Sử dụng Cloudflare free
	)
	from gradio.utils import get_space

	class VoiceCodingHandler(AsyncStreamHandler):
	"""FastRTC Handler cho Voice Coding"""

	def __init__(self, groq_client: Groq):
	super().__init__(
	expected_layout="mono",
	output_sample_rate=24000,
	input_sample_rate=16000,
	)
	self.groq_client = groq_client
	self.input_queue = asyncio.Queue()
	self.output_queue = asyncio.Queue()
	self.is_active = False

	# Prompts
	self.system_prompt = "You are an AI coding assistant. Your task is to write single-file HTML applications based on a user's request. Only return the necessary code. Include all necessary imports and styles. You may also be asked to edit your original response. Respond in Vietnamese when appropriate."
	self.user_prompt = "Please write a single-file HTML application to fulfill the following request.\nThe message:{user_message}\nCurrent code you have written:{code}"

	self.current_history = [{"role": "system", "content": self.system_prompt}]
	self.current_code = ""

	def copy(self):
	return VoiceCodingHandler(self.groq_client)

	def extract_html_content(self, text):
	"""Extract content including HTML tags."""
	if not text:
	return None
	match = re.search(r"<!DOCTYPE html>.*?</html>", text, re.DOTALL)
	return match.group(0) if match else text # Return full text if no HTML found

	async def start_up(self):
	"""Khởi động handler"""
	self.is_active = True
	print("✅ Voice Coding Handler started")

	async def receive(self, frame: tuple[int, np.ndarray]) -> None:
	"""Nhận audio frame"""
	if not self.is_active:
	return

	sample_rate, array = frame
	array = array.squeeze()

	# Xử lý audio trong background
	asyncio.create_task(self._process_audio(array, sample_rate))

	async def _process_audio(self, audio_data: np.ndarray, sample_rate: int):
	"""Xử lý audio và generate code"""
	try:
	print("🎤 Processing audio for voice coding...")

	# Tạm thời sử dụng text input thay vì audio transcription
	# Trong thực tế, bạn sẽ tích hợp với VOSK/Whisper
	transcription = await self._mock_transcribe_audio()

	if transcription:
	print(f"🎯 Received request: {transcription}")

	# Generate loading state
	await self.output_queue.put(AdditionalOutputs({
	"type": "loading",
	"message": "🦙 Llama đang code...",
	"history": self.current_history,
	"code": self.current_code
	}))

	# Generate code
	await self._generate_code(transcription)

	except Exception as e:
	print(f"❌ Lỗi xử lý audio: {e}")

	async def _mock_transcribe_audio(self) -> str:
	"""Mock transcription - trong thực tế sẽ tích hợp với ASR"""
	# Tạm thời return test text
	# Bạn có thể tích hợp với VOSK/Whisper sau
	return "Tạo trang web hello world với màu nền xanh và chữ màu trắng"

	async def _generate_code(self, user_message: str):
	"""Generate code từ text input"""
	try:
	# Format user message
	user_msg_formatted = self.user_prompt.format(
	user_message=user_message,
	code=self.current_code
	)

	# Update history
	self.current_history.append({"role": "user", "content": user_msg_formatted})

	# Generate code với Groq
	print("🦙 Generating code with Llama...")
	response = self.groq_client.chat.completions.create(
	model="llama-3.1-8b-instant", # Sử dụng model có sẵn
	messages=self.current_history,
	temperature=0.7,
	max_tokens=1024,
	top_p=0.9,
	stream=False,
	)

	output = response.choices[0].message.content
	print("✅ Code generated successfully")

	# Extract HTML code
	html_code = self.extract_html_content(output)

	# Update state
	self.current_history.append({"role": "assistant", "content": output})
	self.current_code = html_code

	# Send result
	await self.output_queue.put(AdditionalOutputs({
	"type": "code_generated",
	"history": self.current_history,
	"code": html_code,
	"message": "✅ Code đã được generate!"
	}))

	except Exception as e:
	print(f"❌ Lỗi generate code: {e}")
	await self.output_queue.put(AdditionalOutputs({
	"type": "error",
	"message": f"❌ Lỗi: {str(e)}",
	"history": self.current_history,
	"code": self.current_code
	}))

	async def emit(self):
	"""Emit outputs"""
	try:
	return await wait_for_item(self.output_queue)
	except Exception as e:
	print(f"❌ Lỗi emit: {e}")
	return None

	async def shutdown(self):
	"""Dừng handler"""
	self.is_active = False
	print("🛑 Voice Coding Handler stopped")

	class VoiceCodingService:
	"""Dịch vụ Voice Coding sử dụng FastRTC"""

	def __init__(self, groq_client: Groq):
	self.groq_client = groq_client

	# Sử dụng Cloudflare TURN miễn phí hoặc None cho local development
	try:
	self.rtc_configuration = asyncio.run(get_cloudflare_turn_credentials_async())
	print("✅ Using Cloudflare TURN servers")
	except Exception as e:
	print(f"⚠️ Cannot get TURN credentials, using None: {e}")
	self.rtc_configuration = None # Sẽ hoạt động trên local network

	# HTML templates
	self.sandbox_html = """
	<div style="text-align: center; padding: 20px; border: 2px dashed #ccc; border-radius: 10px;">
	<h3>🎮 Sandbox Preview</h3>
	<p>Code sẽ được hiển thị ở đây sau khi generate</p>
	<p><small>Chức năng voice đang được phát triển. Vui lòng sử dụng text input.</small></p>
	</div>
	"""

	self.loading_html = """
	<div style="text-align: center; padding: 20px;">
	<div class="spinner"></div>
	<p>🦙 Llama đang code...</p>
	</div>
	<style>
	.spinner {
	border: 4px solid #f3f3f3;
	border-top: 4px solid #3498db;
	border-radius: 50%;
	width: 40px;
	height: 40px;
	animation: spin 2s linear infinite;
	margin: 0 auto;
	}
	@keyframes spin {
	0% { transform: rotate(0deg); }
	100% { transform: rotate(360deg); }
	}
	</style>
	"""

	def extract_html_content(self, text):
	"""Extract content including HTML tags."""
	if not text:
	return "<!-- No code generated -->"
	match = re.search(r"<!DOCTYPE html>.*?</html>", text, re.DOTALL)
	return match.group(0) if match else f"<!-- Generated Code -->\n<pre>{text}</pre>"

	def create_stream(self):
	"""Tạo FastRTC stream"""
	return Stream(
	VoiceCodingHandler(self.groq_client),
	modality="audio",
	mode="send-receive",
	rtc_configuration=self.rtc_configuration,
	concurrency_limit=3,
	time_limit=120,
	)

	def display_in_sandbox(self, code):
	"""Hiển thị code trong sandbox iframe"""
	if not code or "No code" in code:
	return self.sandbox_html

	try:
	# Kiểm tra xem code có phải HTML không
	if any(tag in code.lower() for tag in ['<html', '<!doctype', '<body', '<head']):
	encoded_html = base64.b64encode(code.encode("utf-8")).decode("utf-8")
	data_uri = f"data:text/html;charset=utf-8;base64,{encoded_html}"
	return f'<iframe src="{data_uri}" width="100%" height="600px" style="border: 1px solid #ccc; border-radius: 5px;"></iframe>'
	else:
	# Nếu không phải HTML, hiển thị dưới dạng text
	return f'<div style="padding: 20px; background: #f5f5f5; border-radius: 5px;"><h4>Generated Content:</h4><pre style="white-space: pre-wrap;">{code}</pre></div>'
	except Exception as e:
	print(f"❌ Lỗi display sandbox: {e}")
	return f'<div style="color: red; padding: 20px;">Lỗi hiển thị sandbox: {str(e)}</div>'