Spaces:

marcosremar2
/

cosyvoice

Configuration error

cosyvoice / simple_tts_test.py

Marcos Remar

Initial CosyVoice code without binary files

0ea7b2a 5 months ago

3.06 kB

	#!/usr/bin/env python3
	import sys
	sys.path.append('third_party/Matcha-TTS')

	print("=== Simple CosyVoice TTS Test ===\n")

	try:
	# Check what models are available
	import os
	print("Available models:")
	models_dir = 'pretrained_models'
	for model in os.listdir(models_dir):
	model_path = os.path.join(models_dir, model)
	if os.path.isdir(model_path):
	files = os.listdir(model_path)
	print(f"\n{model}:")
	for f in files[:10]: # Show first 10 files
	print(f" - {f}")

	# Try to use CosyVoice-300M which seems complete
	from cosyvoice.cli.cosyvoice import CosyVoice
	from cosyvoice.utils.file_utils import load_wav
	import torchaudio

	print("\nInitializing CosyVoice-300M model...")

	# Check if required files exist
	model_path = 'pretrained_models/CosyVoice-300M'
	required_files = ['flow.pt', 'speech_tokenizer_v1.onnx', 'campplus.onnx']

	missing = []
	for f in required_files:
	full_path = os.path.join(model_path, f)
	if not os.path.exists(full_path):
	missing.append(f)

	if missing:
	print(f"Missing files in {model_path}: {missing}")
	print("\nTrying to extract zip files if present...")

	# Check for zip files
	for f in os.listdir(model_path):
	if f.endswith('.zip'):
	print(f"Found zip file: {f}")
	import zipfile
	zip_path = os.path.join(model_path, f)
	with zipfile.ZipFile(zip_path, 'r') as zip_ref:
	zip_ref.extractall(model_path)
	print(f"Extracted: {f}")

	# Try again after extraction
	cosyvoice = CosyVoice(model_path, load_jit=False, load_trt=False, fp16=False)

	# Load prompt audio
	prompt_speech_16k = load_wav('asset/zero_shot_prompt.wav', 16000)

	# Generate speech
	text = "Olá, este é um teste do CosyVoice. Sistema de síntese de voz funcionando!"
	prompt_text = "Teste de voz."

	print(f"\nGenerating speech for: '{text}'")

	for i, j in enumerate(cosyvoice.inference_zero_shot(text, prompt_text, prompt_speech_16k, stream=False)):
	output_file = f'test_output_{i}.wav'
	torchaudio.save(output_file, j['tts_speech'], cosyvoice.sample_rate)
	print(f"✓ Generated: {output_file}")

	# Show file info
	size = os.path.getsize(output_file) / 1024
	duration = j['tts_speech'].shape[1] / cosyvoice.sample_rate
	print(f" Size: {size:.1f} KB, Duration: {duration:.1f}s")

	print("\n✓ Test completed successfully!")

	except Exception as e:
	print(f"\nError: {e}")
	import traceback
	traceback.print_exc()

	# Try to give more specific help
	if "speech_tokenizer_v1.onnx" in str(e):
	print("\n⚠️ Model files may still be downloading. Please wait and try again.")
	elif "No module named" in str(e):
	print("\n⚠️ Missing dependency. Make sure virtual environment is activated.")