Spaces:
Configuration error
Configuration error
| #!/usr/bin/env python3 | |
| import sys | |
| sys.path.append('third_party/Matcha-TTS') | |
| print("=== Simple CosyVoice TTS Test ===\n") | |
| try: | |
| # Check what models are available | |
| import os | |
| print("Available models:") | |
| models_dir = 'pretrained_models' | |
| for model in os.listdir(models_dir): | |
| model_path = os.path.join(models_dir, model) | |
| if os.path.isdir(model_path): | |
| files = os.listdir(model_path) | |
| print(f"\n{model}:") | |
| for f in files[:10]: # Show first 10 files | |
| print(f" - {f}") | |
| # Try to use CosyVoice-300M which seems complete | |
| from cosyvoice.cli.cosyvoice import CosyVoice | |
| from cosyvoice.utils.file_utils import load_wav | |
| import torchaudio | |
| print("\nInitializing CosyVoice-300M model...") | |
| # Check if required files exist | |
| model_path = 'pretrained_models/CosyVoice-300M' | |
| required_files = ['flow.pt', 'speech_tokenizer_v1.onnx', 'campplus.onnx'] | |
| missing = [] | |
| for f in required_files: | |
| full_path = os.path.join(model_path, f) | |
| if not os.path.exists(full_path): | |
| missing.append(f) | |
| if missing: | |
| print(f"Missing files in {model_path}: {missing}") | |
| print("\nTrying to extract zip files if present...") | |
| # Check for zip files | |
| for f in os.listdir(model_path): | |
| if f.endswith('.zip'): | |
| print(f"Found zip file: {f}") | |
| import zipfile | |
| zip_path = os.path.join(model_path, f) | |
| with zipfile.ZipFile(zip_path, 'r') as zip_ref: | |
| zip_ref.extractall(model_path) | |
| print(f"Extracted: {f}") | |
| # Try again after extraction | |
| cosyvoice = CosyVoice(model_path, load_jit=False, load_trt=False, fp16=False) | |
| # Load prompt audio | |
| prompt_speech_16k = load_wav('asset/zero_shot_prompt.wav', 16000) | |
| # Generate speech | |
| text = "Olá, este é um teste do CosyVoice. Sistema de síntese de voz funcionando!" | |
| prompt_text = "Teste de voz." | |
| print(f"\nGenerating speech for: '{text}'") | |
| for i, j in enumerate(cosyvoice.inference_zero_shot(text, prompt_text, prompt_speech_16k, stream=False)): | |
| output_file = f'test_output_{i}.wav' | |
| torchaudio.save(output_file, j['tts_speech'], cosyvoice.sample_rate) | |
| print(f"✓ Generated: {output_file}") | |
| # Show file info | |
| size = os.path.getsize(output_file) / 1024 | |
| duration = j['tts_speech'].shape[1] / cosyvoice.sample_rate | |
| print(f" Size: {size:.1f} KB, Duration: {duration:.1f}s") | |
| print("\n✓ Test completed successfully!") | |
| except Exception as e: | |
| print(f"\nError: {e}") | |
| import traceback | |
| traceback.print_exc() | |
| # Try to give more specific help | |
| if "speech_tokenizer_v1.onnx" in str(e): | |
| print("\n⚠️ Model files may still be downloading. Please wait and try again.") | |
| elif "No module named" in str(e): | |
| print("\n⚠️ Missing dependency. Make sure virtual environment is activated.") |