File size: 967 Bytes
0ea7b2a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
#!/usr/bin/env python3
import sys
sys.path.append('third_party/Matcha-TTS')

try:
    from cosyvoice.cli.cosyvoice import CosyVoice
    from cosyvoice.utils.file_utils import load_wav
    import torchaudio
    
    print("Loading CosyVoice model...")
    cosyvoice = CosyVoice('pretrained_models/CosyVoice-300M-SFT', load_jit=False, load_trt=False, fp16=False)
    
    print("Available speakers:")
    speakers = cosyvoice.list_available_spks()
    for spk in speakers[:5]:
        print(f"  - {spk}")
    
    print("\nGenerating speech...")
    # Generate a simple test
    for i, j in enumerate(cosyvoice.inference_sft('你好,我是通义生成式语音大模型', '中文女', stream=False)):
        torchaudio.save(f'test_output_{i}.wav', j['tts_speech'], cosyvoice.sample_rate)
        print(f"Saved test_output_{i}.wav")
    
    print("Test successful!")
    
except Exception as e:
    print(f"Error: {e}")
    import traceback
    traceback.print_exc()