# Default configuration for voice model RL training # Model settings model_name: "facebook/wav2vec2-base" device: "cpu" # or "cuda" if GPU available checkpoint: null # Data settings data_path: "data/raw" split_ratios: train: 0.7 val: 0.15 test: 0.15 # RL algorithm settings algorithm: "ppo" # or "reinforce" learning_rate: 0.0003 gamma: 0.99 # Reward function settings reward_weights: clarity: 0.33 naturalness: 0.33 accuracy: 0.34 # Training settings num_episodes: 100 batch_size: 32 episode_length: 10 # Checkpointing checkpoint_interval: 10 checkpoint_dir: "checkpoints" max_checkpoints: 5 # Logging and monitoring log_interval: 5 log_dir: "logs" # Reproducibility random_seed: 42