Add files using upload-large-folder tool

212a146 verified 19 days ago

1.11 kB

	#!/bin/bash
	ROOT_DIR=/workspace/hanrui/junquan/SpecForge
	export TORCHINDUCTOR_CACHE_DIR=$ROOT_DIR/cache/compiled_kernels
	export SPECFORGE_DATA_NUM_PROC=16
	export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
	export PATH=/workspace/hanrui/specforge/bin:$PATH
	export PYTHONPATH=$ROOT_DIR:$PYTHONPATH

	NUM_GPUS=${1:-8}

	/workspace/hanrui/specforge/bin/python3 -m torch.distributed.run \
	--standalone \
	--nproc_per_node $NUM_GPUS \
	$ROOT_DIR/scripts/train_dflash_lora.py \
	--model-path /workspace/Qwen3-8B \
	--train-data-path /workspace/hanrui/datasets/Nemotron-CodeAlpaca-qwen3-8b-800K \
	--output-dir $ROOT_DIR/outputs/qwen3-8b-dflash-lora \
	--lora-config $ROOT_DIR/configs/qwen3-8b-dflash-lora.json \
	--block-size 16 \
	--max-length 2048 \
	--batch-size 1 \
	--num-epochs 3 \
	--learning-rate 2e-4 \
	--accumulation-steps 8 \
	--loss-decay-gamma 7 \
	--attention-backend flex_attention \
	--lm-head-chunk-size 256 \
	--gradient-checkpointing \
	--chat-template qwen \
	--log-interval 50 \
	--save-interval 500 \
	--cache-dir $ROOT_DIR/cache