import torch from unsloth import FastLanguageModel try: model, tokenizer = FastLanguageModel.from_pretrained( model_name="unsloth/Llama-3.2-1B-Instruct-bnb-4bit", max_seq_length=1024, load_in_4bit=True, ) print("Successfully loaded model in 4-bit on this GPU.") print(f"Memory allocated: {torch.cuda.memory_allocated() / 1024**2:.1f} MB") except Exception as e: print(f"Failed to load model: {e}")