Unsloth#

This example shows how to launch Unsloth jobs with SkyPilot.

Included files#

unsloth.yaml

# Runs the unsloth example app via SkyPilot
#
# The example app starts by obtaining the LAION dataset,
# loads the Mistral model with 4-bit precision, performs model
# patching with fast LoRA weights, and finally initializes and
# trains the model using the SFTTrainer with specified
# hyperparameters and the LAION dataset.
#
# Usage:
#   sky launch -c myclus unsloth.yaml
#   sky down myclus

resources:
  accelerators: T4:1
  disk_size: 128

file_mounts:
  # Creates a new bucket my-unsloth-checkpoints and mounts it at /outputs  
  /outputs:
    name: my-unsloth-checkpoints # Ensure this name is unique

workdir: .

setup: |
  set -ex
  pip install --upgrade pip
  cuda_version=$(nvcc --version | grep "release" | awk '{print $6}' | cut -c 2-)
  
  if [[ "$cuda_version" == "12.1"* ]]; then
    pip install --upgrade --force-reinstall --no-cache-dir torch==2.2.0 triton \
      --index-url https://download.pytorch.org/whl/cu121
    pip install ipython
    pip install "unsloth[cu121-torch220] @ git+https://github.com/unslothai/unsloth.git"
  else
    pip install --upgrade --force-reinstall --no-cache-dir torch==2.2.0 triton \
      --index-url https://download.pytorch.org/whl/cu118
    pip install ipython
    pip install "unsloth[cu118-torch220] @ git+https://github.com/unslothai/unsloth.git"
  fi

run: |
  python unsloth_example.py --output-dir /outputs

unsloth_example.py

# Use the unsloth library to fine-tune a Mistral model

import argparse

from datasets import load_dataset
import torch
from transformers import TrainingArguments
from trl import SFTTrainer
from unsloth import FastLanguageModel

max_seq_length = 2048

# [1] Get LAION dataset
url = "https://huggingface.co/datasets/laion/OIG/resolve/main/unified_chip2.jsonl"
dataset = load_dataset("json", data_files={"train": url}, split="train")

fourbit_models = [
    "unsloth/mistral-7b-bnb-4bit",
    "unsloth/llama-2-7b-bnb-4bit",
    "unsloth/llama-2-13b-bnb-4bit",
    "unsloth/codellama-34b-bnb-4bit",
    "unsloth/tinyllama-bnb-4bit",
]

# [2] Load Mistral model
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="unsloth/mistral-7b-bnb-4bit",
    max_seq_length=max_seq_length,
    dtype=None,
    load_in_4bit=True,
)

# [3] Do model patching and add fast LoRA weights
model = FastLanguageModel.get_peft_model(
    model,
    r=16,
    target_modules=[
        "q_proj",
        "k_proj",
        "v_proj",
        "o_proj",
        "gate_proj",
        "up_proj",
        "down_proj",
    ],
    lora_alpha=16,
    lora_dropout=0,
    bias="none",
    use_gradient_checkpointing=True,
    random_state=3407,
    max_seq_length=max_seq_length,
)

# [4] Parse output directory of checkpoints
parser = argparse.ArgumentParser()
parser.add_argument("--output-dir", type=str, default="/outputs")
args = parser.parse_args()

# [5] Initialize and train the model using the SFTTrainer
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    tokenizer=tokenizer,
    args=TrainingArguments(
        per_device_train_batch_size=2,
        gradient_accumulation_steps=4,
        warmup_steps=10,
        max_steps=60,
        fp16=not torch.cuda.is_bf16_supported(),
        bf16=torch.cuda.is_bf16_supported(),
        logging_steps=1,
        output_dir=args.output_dir,
        optim="adamw_8bit",
        seed=3407,
        save_steps=10,
    ),
)
trainer.train()