Source: examples/unsloth
Unsloth#
This example shows how to launch Unsloth jobs with SkyPilot.
Included files#
unsloth.yaml
# Runs the unsloth example app via SkyPilot
#
# The example app starts by obtaining the LAION dataset,
# loads the Mistral model with 4-bit precision, performs model
# patching with fast LoRA weights, and finally initializes and
# trains the model using the SFTTrainer with specified
# hyperparameters and the LAION dataset.
#
# Usage:
# sky launch -c myclus unsloth.yaml
# sky down myclus
resources:
accelerators: T4:1
disk_size: 128
file_mounts:
# Creates a new bucket my-unsloth-checkpoints and mounts it at /outputs
/outputs:
name: my-unsloth-checkpoints # Ensure this name is unique
workdir: .
setup: |
set -ex
pip install --upgrade pip
cuda_version=$(nvcc --version | grep "release" | awk '{print $6}' | cut -c 2-)
if [[ "$cuda_version" == "12.1"* ]]; then
pip install --upgrade --force-reinstall --no-cache-dir torch==2.2.0 triton \
--index-url https://download.pytorch.org/whl/cu121
pip install ipython
pip install "unsloth[cu121-torch220] @ git+https://github.com/unslothai/unsloth.git"
else
pip install --upgrade --force-reinstall --no-cache-dir torch==2.2.0 triton \
--index-url https://download.pytorch.org/whl/cu118
pip install ipython
pip install "unsloth[cu118-torch220] @ git+https://github.com/unslothai/unsloth.git"
fi
run: |
python unsloth_example.py --output-dir /outputs
unsloth_example.py
# Use the unsloth library to fine-tune a Mistral model
import argparse
from datasets import load_dataset
import torch
from transformers import TrainingArguments
from trl import SFTTrainer
from unsloth import FastLanguageModel
max_seq_length = 2048
# [1] Get LAION dataset
url = "https://huggingface.co/datasets/laion/OIG/resolve/main/unified_chip2.jsonl"
dataset = load_dataset("json", data_files={"train": url}, split="train")
fourbit_models = [
"unsloth/mistral-7b-bnb-4bit",
"unsloth/llama-2-7b-bnb-4bit",
"unsloth/llama-2-13b-bnb-4bit",
"unsloth/codellama-34b-bnb-4bit",
"unsloth/tinyllama-bnb-4bit",
]
# [2] Load Mistral model
model, tokenizer = FastLanguageModel.from_pretrained(
model_name="unsloth/mistral-7b-bnb-4bit",
max_seq_length=max_seq_length,
dtype=None,
load_in_4bit=True,
)
# [3] Do model patching and add fast LoRA weights
model = FastLanguageModel.get_peft_model(
model,
r=16,
target_modules=[
"q_proj",
"k_proj",
"v_proj",
"o_proj",
"gate_proj",
"up_proj",
"down_proj",
],
lora_alpha=16,
lora_dropout=0,
bias="none",
use_gradient_checkpointing=True,
random_state=3407,
max_seq_length=max_seq_length,
)
# [4] Parse output directory of checkpoints
parser = argparse.ArgumentParser()
parser.add_argument("--output-dir", type=str, default="/outputs")
args = parser.parse_args()
# [5] Initialize and train the model using the SFTTrainer
trainer = SFTTrainer(
model=model,
train_dataset=dataset,
dataset_text_field="text",
max_seq_length=max_seq_length,
tokenizer=tokenizer,
args=TrainingArguments(
per_device_train_batch_size=2,
gradient_accumulation_steps=4,
warmup_steps=10,
max_steps=60,
fp16=not torch.cuda.is_bf16_supported(),
bf16=torch.cuda.is_bf16_supported(),
logging_steps=1,
output_dir=args.output_dir,
optim="adamw_8bit",
seed=3407,
save_steps=10,
),
)
trainer.train()