diff --git a/.github/azure-gpu-test-with-thunder.yml b/.azure/gpu-test-with-thunder.yml similarity index 91% rename from .github/azure-gpu-test-with-thunder.yml rename to .azure/gpu-test-with-thunder.yml index c7c2a2fa0e..cb0d0dfa92 100644 --- a/.github/azure-gpu-test-with-thunder.yml +++ b/.azure/gpu-test-with-thunder.yml @@ -44,13 +44,13 @@ jobs: - script: | pip install --upgrade pip - pip install '.[all,test]' + pip install '.[extra,all,test]' displayName: 'Install dependencies' - script: | pip uninstall -y torchvision torchaudio pip install --pre 'nvfuser-cu121[torch]' --extra-index-url https://pypi.nvidia.com - displayName: 'Install PyTorch nightly' + displayName: 'Install nvFuser' - bash: | set -e @@ -58,7 +58,7 @@ jobs: python -c "import torch ; mgpu = torch.cuda.device_count() ; assert mgpu == 2, f'GPU: {mgpu}'" displayName: "Env details" - - bash: pytest -v --disable-pytest-warnings --strict-markers --color=yes + - bash: pytest -v displayName: 'Ordinary tests' env: PL_RUN_CUDA_TESTS: "1" diff --git a/.github/azure-gpu-test.yml b/.azure/gpu-test.yml similarity index 100% rename from .github/azure-gpu-test.yml rename to .azure/gpu-test.yml diff --git a/.github/workflows/cpu-tests.yml b/.github/workflows/cpu-tests.yml index bf257b2945..c1b06e966e 100644 --- a/.github/workflows/cpu-tests.yml +++ b/.github/workflows/cpu-tests.yml @@ -18,17 +18,47 @@ env: HF_TOKEN: ${{ secrets.HF_TOKEN }} jobs: - cpu-tests: + testing-imports: runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: + os: [ "ubuntu-22.04", "macOS-14", "windows-2022" ] + python-version: [ "3.10" ] + timeout-minutes: 10 + + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Install minimal dependencies + run: | + pip install . + pip list + + - name: Testing package imports + # make sure all modules are still importable with only the minimal dependencies available + run: | + modules=$( + find litgpt -type f -name "*.py" | \ + sed 's/\.py$//' | sed 's/\//./g' | \ + sed 's/.__init__//g' | xargs -I {} echo "import {};" + ) + echo "$modules" + python -c "$modules" + + pytester: + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: ["ubuntu-22.04"] + python-version: ["3.9", "3.10", "3.11"] include: - - {os: "macOS-14", python-version: "3.10"} - - {os: "ubuntu-22.04", python-version: "3.11"} - - {os: "ubuntu-22.04", python-version: "3.10"} - - {os: "ubuntu-22.04", python-version: "3.9"} - - {os: "windows-2022", python-version: "3.9"} + - {os: "macOS-14", python-version: "3.9"} # without Thunder + - {os: "windows-2022", python-version: "3.9"} # without Thunder timeout-minutes: 25 steps: @@ -42,25 +72,24 @@ jobs: cache-dependency-path: | pyproject.toml - - name: Install minimal dependencies - run: | - # python -m pip install --upgrade pip - pip install . - pip list - # make sure all modules are still importable with only the minimal dependencies available - modules=$( - find litgpt -type f -name "*.py" | \ - sed 's/\.py$//' | sed 's/\//./g' | \ - sed 's/.__init__//g' | xargs -I {} echo "import {};" - ) - echo "$modules" - python -c "$modules" - - - name: Install all dependencies + - name: Install dependencies run: | - pip install '.[all,test]' + pip install '.[extra,all,test]' pip list - name: Run tests - run: | - pytest -v --disable-pytest-warnings --strict-markers --color=yes --timeout 120 + run: pytest -v litgpt/ tests/ --timeout 120 + + testing-guardian: + runs-on: ubuntu-latest + needs: [pytester, testing-imports] + if: always() + steps: + - run: echo "${{ needs.pytester.result }}" + - name: failing... + if: needs.pytester.result == 'failure' + run: exit 1 + - name: cancelled or skipped... + if: contains(fromJSON('["cancelled", "skipped"]'), needs.pytester.result) + timeout-minutes: 1 + run: sleep 90 diff --git a/extensions/thunder/README.md b/extensions/thunder/README.md index 835dc43f91..713cbaf2e7 100644 --- a/extensions/thunder/README.md +++ b/extensions/thunder/README.md @@ -460,7 +460,7 @@ After applying the DDP transformation, the backward trace will include the expec With `L.Fabric`, this is how to use them: ```python -from extensions.thunder.strategies import ThunderFSDPStrategy, ThunderDDPStrategy +from extensions.extensions.thunder.strategies import ThunderFSDPStrategy, ThunderDDPStrategy # fully-sharded data parallel strategy = ThunderFSDPStrategy( diff --git a/extensions/thunder/__init__.py b/extensions/thunder/__init__.py new file mode 100644 index 0000000000..77568f817b --- /dev/null +++ b/extensions/thunder/__init__.py @@ -0,0 +1,6 @@ +import sys +from pathlib import Path + +# support running without installing as a package, adding extensions to the Pyton path +wd = Path(__file__).parent.parent.resolve() +sys.path.append(str(wd)) diff --git a/extensions/thunder/strategies/thunder_ddp.py b/extensions/thunder/strategies/thunder_ddp.py index a036a19551..d775456554 100644 --- a/extensions/thunder/strategies/thunder_ddp.py +++ b/extensions/thunder/strategies/thunder_ddp.py @@ -22,18 +22,17 @@ _sync_ddp_if_available, ) from lightning.fabric.utilities.rank_zero import rank_zero_only -from lightning_utilities.core.imports import RequirementCache from lightning_utilities.core.rank_zero import rank_zero_only as utils_rank_zero_only from torch import Tensor from torch.nn import Module from typing_extensions import override +from litgpt.utils import _THUNDER_AVAILABLE + if TYPE_CHECKING: from thunder import Executor -_THUNDER_AVAILABLE = RequirementCache("lightning-thunder", "thunder") - class ThunderDDPStrategy(ParallelStrategy): def __init__( diff --git a/extensions/thunder/strategies/thunder_fsdp.py b/extensions/thunder/strategies/thunder_fsdp.py index 323355f731..ac777fefe6 100644 --- a/extensions/thunder/strategies/thunder_fsdp.py +++ b/extensions/thunder/strategies/thunder_fsdp.py @@ -25,12 +25,12 @@ from lightning.fabric.utilities.rank_zero import rank_zero_only from lightning.fabric.utilities.seed import reset_seed from lightning.fabric.utilities.types import _PATH, _Stateful -from lightning_utilities.core.imports import RequirementCache from lightning_utilities.core.rank_zero import rank_zero_only as utils_rank_zero_only from torch import Tensor from torch.nn import Module from torch.optim import Optimizer from typing_extensions import override +from litgpt.utils import _THUNDER_AVAILABLE from extensions.thunder.strategies.thunder_ddp import _ThunderDataParalellBackwardSyncControl if TYPE_CHECKING: @@ -42,9 +42,6 @@ _BUCKETING_STRATEGY = Union[FSDPBucketingStrategy, Literal["NONE", "LAYER", "BLOCK"]] -_THUNDER_AVAILABLE = RequirementCache("lightning-thunder", "thunder") - - class ThunderFSDPStrategy(ParallelStrategy, _Sharded): def __init__( self, diff --git a/extensions/thunder/unsloth/__init__.py b/extensions/thunder/unsloth/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/extensions/thunder/unsloth/executor.py b/extensions/thunder/unsloth/executor.py index 1779daf8ee..876bd07b8e 100644 --- a/extensions/thunder/unsloth/executor.py +++ b/extensions/thunder/unsloth/executor.py @@ -1,11 +1,8 @@ # Copyright Lightning AI. Licensed under the Apache License 2.0, see LICENSE file. import sys +import torch from pathlib import Path from typing import Optional, Tuple - -import thunder -import thunder.torch as ltorch -import torch from thunder.core.proxies import TensorProxy from thunder.core.transforms import get_grad, mean_backward, put_grads from thunder.extend import OperatorExecutor, register_executor @@ -13,6 +10,11 @@ from torch import Tensor import litgpt.model +from litgpt.utils import _THUNDER_AVAILABLE + +if _THUNDER_AVAILABLE: + import thunder + import thunder.torch as ltorch sys.path.append(str(Path(__file__).parent)) diff --git a/extensions/thunder/unsloth/kernels/cross_entropy_loss.py b/extensions/thunder/unsloth/kernels/cross_entropy_loss.py index 17ab2fa970..a3700c1ec0 100644 --- a/extensions/thunder/unsloth/kernels/cross_entropy_loss.py +++ b/extensions/thunder/unsloth/kernels/cross_entropy_loss.py @@ -13,11 +13,14 @@ # limitations under the License. import torch -import triton -import triton.language as tl +from litgpt.utils import _TRITON_AVAILABLE from .utils import MAX_FUSED_SIZE, calculate_settings +if _TRITON_AVAILABLE: + import triton + import triton.language as tl + @triton.jit def _cross_entropy_forward( diff --git a/extensions/thunder/unsloth/kernels/rope_embedding.py b/extensions/thunder/unsloth/kernels/rope_embedding.py index fdd8fb9183..f4db865fbc 100644 --- a/extensions/thunder/unsloth/kernels/rope_embedding.py +++ b/extensions/thunder/unsloth/kernels/rope_embedding.py @@ -12,11 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -import triton -import triton.language as tl -import torch +from litgpt.utils import _TRITON_AVAILABLE from .utils import calculate_settings +if _TRITON_AVAILABLE: + import triton + import triton.language as tl + ROPE_GROUP_SIZE = 4 @triton.heuristics({"BACKWARD_PASS": lambda args: args["BACKWARD_PASS"],}) diff --git a/extensions/thunder/unsloth/kernels/swiglu.py b/extensions/thunder/unsloth/kernels/swiglu.py index 8d48ef29a4..7a3f4f3c9b 100644 --- a/extensions/thunder/unsloth/kernels/swiglu.py +++ b/extensions/thunder/unsloth/kernels/swiglu.py @@ -13,8 +13,12 @@ # limitations under the License. import torch -import triton -import triton.language as tl + +from litgpt.utils import _TRITON_AVAILABLE + +if _TRITON_AVAILABLE: + import triton + import triton.language as tl @triton.jit diff --git a/extensions/thunder/unsloth/kernels/utils.py b/extensions/thunder/unsloth/kernels/utils.py index 676394573e..3f94f6df7d 100644 --- a/extensions/thunder/unsloth/kernels/utils.py +++ b/extensions/thunder/unsloth/kernels/utils.py @@ -12,7 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -import triton + +from litgpt.utils import _TRITON_AVAILABLE + +if _TRITON_AVAILABLE: + import triton MAX_FUSED_SIZE = 65536 # 2**16 next_power_of_2 = triton.next_power_of_2 diff --git a/extensions/xla/__init__ b/extensions/xla/__init__ new file mode 100644 index 0000000000..77568f817b --- /dev/null +++ b/extensions/xla/__init__ @@ -0,0 +1,6 @@ +import sys +from pathlib import Path + +# support running without installing as a package, adding extensions to the Pyton path +wd = Path(__file__).parent.parent.resolve() +sys.path.append(str(wd)) diff --git a/extensions/xla/finetune/__init__ b/extensions/xla/finetune/__init__ new file mode 100644 index 0000000000..e69de29bb2 diff --git a/extensions/xla/finetune/adapter.py b/extensions/xla/finetune/adapter.py index 41334e0501..6bcf1b58d1 100644 --- a/extensions/xla/finetune/adapter.py +++ b/extensions/xla/finetune/adapter.py @@ -22,9 +22,9 @@ wd = Path(__file__).parents[3].resolve() sys.path.append(str(wd)) -from extensions.xla.generate.base import generate -from extensions.xla.scripts.prepare_alpaca import generate_prompt -from extensions.xla.utils import rank_print, sequential_load_and_fsdp_wrap +from xla.generate.base import generate +from xla.scripts.prepare_alpaca import generate_prompt +from xla.utils import rank_print, sequential_load_and_fsdp_wrap eval_interval = 200 save_interval = 200 diff --git a/extensions/xla/generate/__init__ b/extensions/xla/generate/__init__ new file mode 100644 index 0000000000..e69de29bb2 diff --git a/extensions/xla/generate/adapter.py b/extensions/xla/generate/adapter.py index 04ddb665c7..4e1af9c5e3 100644 --- a/extensions/xla/generate/adapter.py +++ b/extensions/xla/generate/adapter.py @@ -18,8 +18,8 @@ wd = Path(__file__).parents[3].resolve() sys.path.append(str(wd)) -from extensions.xla.generate.base import generate -from extensions.xla.utils import rank_print +from xla.generate.base import generate +from xla.utils import rank_print def setup( diff --git a/extensions/xla/generate/base.py b/extensions/xla/generate/base.py index 54bdbf78a8..d696e756ea 100644 --- a/extensions/xla/generate/base.py +++ b/extensions/xla/generate/base.py @@ -19,7 +19,7 @@ wd = Path(__file__).parents[3].resolve() sys.path.append(str(wd)) -from extensions.xla.utils import rank_print +from xla.utils import rank_print # xla does not support `inference_mode`: RuntimeError: Cannot set version_counter for inference tensor diff --git a/extensions/xla/scripts/__init__ b/extensions/xla/scripts/__init__ new file mode 100644 index 0000000000..e69de29bb2 diff --git a/litgpt/utils.py b/litgpt/utils.py index eb2cca09f9..9b4976e5c9 100644 --- a/litgpt/utils.py +++ b/litgpt/utils.py @@ -12,6 +12,8 @@ import sys from dataclasses import asdict, is_dataclass from io import BytesIO + +from lightning_utilities.core.imports import package_available from packaging import version from pathlib import Path import subprocess @@ -35,6 +37,9 @@ if TYPE_CHECKING: from litgpt import GPT, Config +_THUNDER_AVAILABLE = package_available("thunder") +_TRITON_AVAILABLE = package_available("triton") + def init_out_dir(out_dir: Path) -> Path: if not isinstance(out_dir, Path): @@ -815,3 +820,17 @@ def select_sft_generate_example(eval, data): else: raise ValueError(f"Unknown evaluation example type: {eval.evaluate_example}") return instruction + + + +def _RunIf(thunder: bool = False, **kwargs): + import pytest + from lightning.fabric.utilities.testing import _runif_reasons + + reasons, marker_kwargs = _runif_reasons(**kwargs) + + if thunder and not package_available("thunder"): + # if we require Thunder, but it's not available, we should skip + reasons.append("Thunder") + + return pytest.mark.skipif(condition=len(reasons) > 0, reason=f"Requires: [{' + '.join(reasons)}]", **marker_kwargs) diff --git a/pyproject.toml b/pyproject.toml index 3c9da5fb2e..1a3e4717db 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,16 +9,18 @@ readme = "README.md" license = { file = "LICENSE" } dependencies = [ - "torch>=2.5.0,<2.6.0", - "numpy<2.0", - "lightning>=2.5.0,<2.6.0", - "jsonargparse[signatures]>=4.30.1,<=4.32.1; python_version<='3.9'", # 4.33 does not seem to be compatible with Python 3.9 - "jsonargparse[signatures]>=4.37.0; python_version>'3.9'", # required to work with python3.12+ - "huggingface_hub>=0.23.5", # download models - "safetensors>=0.4.3", # download models - "tokenizers>=0.15.2", # tokenization in most models - "tqdm>=4.66.0", # convert_hf_checkpoint - "lightning-thunder>=0.2.0.dev20250119 ; python_version >= '3.10' and sys_platform == 'linux'", + "torch >=2.5.0,<2.6.0", + "numpy <2.0", # for older Torch versions + "lightning >=2.5.0,<2.6.0", + "jsonargparse[signatures] >=4.30.1,<=4.32.1; python_version<='3.9'", # 4.33 does not seem to be compatible with Python 3.9 + "jsonargparse[signatures] >=4.37.0; python_version>'3.9'", # required to work with python3.12+ + # download models: + "huggingface_hub >=0.23.5", + "safetensors >=0.4.3", # download models + # tokenization in most models: + "tokenizers >=0.15.2", + # convert_hf_checkpoint + "tqdm >=4.66.0", ] [project.urls] @@ -29,38 +31,52 @@ documentation = "https://github.com/lightning-AI/litgpt/tutorials" litgpt = "litgpt.__main__:main" [project.optional-dependencies] +extra = [ + # compilaton: + "lightning-thunder >=0.2.0.dev20250119 ; python_version >= '3.10' and sys_platform == 'linux'" +] test = [ - "pytest>=8.1.1", - "pytest-rerunfailures>=14.0", - "pytest-timeout>=2.3.1", - "pytest-dependency>=0.6.0", - "transformers==4.47.1", # numerical comparisons - "einops>=0.7.0", - "protobuf>=4.23.4", + "pytest >=8.1.1", + "pytest-rerunfailures >=14.0", + "pytest-timeout >=2.3.1", + "pytest-dependency >=0.6.0", + "transformers ==4.47.1", # numerical comparisons + "einops >=0.7.0", + "protobuf >=4.23.4", ] all = [ - "bitsandbytes >=0.44.0,<0.44.2; sys_platform == 'linux' or sys_platform == 'win32'", # quantization - "bitsandbytes >=0.42.0,<0.43.0 ; sys_platform == 'darwin'", # quantization - "sentencepiece>=0.2.0", # llama-based models - "requests>=2.31.0", # litgpt.data - "litdata==0.2.17", # litgpt.data - "litserve<=0.2.4", # litgpt.deploy - "zstandard>=0.22.0", # litgpt.data.prepare_slimpajama.py - "pandas>=1.9.0", # litgpt.data.prepare_starcoder.py - "pyarrow>=15.0.2", # litgpt.data.prepare_starcoder.py - "tensorboard>=2.14.0", # litgpt.pretrain - "torchmetrics>=1.3.1", # litgpt.pretrain - "datasets>=2.18.0", # litgpt.evaluate - "transformers==4.47.1", # litgpt.evaluate - "lm-eval>=0.4.2", # litgpt.evaluate - "huggingface_hub[hf_transfer]>=0.21.0", # download - "uvloop>=0.2.0 ; sys_platform != 'win32'" # litdata, only on non-Windows + # quantization: + "bitsandbytes >=0.44.0,<0.44.2; sys_platform == 'linux' or sys_platform == 'win32'", + "bitsandbytes >=0.42.0,<0.43.0 ; sys_platform == 'darwin'", + # llama-based models: + "sentencepiece >=0.2.0", + # litgpt.data: + "requests >=2.31.0", + "litdata ==0.2.17", + # litgpt.deploy: + "litserve <=0.2.4", + # litgpt.data.prepare_slimpajama.py: + "zstandard >=0.22.0", + # litgpt.data.prepare_starcoder.py: + "pandas >=1.9.0", + "pyarrow >=15.0.2", + # litgpt.pretrain: + "tensorboard >=2.14.0", + "torchmetrics >=1.3.1", + # litgpt.evaluate: + "datasets >=2.18.0", + "transformers ==4.47.1", + "lm-eval >=0.4.2", + # download: + "huggingface_hub[hf_transfer] >=0.21.0", + # litdata, only on non-Windows: + "uvloop >=0.2.0 ; sys_platform != 'win32'" ] [build-system] requires = [ - "setuptools>=68.2.2", - "wheel>=0.41.2", + "setuptools >=68.2.2", + "wheel >=0.41.2", ] build-backend = "setuptools.build_meta" @@ -76,3 +92,11 @@ litgpt = [ "LICENSE", "README.md", ] + +[tool.pytest.ini_options] +addopts = [ + "--strict-markers", + #"--doctest-modules", + "--color=yes", + "--disable-pytest-warnings", +] diff --git a/tests/__init__.py b/tests/__init__.py deleted file mode 100644 index 2f22d66b14..0000000000 --- a/tests/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright Lightning AI. Licensed under the Apache License 2.0, see LICENSE file. - -import warnings - -import pytest - -warnings.filterwarnings("ignore", category=pytest.PytestWarning, message=r".*\(rm_rf\) error removing.*") diff --git a/tests/conftest.py b/tests/conftest.py index 8867442e9a..ae470af291 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,14 +1,21 @@ # Copyright Lightning AI. Licensed under the Apache License 2.0, see LICENSE file. import os +import sys import shutil from pathlib import Path from typing import List, Optional +# support running without installing as a package, adding extensions to the Pyton path +wd = Path(__file__).parent.parent.resolve() +if wd.is_dir(): + sys.path.append(str(wd)) +else: + import warnings + warnings.warn(f"Could not find extensions directory at {wd}") + import pytest import torch -from lightning.fabric.utilities.testing import _runif_reasons -from lightning_utilities.core.imports import RequirementCache @pytest.fixture() @@ -86,14 +93,14 @@ def mock_tokenizer(): @pytest.fixture() def alpaca_path(tmp_path): - file = Path(__file__).parent / "data" / "fixtures" / "alpaca.json" + file = Path(__file__).parent / "data" / "_fixtures" / "alpaca.json" shutil.copyfile(file, tmp_path / "alpaca.json") return tmp_path / "alpaca.json" @pytest.fixture() def dolly_path(tmp_path): - file = Path(__file__).parent / "data" / "fixtures" / "dolly.json" + file = Path(__file__).parent / "data" / "_fixtures" / "dolly.json" shutil.copyfile(file, tmp_path / "dolly.json") return tmp_path / "dolly.json" @@ -103,24 +110,11 @@ def longform_path(tmp_path): path = tmp_path / "longform" path.mkdir() for split in ("train", "val"): - file = Path(__file__).parent / "data" / "fixtures" / f"longform_{split}.json" + file = Path(__file__).parent / "data" / "_fixtures" / f"longform_{split}.json" shutil.copyfile(file, path / f"{split}.json") return path -def RunIf(thunder: Optional[bool] = None, **kwargs): - reasons, marker_kwargs = _runif_reasons(**kwargs) - - if thunder is not None: - thunder_available = bool(RequirementCache("lightning-thunder", "thunder")) - if thunder and not thunder_available: - reasons.append("Thunder") - elif not thunder and thunder_available: - reasons.append("not Thunder") - - return pytest.mark.skipif(condition=len(reasons) > 0, reason=f"Requires: [{' + '.join(reasons)}]", **marker_kwargs) - - # https://github.com/Lightning-AI/lightning/blob/6e517bd55b50166138ce6ab915abd4547702994b/tests/tests_fabric/conftest.py#L140 def pytest_collection_modifyitems(items: List[pytest.Function], config: pytest.Config) -> None: initial_size = len(items) @@ -148,7 +142,7 @@ def pytest_collection_modifyitems(items: List[pytest.Function], config: pytest.C marker.name == "skipif" and marker.kwargs.get(kwarg) for marker in test.own_markers ) if not has_runif_with_kwarg: - # the test has `@RunIf(kwarg=True)`, filter it out + # the test has `@_RunIf(kwarg=True)`, filter it out items.pop(i) filtered += 1 diff --git a/tests/convert/__init__.py b/tests/convert/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/test_convert_hf_checkpoint.py b/tests/convert/test_hf_checkpoint.py similarity index 100% rename from tests/test_convert_hf_checkpoint.py rename to tests/convert/test_hf_checkpoint.py diff --git a/tests/test_convert_lit_checkpoint.py b/tests/convert/test_lit_checkpoint.py similarity index 99% rename from tests/test_convert_lit_checkpoint.py rename to tests/convert/test_lit_checkpoint.py index 9e0cd93c35..f7c271955d 100644 --- a/tests/test_convert_lit_checkpoint.py +++ b/tests/convert/test_lit_checkpoint.py @@ -33,7 +33,7 @@ copy_weights_qwen_2_5, qkv_reassemble, ) -from tests.conftest import RunIf +from litgpt.utils import _RunIf @pytest.mark.parametrize("model_name", ("pythia-14m", "falcon-7b", "Llama-2-7b-hf", "phi-2")) @@ -392,7 +392,7 @@ def test_against_original_stablelm_zephyr_3b(): # the reference does softmax upscaled to fp32 during attention. additionally, the final layernorm input # is slightly different pytest.mark.xfail(raises=AssertionError, strict=False), - RunIf(min_cuda_gpus=1), + _RunIf(min_cuda_gpus=1), ], ), ], @@ -452,7 +452,7 @@ def test_against_original_gemma(model_name, device, dtype): # the reference does softmax upscaled to fp32 during attention. additionally, the final layernorm input # is slightly different pytest.mark.xfail(raises=AssertionError, strict=False), - RunIf(min_cuda_gpus=1), + _RunIf(min_cuda_gpus=1), ], ), ], @@ -540,7 +540,7 @@ def test_check_conversion_supported_lora(): # the reference does softmax upscaled to fp32 during attention. additionally, the final layernorm input # is slightly different pytest.mark.xfail(raises=AssertionError, strict=False), - RunIf(min_cuda_gpus=1), + _RunIf(min_cuda_gpus=1), ], ), ], diff --git a/tests/test_convert_pretrained_checkpoint.py b/tests/convert/test_pretrained_checkpoint.py similarity index 100% rename from tests/test_convert_pretrained_checkpoint.py rename to tests/convert/test_pretrained_checkpoint.py diff --git a/tests/data/fixtures/alpaca.json b/tests/data/_fixtures/alpaca.json similarity index 100% rename from tests/data/fixtures/alpaca.json rename to tests/data/_fixtures/alpaca.json diff --git a/tests/data/fixtures/dolly.json b/tests/data/_fixtures/dolly.json similarity index 100% rename from tests/data/fixtures/dolly.json rename to tests/data/_fixtures/dolly.json diff --git a/tests/data/fixtures/longform_train.json b/tests/data/_fixtures/longform_train.json similarity index 100% rename from tests/data/fixtures/longform_train.json rename to tests/data/_fixtures/longform_train.json diff --git a/tests/data/fixtures/longform_val.json b/tests/data/_fixtures/longform_val.json similarity index 100% rename from tests/data/fixtures/longform_val.json rename to tests/data/_fixtures/longform_val.json diff --git a/tests/ext_thunder/__init__.py b/tests/ext_thunder/__init__.py new file mode 100644 index 0000000000..ac655de35c --- /dev/null +++ b/tests/ext_thunder/__init__.py @@ -0,0 +1,10 @@ +import sys +from pathlib import Path + +# support running without installing as a package, adding extensions to the Pyton path +wd = Path(__file__).parent.parent.parent.resolve() +if wd.is_dir(): + sys.path.append(str(wd)) +else: + import warnings + warnings.warn(f"Could not find extensions directory at {wd}") diff --git a/tests/test_thunder_ddp.py b/tests/ext_thunder/test_thunder_ddp.py similarity index 86% rename from tests/test_thunder_ddp.py rename to tests/ext_thunder/test_thunder_ddp.py index fe54f252d5..7146d076a5 100644 --- a/tests/test_thunder_ddp.py +++ b/tests/ext_thunder/test_thunder_ddp.py @@ -3,24 +3,23 @@ import pytest import torch -from tests.conftest import RunIf +from litgpt.utils import _RunIf from lightning import Fabric -# support running without installing as a package -wd = Path(__file__).parent.parent.resolve() -sys.path.append(str(wd)) +from litgpt.utils import _THUNDER_AVAILABLE -from extensions.thunder.strategies.thunder_ddp import ThunderDDPStrategy -from extensions.thunder.strategies.thunder_fsdp import ThunderFSDPStrategy +if _THUNDER_AVAILABLE: + from extensions.thunder.strategies.thunder_ddp import ThunderDDPStrategy + from extensions.thunder.strategies.thunder_fsdp import ThunderFSDPStrategy -@RunIf(thunder=True) +@_RunIf(thunder=True) def test_thunder_strategy_input_parsing(): with pytest.raises(ValueError, match="doesn't have an effect with `jit=False"): ThunderDDPStrategy(jit=False, executors=("python",)) -@RunIf(min_cuda_gpus=2, thunder=True, standalone=True) +@_RunIf(min_cuda_gpus=2, thunder=True, standalone=True) @pytest.mark.parametrize("choice", ["ddp", "thunder_ddp", "fsdp", "thunder_fsdp"]) def test_no_backward_sync(choice): if choice == "thunder_ddp": @@ -68,7 +67,7 @@ def test_no_backward_sync(choice): assert model.weight.grad is None -@RunIf(min_cuda_gpus=2, thunder=True, standalone=True) +@_RunIf(min_cuda_gpus=2, thunder=True, standalone=True) @pytest.mark.parametrize("jit", (False, True)) def test_jit_before_setup(jit): import thunder @@ -86,7 +85,7 @@ def test_jit_before_setup(jit): assert "all_reduce" in thunder.last_backward_traces(tmodel)[-1].python() -@RunIf(min_cuda_gpus=1, thunder=True) +@_RunIf(min_cuda_gpus=1, thunder=True) def test_setup_already_traced(): import thunder diff --git a/tests/test_thunder_fsdp.py b/tests/ext_thunder/test_thunder_fsdp.py similarity index 95% rename from tests/test_thunder_fsdp.py rename to tests/ext_thunder/test_thunder_fsdp.py index 84de117574..a62fa582df 100644 --- a/tests/test_thunder_fsdp.py +++ b/tests/ext_thunder/test_thunder_fsdp.py @@ -5,22 +5,26 @@ import pytest import torch -from tests.conftest import RunIf + +from litgpt.utils import _THUNDER_AVAILABLE +from litgpt.utils import _RunIf from lightning.fabric import Fabric from lightning.fabric.utilities.imports import _TORCH_GREATER_EQUAL_2_3 +if _THUNDER_AVAILABLE: + from extensions.thunder.strategies.thunder_fsdp import ThunderFSDPStrategy + # support running without installing as a package wd = Path(__file__).parent.parent.resolve() sys.path.append(str(wd)) -from extensions.thunder.strategies.thunder_fsdp import ThunderFSDPStrategy - -@RunIf(thunder=True) +@_RunIf(thunder=True) def test_thunder_strategy_input_parsing(): from thunder.distributed import FSDPBucketingStrategy, FSDPType strategy = ThunderFSDPStrategy(bucketing_strategy="BlOcK", executors=("python",), sharding_strategy="zero3") + assert strategy.bucketing_strategy is FSDPBucketingStrategy.BLOCK assert strategy.sharding_strategy is FSDPType.ZERO3 @@ -28,7 +32,7 @@ def test_thunder_strategy_input_parsing(): ThunderFSDPStrategy(jit=False, executors=("python",)) -@RunIf(thunder=True) +@_RunIf(thunder=True) def test_save_checkpoint_invalid_settings_raise(tmp_path): strategy = ThunderFSDPStrategy(state_dict_type="full") with pytest.raises(TypeError, match="not supported"): @@ -87,7 +91,7 @@ def reset_parameters(self): self.buf = torch.empty_like(self.buf) -@RunIf(min_cuda_gpus=2, thunder=True, standalone=True) +@_RunIf(min_cuda_gpus=2, thunder=True, standalone=True) def test_materialize_meta_tensors(): strategy = ThunderFSDPStrategy() fabric = Fabric(accelerator="cuda", devices=2, strategy=strategy) @@ -125,7 +129,7 @@ def __eq__(self, other): ) -@RunIf(min_cuda_gpus=2, thunder=True, standalone=True) +@_RunIf(min_cuda_gpus=2, thunder=True, standalone=True) def test_save_load_full_checkpoint(tmp_path): strategy = ThunderFSDPStrategy(state_dict_type="full", broadcast_from=0) fabric = Fabric(accelerator="cuda", devices=2, strategy=strategy) @@ -176,7 +180,7 @@ def test_save_load_full_checkpoint(tmp_path): assert state["primitive"] == 123 -@RunIf(min_cuda_gpus=2, thunder=True, standalone=True) +@_RunIf(min_cuda_gpus=2, thunder=True, standalone=True) def test_load_full_checkpoint_only_model(tmp_path): strategy = ThunderFSDPStrategy() fabric = Fabric(accelerator="cuda", devices=2, strategy=strategy) @@ -245,7 +249,7 @@ def set_up_planner(self, state_dict, metadata, is_coordinator): return state_dict -@RunIf(min_cuda_gpus=2, thunder=True, standalone=True) +@_RunIf(min_cuda_gpus=2, thunder=True, standalone=True) def test_save_load_sharded_checkpoint(tmp_path): strategy = ThunderFSDPStrategy(state_dict_type="sharded", broadcast_from=0) fabric = Fabric(accelerator="cuda", devices=2, strategy=strategy) @@ -298,7 +302,7 @@ def test_save_load_sharded_checkpoint(tmp_path): assert state["primitive"] == 123 -@RunIf(min_cuda_gpus=2, thunder=True, standalone=True) +@_RunIf(min_cuda_gpus=2, thunder=True, standalone=True) @pytest.mark.parametrize("jit", (False, True)) def test_jit_before_setup(jit): import thunder @@ -316,7 +320,7 @@ def test_jit_before_setup(jit): assert "all_gather" in thunder.last_traces(tmodel)[-1].python() -@RunIf(min_cuda_gpus=1, thunder=True) +@_RunIf(min_cuda_gpus=1, thunder=True) def test_setup_already_traced(): import thunder diff --git a/tests/test_thunder_pretrain.py b/tests/ext_thunder/test_thunder_pretrain.py similarity index 77% rename from tests/test_thunder_pretrain.py rename to tests/ext_thunder/test_thunder_pretrain.py index e941ad7949..42d95d423d 100644 --- a/tests/test_thunder_pretrain.py +++ b/tests/ext_thunder/test_thunder_pretrain.py @@ -1,37 +1,32 @@ import os -import sys from contextlib import redirect_stdout from io import StringIO -from pathlib import Path from unittest.mock import Mock import torch -from tests.conftest import RunIf from torch.utils.data import DataLoader from litgpt import Config from litgpt.args import EvalArgs, TrainArgs +from litgpt.utils import _THUNDER_AVAILABLE, _RunIf -# support running without installing as a package -wd = Path(__file__).parent.parent.resolve() -sys.path.append(str(wd)) +if _THUNDER_AVAILABLE: + import extensions.thunder.pretrain as thunder_pretrain -import extensions.thunder.pretrain as pretrain - -@RunIf(min_cuda_gpus=1, thunder=True) +@_RunIf(min_cuda_gpus=1, thunder=True) def test_pretrain(tmp_path, monkeypatch): model_config = Config(block_size=2, n_layer=2, n_embd=8, n_head=4, padded_vocab_size=8) dataset = torch.tensor([[0, 1, 2], [3, 4, 5], [0, 1, 2]]) dataloader = DataLoader(dataset) - monkeypatch.setattr(pretrain, "get_dataloaders", Mock(return_value=(dataloader, dataloader))) - monkeypatch.setattr(pretrain, "save_hyperparameters", Mock()) + monkeypatch.setattr(thunder_pretrain, "get_dataloaders", Mock(return_value=(dataloader, dataloader))) + monkeypatch.setattr(thunder_pretrain, "save_hyperparameters", Mock()) out_dir = tmp_path / "out" stdout = StringIO() with redirect_stdout(stdout): - pretrain.setup( + thunder_pretrain.setup( devices=1, model_config=model_config, out_dir=out_dir, diff --git a/tests/test_thunder_unsloth_executor.py b/tests/ext_thunder/test_unsloth_executor.py similarity index 96% rename from tests/test_thunder_unsloth_executor.py rename to tests/ext_thunder/test_unsloth_executor.py index c5a30082c5..113fa7b120 100644 --- a/tests/test_thunder_unsloth_executor.py +++ b/tests/ext_thunder/test_unsloth_executor.py @@ -4,10 +4,10 @@ from litgpt import GPT, Config from litgpt.model import apply_rope, build_rope_cache from litgpt.utils import chunked_cross_entropy -from tests.conftest import RunIf +from litgpt.utils import _RunIf -@RunIf(min_cuda_gpus=1, thunder=True) +@_RunIf(min_cuda_gpus=1, thunder=True) @pytest.mark.parametrize("reduction", ["none", "mean"]) def test_unsloth_cross_entropy(reduction): import thunder @@ -46,7 +46,7 @@ def foo(logits, labels): @pytest.mark.skip(reason='out of date') -@RunIf(min_cuda_gpus=1, thunder=True) +@_RunIf(min_cuda_gpus=1, thunder=True) def test_unsloth_rope(): import thunder from thunder.core.transforms import grad @@ -83,7 +83,7 @@ def foo(x, cos, sin): torch.testing.assert_close(actual, expected) -@RunIf(min_cuda_gpus=1, thunder=True) +@_RunIf(min_cuda_gpus=1, thunder=True) def test_unsloth_swiglu(): import thunder from thunder.core.transforms import grad @@ -120,7 +120,7 @@ def test_unsloth_swiglu(): torch.testing.assert_close(actual, expected) -@RunIf(min_cuda_gpus=1, thunder=True) +@_RunIf(min_cuda_gpus=1, thunder=True) def test_unsloth_gpt(): import thunder from thunder.core.transforms import grad diff --git a/tests/generate/__init__.py b/tests/generate/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/test_generate_adapter.py b/tests/generate/test_adapter.py similarity index 100% rename from tests/test_generate_adapter.py rename to tests/generate/test_adapter.py diff --git a/tests/test_generate.py b/tests/generate/test_main.py similarity index 100% rename from tests/test_generate.py rename to tests/generate/test_main.py diff --git a/tests/test_generate_sequentially.py b/tests/generate/test_sequentially.py similarity index 94% rename from tests/test_generate_sequentially.py rename to tests/generate/test_sequentially.py index 2d7603eb60..1b5c7eda9e 100644 --- a/tests/test_generate_sequentially.py +++ b/tests/generate/test_sequentially.py @@ -4,7 +4,6 @@ import math import subprocess import sys -from collections import defaultdict from dataclasses import asdict from pathlib import Path from re import escape @@ -18,7 +17,8 @@ from litgpt.generate.sequentially import layer_to_device, replace_device, sequential from litgpt.model import GPT, Block from litgpt.scripts.download import download_from_hub -from tests.conftest import RunIf +from litgpt.utils import _RunIf +from .utils import find_forward_hooks @pytest.mark.parametrize( @@ -152,7 +152,7 @@ def _test_model_1device(accelerator): assert model.max_seq_length == 15 -@RunIf(min_cuda_gpus=1) +@_RunIf(min_cuda_gpus=1) def test_model_1device_cuda(): _test_model_1device("cuda") @@ -161,19 +161,7 @@ def test_model_1device_cpu(): _test_model_1device("cpu") -def find_forward_hooks(module): - mapping = defaultdict(list) - for name, submodule in module.named_modules(): - for hook in submodule._forward_pre_hooks.values(): - hook_data = ("forward_pre_hook", hook.func.__name__, hook.args, hook.keywords) - mapping[name].append(hook_data) - for hook in submodule._forward_hooks.values(): - hook_data = ("forward_hook", hook.func.__name__, hook.args, hook.keywords) - mapping[name].append(hook_data) - return dict(mapping) - - -@RunIf(min_cuda_gpus=2) +@_RunIf(min_cuda_gpus=2) def test_model_forward_hooks(): fabric = Fabric(accelerator="cuda", devices=1) with torch.device("meta"): @@ -287,7 +275,7 @@ def test_model_forward_hooks(): root = Path(__file__).parent.parent.resolve() -@RunIf(min_cuda_gpus=2) +@_RunIf(min_cuda_gpus=2) def test_base_with_sequentially(tmp_path): # download the tokenizer download_from_hub(repo_id="EleutherAI/pythia-14m", tokenizer_only=True, checkpoint_dir=tmp_path) diff --git a/tests/test_generate_tp.py b/tests/generate/test_tp.py similarity index 97% rename from tests/test_generate_tp.py rename to tests/generate/test_tp.py index b10b891535..381e7e5841 100644 --- a/tests/test_generate_tp.py +++ b/tests/generate/test_tp.py @@ -11,8 +11,8 @@ from litgpt import GPT, Config from litgpt.generate.tp import tensor_parallel, tensor_parallel_linear from litgpt.scripts.download import download_from_hub -from tests.conftest import RunIf -from tests.test_generate_sequentially import find_forward_hooks +from litgpt.utils import _RunIf +from .utils import find_forward_hooks def test_tensor_parallel_linear(): @@ -105,7 +105,7 @@ def test_tensor_parallel_llama(name, expected): root = Path(__file__).parent.parent.resolve() -@RunIf(min_cuda_gpus=2) +@_RunIf(min_cuda_gpus=2) def test_tp(tmp_path): # download the tokenizer download_from_hub(repo_id="EleutherAI/pythia-14m", tokenizer_only=True, checkpoint_dir=tmp_path) diff --git a/tests/generate/utils.py b/tests/generate/utils.py new file mode 100644 index 0000000000..41ab86e990 --- /dev/null +++ b/tests/generate/utils.py @@ -0,0 +1,13 @@ +from collections import defaultdict + + +def find_forward_hooks(module): + mapping = defaultdict(list) + for name, submodule in module.named_modules(): + for hook in submodule._forward_pre_hooks.values(): + hook_data = ("forward_pre_hook", hook.func.__name__, hook.args, hook.keywords) + mapping[name].append(hook_data) + for hook in submodule._forward_hooks.values(): + hook_data = ("forward_hook", hook.func.__name__, hook.args, hook.keywords) + mapping[name].append(hook_data) + return dict(mapping) \ No newline at end of file diff --git a/tests/run_standalone_tests.sh b/tests/run_standalone_tests.sh index a6c8a3f4f9..c4002a14d9 100644 --- a/tests/run_standalone_tests.sh +++ b/tests/run_standalone_tests.sh @@ -11,7 +11,7 @@ export PL_RUN_STANDALONE_TESTS=1 defaults="-m pytest --no-header -v --disable-pytest-warnings --strict-markers --color=yes -s --timeout 120" echo "Using defaults: ${defaults}" -# find tests marked as `@RunIf(standalone=True)`. done manually instead of with pytest because it is faster +# find tests marked as `@_RunIf(standalone=True)`. done manually instead of with pytest because it is faster grep_output=$(grep --recursive --word-regexp . --regexp 'standalone=True' --include '*.py' --exclude 'test_thunder*.py') # file paths, remove duplicates diff --git a/tests/test_adapter.py b/tests/test_adapter.py index 9deb7be1f7..e80d658b4b 100644 --- a/tests/test_adapter.py +++ b/tests/test_adapter.py @@ -25,7 +25,7 @@ from litgpt.data import Alpaca from litgpt.scripts.convert_hf_checkpoint import copy_weights_gemma_2, copy_weights_hf_llama from litgpt.scripts.convert_lit_checkpoint import qkv_reassemble as make_qkv_interleaved -from tests.conftest import RunIf +from litgpt.utils import _RunIf def test_config_identical(): @@ -118,7 +118,7 @@ def test_adapter_gpt_init_weights(): assert (param == 0).all() -@RunIf(dynamo=True) +@_RunIf(dynamo=True) @torch.inference_mode() def test_adapter_compile(): model = GPT.from_name("pythia-14m", n_layer=3) @@ -138,7 +138,7 @@ def test_adapter_compile(): assert explanation.graph_break_count == 0 -@RunIf(min_cuda_gpus=1) +@_RunIf(min_cuda_gpus=1) def test_adapter_bitsandbytes(monkeypatch, tmp_path, fake_checkpoint_dir, alpaca_path): if not _BITSANDBYTES_AVAILABLE: pytest.skip("BNB not available") @@ -301,7 +301,7 @@ def test_against_hf_gemma(model_name): # the reference does softmax upscaled to fp32 during attention. additionally, the final layernorm input # is slightly different pytest.mark.xfail(raises=AssertionError, strict=False), - RunIf(min_cuda_gpus=1), + _RunIf(min_cuda_gpus=1), ], ), ], diff --git a/tests/test_adapter_v2.py b/tests/test_adapter_v2.py index ca00a5d641..5e68879c87 100644 --- a/tests/test_adapter_v2.py +++ b/tests/test_adapter_v2.py @@ -26,7 +26,7 @@ from litgpt.model import GPT as BaseGPT from litgpt.scripts.convert_hf_checkpoint import copy_weights_gemma_2, copy_weights_hf_llama from litgpt.scripts.convert_lit_checkpoint import qkv_reassemble as make_qkv_interleaved -from tests.conftest import RunIf +from litgpt.utils import _RunIf def test_config_identical(): @@ -147,7 +147,7 @@ def test_base_model_can_be_adapter_v2_loaded(name): assert adapter_filter(k, None) -@RunIf(dynamo=True) +@_RunIf(dynamo=True) @torch.inference_mode() def test_adapter_v2_compile(): model = AdapterV2GPT.from_name("pythia-14m", n_layer=3) @@ -314,7 +314,7 @@ def test_against_original_gemma_2(model_name): torch.testing.assert_close(ours_y, theirs_y, rtol=3e-5, atol=3e-5) # some macOS devices have numerical differences, hence the tol bump -@RunIf(min_cuda_gpus=1) +@_RunIf(min_cuda_gpus=1) def test_adapter_v2_bitsandbytes(monkeypatch, tmp_path, fake_checkpoint_dir, alpaca_path): if not _BITSANDBYTES_AVAILABLE: pytest.skip("BNB not available") diff --git a/tests/test_api.py b/tests/test_api.py index cf1443dd31..7143a4e586 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -9,7 +9,7 @@ import re import torch from unittest.mock import MagicMock, patch -from tests.conftest import RunIf +from litgpt.utils import _RunIf from lightning.fabric.accelerators import CUDAAccelerator from litgpt.api import ( @@ -166,7 +166,7 @@ def test_model_not_initialized(tmp_path): llm.generate("text") -@RunIf(min_cuda_gpus=2) +@_RunIf(min_cuda_gpus=2) def test_more_than_1_device_for_sequential_gpu(tmp_path): device_count = CUDAAccelerator.auto_device_count() @@ -196,7 +196,7 @@ def test_more_than_1_device_for_sequential_gpu(tmp_path): assert str(llm.model.transformer.h[last_layer_idx].mlp.fc.weight.device) == f"cuda:{device_count-1}" -@RunIf(min_cuda_gpus=2) +@_RunIf(min_cuda_gpus=2) def test_more_than_1_device_for_tensor_parallel_gpu(tmp_path): with patch("torch.backends.mps.is_available", return_value=USE_MPS): llm = LLM.load( @@ -209,7 +209,7 @@ def test_more_than_1_device_for_tensor_parallel_gpu(tmp_path): assert isinstance(llm.generate("What do llamas eat?"), str) -@RunIf(min_cuda_gpus=1) +@_RunIf(min_cuda_gpus=1) def test_sequential_tp_incompatibility_with_random_weights(tmp_path): with patch("torch.backends.mps.is_available", return_value=USE_MPS): @@ -255,7 +255,7 @@ def test_initialization_for_trainer(tmp_path): assert isinstance(llm.generate("hello world"), str) -@RunIf(min_cuda_gpus=1) +@_RunIf(min_cuda_gpus=1) def test_quantization_is_applied(tmp_path): with patch("torch.backends.mps.is_available", return_value=USE_MPS): llm = LLM.load( @@ -266,7 +266,7 @@ def test_quantization_is_applied(tmp_path): assert "NF4Linear" in strtype, strtype -@RunIf(min_cuda_gpus=1) +@_RunIf(min_cuda_gpus=1) def test_fixed_kv_cache(tmp_path): with patch("torch.backends.mps.is_available", return_value=USE_MPS): llm = LLM.load( diff --git a/tests/test_batch.py b/tests/test_batch.py index 1c220ac34c..540523a1b4 100644 --- a/tests/test_batch.py +++ b/tests/test_batch.py @@ -12,7 +12,7 @@ ) from litgpt.api import LLM, GPT from litgpt.scripts.download import download_from_hub -from tests.conftest import RunIf +from litgpt.utils import _RunIf warnings.filterwarnings("ignore") @@ -97,7 +97,7 @@ def test_batched_equivalence(tmp_path): assert all(t == tok_2 for t in toks_2), f"{tok_2} != {toks_2}" -@RunIf(min_cuda_gpus=1) +@_RunIf(min_cuda_gpus=1) def test_simple_batch(): old_allow_tf32 = torch.backends.cuda.matmul.allow_tf32 torch.backends.cuda.matmul.allow_tf32 = False @@ -138,7 +138,7 @@ def test_simple_batch(): torch.backends.cuda.matmul.allow_tf32 = old_allow_tf32 -@RunIf(min_cuda_gpus=1) +@_RunIf(min_cuda_gpus=1) def test_batch_generate(tmp_path): torch.use_deterministic_algorithms(True) @@ -263,7 +263,7 @@ def find_unique_stop(triplets): # print() -@RunIf(min_cuda_gpus=1) +@_RunIf(min_cuda_gpus=1) def test_batch_generate_equivalence(tmp_path): torch.use_deterministic_algorithms(True) diff --git a/tests/test_ci.py b/tests/test_ci.py index e1db31aeaf..13584f822d 100644 --- a/tests/test_ci.py +++ b/tests/test_ci.py @@ -1,9 +1,9 @@ # Copyright Lightning AI. Licensed under the Apache License 2.0, see LICENSE file. -from tests.conftest import RunIf +from litgpt.utils import _RunIf from lightning.fabric.plugins.precision.bitsandbytes import _BITSANDBYTES_AVAILABLE -@RunIf(min_cuda_gpus=1) +@_RunIf(min_cuda_gpus=1) def test_gpu_ci_installs_bitsandbytes(): assert _BITSANDBYTES_AVAILABLE, str(_BITSANDBYTES_AVAILABLE) diff --git a/tests/test_lora.py b/tests/test_lora.py index c417d588a4..ceed1ddd96 100644 --- a/tests/test_lora.py +++ b/tests/test_lora.py @@ -37,7 +37,7 @@ from litgpt.model import GPT as BaseGPT from litgpt.scripts.convert_hf_checkpoint import copy_weights_gemma_2, copy_weights_hf_llama from litgpt.scripts.convert_lit_checkpoint import qkv_reassemble as make_qkv_interleaved -from tests.conftest import RunIf +from litgpt.utils import _RunIf def test_lora_layer_replacement(): @@ -393,7 +393,7 @@ def test_lora_qkv_linear_weights_merged_status(rank, enable_lora, expected_merge assert layer.merged == expected_merged -@RunIf(min_cuda_gpus=1) +@_RunIf(min_cuda_gpus=1) def test_lora_merge_with_bitsandbytes(): if not _BITSANDBYTES_AVAILABLE: pytest.skip("BNB not available") @@ -495,7 +495,7 @@ def test_base_model_can_be_lora_loaded(name): assert lora_filter(k, None) -@RunIf(dynamo=True) +@_RunIf(dynamo=True) @torch.inference_mode() def test_lora_compile(): model = LoRAGPT.from_name( @@ -687,7 +687,7 @@ def test_against_original_gemma_2(model_name): torch.testing.assert_close(ours_y, theirs_y, rtol=3e-5, atol=3e-5) -@RunIf(min_cuda_gpus=1) +@_RunIf(min_cuda_gpus=1) def test_lora_bitsandbytes(monkeypatch, tmp_path, fake_checkpoint_dir, alpaca_path): if not _BITSANDBYTES_AVAILABLE: pytest.skip("BNB not available") @@ -809,7 +809,7 @@ def test_lora_bitsandbytes(monkeypatch, tmp_path, fake_checkpoint_dir, alpaca_pa assert "of non-trainable parameters: 1,888" in logs -@RunIf(standalone=True, min_cuda_gpus=2) +@_RunIf(standalone=True, min_cuda_gpus=2) def test_lora_model_fsdp_init(): config = Config( n_layer=1, diff --git a/tests/test_model.py b/tests/test_model.py index 4e5189968d..81e76dfaab 100644 --- a/tests/test_model.py +++ b/tests/test_model.py @@ -42,7 +42,7 @@ copy_weights_qwen_2_5, ) from litgpt.scripts.convert_lit_checkpoint import qkv_reassemble as make_qkv_interleaved -from tests.conftest import RunIf +from litgpt.utils import _RunIf @torch.inference_mode() @@ -61,7 +61,7 @@ # the reference does softmax upscaled to fp32 during attention. additionally, the final layernorm input # is slightly different pytest.mark.xfail(raises=AssertionError, strict=False), - RunIf(min_cuda_gpus=1), + _RunIf(min_cuda_gpus=1), ], ), ], @@ -130,7 +130,7 @@ def test_against_gpt_neox_model(rotary_pct, batch_size, n_embd, parallel_residua # the reference does softmax upscaled to fp32 during attention. additionally, the final layernorm input # is slightly different pytest.mark.xfail(raises=AssertionError, strict=False), - RunIf(min_cuda_gpus=1), + _RunIf(min_cuda_gpus=1), ], ), ], @@ -176,7 +176,7 @@ def test_against_hf_falcon(kwargs, device, dtype): # the reference does softmax upscaled to fp32 during attention. additionally, the final layernorm input # is slightly different pytest.mark.xfail(raises=AssertionError, strict=False), - RunIf(min_cuda_gpus=1), + _RunIf(min_cuda_gpus=1), ], ), ], @@ -240,7 +240,7 @@ def test_against_original_open_llama_3b(device, dtype): # the reference does softmax upscaled to fp32 during attention. additionally, the final layernorm input # is slightly different pytest.mark.xfail(raises=AssertionError, strict=False), - RunIf(min_cuda_gpus=1), + _RunIf(min_cuda_gpus=1), ], ), ], @@ -290,7 +290,7 @@ def test_against_hf_llama_2_and_3(ours_kwargs, device, dtype): pytest.param( torch.device("cuda"), torch.float16, - marks=[pytest.mark.xfail(raises=AssertionError, strict=False), RunIf(min_cuda_gpus=1)], + marks=[pytest.mark.xfail(raises=AssertionError, strict=False), _RunIf(min_cuda_gpus=1)], ), ], ) @@ -339,7 +339,7 @@ def test_against_hf_phi(model_name, device, dtype): pytest.param( torch.device("cuda"), torch.float16, - marks=[pytest.mark.xfail(raises=AssertionError, strict=False), RunIf(min_cuda_gpus=1)], + marks=[pytest.mark.xfail(raises=AssertionError, strict=False), _RunIf(min_cuda_gpus=1)], ), ], ) @@ -402,7 +402,7 @@ def test_against_hf_phi_3(model_name, device, dtype): # the reference does softmax upscaled to fp32 during attention. additionally, the final layernorm input # is slightly different pytest.mark.xfail(raises=AssertionError, strict=False), - RunIf(min_cuda_gpus=1), + _RunIf(min_cuda_gpus=1), ], ), ], @@ -468,7 +468,7 @@ def test_against_mistral_hf_models(device, dtype, model_name): # the reference does softmax upscaled to fp32 during attention. additionally, the final layernorm input # is slightly different pytest.mark.xfail(raises=AssertionError, strict=False), - RunIf(min_cuda_gpus=1), + _RunIf(min_cuda_gpus=1), ], ), ], @@ -574,7 +574,7 @@ def test_against_hf_mixtral(model_name): # the reference does softmax upscaled to fp32 during attention. additionally, the final layernorm input # is slightly different pytest.mark.xfail(raises=AssertionError, strict=False), - RunIf(min_cuda_gpus=1), + _RunIf(min_cuda_gpus=1), ], ), ], @@ -632,7 +632,7 @@ def test_against_olmo(model_name, device, dtype): # the reference does softmax upscaled to fp32 during attention. additionally, the final layernorm input # is slightly different pytest.mark.xfail(raises=AssertionError, strict=False), - RunIf(min_cuda_gpus=1), + _RunIf(min_cuda_gpus=1), ], ), ], @@ -683,7 +683,7 @@ def test_against_original_stablelm_zephyr_3b(device, dtype): # the reference does softmax upscaled to fp32 during attention. additionally, the final layernorm input # is slightly different pytest.mark.xfail(raises=AssertionError, strict=False), - RunIf(min_cuda_gpus=1), + _RunIf(min_cuda_gpus=1), ], ), ], @@ -740,7 +740,7 @@ def test_against_original_gemma(model_name, device, dtype): # the reference does softmax upscaled to fp32 during attention. additionally, the final layernorm input # is slightly different pytest.mark.xfail(raises=AssertionError, strict=False), - RunIf(min_cuda_gpus=1), + _RunIf(min_cuda_gpus=1), ], ), ], @@ -810,7 +810,7 @@ def test_against_original_gemma_2(model_name, device, dtype): # the reference does softmax upscaled to fp32 during attention. additionally, the final layernorm input # is slightly different pytest.mark.xfail(raises=AssertionError, strict=False), - RunIf(min_cuda_gpus=1), + _RunIf(min_cuda_gpus=1), ], ), ], @@ -872,7 +872,7 @@ def test_against_original_qwen_2_5(model_name, device, dtype): # the reference does softmax upscaled to fp32 during attention. additionally, the final layernorm input # is slightly different pytest.mark.xfail(raises=AssertionError, strict=False), - RunIf(min_cuda_gpus=1), + _RunIf(min_cuda_gpus=1), ], ), ], @@ -932,7 +932,7 @@ def test_against_original_salamandra(model_name, device, dtype): # the reference does softmax upscaled to fp32 during attention. additionally, the final layernorm input # is slightly different pytest.mark.xfail(raises=AssertionError, strict=False), - RunIf(min_cuda_gpus=1), + _RunIf(min_cuda_gpus=1), ], ), ], @@ -991,7 +991,7 @@ def test_against_original_smollm2(model_name, device, dtype): # the reference does softmax upscaled to fp32 during attention. additionally, the final layernorm input # is slightly different pytest.mark.xfail(raises=AssertionError, strict=False), - RunIf(min_cuda_gpus=1), + _RunIf(min_cuda_gpus=1), ], ), ], @@ -1038,7 +1038,7 @@ def test_against_hf_falcon3(model_name, device, dtype): torch.testing.assert_close(ours_y, theirs_y) -@RunIf(dynamo=True) +@_RunIf(dynamo=True) @torch.inference_mode() def test_model_compile(): model = GPT.from_name("pythia-14m", n_layer=3) @@ -1110,7 +1110,7 @@ def test_model_kv_cache_amp(): ) -@RunIf(min_cuda_gpus=1) +@_RunIf(min_cuda_gpus=1) @pytest.mark.parametrize("config", deepcopy(config_module.configs), ids=[c["name"] for c in config_module.configs]) @torch.inference_mode() def test_sdpa_choice(config): @@ -1162,7 +1162,7 @@ def assert_sdpa_backend(original_fn, q, k, v, mask): model(x) -@RunIf(min_cuda_gpus=1) +@_RunIf(min_cuda_gpus=1) @pytest.mark.parametrize("config", deepcopy(config_module.configs), ids=[c["name"] for c in config_module.configs]) @torch.inference_mode() def test_sdpa_choice_kv_cache(config): @@ -1216,7 +1216,7 @@ def assert_sdpa_backend(original_fn, q, k, v, mask): model(x, input_pos) -@RunIf(min_cuda_gpus=2, standalone=True) +@_RunIf(min_cuda_gpus=2, standalone=True) def test_rope_init_under_fsdp(): """Check that the rope cache is properly initialized""" fabric = Fabric(devices=2, strategy="fsdp", accelerator="cuda") @@ -1235,7 +1235,7 @@ def test_rope_init_under_fsdp(): torch.testing.assert_close(model.sin, sin) -@RunIf(min_cuda_gpus=1) +@_RunIf(min_cuda_gpus=1) def test_reset_parameters_device(): with torch.device("meta"): model = GPT.from_name("pythia-14m", n_layer=1) diff --git a/tests/test_pretrain.py b/tests/test_pretrain.py index 3b28894793..ef07bee702 100644 --- a/tests/test_pretrain.py +++ b/tests/test_pretrain.py @@ -15,10 +15,10 @@ from litgpt.args import EvalArgs, TrainArgs from litgpt.config import Config from litgpt.pretrain import initialize_weights -from tests.conftest import RunIf +from litgpt.utils import _RunIf -@RunIf(min_cuda_gpus=1, standalone=True) +@_RunIf(min_cuda_gpus=1, standalone=True) @mock.patch("litgpt.pretrain.save_hyperparameters") def test_optimizer_args(_, tmp_path): model_config = Config(block_size=2, n_layer=2, n_embd=4, n_head=2, padded_vocab_size=8) @@ -39,7 +39,7 @@ def test_optimizer_args(_, tmp_path): ) -@RunIf(min_cuda_gpus=2, standalone=True) +@_RunIf(min_cuda_gpus=2, standalone=True) # Set CUDA_VISIBLE_DEVICES for FSDP hybrid-shard, if fewer GPUs are used than are available @mock.patch.dict(os.environ, {"CUDA_VISIBLE_DEVICES": "0,1"}) # If we were to use `save_hyperparameters()`, we would have to patch `sys.argv` or otherwise @@ -86,7 +86,7 @@ def test_pretrain(_, tmp_path): torch.distributed.barrier() -@RunIf(min_cuda_gpus=2, standalone=True) +@_RunIf(min_cuda_gpus=2, standalone=True) # Set CUDA_VISIBLE_DEVICES for FSDP hybrid-shard, if fewer GPUs are used than are available @mock.patch.dict(os.environ, {"CUDA_VISIBLE_DEVICES": "0,1"}) @mock.patch("litgpt.pretrain.L.Fabric.load_raw") diff --git a/tests/test_prompts.py b/tests/test_prompts.py index 2ecf1e7d06..052bc82a8a 100644 --- a/tests/test_prompts.py +++ b/tests/test_prompts.py @@ -101,7 +101,7 @@ def test_save_load_prompt_style(tmp_path): save_prompt_style(CustomPromptStyle(), checkpoint_dir) with open(checkpoint_dir / "prompt_style.yaml", "r", encoding="utf-8") as file: contents = yaml.safe_load(file) - assert contents == {"class_path": "tests.test_prompts.CustomPromptStyle"} + assert contents == {"class_path": "test_prompts.CustomPromptStyle"} loaded = load_prompt_style(checkpoint_dir) assert isinstance(loaded, CustomPromptStyle) diff --git a/tests/test_readme.py b/tests/test_readme.py index 95b03e1474..fc810b3880 100644 --- a/tests/test_readme.py +++ b/tests/test_readme.py @@ -10,7 +10,7 @@ import pytest import requests -from tests.conftest import RunIf +from litgpt.utils import _RunIf REPO_ID = Path("EleutherAI/pythia-14m") CUSTOM_TEXTS_DIR = Path("custom_texts") @@ -72,7 +72,7 @@ def test_chat_with_model(): assert "What food do llamas eat?" in result.stdout -@RunIf(min_cuda_gpus=1) +@_RunIf(min_cuda_gpus=1) @pytest.mark.dependency(depends=["test_download_model"]) def test_chat_with_quantized_model(): command = ["litgpt", "generate", "checkpoints" / REPO_ID, "--quantize", "bnb.nf4", "--precision", "bf16-true"] diff --git a/tests/test_serve.py b/tests/test_serve.py index 381249fb88..8810b152c2 100644 --- a/tests/test_serve.py +++ b/tests/test_serve.py @@ -6,7 +6,7 @@ import torch import requests import subprocess -from tests.conftest import RunIf +from litgpt.utils import _RunIf import threading import time import yaml @@ -57,7 +57,7 @@ def run_server(): server_thread.join() -@RunIf(min_cuda_gpus=1) +@_RunIf(min_cuda_gpus=1) def test_quantize(tmp_path): seed_everything(123) ours_config = Config.from_name("pythia-14m") @@ -100,7 +100,7 @@ def run_server(): server_thread.join() -@RunIf(min_cuda_gpus=2) +@_RunIf(min_cuda_gpus=2) def test_multi_gpu_serve(tmp_path): seed_everything(123) ours_config = Config.from_name("pythia-14m") diff --git a/tests/test_trainer_support.py b/tests/test_trainer_support.py index 61a4208141..27b2445e70 100644 --- a/tests/test_trainer_support.py +++ b/tests/test_trainer_support.py @@ -3,7 +3,7 @@ import os from pathlib import Path import pytest -from tests.conftest import RunIf +from litgpt.utils import _RunIf import torch from litgpt.api import LLM @@ -50,7 +50,7 @@ def test_download_model(): @pytest.mark.dependency(depends=["test_download_model"]) -@RunIf(min_cuda_gpus=1) +@_RunIf(min_cuda_gpus=1) def test_usecase1_pretraining_from_random_weights(tmp_path): llm = LLM.load("EleutherAI/pythia-14m", tokenizer_dir="EleutherAI/pythia-14m", init="random") llm.save("pythia-14m-random-weights") @@ -74,7 +74,7 @@ def test_usecase1_pretraining_from_random_weights(tmp_path): @pytest.mark.dependency(depends=["test_download_model"]) -@RunIf(min_cuda_gpus=1) +@_RunIf(min_cuda_gpus=1) def test_usecase2_continued_pretraining_from_checkpoint(tmp_path): lit_model = LitLLM(checkpoint_dir="EleutherAI/pythia-14m") data = Alpaca2k() @@ -94,7 +94,7 @@ def test_usecase2_continued_pretraining_from_checkpoint(tmp_path): @pytest.mark.dependency(depends=["test_download_model", "test_usecase2_continued_pretraining_from_checkpoint"]) -@RunIf(min_cuda_gpus=1) +@_RunIf(min_cuda_gpus=1) def test_usecase3_resume_from_trainer_checkpoint(tmp_path): def find_latest_checkpoint(directory): @@ -130,7 +130,7 @@ def find_latest_checkpoint(directory): @pytest.mark.dependency(depends=["test_download_model", "test_usecase2_continued_pretraining_from_checkpoint"]) -@RunIf(min_cuda_gpus=1) +@_RunIf(min_cuda_gpus=1) def test_usecase4_manually_save_and_resume(tmp_path): lit_model = LitLLM(checkpoint_dir="EleutherAI/pythia-14m") diff --git a/tests/test_utils.py b/tests/test_utils.py index e58434e894..d58c4c30ec 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -12,7 +12,7 @@ import torch import torch.nn.functional as F import yaml -from tests.conftest import RunIf +from litgpt.utils import _RunIf from lightning import Fabric from lightning.fabric.loggers import CSVLogger, TensorBoardLogger from lightning.fabric.plugins import BitsandbytesPrecision @@ -57,7 +57,7 @@ def test_find_multiple(): # match fails on windows. why did they have to use backslashes? -@RunIf(skip_windows=True) +@_RunIf(skip_windows=True) def test_check_valid_checkpoint_dir(tmp_path): os.chdir(tmp_path) @@ -181,7 +181,7 @@ def test_num_parameters(): assert num_parameters(model, requires_grad=False) == 2 -@RunIf(min_cuda_gpus=1) +@_RunIf(min_cuda_gpus=1) @pytest.mark.parametrize("mode", ["nf4", "nf4-dq", "fp4", "fp4-dq", "int8", "int8-training"]) def test_num_parameters_bitsandbytes(mode): plugin = BitsandbytesPrecision(mode=mode)