Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: resolve failing CI #1944

Merged
merged 34 commits into from
Feb 25, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -44,21 +44,21 @@ jobs:

- script: |
pip install --upgrade pip
pip install '.[all,test]'
pip install '.[extra,all,test]'
displayName: 'Install dependencies'

- script: |
pip uninstall -y torchvision torchaudio
pip install --pre 'nvfuser-cu121[torch]' --extra-index-url https://pypi.nvidia.com
displayName: 'Install PyTorch nightly'
displayName: 'Install nvFuser'

- bash: |
set -e
pip list
python -c "import torch ; mgpu = torch.cuda.device_count() ; assert mgpu == 2, f'GPU: {mgpu}'"
displayName: "Env details"

- bash: pytest -v --disable-pytest-warnings --strict-markers --color=yes
- bash: pytest -v
displayName: 'Ordinary tests'
env:
PL_RUN_CUDA_TESTS: "1"
Expand Down
File renamed without changes.
77 changes: 53 additions & 24 deletions .github/workflows/cpu-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,17 +18,47 @@ env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}

jobs:
cpu-tests:
testing-imports:
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [ "ubuntu-22.04", "macOS-14", "windows-2022" ]
python-version: [ "3.10" ]
timeout-minutes: 10

steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}

- name: Install minimal dependencies
run: |
pip install .
pip list

- name: Testing package imports
# make sure all modules are still importable with only the minimal dependencies available
run: |
modules=$(
find litgpt -type f -name "*.py" | \
sed 's/\.py$//' | sed 's/\//./g' | \
sed 's/.__init__//g' | xargs -I {} echo "import {};"
)
echo "$modules"
python -c "$modules"

pytester:
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: ["ubuntu-22.04"]
python-version: ["3.9", "3.10", "3.11"]
include:
- {os: "macOS-14", python-version: "3.10"}
- {os: "ubuntu-22.04", python-version: "3.11"}
- {os: "ubuntu-22.04", python-version: "3.10"}
- {os: "ubuntu-22.04", python-version: "3.9"}
- {os: "windows-2022", python-version: "3.9"}
- {os: "macOS-14", python-version: "3.9"} # without Thunder
- {os: "windows-2022", python-version: "3.9"} # without Thunder
timeout-minutes: 25

steps:
Expand All @@ -42,25 +72,24 @@ jobs:
cache-dependency-path: |
pyproject.toml

- name: Install minimal dependencies
run: |
# python -m pip install --upgrade pip
pip install .
pip list
# make sure all modules are still importable with only the minimal dependencies available
modules=$(
find litgpt -type f -name "*.py" | \
sed 's/\.py$//' | sed 's/\//./g' | \
sed 's/.__init__//g' | xargs -I {} echo "import {};"
)
echo "$modules"
python -c "$modules"

- name: Install all dependencies
- name: Install dependencies
run: |
pip install '.[all,test]'
pip install '.[extra,all,test]'
pip list

- name: Run tests
run: |
pytest -v --disable-pytest-warnings --strict-markers --color=yes --timeout 120
run: pytest -v litgpt/ tests/ --timeout 120

testing-guardian:
runs-on: ubuntu-latest
needs: [pytester, testing-imports]
if: always()
steps:
- run: echo "${{ needs.pytester.result }}"
- name: failing...
if: needs.pytester.result == 'failure'
run: exit 1
- name: cancelled or skipped...
if: contains(fromJSON('["cancelled", "skipped"]'), needs.pytester.result)
timeout-minutes: 1
run: sleep 90
2 changes: 1 addition & 1 deletion extensions/thunder/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -460,7 +460,7 @@ After applying the DDP transformation, the backward trace will include the expec
With `L.Fabric`, this is how to use them:

```python
from extensions.thunder.strategies import ThunderFSDPStrategy, ThunderDDPStrategy
from extensions.extensions.thunder.strategies import ThunderFSDPStrategy, ThunderDDPStrategy

# fully-sharded data parallel
strategy = ThunderFSDPStrategy(
Expand Down
6 changes: 6 additions & 0 deletions extensions/thunder/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
import sys
from pathlib import Path

# support running without installing as a package, adding extensions to the Pyton path
wd = Path(__file__).parent.parent.resolve()
sys.path.append(str(wd))
5 changes: 2 additions & 3 deletions extensions/thunder/strategies/thunder_ddp.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,18 +22,17 @@
_sync_ddp_if_available,
)
from lightning.fabric.utilities.rank_zero import rank_zero_only
from lightning_utilities.core.imports import RequirementCache
from lightning_utilities.core.rank_zero import rank_zero_only as utils_rank_zero_only
from torch import Tensor
from torch.nn import Module
from typing_extensions import override

from litgpt.utils import _THUNDER_AVAILABLE

if TYPE_CHECKING:
from thunder import Executor


_THUNDER_AVAILABLE = RequirementCache("lightning-thunder", "thunder")


class ThunderDDPStrategy(ParallelStrategy):
def __init__(
Expand Down
5 changes: 1 addition & 4 deletions extensions/thunder/strategies/thunder_fsdp.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,12 @@
from lightning.fabric.utilities.rank_zero import rank_zero_only
from lightning.fabric.utilities.seed import reset_seed
from lightning.fabric.utilities.types import _PATH, _Stateful
from lightning_utilities.core.imports import RequirementCache
from lightning_utilities.core.rank_zero import rank_zero_only as utils_rank_zero_only
from torch import Tensor
from torch.nn import Module
from torch.optim import Optimizer
from typing_extensions import override
from litgpt.utils import _THUNDER_AVAILABLE
from extensions.thunder.strategies.thunder_ddp import _ThunderDataParalellBackwardSyncControl

if TYPE_CHECKING:
Expand All @@ -42,9 +42,6 @@
_BUCKETING_STRATEGY = Union[FSDPBucketingStrategy, Literal["NONE", "LAYER", "BLOCK"]]


_THUNDER_AVAILABLE = RequirementCache("lightning-thunder", "thunder")


class ThunderFSDPStrategy(ParallelStrategy, _Sharded):
def __init__(
self,
Expand Down
Empty file.
10 changes: 6 additions & 4 deletions extensions/thunder/unsloth/executor.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,20 @@
# Copyright Lightning AI. Licensed under the Apache License 2.0, see LICENSE file.
import sys
import torch
from pathlib import Path
from typing import Optional, Tuple

import thunder
import thunder.torch as ltorch
import torch
from thunder.core.proxies import TensorProxy
from thunder.core.transforms import get_grad, mean_backward, put_grads
from thunder.extend import OperatorExecutor, register_executor
from thunder.torch import ne, sum, true_divide
from torch import Tensor

import litgpt.model
from litgpt.utils import _THUNDER_AVAILABLE

if _THUNDER_AVAILABLE:
import thunder
import thunder.torch as ltorch

sys.path.append(str(Path(__file__).parent))

Expand Down
7 changes: 5 additions & 2 deletions extensions/thunder/unsloth/kernels/cross_entropy_loss.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,14 @@
# limitations under the License.

import torch
import triton
import triton.language as tl

from litgpt.utils import _TRITON_AVAILABLE
from .utils import MAX_FUSED_SIZE, calculate_settings

if _TRITON_AVAILABLE:
import triton
import triton.language as tl


@triton.jit
def _cross_entropy_forward(
Expand Down
8 changes: 5 additions & 3 deletions extensions/thunder/unsloth/kernels/rope_embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,13 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import triton
import triton.language as tl
import torch
from litgpt.utils import _TRITON_AVAILABLE
from .utils import calculate_settings

if _TRITON_AVAILABLE:
import triton
import triton.language as tl

ROPE_GROUP_SIZE = 4

@triton.heuristics({"BACKWARD_PASS": lambda args: args["BACKWARD_PASS"],})
Expand Down
8 changes: 6 additions & 2 deletions extensions/thunder/unsloth/kernels/swiglu.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,12 @@
# limitations under the License.

import torch
import triton
import triton.language as tl

from litgpt.utils import _TRITON_AVAILABLE

if _TRITON_AVAILABLE:
import triton
import triton.language as tl


@triton.jit
Expand Down
6 changes: 5 additions & 1 deletion extensions/thunder/unsloth/kernels/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,11 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import triton

from litgpt.utils import _TRITON_AVAILABLE

if _TRITON_AVAILABLE:
import triton

MAX_FUSED_SIZE = 65536 # 2**16
next_power_of_2 = triton.next_power_of_2
Expand Down
6 changes: 6 additions & 0 deletions extensions/xla/__init__
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
import sys
from pathlib import Path

# support running without installing as a package, adding extensions to the Pyton path
wd = Path(__file__).parent.parent.resolve()
sys.path.append(str(wd))
Empty file.
6 changes: 3 additions & 3 deletions extensions/xla/finetune/adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@
wd = Path(__file__).parents[3].resolve()
sys.path.append(str(wd))

from extensions.xla.generate.base import generate
from extensions.xla.scripts.prepare_alpaca import generate_prompt
from extensions.xla.utils import rank_print, sequential_load_and_fsdp_wrap
from xla.generate.base import generate
from xla.scripts.prepare_alpaca import generate_prompt
from xla.utils import rank_print, sequential_load_and_fsdp_wrap

eval_interval = 200
save_interval = 200
Expand Down
Empty file.
4 changes: 2 additions & 2 deletions extensions/xla/generate/adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@
wd = Path(__file__).parents[3].resolve()
sys.path.append(str(wd))

from extensions.xla.generate.base import generate
from extensions.xla.utils import rank_print
from xla.generate.base import generate
from xla.utils import rank_print


def setup(
Expand Down
2 changes: 1 addition & 1 deletion extensions/xla/generate/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
wd = Path(__file__).parents[3].resolve()
sys.path.append(str(wd))

from extensions.xla.utils import rank_print
from xla.utils import rank_print


# xla does not support `inference_mode`: RuntimeError: Cannot set version_counter for inference tensor
Expand Down
Empty file.
19 changes: 19 additions & 0 deletions litgpt/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
import sys
from dataclasses import asdict, is_dataclass
from io import BytesIO

from lightning_utilities.core.imports import package_available
from packaging import version
from pathlib import Path
import subprocess
Expand All @@ -35,6 +37,9 @@
if TYPE_CHECKING:
from litgpt import GPT, Config

_THUNDER_AVAILABLE = package_available("thunder")
_TRITON_AVAILABLE = package_available("triton")


def init_out_dir(out_dir: Path) -> Path:
if not isinstance(out_dir, Path):
Expand Down Expand Up @@ -815,3 +820,17 @@ def select_sft_generate_example(eval, data):
else:
raise ValueError(f"Unknown evaluation example type: {eval.evaluate_example}")
return instruction



def _RunIf(thunder: bool = False, **kwargs):
import pytest
from lightning.fabric.utilities.testing import _runif_reasons

reasons, marker_kwargs = _runif_reasons(**kwargs)

if thunder and not package_available("thunder"):
# if we require Thunder, but it's not available, we should skip
reasons.append("Thunder")

return pytest.mark.skipif(condition=len(reasons) > 0, reason=f"Requires: [{' + '.join(reasons)}]", **marker_kwargs)
Loading