From ea45210c42b06a5ecfab2300fedd98e73e027033 Mon Sep 17 00:00:00 2001
From: Aryan Gupta <guptaaryan16@gmail.com>
Date: Thu, 29 Jun 2023 14:20:20 +0530
Subject: [PATCH 1/5] Add support for "mps" device in ignite.distributed.base

---
 ignite/distributed/comp_models/base.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/ignite/distributed/comp_models/base.py b/ignite/distributed/comp_models/base.py
index 82466f2244d..d5742cc8404 100644
--- a/ignite/distributed/comp_models/base.py
+++ b/ignite/distributed/comp_models/base.py
@@ -325,6 +325,8 @@ def get_node_rank(self) -> int:
     def device(self) -> torch.device:
         if torch.cuda.is_available():
             return torch.device("cuda")
+        if torch.backends.mps.is_available():
+            return torch.device("mps")
         return torch.device("cpu")
 
     def backend(self) -> Optional[str]:

From c9808c1146b138e2823a6a598ca5acddbc9c35bc Mon Sep 17 00:00:00 2001
From: Aryan Gupta <guptaaryan16@gmail.com>
Date: Wed, 12 Jul 2023 16:38:59 +0530
Subject: [PATCH 2/5] Made changes in the supervised_trainer API to have mps
 devices, Added some tests

---
 ignite/engine/__init__.py                     | 19 ++++++++---
 tests/ignite/engine/test_create_supervised.py | 34 +++++++++++++++++--
 2 files changed, 46 insertions(+), 7 deletions(-)

diff --git a/ignite/engine/__init__.py b/ignite/engine/__init__.py
index 299afadba9a..43f9e9255c6 100644
--- a/ignite/engine/__init__.py
+++ b/ignite/engine/__init__.py
@@ -91,6 +91,8 @@ def supervised_training_step(
         Added Gradient Accumulation.
     .. versionchanged:: 0.4.11
         Added `model_transform` to transform model's output
+    .. versionchanged:: 0.4.13
+        Added support for ``mps`` device
     """
 
     if gradient_accumulation_steps <= 0:
@@ -374,9 +376,12 @@ def update(engine: Engine, batch: Sequence[torch.Tensor]) -> Union[Any, Tuple[to
 
 
 def _check_arg(
-    on_tpu: bool, amp_mode: Optional[str], scaler: Optional[Union[bool, "torch.cuda.amp.GradScaler"]]
+    on_tpu: bool, on_mps: bool, amp_mode: Optional[str], scaler: Optional[Union[bool, "torch.cuda.amp.GradScaler"]]
 ) -> Tuple[Optional[str], Optional["torch.cuda.amp.GradScaler"]]:
-    """Checking tpu, amp and GradScaler instance combinations."""
+    """Checking tpu, mps, amp and GradScaler instance combinations."""
+    if on_mps and amp_mode:
+         raise ValueError("amp_mode cannot be used with mps device. Consider using amp_mode=None or device='cuda'.")
+
     if on_tpu and not idist.has_xla_support:
         raise RuntimeError("In order to run on TPU, please install PyTorch XLA")
 
@@ -525,11 +530,14 @@ def output_transform_fn(x, y, y_pred, loss):
         Added Gradient Accumulation argument for all supervised training methods.
     .. versionchanged:: 0.4.11
         Added ``model_transform`` to transform model's output
+    .. versionchanged:: 0.4.13
+        Added support for ``mps`` device
     """
 
     device_type = device.type if isinstance(device, torch.device) else device
     on_tpu = "xla" in device_type if device_type is not None else False
-    mode, _scaler = _check_arg(on_tpu, amp_mode, scaler)
+    on_mps = "mps" in device_type if device_type is not None else False
+    mode, _scaler = _check_arg(on_tpu, on_mps, amp_mode, scaler)
 
     if mode == "amp":
         _update = supervised_training_step_amp(
@@ -754,10 +762,13 @@ def create_supervised_evaluator(
         Added ``amp_mode`` argument for automatic mixed precision.
     .. versionchanged:: 0.4.12
         Added ``model_transform`` to transform model's output
+    .. versionchanged:: 0.4.13
+        Added support for ``mps`` device
     """
     device_type = device.type if isinstance(device, torch.device) else device
     on_tpu = "xla" in device_type if device_type is not None else False
-    mode, _ = _check_arg(on_tpu, amp_mode, None)
+    on_mps = "mps" in device_type if device_type is not None else False
+    mode, _ = _check_arg(on_tpu, amp_mode, None, None)
 
     metrics = metrics or {}
     if mode == "amp":
diff --git a/tests/ignite/engine/test_create_supervised.py b/tests/ignite/engine/test_create_supervised.py
index a13a95c9198..eb98c116ce7 100644
--- a/tests/ignite/engine/test_create_supervised.py
+++ b/tests/ignite/engine/test_create_supervised.py
@@ -184,7 +184,8 @@ def _test_create_mocked_supervised_trainer(
                     data = [(x, y)]
 
                     on_tpu = "xla" in trainer_device if trainer_device is not None else False
-                    mode, _ = _check_arg(on_tpu, amp_mode, scaler)
+                    on_mps = "mps" in trainer_device if trainer_device is not None else False
+                    mode, _ = _check_arg(on_tpu, on_mps, amp_mode, scaler)
 
                     if model_device == trainer_device or ((model_device == "cpu") ^ (trainer_device == "cpu")):
                         trainer.run(data)
@@ -336,7 +337,8 @@ def _test_create_evaluation_step_amp(
 
     device_type = evaluator_device.type if isinstance(evaluator_device, torch.device) else evaluator_device
     on_tpu = "xla" in device_type if device_type is not None else False
-    mode, _ = _check_arg(on_tpu, amp_mode, None)
+    on_mps = "mps" in device_type if device_type is not None else False
+    mode, _ = _check_arg(on_tpu, on_mps, amp_mode, None)
 
     evaluate_step = supervised_evaluation_step_amp(model, evaluator_device, output_transform=output_transform_mock)
 
@@ -371,7 +373,8 @@ def _test_create_evaluation_step(
 
     device_type = evaluator_device.type if isinstance(evaluator_device, torch.device) else evaluator_device
     on_tpu = "xla" in device_type if device_type is not None else False
-    mode, _ = _check_arg(on_tpu, amp_mode, None)
+    on_mps = "mps" in device_type if device_type is not None else False
+    mode, _ = _check_arg(on_tpu, on_mps, amp_mode, None)
 
     evaluate_step = supervised_evaluation_step(model, evaluator_device, output_transform=output_transform_mock)
 
@@ -451,6 +454,18 @@ def test_create_supervised_trainer_on_cuda():
     )
     _test_create_mocked_supervised_trainer(model_device=model_device, trainer_device=trainer_device)
 
+@pytest.mark.skipif(not torch.backends.mps.is_available(), reason="Skip if no MPS")
+def test_create_supervised_trainer_on_mps():
+    model_device = trainer_device = "mps"
+    _test_create_supervised_trainer_wrong_accumulation(model_device=model_device, trainer_device=trainer_device)
+    _test_create_supervised_trainer(
+        gradient_accumulation_steps=1, model_device=model_device, trainer_device=trainer_device
+    )
+    _test_create_supervised_trainer(
+        gradient_accumulation_steps=3, model_device=model_device, trainer_device=trainer_device
+    )
+    _test_create_mocked_supervised_trainer(model_device=model_device, trainer_device=trainer_device)
+
 
 @pytest.mark.skipif(Version(torch.__version__) < Version("1.6.0"), reason="Skip if < 1.6.0")
 @pytest.mark.skipif(not torch.cuda.is_available(), reason="Skip if no GPU")
@@ -618,6 +633,19 @@ def test_create_supervised_evaluator_on_cuda_with_model_on_cpu():
     _test_mocked_supervised_evaluator(evaluator_device="cuda")
 
 
+@pytest.mark.skipif(not torch.backends.mps.is_available(), reason="Skip if no MPS Backend")
+def test_create_supervised_evaluator_on_mps():
+    model_device = evaluator_device = "mps"
+    _test_create_supervised_evaluator(model_device=model_device, evaluator_device=evaluator_device)
+    _test_mocked_supervised_evaluator(model_device=model_device, evaluator_device=evaluator_device)
+
+
+@pytest.mark.skipif(not torch.backends.mps.is_available(), reason="Skip if no MPS Backend")
+def test_create_supervised_evaluator_on_mps_with_model_on_cpu():
+    _test_create_supervised_evaluator(evaluator_device="mps")
+    _test_mocked_supervised_evaluator(evaluator_device="mps")
+
+
 @pytest.mark.skipif(Version(torch.__version__) < Version("1.6.0"), reason="Skip if < 1.6.0")
 @pytest.mark.skipif(not torch.cuda.is_available(), reason="Skip if no GPU")
 def test_create_supervised_evaluator_on_cuda_amp():

From be57ea52acb4db644e8774a8900eb1fd6a70657c Mon Sep 17 00:00:00 2001
From: guptaaryan16 <guptaaryan16@users.noreply.github.com>
Date: Wed, 12 Jul 2023 11:11:59 +0000
Subject: [PATCH 3/5] autopep8 fix

---
 ignite/engine/__init__.py                     | 2 +-
 tests/ignite/engine/test_create_supervised.py | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/ignite/engine/__init__.py b/ignite/engine/__init__.py
index 43f9e9255c6..d8a77b052c8 100644
--- a/ignite/engine/__init__.py
+++ b/ignite/engine/__init__.py
@@ -380,7 +380,7 @@ def _check_arg(
 ) -> Tuple[Optional[str], Optional["torch.cuda.amp.GradScaler"]]:
     """Checking tpu, mps, amp and GradScaler instance combinations."""
     if on_mps and amp_mode:
-         raise ValueError("amp_mode cannot be used with mps device. Consider using amp_mode=None or device='cuda'.")
+        raise ValueError("amp_mode cannot be used with mps device. Consider using amp_mode=None or device='cuda'.")
 
     if on_tpu and not idist.has_xla_support:
         raise RuntimeError("In order to run on TPU, please install PyTorch XLA")
diff --git a/tests/ignite/engine/test_create_supervised.py b/tests/ignite/engine/test_create_supervised.py
index eb98c116ce7..7ecf318ec96 100644
--- a/tests/ignite/engine/test_create_supervised.py
+++ b/tests/ignite/engine/test_create_supervised.py
@@ -454,6 +454,7 @@ def test_create_supervised_trainer_on_cuda():
     )
     _test_create_mocked_supervised_trainer(model_device=model_device, trainer_device=trainer_device)
 
+
 @pytest.mark.skipif(not torch.backends.mps.is_available(), reason="Skip if no MPS")
 def test_create_supervised_trainer_on_mps():
     model_device = trainer_device = "mps"

From 60b15f17351f2ef35ad5a551762987a1ee031681 Mon Sep 17 00:00:00 2001
From: Aryan Gupta <guptaaryan16@gmail.com>
Date: Wed, 12 Jul 2023 17:05:40 +0530
Subject: [PATCH 4/5] Added lint fixes

---
 ignite/engine/__init__.py                     | 4 ++--
 tests/ignite/engine/test_create_supervised.py | 1 +
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/ignite/engine/__init__.py b/ignite/engine/__init__.py
index 43f9e9255c6..03af6d10afb 100644
--- a/ignite/engine/__init__.py
+++ b/ignite/engine/__init__.py
@@ -380,7 +380,7 @@ def _check_arg(
 ) -> Tuple[Optional[str], Optional["torch.cuda.amp.GradScaler"]]:
     """Checking tpu, mps, amp and GradScaler instance combinations."""
     if on_mps and amp_mode:
-         raise ValueError("amp_mode cannot be used with mps device. Consider using amp_mode=None or device='cuda'.")
+        raise ValueError("amp_mode cannot be used with mps device. Consider using amp_mode=None or device='cuda'.")
 
     if on_tpu and not idist.has_xla_support:
         raise RuntimeError("In order to run on TPU, please install PyTorch XLA")
@@ -768,7 +768,7 @@ def create_supervised_evaluator(
     device_type = device.type if isinstance(device, torch.device) else device
     on_tpu = "xla" in device_type if device_type is not None else False
     on_mps = "mps" in device_type if device_type is not None else False
-    mode, _ = _check_arg(on_tpu, amp_mode, None, None)
+    mode, _ = _check_arg(on_tpu, on_mps, amp_mode, None)
 
     metrics = metrics or {}
     if mode == "amp":
diff --git a/tests/ignite/engine/test_create_supervised.py b/tests/ignite/engine/test_create_supervised.py
index eb98c116ce7..7ecf318ec96 100644
--- a/tests/ignite/engine/test_create_supervised.py
+++ b/tests/ignite/engine/test_create_supervised.py
@@ -454,6 +454,7 @@ def test_create_supervised_trainer_on_cuda():
     )
     _test_create_mocked_supervised_trainer(model_device=model_device, trainer_device=trainer_device)
 
+
 @pytest.mark.skipif(not torch.backends.mps.is_available(), reason="Skip if no MPS")
 def test_create_supervised_trainer_on_mps():
     model_device = trainer_device = "mps"

From 862785e85612830f3ebac9e635c238f88560696f Mon Sep 17 00:00:00 2001
From: vfdev <vfdev.5@gmail.com>
Date: Wed, 23 Aug 2023 10:18:13 +0200
Subject: [PATCH 5/5] Setup ci for mps tests

---
 .github/workflows/mps-tests.yml  | 150 +++++++++----------------------
 .github/workflows/unit-tests.yml |   1 +
 2 files changed, 41 insertions(+), 110 deletions(-)

diff --git a/.github/workflows/mps-tests.yml b/.github/workflows/mps-tests.yml
index b7154611afd..5717bd36a17 100644
--- a/.github/workflows/mps-tests.yml
+++ b/.github/workflows/mps-tests.yml
@@ -1,6 +1,9 @@
-name: Run unit tests on MPS Backend
+name: Run unit tests on M1
 on:
   push:
+    branches:
+      - master
+      - "*.*.*"
     paths:
       - "ignite/**"
       - "tests/ignite/**"
@@ -15,7 +18,7 @@ on:
       - "tests/run_code_style.sh"
       - "examples/**.py"
       - "requirements-dev.txt"
-      - ".github/workflows/unit-tests.yml"
+      - ".github/workflows/mps-tests.yml"
   workflow_dispatch:
 
 concurrency:
@@ -23,18 +26,21 @@ concurrency:
   group: mps-tests-${{ github.ref_name }}-${{ !(github.ref_protected) || github.sha }}
   cancel-in-progress: true
 
-# Cherry-picked from https://github.com/pytorch/test-infra/blob/main/.github/workflows/linux_job.yml
+# Cherry-picked from 
+# - https://github.com/pytorch/vision/main/.github/workflows/tests.yml
+# - https://github.com/pytorch/test-infra/blob/main/.github/workflows/macos_job.yml
 
 jobs:
-  gpu-tests:    
+  mps-tests:
     strategy:
       matrix:
         python-version:
           - "3.8"
-        pytorch-channel: [pytorch, pytorch-nightly]
+        pytorch-channel: ["pytorch"]
+        skip-distrib-tests: 1
       fail-fast: false
     runs-on: ["macos-m1-12"]
-    timeout-minutes: 45
+    timeout-minutes: 60
     
     steps:
       - name: Clean workspace
@@ -51,13 +57,10 @@ jobs:
           repository: pytorch/test-infra
           path: test-infra
 
-      - name: Setup MacOS-M1
-        uses: ./test-infra/.github/actions/macos-job
-
-      - name: Pull docker image
-        uses: ./test-infra/.github/actions/pull-docker-image
+      - name: Setup miniconda
+        uses: ./test-infra/.github/actions/setup-miniconda
         with:
-          docker-image: ${{ env.DOCKER_IMAGE }}
+          python-version: ${{ matrix.python-version }}
 
       - name: Checkout repository (${{ github.repository }})
         uses: actions/checkout@v3
@@ -68,117 +71,44 @@ jobs:
           path: ${{ github.repository }}
           fetch-depth: 1
 
-      - name: Start Pytorch container
-        working-directory: ${{ github.repository }}
-        run: |
-          docker run --name pthd --gpus=all --rm \
-            --cap-add=SYS_PTRACE \
-            --detach \
-            --ipc=host \
-            --security-opt seccomp=unconfined \
-            --shm-size=2g \
-            --tty \
-            --ulimit stack=10485760:83886080 \
-            -v $PWD:/work \
-            -w /work \
-            ${DOCKER_IMAGE}
-
-          script=$(cat << EOF
-
-            set -x
-
-            nvidia-smi
-            ls -alh
-
-            conda --version
-            python --version
-
-          EOF
-          )
-          docker exec -t pthd /bin/bash -c "${script}"
-
-      - name: Install PyTorch and dependencies
+      - name: Install PyTorch
+        if: ${{ matrix.pytorch-channel == 'pytorch' }}
+        shell: bash -l {0}
         continue-on-error: false
-        run: |
-
-          script=$(cat << EOF
-
-          set -x
-
-          # Install PyTorch
-          if [ "${{ matrix.pytorch-channel }}" == "pytorch" ]; then
-            pip install --upgrade torch torchvision --index-url https://download.pytorch.org/whl/cu118
-          else
-            pip install --upgrade --pre torch torchvision --index-url https://download.pytorch.org/whl/nightly/cu118
-          fi
+        run: pip install --upgrade torch torchvision --index-url https://download.pytorch.org/whl/cu118
 
-          python -c "import torch; print(torch.__version__, ', CUDA is available: ', torch.cuda.is_available()); exit(not torch.cuda.is_available())"
-          pip list
+      - name: Install PyTorch (nightly)
+        if: ${{ matrix.pytorch-channel == 'pytorch-nightly' }}
+        run: pip install torch torchvision -f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html --pre
 
-          # Install dependencies
+      - name: Install dependencies
+        run: |
           pip install -r requirements-dev.txt
           pip install -e .
+          pip list
 
-          EOF
-          )
-
-          docker exec -t pthd /bin/bash -c "${script}"
+      # Download MNIST: https://github.com/pytorch/ignite/issues/1737
+      # to "/tmp" for unit tests
+      - name: Download MNIST
+        uses: pytorch-ignite/download-mnist-github-action@master
+        with:
+          target_dir: /tmp
 
-      - name: Run 1 Node 2 GPUs Unit Tests
-        continue-on-error: false
+      # Copy MNIST to "." for the examples
+      - name: Copy MNIST
         run: |
+          cp -R /tmp/MNIST .
 
-          script=$(cat << EOF
-
-          set -x
-
-          bash tests/run_gpu_tests.sh 2
-
-          EOF
-          )
-
-          docker exec -t pthd /bin/bash -c "${script}"
+      - name: Run Tests
+        run: |
+          SKIP_DISTRIB_TESTS=${{ matrix.skip-distrib-tests }} bash tests/run_cpu_tests.sh
 
       - name: Upload coverage to Codecov
         uses: codecov/codecov-action@v3
         with:
           file: ${{ github.repository }}/coverage.xml
-          flags: gpu-2
+          flags: mps
           fail_ci_if_error: false
 
-      - name: Run examples in container
-        continue-on-error: false
-        run: |
-          SCRIPT=$(cat << EOF
-          
-          set -x
-
-          # Install additional example dependencies
-          pip install fire
-
-          # Check training on cifar10, run without backend
-          ## initial run
-          CI=1 python examples/contrib/cifar10/main.py run --checkpoint_every=200 --stop_iteration=500
-          ## resume
-          CI=1 python examples/contrib/cifar10/main.py run --checkpoint_every=200 --num_epochs=7 --resume-from=/tmp/output-cifar10/resnet18_backend-None-1_stop-on-500/training_checkpoint_400.pt
-    
-          # Check training on cifar10, run with NCCL backend using torchrun
-          ## initial run
-          CI=1 torchrun --nproc_per_node=2 examples/contrib/cifar10/main.py run --backend=nccl --checkpoint_every=200 --stop_iteration=500
-          ## resume
-          CI=1 torchrun --nproc_per_node=2 examples/contrib/cifar10/main.py run --backend=nccl --checkpoint_every=200 --num_epochs=7 --resume-from=/tmp/output-cifar10/resnet18_backend-nccl-2_stop-on-500/training_checkpoint_400.pt
-
-          # Check training on cifar10, run with NCCL backend using spawn
-          ## initial run
-          CI=1 python -u examples/contrib/cifar10/main.py run --backend=nccl --nproc_per_node=2 --checkpoint_every=200 --stop_iteration=500
-          ## resume
-          CI=1 python -u examples/contrib/cifar10/main.py run --backend=nccl --nproc_per_node=2 --checkpoint_every=200 --num_epochs=7 --resume-from=/tmp/output-cifar10/resnet18_backend-nccl-2_stop-on-500/training_checkpoint_400.pt
-
-          EOF
-          )
-
-          docker exec -t pthd /bin/bash -c "${script}"
-
-      - name: Teardown Linux
-        if: ${{ always() }}
-        uses: ./test-infra/.github/actions/teardown-linux
+      - name: Run MNIST Examples
+        run: python examples/mnist/mnist.py --epochs=1
diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml
index 2c409f7227a..bdef2d1a680 100644
--- a/.github/workflows/unit-tests.yml
+++ b/.github/workflows/unit-tests.yml
@@ -93,6 +93,7 @@ jobs:
         run: |
           pip install -r requirements-dev.txt
           python setup.py install
+          pip list
 
       - name: Check code formatting
         run: |