diff --git a/.dockerignore b/.dockerignore
index 262da4d0dd..4e1161bfb2 100644
--- a/.dockerignore
+++ b/.dockerignore
@@ -8,7 +8,6 @@ docs/
 .coverage/
 coverage.xml
 .readthedocs.yml
-*.md
 *.toml
 
 !README.md
diff --git a/.github/workflows/cron.yml b/.github/workflows/cron.yml
index 761b1f7ebc..e215ec98e4 100644
--- a/.github/workflows/cron.yml
+++ b/.github/workflows/cron.yml
@@ -3,6 +3,8 @@ name: crons
 on:
   schedule:
     - cron: "0 2 * * *"  # at 02:00 UTC
+  # Allows you to run this workflow manually from the Actions tab
+  workflow_dispatch:
 
 jobs:
   cron-gpu:
@@ -40,17 +42,14 @@ jobs:
         nvidia-smi
         export CUDA_VISIBLE_DEVICES=$(python -m tests.utils)
         echo $CUDA_VISIBLE_DEVICES
-        stop_time=$((LAUNCH_DELAY + $(date +%s)))
-        while [ $(date +%s) -lt $stop_time ]; do
-          python -c 'import torch; torch.rand(5, 3, device=torch.device("cuda:0"))';
-        done
+        trap 'if pgrep python; then pkill python; fi;' ERR
+        python -c $'import torch\na,b=torch.zeros(1,device="cuda:0"),torch.zeros(1,device="cuda:1");\nwhile True:print(a,b)' > /dev/null &
         python -c "import torch; print(torch.__version__); print('{} of GPUs available'.format(torch.cuda.device_count()))"
         python -c 'import torch; print(torch.rand(5, 3, device=torch.device("cuda:0")))'
         BUILD_MONAI=1 ./runtests.sh --coverage --unittests  # unit tests with coverage report
-        export CUDA_VISIBLE_DEVICES=$(python -m tests.utils)
-        echo $CUDA_VISIBLE_DEVICES
         BUILD_MONAI=1 ./runtests.sh --coverage --net  # integration tests with coverage report
         coverage xml
+        if pgrep python; then pkill python; fi
     - name: Upload coverage
       uses: codecov/codecov-action@v1
       with:
@@ -83,17 +82,14 @@ jobs:
         nvidia-smi
         export CUDA_VISIBLE_DEVICES=$(python -m tests.utils)
         echo $CUDA_VISIBLE_DEVICES
-        stop_time=$((LAUNCH_DELAY + $(date +%s)))
-        while [ $(date +%s) -lt $stop_time ]; do
-          python -c 'import torch; torch.rand(5, 3, device=torch.device("cuda:0"))';
-        done
+        trap 'if pgrep python; then pkill python; fi;' ERR
+        python -c $'import torch\na,b=torch.zeros(1,device="cuda:0"),torch.zeros(1,device="cuda:1");\nwhile True:print(a,b)' > /dev/null &
         python -c "import torch; print(torch.__version__); print('{} of GPUs available'.format(torch.cuda.device_count()))"
         python -c 'import torch; print(torch.rand(5, 3, device=torch.device("cuda:0")))'
         BUILD_MONAI=1 ./runtests.sh --coverage --unittests  # unit tests with coverage report
-        export CUDA_VISIBLE_DEVICES=$(python -m tests.utils)
-        echo $CUDA_VISIBLE_DEVICES
         BUILD_MONAI=1 ./runtests.sh --coverage --net  # integration tests with coverage report
         coverage xml
+        if pgrep python; then pkill python; fi
     - name: Upload coverage
       uses: codecov/codecov-action@v1
       with:
@@ -115,14 +111,15 @@ jobs:
         nvidia-smi
         export CUDA_VISIBLE_DEVICES=$(python -m tests.utils)
         echo $CUDA_VISIBLE_DEVICES
+        trap 'if pgrep python; then pkill python; fi;' ERR
+        python -c $'import torch\na,b=torch.zeros(1,device="cuda:0"),torch.zeros(1,device="cuda:1");\nwhile True:print(a,b)' > /dev/null &
         python -c "import torch; print(torch.__version__); print('{} of GPUs available'.format(torch.cuda.device_count()))"
         python -c 'import torch; print(torch.rand(5,3, device=torch.device("cuda:0")))'
         ngc --version
         BUILD_MONAI=1 ./runtests.sh --coverage --pytype --unittests  # unit tests with pytype checks, coverage report
-        export CUDA_VISIBLE_DEVICES=$(python -m tests.utils)
-        echo $CUDA_VISIBLE_DEVICES
         BUILD_MONAI=1 ./runtests.sh --coverage --net  # integration tests with coverage report
         coverage xml
+        if pgrep python; then pkill python; fi
     - name: Upload coverage
       uses: codecov/codecov-action@v1
       with:
@@ -156,8 +153,12 @@ jobs:
         cd tutorials
         python -m pip install -r requirements.txt
     - name: Run tutorial notebooks
+      timeout-minutes: 150
       run: |
         export CUDA_VISIBLE_DEVICES=${{ steps.monai-install.outputs.devices }}
         echo $CUDA_VISIBLE_DEVICES
+        trap 'if pgrep python; then pkill python; fi;' ERR
+        python -c $'import torch\na,b=torch.zeros(1,device="cuda:0"),torch.zeros(1,device="cuda:1");\nwhile True:print(a,b)' > /dev/null &
         cd /opt/tutorials
         $(pwd)/runner.sh
+        if pgrep python; then pkill python; fi
diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml
new file mode 100644
index 0000000000..32f1fd2056
--- /dev/null
+++ b/.github/workflows/docker.yml
@@ -0,0 +1,123 @@
+name: docker
+# versioning: compute a static version file
+# local_docker: use the version file to build docker images
+# docker_test_latest: test the latest internal docker image (has flake)
+# docker_test_dockerhub: test the latest dockerhub release (no flake)
+on:
+  # master only docker deployment and quick tests
+  push:
+    branches:
+      - master
+  # Allows you to run this workflow manually from the Actions tab
+  workflow_dispatch:
+
+jobs:
+  versioning:
+    # compute versioning file from python setup.py
+    # upload as artifact
+    # (also used in release.yml)
+    if: github.repository == 'Project-MONAI/MONAI'
+    container:
+      image: localhost:5000/local_monai:latest
+    runs-on: [self-hosted, linux, x64, build_only]
+    steps:
+      - uses: actions/checkout@v2
+        # full history so that we can git describe
+        with:
+          ref: master
+          fetch-depth: 0
+      - shell: bash
+        run: |
+          git describe
+          python setup.py build
+          cat build/lib/monai/_version.py
+      - name: Upload version
+        uses: actions/upload-artifact@v2
+        with:
+          name: _version.py
+          path: build/lib/monai/_version.py
+      - name: Clean up directory
+        shell: bash
+        run: |
+          ls -al
+          rm -rf {*,.[^.]*}
+
+  local_docker:
+    # builds two versions: local_monai:latest and local_monai:dockerhub
+    # latest: used for local tests
+    # dockerhub: release, no flake package
+    if: github.repository == 'Project-MONAI/MONAI'
+    needs: versioning
+    runs-on: [self-hosted, linux, x64, build_only]
+    steps:
+    - uses: actions/checkout@v2
+      with:
+        ref: master
+    - name: Download version
+      uses: actions/download-artifact@v2
+      with:
+        name: _version.py
+    - name: docker_build
+      shell: bash
+      run: |
+        # get tag info for versioning
+        cat _version.py
+        mv _version.py monai/
+        # build and run original docker image for local registry
+        docker build -t localhost:5000/local_monai:latest -f Dockerfile .
+        docker push localhost:5000/local_monai:latest
+        # build once more w/ tag "latest": remove flake package as it is not needed on hub.docker.com
+        sed -i '/flake/d' requirements-dev.txt
+        docker build -t projectmonai/monai:latest -f Dockerfile .
+        # also push as tag "dockerhub" to local registry
+        docker image tag projectmonai/monai:latest localhost:5000/local_monai:dockerhub
+        docker push localhost:5000/local_monai:dockerhub
+        # distribute as always w/ tag "latest" to hub.docker.com
+        echo "${{ secrets.DOCKER_PW }}" | docker login -u projectmonai --password-stdin
+        docker push projectmonai/monai:latest
+        docker logout
+        docker image prune -f
+
+  docker_test_latest:
+    if: github.repository == 'Project-MONAI/MONAI'
+    needs: local_docker
+    container:
+      image: localhost:5000/local_monai:latest
+    runs-on: [self-hosted, linux, x64, common]
+    steps:
+    - name: Import
+      run: |
+        export CUDA_VISIBLE_DEVICES=$(python -m tests.utils)
+        echo $CUDA_VISIBLE_DEVICES
+        trap 'if pgrep python; then pkill python; fi;' ERR
+        python -c $'import torch\na,b=torch.zeros(1,device="cuda:0"),torch.zeros(1,device="cuda:1");\nwhile True:print(a,b)' > /dev/null &
+        python -c 'import monai; monai.config.print_config()'
+        cd /opt/monai
+        ls -al
+        ngc --version
+        python -m tests.min_tests
+        if pgrep python; then pkill python; fi
+      env:
+        QUICKTEST: True
+
+  docker_test_dockerhub:
+    if: github.repository == 'Project-MONAI/MONAI'
+    needs: local_docker
+    container:
+      image: localhost:5000/local_monai:dockerhub
+    runs-on: [self-hosted, linux, x64, common]
+    steps:
+    - name: Import
+      run: |
+        export CUDA_VISIBLE_DEVICES=$(python -m tests.utils)
+        echo $CUDA_VISIBLE_DEVICES
+        trap 'if pgrep python; then pkill python; fi;' ERR
+        python -c $'import torch\na,b=torch.zeros(1,device="cuda:0"),torch.zeros(1,device="cuda:1");\nwhile True:print(a,b)' > /dev/null &
+        python -c 'import monai; monai.config.print_config()'
+        cd /opt/monai
+        ls -al
+        ngc --version
+        python -m tests.min_tests
+        if pgrep python; then pkill python; fi
+      env:
+        QUICKTEST: True
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index 5f160e6e8e..e78393f357 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -42,9 +42,14 @@ jobs:
         nvidia-smi
         export CUDA_VISIBLE_DEVICES=$(python -m tests.utils)
         echo $CUDA_VISIBLE_DEVICES
+        trap 'if pgrep python; then pkill python; fi;' ERR
+        python -c $'import torch\na,b=torch.zeros(1,device="cuda:0"),torch.zeros(1,device="cuda:1");\nwhile True:print(a,b)' > /dev/null &
         python -c "import torch; print(torch.__version__); print('{} of GPUs available'.format(torch.cuda.device_count()))"
         python -c 'import torch; print(torch.rand(5,3, device=torch.device("cuda:0")))'
-        BUILD_MONAI=1 ./runtests.sh --unittests --net
+        BUILD_MONAI=1 ./runtests.sh --net
+        BUILD_MONAI=1 ./runtests.sh --unittests
+        if pgrep python; then pkill python; fi
+      shell: bash
     - name: Add reaction
       uses: peter-evans/create-or-update-comment@v1
       with:
diff --git a/.github/workflows/pythonapp.yml b/.github/workflows/pythonapp.yml
index e5803028a0..fe3642d7e3 100644
--- a/.github/workflows/pythonapp.yml
+++ b/.github/workflows/pythonapp.yml
@@ -41,7 +41,7 @@ jobs:
         # Git hub actions have 2 cores, so parallize pytype
         $(pwd)/runtests.sh --codeformat -j 2
 
-  quick-py3:  # full dependencies installed
+  quick-py3:  # full dependencies installed tests for different OS
     runs-on: ${{ matrix.os }}
     strategy:
       fail-fast: false
@@ -105,7 +105,7 @@ jobs:
       env:
         QUICKTEST: True
 
-  min-dep-py3:  # min dependencies installed
+  min-dep-os:  # min dependencies installed tests for different OS
     runs-on: ${{ matrix.os }}
     strategy:
       fail-fast: false
@@ -154,6 +154,51 @@ jobs:
       env:
         QUICKTEST: True
 
+  min-dep-py3:  # min dependencies installed tests for different python
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: [3.6, 3.7]
+    timeout-minutes: 40
+    steps:
+    - uses: actions/checkout@v2
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v2
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Prepare pip wheel
+      run: |
+        which python
+        python -m pip install --user --upgrade pip setuptools wheel
+    - name: cache weekly timestamp
+      id: pip-cache
+      run: |
+        echo "::set-output name=datew::$(date '+%Y-%V')"
+        echo "::set-output name=dir::$(pip cache dir)"
+      shell: bash
+    - name: cache for pip
+      uses: actions/cache@v2
+      id: cache
+      with:
+        path: ${{ steps.pip-cache.outputs.dir }}
+        key: ubuntu-latest-latest-pip-${{ steps.pip-cache.outputs.datew }}
+    - name: Install the dependencies
+      run: |
+        # min. requirements
+        python -m pip install torch==1.8.1
+        python -m pip install -r requirements-min.txt
+        python -m pip list
+        BUILD_MONAI=0 python setup.py develop  # no compile of extensions
+      shell: bash
+    - name: Run quick tests (CPU ${{ runner.os }})
+      run: |
+        python -c 'import torch; print(torch.__version__); print(torch.rand(5,3))'
+        python -c "import monai; monai.config.print_config()"
+        python -m tests.min_tests
+      env:
+        QUICKTEST: True
+
   GPU-quick-py3:  # GPU with full dependencies
     if: github.repository == 'Project-MONAI/MONAI'
     strategy:
@@ -236,21 +281,28 @@ jobs:
         python -m pip install --upgrade pip wheel
         python -m pip install ${{ matrix.pytorch }}
         python -m pip install -r requirements-dev.txt
+        python -m pip list
     - name: Run quick tests (GPU)
       run: |
-        python -m pip list
         nvidia-smi
+        export LAUNCH_DELAY=$(python -c "import numpy; print(numpy.random.randint(30) * 5)")
+        echo "Sleep $LAUNCH_DELAY"
+        sleep $LAUNCH_DELAY
         export CUDA_VISIBLE_DEVICES=$(coverage run -m tests.utils)
         echo $CUDA_VISIBLE_DEVICES
+        trap 'if pgrep python; then pkill python; fi;' ERR
+        python -c $'import torch\na,b=torch.zeros(1,device="cuda:0"),torch.zeros(1,device="cuda:1");\nwhile True:print(a,b)' > /dev/null &
         python -c "import torch; print(torch.__version__); print('{} of GPUs available'.format(torch.cuda.device_count()))"
         python -c 'import torch; print(torch.rand(5, 3, device=torch.device("cuda:0")))'
         python -c "import monai; monai.config.print_config()"
         BUILD_MONAI=1 ./runtests.sh --quick --unittests
-        if [ ${{ matrix.environment }} == "PT18+CUDA112" ]; then
+        if [ ${{ matrix.environment }} = "PT18+CUDA112" ]; then
           # test the clang-format tool downloading once
           coverage run -m tests.clang_format_utils
         fi
         coverage xml
+        if pgrep python; then pkill python; fi
+      shell: bash
     - name: Upload coverage
       uses: codecov/codecov-action@v1
       with:
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 840194b1da..00e28ecd52 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -83,3 +83,70 @@ jobs:
         password: ${{ secrets.TEST_PYPI }}
         repository_url: https://test.pypi.org/legacy/
 
+  versioning:
+    # compute versioning file from python setup.py
+    # upload as artifact
+    # (also used in docker.yml)
+    if: github.repository == 'Project-MONAI/MONAI'
+    needs: packaging
+    container:
+      image: localhost:5000/local_monai:latest
+    runs-on: [self-hosted, linux, x64, build_only]
+    steps:
+      - uses: actions/checkout@v2
+        # full history so that we can git describe
+        with:
+          ref: master
+          fetch-depth: 0
+      - shell: bash
+        run: |
+          git describe
+          python setup.py build
+          cat build/lib/monai/_version.py
+      - name: Upload version
+        uses: actions/upload-artifact@v2
+        with:
+          name: _version.py
+          path: build/lib/monai/_version.py
+      - name: Clean up directory
+        shell: bash
+        run: |
+          ls -al
+          rm -rf {*,.[^.]*}
+
+  release_tag_docker:
+    if: github.repository == 'Project-MONAI/MONAI'
+    needs: versioning
+    runs-on: [self-hosted, linux, x64, build_only]
+    steps:
+      - uses: actions/checkout@v2
+        with:
+          ref: master
+      - name: Download version
+        uses: actions/download-artifact@v2
+        with:
+          name: _version.py
+      - name: Set tag
+        id: versioning
+        run: echo ::set-output name=tag::${GITHUB_REF#refs/*/}
+      - name: Check tag
+        env:
+          RELEASE_VERSION: ${{ steps.versioning.outputs.tag }}
+        run: |
+          echo "$RELEASE_VERSION"
+          cat _version.py
+      - if: startsWith(github.ref, 'refs/tags/')
+        name: build with the tag
+        env:
+          RELEASE_VERSION: ${{ steps.versioning.outputs.tag }}
+        shell: bash
+        run: |
+          # get tag info for versioning
+          mv _version.py monai/
+          # remove flake package as it is not needed on hub.docker.com
+          sed -i '/flake/d' requirements-dev.txt
+          docker build -t projectmonai/monai:"$RELEASE_VERSION" -f Dockerfile .
+          # distribute with a tag to hub.docker.com
+          echo "${{ secrets.DOCKER_PW }}" | docker login -u projectmonai --password-stdin
+          docker push projectmonai/monai:"$RELEASE_VERSION"
+          docker logout
diff --git a/.github/workflows/setupapp.yml b/.github/workflows/setupapp.yml
index e5cb9a7cf1..1b4c37b6e8 100644
--- a/.github/workflows/setupapp.yml
+++ b/.github/workflows/setupapp.yml
@@ -47,17 +47,15 @@ jobs:
         nvidia-smi
         export CUDA_VISIBLE_DEVICES=$(python -m tests.utils)
         echo $CUDA_VISIBLE_DEVICES
-        stop_time=$((LAUNCH_DELAY + $(date +%s)))
-        while [ $(date +%s) -lt $stop_time ]; do
-          python -c 'import torch; torch.rand(5, 3, device=torch.device("cuda:0"))';
-        done
+        trap 'if pgrep python; then pkill python; fi;' ERR
+        python -c $'import torch\na,b=torch.zeros(1,device="cuda:0"),torch.zeros(1,device="cuda:1");\nwhile True:print(a,b)' > /dev/null &
         python -c "import torch; print(torch.__version__); print('{} of GPUs available'.format(torch.cuda.device_count()))"
         python -c 'import torch; print(torch.rand(5, 3, device=torch.device("cuda:0")))'
         BUILD_MONAI=1 ./runtests.sh --coverage --unittests  # unit tests with coverage report
-        export CUDA_VISIBLE_DEVICES=$(python -m tests.utils)
-        echo $CUDA_VISIBLE_DEVICES
         BUILD_MONAI=1 ./runtests.sh --coverage --net  # integration tests with coverage report
         coverage xml
+        if pgrep python; then pkill python; fi
+      shell: bash
     - name: Upload coverage
       uses: codecov/codecov-action@v1
       with:
@@ -148,44 +146,3 @@ jobs:
         python -m tests.min_tests
       env:
         QUICKTEST: True
-
-  local_docker:
-    if: github.repository == 'Project-MONAI/MONAI'
-    runs-on: [self-hosted, linux, x64, build_only]
-    # we only push built container if it is built from master branch
-    steps:
-    - uses: actions/checkout@v2
-      with:
-        ref: master
-    - name: docker_build
-      run: |
-        # build and run original docker image for local registry
-        docker build -t localhost:5000/local_monai:latest -f Dockerfile .
-        docker push localhost:5000/local_monai:latest
-        # build once more w/ tag "latest": remove flake package as it is not needed on hub.docker.com
-        sed -i '/flake/d' requirements-dev.txt
-        docker build -t projectmonai/monai:latest -f Dockerfile .
-        # also push as tag "dockerhub" to local registry
-        docker image tag projectmonai/monai:latest localhost:5000/local_monai:dockerhub
-        docker push localhost:5000/local_monai:dockerhub
-        # distribute as always w/ tag "latest" to hub.docker.com
-        echo "${{ secrets.DOCKER_PW }}" | docker login -u projectmonai --password-stdin
-        docker push projectmonai/monai:latest
-        docker logout
-
-  docker:
-    if: github.repository == 'Project-MONAI/MONAI'
-    needs: local_docker
-    container:
-      image: localhost:5000/local_monai:latest
-    runs-on: [self-hosted, linux, x64, common]
-    steps:
-    - name: Import
-      run: |
-        python -c 'import monai; monai.config.print_config()'
-        cd /opt/monai
-        ls -al
-        ngc --version
-        python -m tests.min_tests
-      env:
-        QUICKTEST: True
diff --git a/.github/workflows/weekly-preview.yml b/.github/workflows/weekly-preview.yml
index bb68a0801d..df5f52f57a 100644
--- a/.github/workflows/weekly-preview.yml
+++ b/.github/workflows/weekly-preview.yml
@@ -32,7 +32,7 @@ jobs:
         export YEAR_WEEK=$(date +'%y%U')
         echo "Year week for tag is ${YEAR_WEEK}"
         if ! [[ $YEAR_WEEK =~ ^[0-9]{4}$ ]] ; then echo "Wrong 'year week' format.  Should be 4 digits."; exit 1 ; fi
-        git tag "0.5.dev${YEAR_WEEK}"
+        git tag "0.6.dev${YEAR_WEEK}"
         git log -1
         git tag --list
         python setup.py sdist bdist_wheel
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 56e65a7d92..2a6882d08b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,87 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
 and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
 
 ## [Unreleased]
+## [0.5.0] - 2020-04-09
+### Added
+* Overview document for [feature highlights in v0.5.0](https://github.com/Project-MONAI/MONAI/blob/master/docs/source/highlights.md)
+* Invertible spatial transforms
+  * `InvertibleTransform` base APIs
+  * Batch inverse and decollating APIs
+  * Inverse of `Compose`
+  * Batch inverse event handling
+  * Test-time augmentation as an application
+* Initial support of learning-based image registration:
+  * Bending energy, LNCC, and global mutual information loss
+  * Fully convolutional architectures
+  * Dense displacement field, dense velocity field computation
+  * Warping with high-order interpolation with C++/CUDA implementations
+* Deepgrow modules for interactive segmentation:
+  * Workflows with simulations of clicks
+  * Distance-based transforms for guidance signals
+* Digital pathology support:
+  * Efficient whole slide imaging IO and sampling with Nvidia cuCIM and SmartCache
+  * FROC measurements for lesion
+  * Probabilistic post-processing for lesion detection
+  * TorchVision classification model adaptor for fully convolutional analysis
+* 12 new transforms, grid patch dataset, `ThreadDataLoader`, EfficientNets B0-B7
+* 4 iteration events for the engine for finer control of workflows
+* New C++/CUDA extensions:
+  * Conditional random field
+  * Fast bilateral filtering using the permutohedral lattice
+* Metrics summary reporting and saving APIs
+* DiceCELoss, DiceFocalLoss, a multi-scale wrapper for segmentation loss computation
+* Data loading utilities：
+  * `decollate_batch`
+  * `PadListDataCollate` with inverse support
+* Support of slicing syntax for `Dataset`
+* Initial Torchscript support for the loss modules
+* Learning rate finder
+* Allow for missing keys in the dictionary-based transforms
+* Support of checkpoint loading for transfer learning
+* Various summary and plotting utilities for Jupyter notebooks
+* Contributor Covenant Code of Conduct
+* Major CI/CD enhancements covering the tutorial repository
+* Fully compatible with PyTorch 1.8
+* Initial nightly CI/CD pipelines using Nvidia Blossom Infrastructure
+
+### Changed
+* Enhanced `list_data_collate` error handling
+* Unified iteration metric APIs
+* `densenet*` extensions are renamed to `DenseNet*`
+* `se_res*` network extensions are renamed to `SERes*`
+* Transform base APIs are rearranged into `compose`, `inverse`, and `transform`
+* `_do_transform` flag for the random augmentations is unified via `RandomizableTransform`
+* Decoupled post-processing steps, e.g. `softmax`, `to_onehot_y`, from the metrics computations
+* Moved the distributed samplers to `monai.data.samplers` from `monai.data.utils`
+* Engine's data loaders now accept generic iterables as input
+* Workflows now accept additional custom events and state properties
+* Various type hints according to Numpy 1.20
+* Refactored testing utility `runtests.sh` to have `--unittest` and `--net` (integration tests) options
+* Base Docker image upgraded to `nvcr.io/nvidia/pytorch:21.02-py3` from `nvcr.io/nvidia/pytorch:20.10-py3`
+* Docker images are now built with self-hosted environments
+* Primary contact email updated to `monai.contact@gmail.com`
+* Now using GitHub Discussions as the primary communication forum
+
+### Removed
+* Compatibility tests for PyTorch 1.5.x
+* Format specific loaders, e.g. `LoadNifti`, `NiftiDataset`
+* Assert statements from non-test files
+* `from module import *` statements, addressed flake8 F403
+
+### Fixed
+* Uses American English spelling for code, as per PyTorch
+* Code coverage now takes multiprocessing runs into account
+* SmartCache with initial shuffling
+* `ConvertToMultiChannelBasedOnBratsClasses` now supports channel-first inputs
+* Checkpoint handler to save with non-root permissions
+* Fixed an issue for exiting the distributed unit tests
+* Unified `DynUNet` to have single tensor output w/o deep supervision
+* `SegmentationSaver` now supports user-specified data types and a `squeeze_end_dims` flag
+* Fixed `*Saver` event handlers output filenames with a `data_root_dir` option
+* Load image functions now ensure little-endian
+* Fixed the test runner to support regex-based test case matching
+* Usability issues in the event handlers
+
 ## [0.4.0] - 2020-12-15
 ### Added
 * Overview document for [feature highlights in v0.4.0](https://github.com/Project-MONAI/MONAI/blob/master/docs/source/highlights.md)
@@ -173,7 +254,8 @@ the postprocessing steps should be used before calling the metrics methods
 
 [highlights]: https://github.com/Project-MONAI/MONAI/blob/master/docs/source/highlights.md
 
-[Unreleased]: https://github.com/Project-MONAI/MONAI/compare/0.4.0...HEAD
+[Unreleased]: https://github.com/Project-MONAI/MONAI/compare/0.5.0...HEAD
+[0.5.0]: https://github.com/Project-MONAI/MONAI/compare/0.4.0...0.5.0
 [0.4.0]: https://github.com/Project-MONAI/MONAI/compare/0.3.0...0.4.0
 [0.3.0]: https://github.com/Project-MONAI/MONAI/compare/0.2.0...0.3.0
 [0.2.0]: https://github.com/Project-MONAI/MONAI/compare/0.1.0...0.2.0
diff --git a/Dockerfile b/Dockerfile
index 57ea567869..23be9ae1c3 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -30,10 +30,9 @@ RUN cp /tmp/requirements.txt /tmp/req.bak \
 # please specify exact files and folders to be copied -- else, basically always, the Docker build process cannot cache
 # this or anything below it and always will build from at most here; one file change leads to no caching from here on...
 
-COPY LICENSE setup.py setup.cfg versioneer.py runtests.sh .gitignore .gitattributes README.md MANIFEST.in ./
+COPY LICENSE CHANGELOG.md CODE_OF_CONDUCT.md CONTRIBUTING.md README.md versioneer.py setup.py setup.cfg runtests.sh MANIFEST.in ./
 COPY tests ./tests
 COPY monai ./monai
-COPY .git ./.git
 RUN BUILD_MONAI=1 FORCE_CUDA=1 python setup.py develop \
   && rm -rf build __pycache__
 
diff --git a/docs/images/3d_paired.png b/docs/images/3d_paired.png
new file mode 100644
index 0000000000..dd751c8e16
Binary files /dev/null and b/docs/images/3d_paired.png differ
diff --git a/docs/images/deepgrow.png b/docs/images/deepgrow.png
new file mode 100644
index 0000000000..ec675009d1
Binary files /dev/null and b/docs/images/deepgrow.png differ
diff --git a/docs/images/invert_transforms.png b/docs/images/invert_transforms.png
new file mode 100644
index 0000000000..fa3863f373
Binary files /dev/null and b/docs/images/invert_transforms.png differ
diff --git a/docs/images/lr_finder.png b/docs/images/lr_finder.png
new file mode 100644
index 0000000000..ed9ba69770
Binary files /dev/null and b/docs/images/lr_finder.png differ
diff --git a/docs/images/metrics_report.png b/docs/images/metrics_report.png
new file mode 100644
index 0000000000..a317fcdc21
Binary files /dev/null and b/docs/images/metrics_report.png differ
diff --git a/docs/images/pathology.png b/docs/images/pathology.png
new file mode 100644
index 0000000000..da12ad23e7
Binary files /dev/null and b/docs/images/pathology.png differ
diff --git a/docs/images/tta.png b/docs/images/tta.png
new file mode 100644
index 0000000000..6c4e18ffa0
Binary files /dev/null and b/docs/images/tta.png differ
diff --git a/docs/requirements.txt b/docs/requirements.txt
index f05bc5b9ca..acc983129f 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -2,17 +2,15 @@
 torch>=1.5
 pytorch-ignite==0.4.4
 numpy>=1.17
-itk>=5.0
+itk>=5.0, <=5.1.2
 nibabel
-cucim==0.18.1
-openslide-python==1.1.2
 parameterized
 scikit-image>=0.14.2
 tensorboard
 commonmark==0.9.1
 recommonmark==0.6.0
-Sphinx==3.3.0
-sphinx-rtd-theme==0.5.0
+Sphinx==3.5.3
+sphinx-rtd-theme==0.5.2
 sphinxcontrib-applehelp
 sphinxcontrib-devhelp
 sphinxcontrib-htmlhelp
diff --git a/docs/source/handlers.rst b/docs/source/handlers.rst
index a629b28b27..7c8498e37a 100644
--- a/docs/source/handlers.rst
+++ b/docs/source/handlers.rst
@@ -110,3 +110,23 @@ SmartCache handler
 ------------------
 .. autoclass:: SmartCacheHandler
     :members:
+
+Parameter Scheduler handler
+---------------------------
+.. autoclass:: ParamSchedulerHandler
+    :members:
+
+EarlyStop handler
+-----------------
+.. autoclass:: EarlyStopHandler
+    :members:
+
+GarbageCollector handler
+------------------------
+.. autoclass:: GarbageCollector
+    :members:
+
+Transform inverter
+------------------
+.. autoclass:: TransformInverter
+    :members:
diff --git a/docs/source/highlights.md b/docs/source/highlights.md
index 5baaa75f4c..3cb5cf6c71 100644
--- a/docs/source/highlights.md
+++ b/docs/source/highlights.md
@@ -1,4 +1,4 @@
-# Modules in v0.4.0
+# Modules in v0.5.0
 
 MONAI aims at supporting deep learning in medical image analysis at multiple granularities.
 This figure shows a typical example of the end-to-end workflow in medical deep learning area:
@@ -26,6 +26,7 @@ The rest of this page provides more details for each module.
 * [Workflows](#workflows)
 * [Research](#research)
 * [GPU acceleration](#gpu-acceleration)
+* [Applications](#applications)
 
 ## Medical image data I/O, processing and augmentation
 Medical images require highly specialized methods for I/O, preprocessing, and augmentation. Medical images are often in specialized formats with rich meta-information, and the data volumes are often high-dimensional. These require carefully designed manipulation procedures. The medical imaging focus of MONAI is enabled by powerful and flexible image transformations that facilitate user-friendly, reproducible, optimized medical data pre-processing pipelines.
@@ -129,6 +130,25 @@ The `ImageReader` API is quite straight-forward, users can easily extend for the
 
 With these pre-defined image readers, MONAI can load images in formats: `NIfTI`, `DICOM`, `PNG`, `JPG`, `BMP`, `NPY/NPZ`, etc.
 
+### 11. Save transform data into NIfTI or PNG files
+To convert images into files or debug the transform chain, MONAI provides `SaveImage` transform. Users can inject this transform into the transform chain to save the results.
+
+### 12. Automatically ensure `channel-first` data shape
+Medical images have different shape formats. They can be `channel-last`, `channel-first` or even `no-channel`. We may, for example, want to load several `no-channel` images and stack them as `channel-first` data. To improve the user experience, MONAI provided an `EnsureChannelFirst` transform to automatically detect data shape according to the meta information and convert it to the `channel-first` format consistently.
+
+### 13. Invert spatial transforms and test-time augmentations
+It is often desirable to invert the previously applied spatial transforms (resize, flip, rotate, zoom, crop, pad, etc.) with the deep learning workflows, for example, to resume to the original imaging space after processing the image data in a normalized data space.  We enhance almost all the spatial transforms with an `inverse` operation and release this experimental feature in v0.5.0. Users can easily invert all the spatial transforms for one transformed data item or a batch of data items. It also can be achieved within the workflows by using the `TransformInverter` handler.
+
+If the pipeline includes random transformations, users may want to observe the effect that these transformations have on the output. The typical approach is that we pass the same input through the transforms multiple times with different random realizations. Then use the inverse transforms to move all the results to a common space, and calculate the metrics. MONAI provided `TestTimeAugmentation` for this feature, which by default will calculate the `mode`, `mean`, `standard deviation` and `volume variation coefficient`.
+
+[Invert transforms and TTA tutorials](https://github.com/Project-MONAI/tutorials/blob/master/modules/inverse_transforms_and_test_time_augmentations.ipynb) introduce details about the API with usage examples.
+
+(1) The last column is the inverted data of model output:
+![image](../images/invert_transforms.png)
+
+(2) The TTA results of `mode`, `mean` and `standard deviation`:
+![image](../images/tta.png)
+
 ## Datasets
 ### 1. Cache IO and transforms data to accelerate training
 Users often need to train the model with many (potentially thousands of) epochs over the data to achieve the desired model quality. A native PyTorch implementation may repeatedly load data and run the same preprocessing steps for every epoch during training, which can be time-consuming and unnecessary, especially when the medical image volumes are large.
@@ -194,14 +214,18 @@ The common workflow of predefined datasets:
 The `partition_dataset` utility in MONAI can perform several kinds of mechanism to partition dataset for training and validation or cross-validation. It supports shuffling based on a specified random seed, and will return a set of datasets, each dataset contains one partition. And it can split the dataset based on specified ratios or evenly split into `num_partitions`. For given class labels, it can also make sure the same ratio of classes in every partition.
 
 ## Losses
-There are domain-specific loss functions in the medical imaging research which are not typically used in the generic computer vision tasks. As an important module of MONAI, these loss functions are implemented in PyTorch, such as `DiceLoss`, `GeneralizedDiceLoss`, `MaskedDiceLoss`, `TverskyLoss` and `FocalLoss`, etc.
+There are domain-specific loss functions in the medical imaging research which are not typically used in the generic computer vision tasks. As an important module of MONAI, these loss functions are implemented in PyTorch, such as `DiceLoss`, `GeneralizedDiceLoss`, `MaskedDiceLoss`, `TverskyLoss`, `FocalLoss`, `DiceCELoss`, and `DiceFocalLoss`, etc.
 
 ## Optimizers
 MONAI provides several advanced features in optimizers to help accelerate the training or fine-tuning progress. For example, `Novograd` optimizer can be used to converge obviously faster than traditional optimizers. And users can easily define different learning rates for the model layers based [on the `generate_param_groups` utility API](https://github.com/Project-MONAI/tutorials/blob/master/modules/layer_wise_learning_rate.ipynb).
 
+Another important feature is `LearningRateFinder`. The learning rate range test increases the learning rate in a pre-training run between two boundaries in a linear or exponential manner. It provides valuable information on how well the network can be trained over a range of learning rates and what the optimal learning rates are. [LearningRateFinder tutorial](https://github.com/Project-MONAI/tutorials/blob/master/modules/learning_rate.ipynb) indicates the API usage examples.
+![image](../images/lr_finder.png)
+
 ## Network architectures
 Some deep neural network architectures have shown to be particularly effective for medical imaging analysis tasks. MONAI implements reference networks with the aims of both flexibility and code readability.
 
+### 1. Predefined layers and blocks
 To leverage the common network layers and blocks, MONAI provides several predefined layers and blocks which are compatible with 1D, 2D and 3D networks. Users can easily integrate the layer factories in their own networks.
 
 For example:
@@ -215,6 +239,8 @@ name, dimension = Conv.CONVTRANS, 3
 conv_type = Conv[name, dimension]
 add_module('conv1', conv_type(in_channels, out_channels, kernel_size=1, bias=False))
 ```
+
+### 2. Implementation of generic 2D/3D networks
 And there are several 1D/2D/3D-compatible implementations of intermediate blocks and generic networks, such as UNet, DynUNet, DenseNet, GAN, AHNet, VNet, SENet(and SEResNet, SEResNeXt), SegResNet, etc. All the networks can support PyTorch serialization pipeline based on `torch.jit.script`.
 
 ## Evaluation
@@ -237,6 +263,10 @@ Various useful evaluation metrics have been used to measure the quality of medic
 
 For example, `Mean Dice` score can be used for segmentation tasks, and the area under the ROC curve(`ROCAUC`) for classification tasks. We continue to integrate more options.
 
+### 3. Metrics report generation
+During evaluation, users usually save the metrics of every input image, then analyze the bad cases to improve the deep learning pipeline. To save detailed information of metrics, MONAI provided a handler `MetricsSaver`, which can save the final metric values, raw metric of every model output channel of every input image, metrics summary report of operations: `mean`, `median`, `max`, `min`, `90percent`, `std`, etc. The `MeanDice` reports of validation with prostate dataset are as below:
+![image](../images/metrics_report.png)
+
 ## Visualization
 Beyond the simple point and curve plotting, MONAI provides intuitive interfaces to visualize multidimensional data as GIF animations in TensorBoard. This could provide a quick qualitative assessment of the model by visualizing, for example, the volumetric inputs, segmentation maps, and intermediate feature maps. A runnable example with visualization is available at [UNet training example](https://github.com/Project-MONAI/tutorials/blob/master/3d_segmentation/torch/unet_training_dict.py).
 
@@ -255,10 +285,10 @@ A rich set of formats will be supported soon, along with relevant statistics and
 To quickly set up training and evaluation experiments, MONAI provides a set of workflows to significantly simplify the modules and allow for fast prototyping.
 
 These features decouple the domain-specific components and the generic machine learning processes. They also provide a set of unify APIs for higher level applications (such as AutoML, Federated Learning).
-The trainers and evaluators of the workflows are compatible with pytorch-ignite `Engine` and `Event-Handler` mechanism. There are rich event handlers in MONAI to independently attach to the trainer or evaluator.
+The trainers and evaluators of the workflows are compatible with pytorch-ignite `Engine` and `Event-Handler` mechanism. There are rich event handlers in MONAI to independently attach to the trainer or evaluator, and users can register additional `custom events` to workflows.
 
 ### 1. General workflows pipeline
-The workflow and event handlers are shown as below:
+The workflow and some of MONAI event handlers are shown as below:
 ![image](../images/workflows.png)
 
 The end-to-end training and evaluation examples are available at [Workflow examples](https://github.com/Project-MONAI/tutorials/tree/master/modules/engines).
@@ -273,6 +303,11 @@ Models ensemble is a popular strategy in machine learning and deep learning area
 ![image](../images/models_ensemble.png)
 More details of practice is at [Model ensemble tutorial](https://github.com/Project-MONAI/tutorials/blob/master/modules/models_ensemble.ipynb).
 
+### 3. Transfer learning for different input / output classes
+`Transfer-learning` is a very common and efficient training approach, especially in the medical-specific domain where obtaining large datasets for training can be difficult. So transfer-learning from a pre-trained checkpoint can significantly improve the model metrics and shorten training time.
+
+MONAI provided `CheckpointLoader` to load a checkpoint for the workflow before training, and it allows some `layer names` of current network don't match the checkpoint, or some `layer shapes` don't match the checkpoint, which can be useful if the current task has different input image classes or output classes.
+
 ## Research
 There are several research prototypes in MONAI corresponding to the recently published papers that address advanced research problems.
 We always welcome contributions in forms of comments, suggestions, and code implementations.
@@ -311,7 +346,30 @@ More details is available at [Fast training tutorial](https://github.com/Project
 ### 2. Distributed data parallel
 Distributed data parallel is an important feature of PyTorch to connect multiple GPU devices on single or multiple nodes to train or evaluate models. MONAI provides demos for reference: train/evaluate with PyTorch DDP, train/evaluate with Horovod, train/evaluate with Ignite DDP, partition dataset and train with SmartCacheDataset, as well as a real world training example based on Decathlon challenge Task01 - Brain Tumor segmentation.
 The demo contains distributed caching, training, and validation. We tried to train this example on NVIDIA NGC server, got some performance benchmarks for reference(PyTorch 1.6, CUDA 11, NVIDIA V100 GPUs):
+
 ![image](../images/distributed_training.png)
 
 ### 3. C++/CUDA optimized modules
-To accelerate some heavy computation progress, C++/CUDA implementation can be an impressive method, which usually brings even hundreds of times faster performance. MONAI contains some C++/CUDA optimized modules, like `Resampler`,and fully support C++/CUDA programs in CI/CD and building package.
+To accelerate some heavy computation progress, C++/CUDA implementation can be an impressive method, which usually brings even hundreds of times faster performance. MONAI contains some C++/CUDA optimized modules, like `Resampler`, `Conditional random field (CRF)`, `Fast bilateral filtering using the permutohedral lattice`, and fully support C++/CUDA programs in CI/CD and building package.
+
+## Applications
+The research area of medical image deep learning is expanding fast. To apply the latest achievements into applications, MONAI contains many application components to build end-to-end solutions or prototypes for other similar use cases.
+
+### 1. DeepGrow modules for interactive segmentation
+[A reimplementation](https://github.com/Project-MONAI/MONAI/tree/master/monai/apps/deepgrow) of the DeepGrow components, which is deep learning based semi-automated segmentation approach that aims to be a "smart" interactive tool for region of interest delineation in medical images, originally proposed by:
+
+Sakinis, Tomas, et al. "Interactive segmentation of medical images through fully convolutional neural networks." arXiv preprint arXiv:1903.08205 (2019).
+
+![image](../images/deepgrow.png)
+
+### 2. Lesion detection in digital pathology
+[Implementation](https://github.com/Project-MONAI/MONAI/tree/master/monai/apps/pathology) of the pathology detection components, which includes efficient whole slide imaging IO and sampling with NVIDIA cuCIM library and SmartCache mechanism, FROC measurements for lesion and probabilistic post-processing for lesion detection.
+
+![image](../images/pathology.png)
+
+### 3. Learning-based image registration
+Starting from v0.5.0, MONAI provides experimental features for building learning-based 2D/3D registration workflows.  These include image similarity measures as loss functions, bending energy as model regularization, network architectures, warping modules. The components can be used to build the major unsupervised and weakly-supervised algorithms.
+
+The following figure shows the registration of CT images acquired at different time points for a single patient using MONAI:
+
+![3dreg](../images/3d_paired.png)
diff --git a/docs/source/losses.rst b/docs/source/losses.rst
index 5e19219fee..eea6656a24 100644
--- a/docs/source/losses.rst
+++ b/docs/source/losses.rst
@@ -48,6 +48,11 @@ Segmentation Losses
 .. autoclass:: DiceCELoss
     :members:
 
+`DiceFocalLoss`
+~~~~~~~~~~~~~~~
+.. autoclass:: DiceFocalLoss
+    :members:
+
 `FocalLoss`
 ~~~~~~~~~~~
 .. autoclass:: FocalLoss
diff --git a/docs/source/networks.rst b/docs/source/networks.rst
index 15d7cb80b0..baee107620 100644
--- a/docs/source/networks.rst
+++ b/docs/source/networks.rst
@@ -35,6 +35,11 @@ Blocks
 .. autoclass:: Swish
     :members:
 
+`MemoryEfficientSwish`
+~~~~~~~~~~~~~~~~~~~~~~
+.. autoclass:: MemoryEfficientSwish
+    :members:
+
 `Mish`
 ~~~~~~
 .. autoclass:: Mish
@@ -99,7 +104,7 @@ Blocks
 .. autoclass:: SEResNetBottleneck
     :members:
 
-`Squeeze-and-Excitation ResneXt Bottleneck`
+`Squeeze-and-Excitation ResNeXt Bottleneck`
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 .. autoclass:: SEResNeXtBottleneck
     :members:
@@ -292,6 +297,11 @@ Nets
 .. autoclass:: DenseNet
   :members:
 
+`EfficientNet`
+~~~~~~~~~~~~~~
+.. autoclass:: EfficientNet
+  :members:
+
 `SegResNet`
 ~~~~~~~~~~~
 .. autoclass:: SegResNet
diff --git a/docs/source/transforms.rst b/docs/source/transforms.rst
index a726b25435..4f039b9c35 100644
--- a/docs/source/transforms.rst
+++ b/docs/source/transforms.rst
@@ -305,6 +305,11 @@ Post-processing
     :members:
     :special-members: __call__
 
+`Prob NMS`
+""""""""""
+.. autoclass:: ProbNMS
+  :members:
+
 `VoteEnsemble`
 """"""""""""""
 .. autoclass:: VoteEnsemble
diff --git a/docs/source/utils.rst b/docs/source/utils.rst
index 071d9ecefd..855954fd29 100644
--- a/docs/source/utils.rst
+++ b/docs/source/utils.rst
@@ -27,11 +27,6 @@ Misc
 .. automodule:: monai.utils.misc
   :members:
 
-Prob NMS
---------
-.. automodule:: monai.utils.prob_nms
-.. autoclass:: ProbNMS
-  :members:
 
 Profiling
 ---------
diff --git a/monai/_version.py b/monai/_version.py
index 1b31d5fd1a..79f569dd79 100644
--- a/monai/_version.py
+++ b/monai/_version.py
@@ -1,3 +1,4 @@
+
 # This file helps to compute a version number in source trees obtained from
 # git-archive tarball (such as those provided by githubs download-from-tag
 # feature). Distribution tarballs (built by setup.py sdist) and build
@@ -5,7 +6,7 @@
 # that just contains the computed version number.
 
 # This file is released into the public domain. Generated by
-# versioneer-0.18 (https://github.com/warner/python-versioneer)
+# versioneer-0.19 (https://github.com/python-versioneer/python-versioneer)
 
 """Git implementation of _version.py."""
 
@@ -56,7 +57,7 @@ class NotThisMethod(Exception):
 
 
 def register_vcs_handler(vcs, method):  # decorator
-    """Decorator to mark a method as the handler for a particular VCS."""
+    """Create decorator to mark a method as the handler of a VCS."""
     def decorate(f):
         """Store f in HANDLERS[vcs][method]."""
         if vcs not in HANDLERS:
@@ -92,9 +93,7 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
         if verbose:
             print("unable to find command, tried %s" % (commands,))
         return None, None
-    stdout = p.communicate()[0].strip()
-    if sys.version_info[0] >= 3:
-        stdout = stdout.decode()
+    stdout = p.communicate()[0].strip().decode()
     if p.returncode != 0:
         if verbose:
             print("unable to run %s (error)" % dispcmd)
@@ -164,6 +163,10 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose):
         raise NotThisMethod("no keywords at all, weird")
     date = keywords.get("date")
     if date is not None:
+        # Use only the last line.  Previous lines may contain GPG signature
+        # information.
+        date = date.splitlines()[-1]
+
         # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant
         # datestamp. However we prefer "%ci" (which expands to an "ISO-8601
         # -like" string, which we must then edit to make compliant), because
@@ -299,6 +302,9 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
     # commit date: see ISO-8601 comment in git_versions_from_keywords()
     date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"],
                        cwd=root)[0].strip()
+    # Use only the last line.  Previous lines may contain GPG signature
+    # information.
+    date = date.splitlines()[-1]
     pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
 
     return pieces
@@ -337,18 +343,18 @@ def render_pep440(pieces):
 
 
 def render_pep440_pre(pieces):
-    """TAG[.post.devDISTANCE] -- No -dirty.
+    """TAG[.post0.devDISTANCE] -- No -dirty.
 
     Exceptions:
-    1: no tags. 0.post.devDISTANCE
+    1: no tags. 0.post0.devDISTANCE
     """
     if pieces["closest-tag"]:
         rendered = pieces["closest-tag"]
         if pieces["distance"]:
-            rendered += ".post.dev%d" % pieces["distance"]
+            rendered += ".post0.dev%d" % pieces["distance"]
     else:
         # exception #1
-        rendered = "0.post.dev%d" % pieces["distance"]
+        rendered = "0.post0.dev%d" % pieces["distance"]
     return rendered
 
 
@@ -494,7 +500,7 @@ def get_versions():
         # versionfile_source is the relative path from the top of the source
         # tree (where the .git directory might live) to this file. Invert
         # this to find the root from __file__.
-        for i in cfg.versionfile_source.split('/'):  # lgtm[py/unused-loop-variable]
+        for i in cfg.versionfile_source.split('/'):
             root = os.path.dirname(root)
     except NameError:
         return {"version": "0+unknown", "full-revisionid": None,
diff --git a/monai/apps/deepgrow/transforms.py b/monai/apps/deepgrow/transforms.py
index c58d4c1123..3d8f08bc01 100644
--- a/monai/apps/deepgrow/transforms.py
+++ b/monai/apps/deepgrow/transforms.py
@@ -16,7 +16,7 @@
 from monai.config import IndexSelection, KeysCollection
 from monai.networks.layers import GaussianFilter
 from monai.transforms import Resize, SpatialCrop
-from monai.transforms.transform import MapTransform, RandomizableTransform, Transform
+from monai.transforms.transform import MapTransform, Randomizable, Transform
 from monai.transforms.utils import generate_spatial_bounding_box
 from monai.utils import InterpolateMode, ensure_tuple_rep, min_version, optional_import
 
@@ -61,7 +61,7 @@ def __call__(self, data):
         return d
 
 
-class AddInitialSeedPointd(RandomizableTransform):
+class AddInitialSeedPointd(Randomizable):
     """
     Add random guidance as initial seed point for a given label.
 
@@ -86,7 +86,6 @@ def __init__(
         sid: str = "sid",
         connected_regions: int = 5,
     ):
-        super().__init__(prob=1.0, do_transform=True)
         self.label = label
         self.sids_key = sids
         self.sid_key = sid
@@ -284,7 +283,7 @@ def __call__(self, data):
         return d
 
 
-class AddRandomGuidanced(RandomizableTransform):
+class AddRandomGuidanced(Randomizable):
     """
     Add random guidance based on discrepancies that were found between label and prediction.
 
@@ -320,7 +319,6 @@ def __init__(
         probability: str = "probability",
         batched: bool = True,
     ):
-        super().__init__(prob=1.0, do_transform=True)
         self.guidance = guidance
         self.discrepancy = discrepancy
         self.probability = probability
diff --git a/monai/apps/pathology/datasets.py b/monai/apps/pathology/datasets.py
index 01902d1ee2..cba8cd2da9 100644
--- a/monai/apps/pathology/datasets.py
+++ b/monai/apps/pathology/datasets.py
@@ -283,7 +283,7 @@ def _load_a_patch(self, index):
         """
         Load sample given the index
 
-        Since index is sequential and the patches are comming in an stream from different images,
+        Since index is sequential and the patches are coming in an stream from different images,
         this method, first, finds the whole slide image and the patch that should be extracted,
         then it loads the patch and provide it with its image name and the corresponding mask location.
         """
diff --git a/monai/apps/pathology/handlers.py b/monai/apps/pathology/handlers.py
index 046e403e0f..f0790c20b1 100644
--- a/monai/apps/pathology/handlers.py
+++ b/monai/apps/pathology/handlers.py
@@ -1,3 +1,14 @@
+# Copyright 2020 - 2021 MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import logging
 import os
 from typing import TYPE_CHECKING, Dict, Optional
diff --git a/monai/apps/pathology/metrics.py b/monai/apps/pathology/metrics.py
index 63b9d073a7..2140de0080 100644
--- a/monai/apps/pathology/metrics.py
+++ b/monai/apps/pathology/metrics.py
@@ -9,14 +9,26 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import json
-from typing import Dict, List, Tuple, Union
+from typing import TYPE_CHECKING, Dict, List, Tuple
 
 import numpy as np
 
 from monai.apps.pathology.utils import PathologyProbNMS, compute_isolated_tumor_cells, compute_multi_instance_mask
 from monai.data.image_reader import WSIReader
 from monai.metrics import compute_fp_tp_probs, compute_froc_curve_data, compute_froc_score
+from monai.utils import min_version, optional_import
+
+if TYPE_CHECKING:
+    from tqdm import tqdm
+
+    has_tqdm = True
+else:
+    tqdm, has_tqdm = optional_import("tqdm", "4.47.0", min_version, "tqdm")
+
+if not has_tqdm:
+
+    def tqdm(x):
+        return x
 
 
 class LesionFROC:
@@ -51,7 +63,7 @@ class LesionFROC:
 
     def __init__(
         self,
-        data: Union[List[Dict], str],
+        data: List[Dict],
         grow_distance: int = 75,
         itc_diameter: int = 200,
         eval_thresholds: Tuple = (0.25, 0.5, 1, 2, 4, 8),
@@ -61,10 +73,7 @@ def __init__(
         image_reader_name: str = "cuCIM",
     ) -> None:
 
-        if isinstance(data, str):
-            self.data = self._load_data(data)
-        else:
-            self.data = data
+        self.data = data
         self.grow_distance = grow_distance
         self.itc_diameter = itc_diameter
         self.eval_thresholds = eval_thresholds
@@ -75,11 +84,6 @@ def __init__(
             box_size=nms_box_size,
         )
 
-    def _load_data(self, file_path: str) -> List[Dict]:
-        with open(file_path, "r") as f:
-            data: List[Dict] = json.load(f)
-        return data
-
     def prepare_inference_result(self, sample: Dict):
         """
         Prepare the probability map for detection evaluation.
@@ -131,7 +135,7 @@ def compute_fp_tp(self):
         total_num_targets = 0
         num_images = len(self.data)
 
-        for sample in self.data:
+        for sample in tqdm(self.data):
             probs, y_coord, x_coord = self.prepare_inference_result(sample)
             ground_truth, itc_labels = self.prepare_ground_truth(sample)
             # compute FP and TP probabilities for a pair of an image and an ground truth mask
diff --git a/monai/apps/pathology/transforms.py b/monai/apps/pathology/transforms.py
new file mode 100644
index 0000000000..1c631c0be2
--- /dev/null
+++ b/monai/apps/pathology/transforms.py
@@ -0,0 +1,193 @@
+# modified from sources:
+# - Original implementation from Macenko paper in Matlab: https://github.com/mitkovetta/staining-normalization
+# - Implementation in Python: https://github.com/schaugf/HEnorm_python
+# - Link to Macenko et al., 2009 paper: http://wwwx.cs.unc.edu/~mn/sites/default/files/macenko2009.pdf
+
+from typing import TYPE_CHECKING
+
+from monai.transforms.transform import Transform
+from monai.utils import exact_version, optional_import
+
+if TYPE_CHECKING:
+    import cupy as cp
+else:
+    cp, _ = optional_import("cupy", "8.6.0", exact_version)
+
+
+class ExtractStainsMacenko(Transform):
+    """Class to extract a target stain from an image, using the Macenko method for stain deconvolution.
+
+    Args:
+        tli: transmitted light intensity. Defaults to 240.
+        alpha: tolerance in percentile for the pseudo-min (alpha percentile)
+            and pseudo-max (100 - alpha percentile). Defaults to 1.
+        beta: absorbance threshold for transparent pixels. Defaults to 0.15
+        max_cref: reference maximum stain concentrations for Hematoxylin & Eosin (H&E).
+            Defaults to None.
+    """
+
+    def __init__(self, tli: float = 240, alpha: float = 1, beta: float = 0.15, max_cref: cp.ndarray = None) -> None:
+        self.tli = tli
+        self.alpha = alpha
+        self.beta = beta
+
+        self.max_cref = max_cref
+        if self.max_cref is None:
+            self.max_cref = cp.array([1.9705, 1.0308])
+
+    def _deconvolution_extract_stain(self, img: cp.ndarray) -> cp.ndarray:
+        """Perform Stain Deconvolution using the Macenko Method, and return stain matrix for the image.
+
+        Args:
+            img: RGB image to perform stain deconvolution of
+
+        Return:
+            he: H&E absorbance matrix for the image (first column is H, second column is E, rows are RGB values)
+        """
+        # reshape image
+        img = img.reshape((-1, 3))
+
+        # calculate absorbance
+        absorbance = -cp.log(cp.clip(img.astype(cp.float) + 1, a_max=self.tli) / self.tli)
+
+        # remove transparent pixels
+        absorbance_hat = absorbance[cp.all(absorbance > self.beta, axis=1)]
+
+        # compute eigenvectors
+        _, eigvecs = cp.linalg.eigh(cp.cov(absorbance_hat.T))
+
+        # project on the plane spanned by the eigenvectors corresponding to the two largest eigenvalues
+        t_hat = absorbance_hat.dot(eigvecs[:, 1:3])
+
+        # find the min and max vectors and project back to absorbance space
+        phi = cp.arctan2(t_hat[:, 1], t_hat[:, 0])
+        min_phi = cp.percentile(phi, self.alpha)
+        max_phi = cp.percentile(phi, 100 - self.alpha)
+        v_min = eigvecs[:, 1:3].dot(cp.array([(cp.cos(min_phi), cp.sin(min_phi))]).T)
+        v_max = eigvecs[:, 1:3].dot(cp.array([(cp.cos(max_phi), cp.sin(max_phi))]).T)
+
+        # a heuristic to make the vector corresponding to hematoxylin first and the one corresponding to eosin second
+        if v_min[0] > v_max[0]:
+            he = cp.array((v_min[:, 0], v_max[:, 0])).T
+        else:
+            he = cp.array((v_max[:, 0], v_min[:, 0])).T
+
+        return he
+
+    def __call__(self, image: cp.ndarray) -> cp.ndarray:
+        """Perform stain extraction.
+
+        Args:
+            image: RGB image to extract stain from
+
+        return:
+            target_he: H&E absorbance matrix for the image (first column is H, second column is E, rows are RGB values)
+        """
+        target_he = self._deconvolution_extract_stain(image)
+        return target_he
+
+
+class NormalizeStainsMacenko(Transform):
+    """Class to normalize patches/images to a reference or target image stain, using the Macenko method.
+
+    Performs stain deconvolution of the source image to obtain the stain concentration matrix
+    for the image. Then, performs the inverse Beer-Lambert transform to recreate the
+    patch using the target H&E stain matrix provided. If no target stain provided, a default
+    reference stain is used. Similarly, if no maximum stain concentrations are provided, a
+    reference maximum stain concentrations matrix is used.
+
+    Args:
+        tli: transmitted light intensity. Defaults to 240.
+        alpha: tolerance in percentile for the pseudo-min (alpha percentile) and
+            pseudo-max (100 - alpha percentile). Defaults to 1.
+        beta: absorbance threshold for transparent pixels. Defaults to 0.15.
+        target_he: target stain matrix. Defaults to None.
+        max_cref: reference maximum stain concentrations for Hematoxylin & Eosin (H&E).
+            Defaults to None.
+    """
+
+    def __init__(
+        self,
+        tli: float = 240,
+        alpha: float = 1,
+        beta: float = 0.15,
+        target_he: cp.ndarray = None,
+        max_cref: cp.ndarray = None,
+    ) -> None:
+        self.tli = tli
+        self.alpha = alpha
+        self.beta = beta
+
+        self.target_he = target_he
+        if self.target_he is None:
+            self.target_he = cp.array([[0.5626, 0.2159], [0.7201, 0.8012], [0.4062, 0.5581]])
+
+        self.max_cref = max_cref
+        if self.max_cref is None:
+            self.max_cref = cp.array([1.9705, 1.0308])
+
+    def _deconvolution_extract_conc(self, img: cp.ndarray) -> cp.ndarray:
+        """Perform Stain Deconvolution using the Macenko Method, and return stain concentration.
+
+        Args:
+            img: RGB image to perform stain deconvolution of
+
+        Return:
+            conc_norm: stain concentration matrix for the input image
+        """
+        # reshape image
+        img = img.reshape((-1, 3))
+
+        # calculate absorbance
+        absorbance = -cp.log(cp.clip(img.astype(cp.float) + 1, a_max=self.tli) / self.tli)
+
+        # remove transparent pixels
+        absorbance_hat = absorbance[cp.all(absorbance > self.beta, axis=1)]
+
+        # compute eigenvectors
+        _, eigvecs = cp.linalg.eigh(cp.cov(absorbance_hat.T))
+
+        # project on the plane spanned by the eigenvectors corresponding to the two largest eigenvalues
+        t_hat = absorbance_hat.dot(eigvecs[:, 1:3])
+
+        # find the min and max vectors and project back to absorbance space
+        phi = cp.arctan2(t_hat[:, 1], t_hat[:, 0])
+        min_phi = cp.percentile(phi, self.alpha)
+        max_phi = cp.percentile(phi, 100 - self.alpha)
+        v_min = eigvecs[:, 1:3].dot(cp.array([(cp.cos(min_phi), cp.sin(min_phi))]).T)
+        v_max = eigvecs[:, 1:3].dot(cp.array([(cp.cos(max_phi), cp.sin(max_phi))]).T)
+
+        # a heuristic to make the vector corresponding to hematoxylin first and the one corresponding to eosin second
+        if v_min[0] > v_max[0]:
+            he = cp.array((v_min[:, 0], v_max[:, 0])).T
+        else:
+            he = cp.array((v_max[:, 0], v_min[:, 0])).T
+
+        # rows correspond to channels (RGB), columns to absorbance values
+        y = cp.reshape(absorbance, (-1, 3)).T
+
+        # determine concentrations of the individual stains
+        conc = cp.linalg.lstsq(he, y, rcond=None)[0]
+
+        # normalize stain concentrations
+        max_conc = cp.array([cp.percentile(conc[0, :], 99), cp.percentile(conc[1, :], 99)])
+        tmp = cp.divide(max_conc, self.max_cref)
+        conc_norm = cp.divide(conc, tmp[:, cp.newaxis])
+        return conc_norm
+
+    def __call__(self, image: cp.ndarray) -> cp.ndarray:
+        """Perform stain normalization.
+
+        Args:
+            image: RGB image/patch to stain normalize
+
+        Return:
+            image_norm: stain normalized image/patch
+        """
+        h, w, _ = image.shape
+        image_c = self._deconvolution_extract_conc(image)
+
+        image_norm = cp.multiply(self.tli, cp.exp(-self.target_he.dot(image_c)))
+        image_norm[image_norm > 255] = 254
+        image_norm = cp.reshape(image_norm.T, (h, w, 3)).astype(cp.uint8)
+        return image_norm
diff --git a/monai/apps/pathology/utils.py b/monai/apps/pathology/utils.py
index ae77bfafd1..0d1f530bff 100644
--- a/monai/apps/pathology/utils.py
+++ b/monai/apps/pathology/utils.py
@@ -14,7 +14,8 @@
 import numpy as np
 import torch
 
-from monai.utils import ProbNMS, optional_import
+from monai.transforms.post.array import ProbNMS
+from monai.utils import optional_import
 
 measure, _ = optional_import("skimage.measure")
 ndimage, _ = optional_import("scipy.ndimage")
diff --git a/monai/csrc/filtering/bilateral/bilateralfilter_cuda_phl.cu b/monai/csrc/filtering/bilateral/bilateralfilter_cuda_phl.cu
index 603ab689cf..17dc9e7ebd 100644
--- a/monai/csrc/filtering/bilateral/bilateralfilter_cuda_phl.cu
+++ b/monai/csrc/filtering/bilateral/bilateralfilter_cuda_phl.cu
@@ -95,7 +95,7 @@ void BilateralFilterPHLCuda(
   cudaMalloc(&data, desc.batchCount * desc.channelStride * desc.channelCount * sizeof(scalar_t));
   cudaMalloc(&features, desc.batchCount * desc.channelStride * featureChannelCount * sizeof(scalar_t));
 
-  // Prparing constant memory
+  // Preparing constant memory
   cudaMemcpyToSymbol(cBatchStride, &desc.batchStride, sizeof(int));
   cudaMemcpyToSymbol(cChannelStride, &desc.channelStride, sizeof(int));
   cudaMemcpyToSymbol(cSpatialStrides, desc.strides, sizeof(int) * desc.dimensions);
diff --git a/monai/csrc/filtering/permutohedral/hash_table.cuh b/monai/csrc/filtering/permutohedral/hash_table.cuh
index 7d9d7eb163..f9893dffe2 100644
--- a/monai/csrc/filtering/permutohedral/hash_table.cuh
+++ b/monai/csrc/filtering/permutohedral/hash_table.cuh
@@ -15,7 +15,7 @@ limitations under the License.
 
 //#define USE_ADDITIVE_HASH
 
-// turn this on if you want to get slighly less memory consumption and slightly longer run times.
+// turn this on if you want to get slightly less memory consumption and slightly longer run times.
 //#define LINEAR_D_MEMORY
 
 #define USE_CUSTOM_MODULO
diff --git a/monai/csrc/filtering/permutohedral/permutohedral.cpp b/monai/csrc/filtering/permutohedral/permutohedral.cpp
index 5d6916b8f4..04ef6fa4da 100644
--- a/monai/csrc/filtering/permutohedral/permutohedral.cpp
+++ b/monai/csrc/filtering/permutohedral/permutohedral.cpp
@@ -1,3 +1,16 @@
+/*
+Copyright 2020 - 2021 MONAI Consortium
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
 #include "utils/common_utils.h"
 #include "utils/meta_macros.h"
 
diff --git a/monai/data/dataset.py b/monai/data/dataset.py
index 12403bbff1..a09050e5bc 100644
--- a/monai/data/dataset.py
+++ b/monai/data/dataset.py
@@ -29,7 +29,6 @@
 
 from monai.data.utils import first, pickle_hashing
 from monai.transforms import Compose, Randomizable, Transform, apply_transform
-from monai.transforms.transform import RandomizableTransform
 from monai.utils import MAX_SEED, get_seed, min_version, optional_import
 
 if TYPE_CHECKING:
@@ -182,7 +181,7 @@ def _pre_transform(self, item_transformed):
             raise ValueError("transform must be an instance of monai.transforms.Compose.")
         for _transform in self.transform.transforms:
             # execute all the deterministic transforms
-            if isinstance(_transform, RandomizableTransform) or not isinstance(_transform, Transform):
+            if isinstance(_transform, Randomizable) or not isinstance(_transform, Transform):
                 break
             item_transformed = apply_transform(_transform, item_transformed)
         return item_transformed
@@ -204,7 +203,7 @@ def _post_transform(self, item_transformed):
         for _transform in self.transform.transforms:
             if (
                 start_post_randomize_run
-                or isinstance(_transform, RandomizableTransform)
+                or isinstance(_transform, Randomizable)
                 or not isinstance(_transform, Transform)
             ):
                 start_post_randomize_run = True
@@ -547,7 +546,7 @@ def _load_cache_item(self, idx: int):
             raise ValueError("transform must be an instance of monai.transforms.Compose.")
         for _transform in self.transform.transforms:
             # execute all the deterministic transforms
-            if isinstance(_transform, RandomizableTransform) or not isinstance(_transform, Transform):
+            if isinstance(_transform, Randomizable) or not isinstance(_transform, Transform):
                 break
             item = apply_transform(_transform, item)
         return item
@@ -564,7 +563,7 @@ def _transform(self, index: int):
         if not isinstance(self.transform, Compose):
             raise ValueError("transform must be an instance of monai.transforms.Compose.")
         for _transform in self.transform.transforms:
-            if start_run or isinstance(_transform, RandomizableTransform) or not isinstance(_transform, Transform):
+            if start_run or isinstance(_transform, Randomizable) or not isinstance(_transform, Transform):
                 start_run = True
                 data = apply_transform(_transform, data)
         return data
@@ -967,10 +966,10 @@ def __getitem__(self, index: int):
             # set transforms of each zip component
             for dataset in self.dataset.data:
                 transform = getattr(dataset, "transform", None)
-                if isinstance(transform, RandomizableTransform):
+                if isinstance(transform, Randomizable):
                     transform.set_random_state(seed=self._seed)
         transform = getattr(self.dataset, "transform", None)
-        if isinstance(transform, RandomizableTransform):
+        if isinstance(transform, Randomizable):
             transform.set_random_state(seed=self._seed)
         return self.dataset[index]
 
diff --git a/monai/data/decathlon_datalist.py b/monai/data/decathlon_datalist.py
index 6167e83e47..11fb5edd28 100644
--- a/monai/data/decathlon_datalist.py
+++ b/monai/data/decathlon_datalist.py
@@ -17,34 +17,43 @@
 
 
 @overload
-def _compute_path(base_dir: str, element: str) -> str:
+def _compute_path(base_dir: str, element: str, check_path: bool = False) -> str:
     ...
 
 
 @overload
-def _compute_path(base_dir: str, element: List[str]) -> List[str]:
+def _compute_path(base_dir: str, element: List[str], check_path: bool = False) -> List[str]:
     ...
 
 
-def _compute_path(base_dir, element):
+def _compute_path(base_dir, element, check_path=False):
     """
     Args:
         base_dir: the base directory of the dataset.
         element: file path(s) to append to directory.
+        check_path: if `True`, only compute when the result is an existing path.
 
     Raises:
         TypeError: When ``element`` contains a non ``str``.
         TypeError: When ``element`` type is not in ``Union[list, str]``.
 
     """
+
+    def _join_path(base_dir: str, item: str):
+        result = os.path.normpath(os.path.join(base_dir, item))
+        if check_path and not os.path.exists(result):
+            # if not an existing path, don't join with base dir
+            return item
+        return result
+
     if isinstance(element, str):
-        return os.path.normpath(os.path.join(base_dir, element))
+        return _join_path(base_dir, element)
     if isinstance(element, list):
         for e in element:
             if not isinstance(e, str):
-                raise TypeError(f"Every file path in element must be a str but got {type(element).__name__}.")
-        return [os.path.normpath(os.path.join(base_dir, e)) for e in element]
-    raise TypeError(f"element must be one of (str, list) but is {type(element).__name__}.")
+                return element
+        return [_join_path(base_dir, e) for e in element]
+    return element
 
 
 def _append_paths(base_dir: str, is_segmentation: bool, items: List[Dict]) -> List[Dict]:
@@ -63,9 +72,12 @@ def _append_paths(base_dir: str, is_segmentation: bool, items: List[Dict]) -> Li
             raise TypeError(f"Every item in items must be a dict but got {type(item).__name__}.")
         for k, v in item.items():
             if k == "image":
-                item[k] = _compute_path(base_dir, v)
+                item[k] = _compute_path(base_dir, v, check_path=False)
             elif is_segmentation and k == "label":
-                item[k] = _compute_path(base_dir, v)
+                item[k] = _compute_path(base_dir, v, check_path=False)
+            else:
+                # for other items, auto detect whether it's a valid path
+                item[k] = _compute_path(base_dir, v, check_path=True)
     return items
 
 
diff --git a/monai/data/image_dataset.py b/monai/data/image_dataset.py
index 1074105508..1568e082ee 100644
--- a/monai/data/image_dataset.py
+++ b/monai/data/image_dataset.py
@@ -17,7 +17,6 @@
 from monai.config import DtypeLike
 from monai.data.image_reader import ImageReader
 from monai.transforms import LoadImage, Randomizable, apply_transform
-from monai.transforms.transform import RandomizableTransform
 from monai.utils import MAX_SEED, get_seed
 
 
@@ -107,14 +106,14 @@ def __getitem__(self, index: int):
             label = self.labels[index]
 
         if self.transform is not None:
-            if isinstance(self.transform, RandomizableTransform):
+            if isinstance(self.transform, Randomizable):
                 self.transform.set_random_state(seed=self._seed)
             img = apply_transform(self.transform, img)
 
         data = [img]
 
         if self.seg_transform is not None:
-            if isinstance(self.seg_transform, RandomizableTransform):
+            if isinstance(self.seg_transform, Randomizable):
                 self.seg_transform.set_random_state(seed=self._seed)
             seg = apply_transform(self.seg_transform, seg)
 
diff --git a/monai/data/inverse_batch_transform.py b/monai/data/inverse_batch_transform.py
index fa88114c84..3035a1910d 100644
--- a/monai/data/inverse_batch_transform.py
+++ b/monai/data/inverse_batch_transform.py
@@ -9,6 +9,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import warnings
 from typing import Any, Callable, Dict, Hashable, Optional, Sequence
 
 import numpy as np
@@ -16,7 +17,7 @@
 
 from monai.data.dataloader import DataLoader
 from monai.data.dataset import Dataset
-from monai.data.utils import decollate_batch, pad_list_data_collate
+from monai.data.utils import decollate_batch, no_collation, pad_list_data_collate
 from monai.transforms.croppad.batch import PadListDataCollate
 from monai.transforms.inverse import InvertibleTransform
 from monai.transforms.transform import Transform
@@ -42,29 +43,39 @@ def _transform(self, index: int) -> Dict[Hashable, np.ndarray]:
         if self.pad_collation_used:
             data = PadListDataCollate.inverse(data)
 
+        if not isinstance(self.invertible_transform, InvertibleTransform):
+            warnings.warn("transform is not invertible, can't invert transform for the input data.")
+            return data
         return self.invertible_transform.inverse(data)
 
 
-def no_collation(x):
-    return x
-
-
 class BatchInverseTransform(Transform):
-    """Perform inverse on a batch of data. This is useful if you have inferred a batch of images and want to invert them all."""
+    """
+    Perform inverse on a batch of data. This is useful if you have inferred a batch of images and want to invert
+    them all.
+    """
 
     def __init__(
-        self, transform: InvertibleTransform, loader: TorchDataLoader, collate_fn: Optional[Callable] = no_collation
+        self,
+        transform: InvertibleTransform,
+        loader: TorchDataLoader,
+        collate_fn: Optional[Callable] = no_collation,
+        num_workers: Optional[int] = 0,
     ) -> None:
         """
         Args:
             transform: a callable data transform on input data.
-            loader: data loader used to generate the batch of data.
-            collate_fn: how to collate data after inverse transformations. Default won't do any collation, so the output will be a
-                list of size batch size.
+            loader: data loader used to run `transforms` and generate the batch of data.
+            collate_fn: how to collate data after inverse transformations.
+                default won't do any collation, so the output will be a list of size batch size.
+            num_workers: number of workers when run data loader for inverse transforms,
+                default to 0 as only run 1 iteration and multi-processing may be even slower.
+                if the transforms are really slow, set num_workers for multi-processing.
+                if set to `None`, use the `num_workers` of the transform data loader.
         """
         self.transform = transform
         self.batch_size = loader.batch_size
-        self.num_workers = loader.num_workers
+        self.num_workers = loader.num_workers if num_workers is None else num_workers
         self.collate_fn = collate_fn
         self.pad_collation_used = loader.collate_fn == pad_list_data_collate
 
diff --git a/monai/data/nifti_saver.py b/monai/data/nifti_saver.py
index 15e61c79e1..0ff719023c 100644
--- a/monai/data/nifti_saver.py
+++ b/monai/data/nifti_saver.py
@@ -102,6 +102,7 @@ def save(self, data: Union[torch.Tensor, np.ndarray], meta_data: Optional[Dict]
             - ``'original_affine'`` -- for data orientation handling, defaulting to an identity matrix.
             - ``'affine'`` -- for data output affine, defaulting to an identity matrix.
             - ``'spatial_shape'`` -- for data output shape.
+            - ``'patch_index'`` -- if the data is a patch of big image, append the patch index to filename.
 
         When meta_data is specified, the saver will try to resample batch data from the space
         defined by "affine" to the space defined by "original_affine".
@@ -121,12 +122,13 @@ def save(self, data: Union[torch.Tensor, np.ndarray], meta_data: Optional[Dict]
         original_affine = meta_data.get("original_affine", None) if meta_data else None
         affine = meta_data.get("affine", None) if meta_data else None
         spatial_shape = meta_data.get("spatial_shape", None) if meta_data else None
+        patch_index = meta_data.get(Key.PATCH_INDEX, None) if meta_data else None
 
         if isinstance(data, torch.Tensor):
             data = data.detach().cpu().numpy()
 
-        filename = create_file_basename(self.output_postfix, filename, self.output_dir, self.data_root_dir)
-        filename = f"{filename}{self.output_ext}"
+        path = create_file_basename(self.output_postfix, filename, self.output_dir, self.data_root_dir, patch_index)
+        path = f"{path}{self.output_ext}"
         # change data shape to be (channel, h, w, d)
         while len(data.shape) < 4:
             data = np.expand_dims(data, -1)
@@ -140,7 +142,7 @@ def save(self, data: Union[torch.Tensor, np.ndarray], meta_data: Optional[Dict]
 
         write_nifti(
             data,
-            file_name=filename,
+            file_name=path,
             affine=affine,
             target_affine=original_affine,
             resample=self.resample,
@@ -169,6 +171,7 @@ def save_batch(self, batch_data: Union[torch.Tensor, np.ndarray], meta_data: Opt
         Args:
             batch_data: target batch data content that save into NIfTI format.
             meta_data: every key-value in the meta_data is corresponding to a batch of data.
+
         """
         for i, data in enumerate(batch_data):  # save a batch of files
-            self.save(data, {k: meta_data[k][i] for k in meta_data} if meta_data else None)
+            self.save(data=data, meta_data={k: meta_data[k][i] for k in meta_data} if meta_data is not None else None)
diff --git a/monai/data/png_saver.py b/monai/data/png_saver.py
index a6cc0e89a2..880f6b204f 100644
--- a/monai/data/png_saver.py
+++ b/monai/data/png_saver.py
@@ -78,6 +78,7 @@ def save(self, data: Union[torch.Tensor, np.ndarray], meta_data: Optional[Dict]
 
             - ``'filename_or_obj'`` -- for output file name creation, corresponding to filename or object.
             - ``'spatial_shape'`` -- for data output shape.
+            - ``'patch_index'`` -- if the data is a patch of big image, append the patch index to filename.
 
         If meta_data is None, use the default index (starting from 0) as the filename.
 
@@ -98,12 +99,13 @@ def save(self, data: Union[torch.Tensor, np.ndarray], meta_data: Optional[Dict]
         filename = meta_data[Key.FILENAME_OR_OBJ] if meta_data else str(self._data_index)
         self._data_index += 1
         spatial_shape = meta_data.get("spatial_shape", None) if meta_data and self.resample else None
+        patch_index = meta_data.get(Key.PATCH_INDEX, None) if meta_data else None
 
         if isinstance(data, torch.Tensor):
             data = data.detach().cpu().numpy()
 
-        filename = create_file_basename(self.output_postfix, filename, self.output_dir, self.data_root_dir)
-        filename = f"{filename}{self.output_ext}"
+        path = create_file_basename(self.output_postfix, filename, self.output_dir, self.data_root_dir, patch_index)
+        path = f"{path}{self.output_ext}"
 
         if data.shape[0] == 1:
             data = data.squeeze(0)
@@ -114,7 +116,7 @@ def save(self, data: Union[torch.Tensor, np.ndarray], meta_data: Optional[Dict]
 
         write_png(
             np.asarray(data),
-            file_name=filename,
+            file_name=path,
             output_spatial_shape=spatial_shape,
             mode=self.mode,
             scale=self.scale,
@@ -126,6 +128,7 @@ def save_batch(self, batch_data: Union[torch.Tensor, np.ndarray], meta_data: Opt
         Args:
             batch_data: target batch data content that save into png format.
             meta_data: every key-value in the meta_data is corresponding to a batch of data.
+
         """
         for i, data in enumerate(batch_data):  # save a batch of files
-            self.save(data, {k: meta_data[k][i] for k in meta_data} if meta_data else None)
+            self.save(data=data, meta_data={k: meta_data[k][i] for k in meta_data} if meta_data is not None else None)
diff --git a/monai/data/test_time_augmentation.py b/monai/data/test_time_augmentation.py
index 51b95adc58..06e1f63da5 100644
--- a/monai/data/test_time_augmentation.py
+++ b/monai/data/test_time_augmentation.py
@@ -20,7 +20,7 @@
 from monai.data.utils import list_data_collate, pad_list_data_collate
 from monai.transforms.compose import Compose
 from monai.transforms.inverse import InvertibleTransform
-from monai.transforms.transform import RandomizableTransform
+from monai.transforms.transform import Randomizable
 from monai.transforms.utils import allow_missing_keys_mode
 from monai.utils.enums import CommonKeys, InverseKeys
 
@@ -47,7 +47,7 @@ class TestTimeAugmentation:
 
     Args:
         transform: transform (or composed) to be applied to each realisation. At least one transform must be of type
-            `RandomizableTransform`. All random transforms must be of type `InvertibleTransform`.
+            `Randomizable`. All random transforms must be of type `InvertibleTransform`.
         batch_size: number of realisations to infer at once.
         num_workers: how many subprocesses to use for data.
         inferrer_fn: function to use to perform inference.
@@ -96,7 +96,7 @@ def __init__(
     def _check_transforms(self):
         """Should be at least 1 random transform, and all random transforms should be invertible."""
         ts = [self.transform] if not isinstance(self.transform, Compose) else self.transform.transforms
-        randoms = np.array([isinstance(t, RandomizableTransform) for t in ts])
+        randoms = np.array([isinstance(t, Randomizable) for t in ts])
         invertibles = np.array([isinstance(t, InvertibleTransform) for t in ts])
         # check at least 1 random
         if sum(randoms) == 0:
diff --git a/monai/data/thread_buffer.py b/monai/data/thread_buffer.py
index da5f864900..8ea71e3555 100644
--- a/monai/data/thread_buffer.py
+++ b/monai/data/thread_buffer.py
@@ -88,7 +88,7 @@ def __init__(self, dataset: Dataset, num_workers: int = 0, **kwargs):
         super().__init__(dataset, num_workers, **kwargs)
 
         # ThreadBuffer will use the inherited __iter__ instead of the one defined below
-        self.buffer = ThreadBuffer(super())
+        self.buffer = ThreadBuffer(super().__iter__())
 
     def __iter__(self):
         yield from self.buffer
diff --git a/monai/data/utils.py b/monai/data/utils.py
index a3d8f3128e..d39f2702ff 100644
--- a/monai/data/utils.py
+++ b/monai/data/utils.py
@@ -65,6 +65,7 @@
     "sorted_dict",
     "decollate_batch",
     "pad_list_data_collate",
+    "no_collation",
 ]
 
 
@@ -379,6 +380,13 @@ def pad_list_data_collate(
     return PadListDataCollate(method, mode)(batch)
 
 
+def no_collation(x):
+    """
+    No any collation operation.
+    """
+    return x
+
+
 def worker_init_fn(worker_id: int) -> None:
     """
     Callback function for PyTorch DataLoader `worker_init_fn`.
@@ -600,6 +608,7 @@ def create_file_basename(
     input_file_name: str,
     folder_path: str,
     data_root_dir: str = "",
+    patch_index: Optional[int] = None,
 ) -> str:
     """
     Utility function to create the path to the output file based on the input
@@ -623,6 +632,7 @@ def create_file_basename(
             absolute path. This is used to compute `input_file_rel_path`, the relative path to the file from
             `data_root_dir` to preserve folder structure when saving in case there are files in different
             folders with the same file names.
+        patch_index: if not None, append the patch index to filename.
     """
 
     # get the filename and directory
@@ -641,11 +651,15 @@ def create_file_basename(
     if not os.path.exists(subfolder_path):
         os.makedirs(subfolder_path)
 
-    if postfix:
+    if len(postfix) > 0:
         # add the sub-folder plus the postfix name to become the file basename in the output path
         output = os.path.join(subfolder_path, filename + "_" + postfix)
     else:
         output = os.path.join(subfolder_path, filename)
+
+    if patch_index is not None:
+        output += f"_{patch_index}"
+
     return os.path.abspath(output)
 
 
diff --git a/monai/engines/evaluator.py b/monai/engines/evaluator.py
index c1fe79c848..e1fecb745d 100644
--- a/monai/engines/evaluator.py
+++ b/monai/engines/evaluator.py
@@ -9,7 +9,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import TYPE_CHECKING, Callable, Dict, Iterable, Optional, Sequence, Tuple, Union
+from typing import TYPE_CHECKING, Callable, Dict, Iterable, List, Optional, Sequence, Tuple, Union
 
 import torch
 from torch.utils.data import DataLoader
@@ -23,11 +23,12 @@
 from monai.utils.enums import CommonKeys as Keys
 
 if TYPE_CHECKING:
-    from ignite.engine import Engine
+    from ignite.engine import Engine, EventEnum
     from ignite.metrics import Metric
 else:
     Engine, _ = optional_import("ignite.engine", "0.4.4", exact_version, "Engine")
     Metric, _ = optional_import("ignite.metrics", "0.4.4", exact_version, "Metric")
+    EventEnum, _ = optional_import("ignite.engine", "0.4.4", exact_version, "EventEnum")
 
 __all__ = ["Evaluator", "SupervisedEvaluator", "EnsembleEvaluator"]
 
@@ -56,6 +57,10 @@ class Evaluator(Workflow):
         amp: whether to enable auto-mixed-precision evaluation, default is False.
         mode: model forward mode during evaluation, should be 'eval' or 'train',
             which maps to `model.eval()` or `model.train()`, default to 'eval'.
+        event_names: additional custom ignite events that will register to the engine.
+            new events can be a list of str or `ignite.engine.events.EventEnum`.
+        event_to_attr: a dictionary to map an event to a state attribute, then add to `engine.state`.
+            for more details, check: https://github.com/pytorch/ignite/blob/v0.4.4.post1/ignite/engine/engine.py#L160
 
     """
 
@@ -73,6 +78,8 @@ def __init__(
         val_handlers: Optional[Sequence] = None,
         amp: bool = False,
         mode: Union[ForwardMode, str] = ForwardMode.EVAL,
+        event_names: Optional[List[Union[str, EventEnum]]] = None,
+        event_to_attr: Optional[dict] = None,
     ) -> None:
         super().__init__(
             device=device,
@@ -87,6 +94,8 @@ def __init__(
             additional_metrics=additional_metrics,
             handlers=val_handlers,
             amp=amp,
+            event_names=event_names,
+            event_to_attr=event_to_attr,
         )
         mode = ForwardMode(mode)
         if mode == ForwardMode.EVAL:
@@ -140,6 +149,10 @@ class SupervisedEvaluator(Evaluator):
         amp: whether to enable auto-mixed-precision evaluation, default is False.
         mode: model forward mode during evaluation, should be 'eval' or 'train',
             which maps to `model.eval()` or `model.train()`, default to 'eval'.
+        event_names: additional custom ignite events that will register to the engine.
+            new events can be a list of str or `ignite.engine.events.EventEnum`.
+        event_to_attr: a dictionary to map an event to a state attribute, then add to `engine.state`.
+            for more details, check: https://github.com/pytorch/ignite/blob/v0.4.4.post1/ignite/engine/engine.py#L160
 
     """
 
@@ -159,6 +172,8 @@ def __init__(
         val_handlers: Optional[Sequence] = None,
         amp: bool = False,
         mode: Union[ForwardMode, str] = ForwardMode.EVAL,
+        event_names: Optional[List[Union[str, EventEnum]]] = None,
+        event_to_attr: Optional[dict] = None,
     ) -> None:
         super().__init__(
             device=device,
@@ -173,15 +188,14 @@ def __init__(
             val_handlers=val_handlers,
             amp=amp,
             mode=mode,
+            # add the iteration events
+            event_names=[IterationEvents] if event_names is None else event_names + [IterationEvents],
+            event_to_attr=event_to_attr,
         )
 
         self.network = network
         self.inferer = SimpleInferer() if inferer is None else inferer
 
-    def _register_additional_events(self):
-        super()._register_additional_events()
-        self.register_events(*IterationEvents)
-
     def _iteration(self, engine: Engine, batchdata: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
         """
         callback function for the Supervised Evaluation processing logic of 1 iteration in Ignite Engine.
@@ -209,17 +223,18 @@ def _iteration(self, engine: Engine, batchdata: Dict[str, torch.Tensor]) -> Dict
             inputs, targets, args, kwargs = batch
 
         # put iteration outputs into engine.state
-        engine.state.output = output = {Keys.IMAGE: inputs, Keys.LABEL: targets}
+        engine.state.output = {Keys.IMAGE: inputs, Keys.LABEL: targets}
         # execute forward computation
         with self.mode(self.network):
             if self.amp:
                 with torch.cuda.amp.autocast():
-                    output[Keys.PRED] = self.inferer(inputs, self.network, *args, **kwargs)
+                    engine.state.output[Keys.PRED] = self.inferer(inputs, self.network, *args, **kwargs)
             else:
-                output[Keys.PRED] = self.inferer(inputs, self.network, *args, **kwargs)
+                engine.state.output[Keys.PRED] = self.inferer(inputs, self.network, *args, **kwargs)
         engine.fire_event(IterationEvents.FORWARD_COMPLETED)
+        engine.fire_event(IterationEvents.MODEL_COMPLETED)
 
-        return output
+        return engine.state.output
 
 
 class EnsembleEvaluator(Evaluator):
@@ -251,6 +266,10 @@ class EnsembleEvaluator(Evaluator):
         amp: whether to enable auto-mixed-precision evaluation, default is False.
         mode: model forward mode during evaluation, should be 'eval' or 'train',
             which maps to `model.eval()` or `model.train()`, default to 'eval'.
+        event_names: additional custom ignite events that will register to the engine.
+            new events can be a list of str or `ignite.engine.events.EventEnum`.
+        event_to_attr: a dictionary to map an event to a state attribute, then add to `engine.state`.
+            for more details, check: https://github.com/pytorch/ignite/blob/v0.4.4.post1/ignite/engine/engine.py#L160
 
     """
 
@@ -271,6 +290,8 @@ def __init__(
         val_handlers: Optional[Sequence] = None,
         amp: bool = False,
         mode: Union[ForwardMode, str] = ForwardMode.EVAL,
+        event_names: Optional[List[Union[str, EventEnum]]] = None,
+        event_to_attr: Optional[dict] = None,
     ) -> None:
         super().__init__(
             device=device,
@@ -285,16 +306,15 @@ def __init__(
             val_handlers=val_handlers,
             amp=amp,
             mode=mode,
+            # add the iteration events
+            event_names=[IterationEvents] if event_names is None else event_names + [IterationEvents],
+            event_to_attr=event_to_attr,
         )
 
         self.networks = ensure_tuple(networks)
         self.pred_keys = ensure_tuple(pred_keys)
         self.inferer = SimpleInferer() if inferer is None else inferer
 
-    def _register_additional_events(self):
-        super()._register_additional_events()
-        self.register_events(*IterationEvents)
-
     def _iteration(self, engine: Engine, batchdata: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
         """
         callback function for the Supervised Evaluation processing logic of 1 iteration in Ignite Engine.
@@ -325,14 +345,17 @@ def _iteration(self, engine: Engine, batchdata: Dict[str, torch.Tensor]) -> Dict
             inputs, targets, args, kwargs = batch
 
         # put iteration outputs into engine.state
-        engine.state.output = output = {Keys.IMAGE: inputs, Keys.LABEL: targets}
+        engine.state.output = {Keys.IMAGE: inputs, Keys.LABEL: targets}
         for idx, network in enumerate(self.networks):
             with self.mode(network):
                 if self.amp:
                     with torch.cuda.amp.autocast():
-                        output.update({self.pred_keys[idx]: self.inferer(inputs, network, *args, **kwargs)})
+                        engine.state.output.update(
+                            {self.pred_keys[idx]: self.inferer(inputs, network, *args, **kwargs)}
+                        )
                 else:
-                    output.update({self.pred_keys[idx]: self.inferer(inputs, network, *args, **kwargs)})
+                    engine.state.output.update({self.pred_keys[idx]: self.inferer(inputs, network, *args, **kwargs)})
         engine.fire_event(IterationEvents.FORWARD_COMPLETED)
+        engine.fire_event(IterationEvents.MODEL_COMPLETED)
 
-        return output
+        return engine.state.output
diff --git a/monai/engines/trainer.py b/monai/engines/trainer.py
index a7b1943211..e9e31a1b16 100644
--- a/monai/engines/trainer.py
+++ b/monai/engines/trainer.py
@@ -9,7 +9,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import TYPE_CHECKING, Callable, Dict, Iterable, Optional, Sequence, Tuple, Union
+from typing import TYPE_CHECKING, Callable, Dict, Iterable, List, Optional, Sequence, Tuple, Union
 
 import torch
 from torch.optim.optimizer import Optimizer
@@ -23,11 +23,12 @@
 from monai.utils.enums import CommonKeys as Keys
 
 if TYPE_CHECKING:
-    from ignite.engine import Engine
+    from ignite.engine import Engine, EventEnum
     from ignite.metrics import Metric
 else:
     Engine, _ = optional_import("ignite.engine", "0.4.4", exact_version, "Engine")
     Metric, _ = optional_import("ignite.metrics", "0.4.4", exact_version, "Metric")
+    EventEnum, _ = optional_import("ignite.engine", "0.4.4", exact_version, "EventEnum")
 
 __all__ = ["Trainer", "SupervisedTrainer", "GanTrainer"]
 
@@ -78,6 +79,10 @@ class SupervisedTrainer(Trainer):
         train_handlers: every handler is a set of Ignite Event-Handlers, must have `attach` function, like:
             CheckpointHandler, StatsHandler, SegmentationSaver, etc.
         amp: whether to enable auto-mixed-precision training, default is False.
+        event_names: additional custom ignite events that will register to the engine.
+            new events can be a list of str or `ignite.engine.events.EventEnum`.
+        event_to_attr: a dictionary to map an event to a state attribute, then add to `engine.state`.
+            for more details, check: https://github.com/pytorch/ignite/blob/v0.4.4.post1/ignite/engine/engine.py#L160
 
     """
 
@@ -99,8 +104,9 @@ def __init__(
         additional_metrics: Optional[Dict[str, Metric]] = None,
         train_handlers: Optional[Sequence] = None,
         amp: bool = False,
+        event_names: Optional[List[Union[str, EventEnum]]] = None,
+        event_to_attr: Optional[dict] = None,
     ) -> None:
-        # set up Ignite engine and environments
         super().__init__(
             device=device,
             max_epochs=max_epochs,
@@ -114,6 +120,9 @@ def __init__(
             additional_metrics=additional_metrics,
             handlers=train_handlers,
             amp=amp,
+            # add the iteration events
+            event_names=[IterationEvents] if event_names is None else event_names + [IterationEvents],
+            event_to_attr=event_to_attr,
         )
 
         self.network = network
@@ -121,10 +130,6 @@ def __init__(
         self.loss_function = loss_function
         self.inferer = SimpleInferer() if inferer is None else inferer
 
-    def _register_additional_events(self):
-        super()._register_additional_events()
-        self.register_events(*IterationEvents)
-
     def _iteration(self, engine: Engine, batchdata: Dict[str, torch.Tensor]):
         """
         Callback function for the Supervised Training processing logic of 1 iteration in Ignite Engine.
@@ -152,12 +157,12 @@ def _iteration(self, engine: Engine, batchdata: Dict[str, torch.Tensor]):
         else:
             inputs, targets, args, kwargs = batch
         # put iteration outputs into engine.state
-        engine.state.output = output = {Keys.IMAGE: inputs, Keys.LABEL: targets}
+        engine.state.output = {Keys.IMAGE: inputs, Keys.LABEL: targets}
 
         def _compute_pred_loss():
-            output[Keys.PRED] = self.inferer(inputs, self.network, *args, **kwargs)
+            engine.state.output[Keys.PRED] = self.inferer(inputs, self.network, *args, **kwargs)
             engine.fire_event(IterationEvents.FORWARD_COMPLETED)
-            output[Keys.LOSS] = self.loss_function(output[Keys.PRED], targets).mean()
+            engine.state.output[Keys.LOSS] = self.loss_function(engine.state.output[Keys.PRED], targets).mean()
             engine.fire_event(IterationEvents.LOSS_COMPLETED)
 
         self.network.train()
@@ -165,18 +170,18 @@ def _compute_pred_loss():
         if self.amp and self.scaler is not None:
             with torch.cuda.amp.autocast():
                 _compute_pred_loss()
-            self.scaler.scale(output[Keys.LOSS]).backward()
+            self.scaler.scale(engine.state.output[Keys.LOSS]).backward()
             engine.fire_event(IterationEvents.BACKWARD_COMPLETED)
             self.scaler.step(self.optimizer)
             self.scaler.update()
         else:
             _compute_pred_loss()
-            output[Keys.LOSS].backward()
+            engine.state.output[Keys.LOSS].backward()
             engine.fire_event(IterationEvents.BACKWARD_COMPLETED)
             self.optimizer.step()
-        engine.fire_event(IterationEvents.OPTIMIZER_COMPLETED)
+        engine.fire_event(IterationEvents.MODEL_COMPLETED)
 
-        return output
+        return engine.state.output
 
 
 class GanTrainer(Trainer):
diff --git a/monai/engines/utils.py b/monai/engines/utils.py
index 04237d0f4a..d16ab3cfbb 100644
--- a/monai/engines/utils.py
+++ b/monai/engines/utils.py
@@ -38,13 +38,14 @@ class IterationEvents(EventEnum):
     `FORWARD_COMPLETED` is the Event when `network(image, label)` completed.
     `LOSS_COMPLETED` is the Event when `loss(pred, label)` completed.
     `BACKWARD_COMPLETED` is the Event when `loss.backward()` completed.
+    `MODEL_COMPLETED` is the Event when all the model related operations completed.
 
     """
 
     FORWARD_COMPLETED = "forward_completed"
     LOSS_COMPLETED = "loss_completed"
     BACKWARD_COMPLETED = "backward_completed"
-    OPTIMIZER_COMPLETED = "optimizer_completed"
+    MODEL_COMPLETED = "model_completed"
 
 
 class GanKeys:
diff --git a/monai/engines/workflow.py b/monai/engines/workflow.py
index 61b92ac5dd..4018dabc40 100644
--- a/monai/engines/workflow.py
+++ b/monai/engines/workflow.py
@@ -9,26 +9,28 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import TYPE_CHECKING, Callable, Dict, Iterable, Optional, Sequence, Union
+from typing import TYPE_CHECKING, Callable, Dict, Iterable, List, Optional, Sequence, Union
 
 import torch
 import torch.distributed as dist
 from torch.utils.data import DataLoader
 from torch.utils.data.distributed import DistributedSampler
 
-from monai.engines.utils import default_prepare_batch
+from monai.engines.utils import IterationEvents, default_prepare_batch
 from monai.transforms import apply_transform
 from monai.utils import ensure_tuple, exact_version, optional_import
 
 IgniteEngine, _ = optional_import("ignite.engine", "0.4.4", exact_version, "Engine")
 State, _ = optional_import("ignite.engine", "0.4.4", exact_version, "State")
 Events, _ = optional_import("ignite.engine", "0.4.4", exact_version, "Events")
+
 if TYPE_CHECKING:
-    from ignite.engine import Engine
+    from ignite.engine import Engine, EventEnum
     from ignite.metrics import Metric
 else:
     Engine, _ = optional_import("ignite.engine", "0.4.4", exact_version, "Engine")
     Metric, _ = optional_import("ignite.metrics", "0.4.4", exact_version, "Metric")
+    EventEnum, _ = optional_import("ignite.engine", "0.4.4", exact_version, "EventEnum")
 
 
 class Workflow(IgniteEngine):  # type: ignore[valid-type, misc] # due to optional_import
@@ -60,6 +62,10 @@ class Workflow(IgniteEngine):  # type: ignore[valid-type, misc] # due to optiona
         handlers: every handler is a set of Ignite Event-Handlers, must have `attach` function, like:
             CheckpointHandler, StatsHandler, SegmentationSaver, etc.
         amp: whether to enable auto-mixed-precision training or inference, default is False.
+        event_names: additional custom ignite events that will register to the engine.
+            new events can be a list of str or `ignite.engine.events.EventEnum`.
+        event_to_attr: a dictionary to map an event to a state attribute, then add to `engine.state`.
+            for more details, check: https://github.com/pytorch/ignite/blob/v0.4.4.post1/ignite/engine/engine.py#L160
 
     Raises:
         TypeError: When ``device`` is not a ``torch.Device``.
@@ -83,6 +89,8 @@ def __init__(
         additional_metrics: Optional[Dict[str, Metric]] = None,
         handlers: Optional[Sequence] = None,
         amp: bool = False,
+        event_names: Optional[List[Union[str, EventEnum]]] = None,
+        event_to_attr: Optional[dict] = None,
     ) -> None:
         if iteration_update is not None:
             super().__init__(iteration_update)
@@ -128,7 +136,17 @@ def set_sampler_epoch(engine: Engine):
         self.prepare_batch = prepare_batch
         self.amp = amp
 
-        self._register_additional_events()
+        if event_names is not None:
+            if not isinstance(event_names, list):
+                raise ValueError("event_names must be a list or string or EventEnum.")
+            for name in event_names:
+                if isinstance(name, str):
+                    self.register_events(name, event_to_attr=event_to_attr)
+                elif issubclass(name, EventEnum):
+                    self.register_events(*name, event_to_attr=event_to_attr)
+                else:
+                    raise ValueError("event_names must be a list or string or EventEnum.")
+
         if post_transform is not None:
             self._register_post_transforms(post_transform)
         if key_metric is not None:
@@ -136,26 +154,17 @@ def set_sampler_epoch(engine: Engine):
         if handlers is not None:
             self._register_handlers(handlers)
 
-    def _register_additional_events(self):
-        """
-        Register more ignite Events to the engine.
-
-        """
-        pass
-
-    def _register_post_transforms(self, posttrans):
+    def _register_post_transforms(self, posttrans: Callable):
         """
         Register the post transforms to the engine, will execute them as a chain when iteration completed.
 
         """
 
-        @self.on(Events.ITERATION_COMPLETED)
+        @self.on(IterationEvents.MODEL_COMPLETED)
         def run_post_transform(engine: Engine) -> None:
-            if posttrans is None:
-                raise AssertionError
             engine.state.output = apply_transform(posttrans, engine.state.output)
 
-    def _register_metrics(self, k_metric, add_metrics):
+    def _register_metrics(self, k_metric: Dict, add_metrics: Optional[Dict] = None):
         """
         Register the key metric and additional metrics to the engine, supports ignite Metrics.
 
@@ -180,7 +189,7 @@ def _compare_metrics(engine: Engine) -> None:
                     engine.state.best_metric = current_val_metric
                     engine.state.best_metric_epoch = engine.state.epoch
 
-    def _register_handlers(self, handlers):
+    def _register_handlers(self, handlers: Sequence):
         """
         Register the handlers to the engine, supports ignite Handlers with `attach` API.
 
diff --git a/monai/handlers/__init__.py b/monai/handlers/__init__.py
index 5669e8a9ee..b0dbb82127 100644
--- a/monai/handlers/__init__.py
+++ b/monai/handlers/__init__.py
@@ -13,18 +13,22 @@
 from .checkpoint_saver import CheckpointSaver
 from .classification_saver import ClassificationSaver
 from .confusion_matrix import ConfusionMatrix
+from .earlystop_handler import EarlyStopHandler
+from .garbage_collector import GarbageCollector
 from .hausdorff_distance import HausdorffDistance
 from .iteration_metric import IterationMetric
 from .lr_schedule_handler import LrScheduleHandler
 from .mean_dice import MeanDice
 from .metric_logger import MetricLogger, MetricLoggerKeys
 from .metrics_saver import MetricsSaver
+from .parameter_scheduler import ParamSchedulerHandler
 from .roc_auc import ROCAUC
 from .segmentation_saver import SegmentationSaver
 from .smartcache_handler import SmartCacheHandler
 from .stats_handler import StatsHandler
 from .surface_distance import SurfaceDistance
 from .tensorboard_handlers import TensorBoardHandler, TensorBoardImageHandler, TensorBoardStatsHandler
+from .transform_inverter import TransformInverter
 from .utils import (
     evenly_divisible_all_gather,
     stopping_fn_from_loss,
diff --git a/monai/handlers/checkpoint_loader.py b/monai/handlers/checkpoint_loader.py
index bb67428bef..6d8f065f1e 100644
--- a/monai/handlers/checkpoint_loader.py
+++ b/monai/handlers/checkpoint_loader.py
@@ -13,6 +13,7 @@
 from typing import TYPE_CHECKING, Dict, Optional
 
 import torch
+import torch.nn as nn
 
 from monai.utils import exact_version, optional_import
 
@@ -44,6 +45,12 @@ class CheckpointLoader:
             first load the module to CPU and then copy each parameter to where it was
             saved, which would result in all processes on the same machine using the
             same set of devices.
+        strict: whether to strictly enforce that the keys in `state_dict` match the keys
+            returned by `torch.nn.Module.state_dict` function. default to `True`.
+        strict_shape: whether to enforce the data shape of the matched layers in the checkpoint,
+            `if `False`, it will skip the layers that have different data shape with checkpoint content.
+            This can be useful advanced feature for transfer learning. users should totally
+            understand which layers will have different shape. default to `True`.
 
     """
 
@@ -53,6 +60,8 @@ def __init__(
         load_dict: Dict,
         name: Optional[str] = None,
         map_location: Optional[Dict] = None,
+        strict: bool = True,
+        strict_shape: bool = True,
     ) -> None:
         if load_path is None:
             raise AssertionError("must provide clear path to load checkpoint.")
@@ -63,6 +72,8 @@ def __init__(
         self.load_dict = load_dict
         self._name = name
         self.map_location = map_location
+        self.strict = strict
+        self.strict_shape = strict_shape
 
     def attach(self, engine: Engine) -> None:
         """
@@ -80,9 +91,23 @@ def __call__(self, engine: Engine) -> None:
         """
         checkpoint = torch.load(self.load_path, map_location=self.map_location)
 
+        if not self.strict_shape:
+            k, _ = list(self.load_dict.items())[0]
+            # single object and checkpoint is directly a state_dict
+            if len(self.load_dict) == 1 and k not in checkpoint:
+                checkpoint = {k: checkpoint}
+
+            # skip items that don't match data shape
+            for k, obj in self.load_dict.items():
+                if isinstance(obj, (nn.DataParallel, nn.parallel.DistributedDataParallel)):
+                    obj = obj.module
+                if isinstance(obj, torch.nn.Module):
+                    d = obj.state_dict()
+                    checkpoint[k] = {k: v for k, v in checkpoint[k].items() if k in d and v.shape == d[k].shape}
+
         # save current max epochs setting in the engine, don't overwrite it if larger than max_epochs in checkpoint
         prior_max_epochs = engine.state.max_epochs
-        Checkpoint.load_objects(to_load=self.load_dict, checkpoint=checkpoint)
+        Checkpoint.load_objects(to_load=self.load_dict, checkpoint=checkpoint, strict=self.strict)
         if engine.state.epoch > prior_max_epochs:
             raise ValueError(
                 f"Epoch count ({engine.state.epoch}) in checkpoint is larger than "
diff --git a/monai/handlers/checkpoint_saver.py b/monai/handlers/checkpoint_saver.py
index fd80182ba2..68857e17ff 100644
--- a/monai/handlers/checkpoint_saver.py
+++ b/monai/handlers/checkpoint_saver.py
@@ -17,7 +17,6 @@
 
 Events, _ = optional_import("ignite.engine", "0.4.4", exact_version, "Events")
 Checkpoint, _ = optional_import("ignite.handlers", "0.4.4", exact_version, "Checkpoint")
-BaseSaveHandler, _ = optional_import("ignite.handlers.checkpoint", "0.4.4", exact_version, "BaseSaveHandler")
 
 if TYPE_CHECKING:
     from ignite.engine import Engine
diff --git a/monai/handlers/earlystop_handler.py b/monai/handlers/earlystop_handler.py
new file mode 100644
index 0000000000..0d140a9994
--- /dev/null
+++ b/monai/handlers/earlystop_handler.py
@@ -0,0 +1,95 @@
+# Copyright 2020 - 2021 MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import TYPE_CHECKING, Callable, Optional
+
+from monai.utils import exact_version, optional_import
+
+Events, _ = optional_import("ignite.engine", "0.4.4", exact_version, "Events")
+EarlyStopping, _ = optional_import("ignite.handlers", "0.4.4", exact_version, "EarlyStopping")
+
+if TYPE_CHECKING:
+    from ignite.engine import Engine
+else:
+    Engine, _ = optional_import("ignite.engine", "0.4.4", exact_version, "Engine")
+
+
+class EarlyStopHandler:
+    """
+    EarlyStopHandler acts as an Ignite handler to stop training if no improvement after a given number of events.
+    It‘s based on the `EarlyStopping` handler in ignite.
+
+    Args:
+        patience: number of events to wait if no improvement and then stop the training.
+        score_function: It should be a function taking a single argument, an :class:`~ignite.engine.engine.Engine`
+            object that the handler attached, can be a trainer or validator, and return a score `float`.
+            an improvement is considered if the score is higher.
+        trainer: trainer engine to stop the run if no improvement, if None, must call `set_trainer()` before training.
+        min_delta: a minimum increase in the score to qualify as an improvement,
+            i.e. an increase of less than or equal to `min_delta`, will count as no improvement.
+        cumulative_delta: if True, `min_delta` defines an increase since the last `patience` reset, otherwise,
+            it defines an increase after the last event, default to False.
+        epoch_level: check early stopping for every epoch or every iteration of the attached engine,
+            `True` is epoch level, `False` is iteration level, default to epoch level.
+
+    Note:
+        If in distributed training and uses loss value of every iteration to detect early stopping,
+        the values may be different in different ranks.
+        User may attach this handler to validator engine to detect validation metrics and stop the training,
+        in this case, the `score_function` is executed on validator engine and `trainer` is the trainer engine.
+
+    """
+
+    def __init__(
+        self,
+        patience: int,
+        score_function: Callable,
+        trainer: Optional[Engine] = None,
+        min_delta: float = 0.0,
+        cumulative_delta: bool = False,
+        epoch_level: bool = True,
+    ) -> None:
+        self.patience = patience
+        self.score_function = score_function
+        self.min_delta = min_delta
+        self.cumulative_delta = cumulative_delta
+        self.epoch_level = epoch_level
+        self._handler = None
+
+        if trainer is not None:
+            self.set_trainer(trainer=trainer)
+
+    def attach(self, engine: Engine) -> None:
+        """
+        Args:
+            engine: Ignite Engine, it can be a trainer, validator or evaluator.
+        """
+        if self.epoch_level:
+            engine.add_event_handler(Events.EPOCH_COMPLETED, self)
+        else:
+            engine.add_event_handler(Events.ITERATION_COMPLETED, self)
+
+    def set_trainer(self, trainer: Engine):
+        """
+        Set trainer to execute early stop if not setting properly in `__init__()`.
+        """
+        self._handler = EarlyStopping(
+            patience=self.patience,
+            score_function=self.score_function,
+            trainer=trainer,
+            min_delta=self.min_delta,
+            cumulative_delta=self.cumulative_delta,
+        )
+
+    def __call__(self, engine: Engine) -> None:
+        if self._handler is None:
+            raise RuntimeError("please set trainer in __init__() or call set_trainer() before training.")
+        self._handler(engine)
diff --git a/monai/handlers/garbage_collector.py b/monai/handlers/garbage_collector.py
new file mode 100644
index 0000000000..7bb59c9049
--- /dev/null
+++ b/monai/handlers/garbage_collector.py
@@ -0,0 +1,80 @@
+# Copyright 2020 - 2021 MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import gc
+from typing import TYPE_CHECKING
+
+from monai.utils import exact_version, optional_import
+
+if TYPE_CHECKING:
+    from ignite.engine import Engine, Events
+else:
+    Engine, _ = optional_import("ignite.engine", "0.4.4", exact_version, "Engine")
+    Events, _ = optional_import("ignite.engine", "0.4.4", exact_version, "Events")
+
+
+class GarbageCollector:
+    """
+    Run garbage collector after each epoch
+
+    Args:
+        trigger_event: the event that trigger a call to this handler.
+            - "epoch", after completion of each epoch (equivalent of ignite.engine.Events.EPOCH_COMPLETED)
+            - "iteration", after completion of each iteration (equivalent of ignite.engine.Events.ITERATION_COMPLETED)
+            - any ignite built-in event from ignite.engine.Events.
+            Defaults to "epoch".
+        log_level: log level (integer) for some garbage collection information as below. Defaults to 10 (DEBUG).
+            - 50 (CRITICAL)
+            - 40 (ERROR)
+            - 30 (WARNING)
+            - 20 (INFO)
+            - 10 (DEBUG)
+            - 0 (NOTSET)
+    """
+
+    def __init__(self, trigger_event: str = "epoch", log_level: int = 10):
+        if isinstance(trigger_event, Events):
+            self.trigger_event = trigger_event
+        elif trigger_event.lower() == "epoch":
+            self.trigger_event = Events.EPOCH_COMPLETED
+        elif trigger_event.lower() == "iteration":
+            self.trigger_event = Events.ITERATION_COMPLETED
+        else:
+            raise ValueError(
+                f"'trigger_event' should be either epoch, iteration, or an ignite built-in event from"
+                f" ignite.engine.Events, '{trigger_event}' was given."
+            )
+
+        self.log_level = log_level
+
+    def attach(self, engine: Engine) -> None:
+        if not engine.has_event_handler(self, self.trigger_event):
+            engine.add_event_handler(self.trigger_event, self)
+
+    def __call__(self, engine: Engine) -> None:
+        """
+        This method calls python garbage collector.
+
+        Args:
+            engine: Ignite Engine, it should be either a trainer or validator.
+        """
+        # get count before garbage collection
+        pre_count = gc.get_count()
+        # fits call to garbage collector
+        gc.collect()
+        # second call to garbage collector
+        unreachable = gc.collect()
+        # get count after garbage collection
+        after_count = gc.get_count()
+        engine.logger.log(
+            self.log_level,
+            f"Garbage Count: [before: {pre_count}] -> [after: {after_count}] (unreachable : {unreachable})",
+        )
diff --git a/monai/handlers/iteration_metric.py b/monai/handlers/iteration_metric.py
index f49c799a21..42f9828afd 100644
--- a/monai/handlers/iteration_metric.py
+++ b/monai/handlers/iteration_metric.py
@@ -73,10 +73,18 @@ def update(self, output: Sequence[torch.Tensor]) -> None:
         """
         if len(output) != 2:
             raise ValueError(f"output must have length 2, got {len(output)}.")
+
         y_pred, y = output
-        score = self.metric_fn(y_pred, y)
-        if isinstance(score, (tuple, list)):
-            score = score[0]
+
+        def _compute(y_pred, y):
+            score = self.metric_fn(y_pred, y)
+            return score[0] if isinstance(score, (tuple, list)) else score
+
+        if isinstance(y_pred, (list, tuple)) and isinstance(y, (list, tuple)):
+            # if a list of channel-first data, add batch dim and compute metric, then concat the scores
+            score = torch.cat([_compute(p_.unsqueeze(0), y_.unsqueeze(0)) for p_, y_ in zip(y_pred, y)], dim=0)
+        else:
+            score = _compute(y_pred, y)
         self._scores.append(score.to(self._device))
 
     def compute(self) -> Any:
diff --git a/monai/handlers/metric_logger.py b/monai/handlers/metric_logger.py
index 778ec13900..f9a3913c56 100644
--- a/monai/handlers/metric_logger.py
+++ b/monai/handlers/metric_logger.py
@@ -48,7 +48,7 @@ class MetricLogger:
         logger = MetricLogger(evaluator=evaluator)
 
         # construct the trainer with the logger passed in as a handler so that it logs loss values
-        trainer = SupervisedTrainer(..., train_handlers=[logger, ValidationHandler(evaluator, 1)])
+        trainer = SupervisedTrainer(..., train_handlers=[logger, ValidationHandler(1, evaluator)])
 
         # run training, logger.loss will be a list of (iteration, loss) values, logger.metrics a dict with key
         # "val_mean_dice" storing a list of (iteration, metric) values
diff --git a/monai/handlers/parameter_scheduler.py b/monai/handlers/parameter_scheduler.py
new file mode 100644
index 0000000000..35ba044586
--- /dev/null
+++ b/monai/handlers/parameter_scheduler.py
@@ -0,0 +1,174 @@
+# Copyright 2020 - 2021 MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+from bisect import bisect_right
+from typing import TYPE_CHECKING, Callable, Dict, List, Optional, Union
+
+from monai.utils import exact_version, optional_import
+
+if TYPE_CHECKING:
+    from ignite.engine import Engine, Events
+else:
+    Engine, _ = optional_import("ignite.engine", "0.4.4", exact_version, "Engine")
+    Events, _ = optional_import("ignite.engine", "0.4.4", exact_version, "Events")
+
+
+class ParamSchedulerHandler:
+    """
+    General purpose scheduler for parameters values. By default it can schedule in a linear, exponential, step or
+    multistep function. One can also pass Callables to have customized scheduling logic.
+
+    Args:
+        parameter_setter (Callable): Function that sets the required parameter
+        value_calculator (Union[str,Callable]): Either a string ('linear', 'exponential', 'step' or 'multistep')
+         or Callable for custom logic.
+        vc_kwargs (Dict): Dictionary that stores the required parameters for the value_calculator.
+        epoch_level (bool): Whether the the step is based on epoch or iteration. Defaults to False.
+        name (Optional[str]): Identifier of logging.logger to use, if None, defaulting to ``engine.logger``.
+        event (Optional[str]): Event to which the handler attaches. Defaults to Events.ITERATION_COMPLETED.
+    """
+
+    def __init__(
+        self,
+        parameter_setter: Callable,
+        value_calculator: Union[str, Callable],
+        vc_kwargs: Dict,
+        epoch_level: bool = False,
+        name: Optional[str] = None,
+        event=Events.ITERATION_COMPLETED,
+    ):
+        self.epoch_level = epoch_level
+        self.event = event
+
+        self._calculators = {
+            "linear": self._linear,
+            "exponential": self._exponential,
+            "step": self._step,
+            "multistep": self._multistep,
+        }
+
+        self._parameter_setter = parameter_setter
+        self._vc_kwargs = vc_kwargs
+        self._value_calculator = self._get_value_calculator(value_calculator=value_calculator)
+
+        self.logger = logging.getLogger(name)
+        self._name = name
+
+    def _get_value_calculator(self, value_calculator):
+        if isinstance(value_calculator, str):
+            return self._calculators[value_calculator]
+        if callable(value_calculator):
+            return value_calculator
+        raise ValueError(
+            f"value_calculator must be either a string from {list(self._calculators.keys())} or a Callable."
+        )
+
+    def __call__(self, engine: Engine):
+        if self.epoch_level:
+            self._vc_kwargs["current_step"] = engine.state.epoch
+        else:
+            self._vc_kwargs["current_step"] = engine.state.iteration
+
+        new_value = self._value_calculator(**self._vc_kwargs)
+        self._parameter_setter(new_value)
+
+    def attach(self, engine: Engine) -> None:
+        """
+        Args:
+            engine: Ignite Engine that is used for training.
+        """
+        if self._name is None:
+            self.logger = engine.logger
+        engine.add_event_handler(self.event, self)
+
+    @staticmethod
+    def _linear(
+        initial_value: float, step_constant: int, step_max_value: int, max_value: float, current_step: int
+    ) -> float:
+        """
+        Keeps the parameter value to zero until step_zero steps passed and then linearly increases it to 1 until an
+        additional step_one steps passed. Continues the trend until it reaches max_value.
+
+        Args:
+            initial_value (float): Starting value of the parameter.
+            step_constant (int): Step index until parameter's value is kept constant.
+            step_max_value (int): Step index at which parameter's value becomes max_value.
+            max_value (float): Max parameter value.
+            current_step (int): Current step index.
+
+        Returns:
+            float: new parameter value
+        """
+        if current_step <= step_constant:
+            delta = 0.0
+        elif current_step > step_max_value:
+            delta = max_value - initial_value
+        else:
+            delta = (max_value - initial_value) / (step_max_value - step_constant) * (current_step - step_constant)
+
+        return initial_value + delta
+
+    @staticmethod
+    def _exponential(initial_value: float, gamma: float, current_step: int) -> float:
+        """
+        Decays the parameter value by gamma every step.
+
+        Based on the closed form of ExponentialLR from Pytorch
+        https://github.com/pytorch/pytorch/blob/master/torch/optim/lr_scheduler.py#L457
+
+        Args:
+            initial_value (float): Starting value of the parameter.
+            gamma (float): Multiplicative factor of parameter value decay.
+            current_step (int): Current step index.
+
+        Returns:
+            float: new parameter value
+        """
+        return initial_value * gamma ** current_step
+
+    @staticmethod
+    def _step(initial_value: float, gamma: float, step_size: int, current_step: int) -> float:
+        """
+        Decays the parameter value by gamma every step_size.
+
+        Based on StepLR from Pytorch.
+        https://github.com/pytorch/pytorch/blob/master/torch/optim/lr_scheduler.py#L377
+
+        Args:
+            initial_value (float): Starting value of the parameter.
+            gamma (float): Multiplicative factor of parameter value decay.
+            step_size (int): Period of parameter value decay.
+            current_step (int): Current step index.
+
+        Returns
+            float: new parameter value
+        """
+        return initial_value * gamma ** (current_step // step_size)
+
+    @staticmethod
+    def _multistep(initial_value: float, gamma: float, milestones: List[int], current_step: int) -> float:
+        """
+        Decays the parameter value by gamma once the number of steps reaches one of the milestones.
+
+        Based on MultiStepLR from Pytorch.
+        https://github.com/pytorch/pytorch/blob/master/torch/optim/lr_scheduler.py#L424
+
+        Args:
+            initial_value (float): Starting value of the parameter.
+            gamma (float): Multiplicative factor of parameter value decay.
+            milestones (List[int]): List of step indices. Must be increasing.
+            current_step (int): Current step index.
+
+        Returns:
+            float: new parameter value
+        """
+        return initial_value * gamma ** bisect_right(milestones, current_step)
diff --git a/monai/handlers/segmentation_saver.py b/monai/handlers/segmentation_saver.py
index 9ee7ca67f9..279b514bd7 100644
--- a/monai/handlers/segmentation_saver.py
+++ b/monai/handlers/segmentation_saver.py
@@ -28,6 +28,9 @@
 class SegmentationSaver:
     """
     Event handler triggered on completing every iteration to save the segmentation predictions into files.
+    It can extract the input image meta data(filename, affine, original_shape, etc.) and resample the predictions
+    based on the meta data.
+
     """
 
     def __init__(
@@ -96,6 +99,7 @@ def __init__(
                 output will be: /output/test1/image/image_seg.nii.gz
             batch_transform: a callable that is used to transform the
                 ignite.engine.batch into expected format to extract the meta_data dictionary.
+                it can be used to extract the input image meta data: filename, affine, original_shape, etc.
             output_transform: a callable that is used to transform the
                 ignite.engine.output into the form expected image data.
                 The first dimension of this transform's output will be treated as the
@@ -115,7 +119,6 @@ def __init__(
             output_dtype=output_dtype,
             squeeze_end_dims=squeeze_end_dims,
             data_root_dir=data_root_dir,
-            save_batch=True,
         )
         self.batch_transform = batch_transform
         self.output_transform = output_transform
@@ -143,5 +146,13 @@ def __call__(self, engine: Engine) -> None:
         """
         meta_data = self.batch_transform(engine.state.batch)
         engine_output = self.output_transform(engine.state.output)
-        self._saver(engine_output, meta_data)
+        if isinstance(engine_output, (tuple, list)):
+            # if a list of data in shape: [channel, H, W, [D]], save every item separately
+            self._saver.save_batch = False
+            for i, d in enumerate(engine_output):
+                self._saver(d, {k: meta_data[k][i] for k in meta_data} if meta_data is not None else None)
+        else:
+            # if the data is in shape: [batch, channel, H, W, [D]]
+            self._saver.save_batch = True
+            self._saver(engine_output, meta_data)
         self.logger.info("saved all the model outputs into files.")
diff --git a/monai/handlers/transform_inverter.py b/monai/handlers/transform_inverter.py
new file mode 100644
index 0000000000..64f5c37d78
--- /dev/null
+++ b/monai/handlers/transform_inverter.py
@@ -0,0 +1,110 @@
+# Copyright 2020 - 2021 MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import warnings
+from typing import TYPE_CHECKING, Callable, Optional, Sequence, Union
+
+from torch.utils.data import DataLoader as TorchDataLoader
+
+from monai.data import BatchInverseTransform
+from monai.data.utils import no_collation
+from monai.engines.utils import CommonKeys, IterationEvents
+from monai.transforms import InvertibleTransform, ToTensor, allow_missing_keys_mode, convert_inverse_interp_mode
+from monai.utils import InverseKeys, ensure_tuple, ensure_tuple_rep, exact_version, optional_import
+
+Events, _ = optional_import("ignite.engine", "0.4.4", exact_version, "Events")
+if TYPE_CHECKING:
+    from ignite.engine import Engine
+else:
+    Engine, _ = optional_import("ignite.engine", "0.4.4", exact_version, "Engine")
+
+
+class TransformInverter:
+    """
+    Ignite handler to automatically invert `transforms`.
+    It takes `engine.state.output` as the input data and uses the transforms information from `engine.state.batch`.
+    The outputs are stored in `engine.state.output` with the `output_keys`.
+    """
+
+    def __init__(
+        self,
+        transform: InvertibleTransform,
+        loader: TorchDataLoader,
+        output_keys: Union[str, Sequence[str]] = CommonKeys.PRED,
+        batch_keys: Union[str, Sequence[str]] = CommonKeys.IMAGE,
+        collate_fn: Optional[Callable] = no_collation,
+        postfix: str = "_inverted",
+        nearest_interp: Union[bool, Sequence[bool]] = True,
+        num_workers: Optional[int] = 0,
+    ) -> None:
+        """
+        Args:
+            transform: a callable data transform on input data.
+            loader: data loader used to run transforms and generate the batch of data.
+            collate_fn: how to collate data after inverse transformations.
+                default won't do any collation, so the output will be a list of size batch size.
+            output_keys: the key of expected data in `ignite.engine.output`, invert transforms on it.
+                it also can be a list of keys, will invert transform for each of them. Default to "pred".
+            batch_keys: the key of input data in `ignite.engine.batch`. will get the applied transforms
+                for this input data, then invert them for the expected data with `output_keys`.
+                It can also be a list of keys, each matches to the `output_keys` data. default to "image".
+            postfix: will save the inverted result into `ignite.engine.output` with key `{output_key}{postfix}`.
+            nearest_interp: whether to use `nearest` interpolation mode when inverting the spatial transforms,
+                default to `True`. If `False`, use the same interpolation mode as the original transform.
+                it also can be a list of bool, each matches to the `output_keys` data.
+            num_workers: number of workers when run data loader for inverse transforms,
+                default to 0 as only run one iteration and multi-processing may be even slower.
+                Set to `None`, to use the `num_workers` of the input transform data loader.
+
+        """
+        self.transform = transform
+        self.inverter = BatchInverseTransform(
+            transform=transform,
+            loader=loader,
+            collate_fn=collate_fn,
+            num_workers=num_workers,
+        )
+        self.output_keys = ensure_tuple(output_keys)
+        self.batch_keys = ensure_tuple_rep(batch_keys, len(self.output_keys))
+        self.postfix = postfix
+        self.nearest_interp = ensure_tuple_rep(nearest_interp, len(self.output_keys))
+        self._totensor = ToTensor()
+
+    def attach(self, engine: Engine) -> None:
+        """
+        Args:
+            engine: Ignite Engine, it can be a trainer, validator or evaluator.
+        """
+        engine.add_event_handler(IterationEvents.MODEL_COMPLETED, self)
+
+    def __call__(self, engine: Engine) -> None:
+        """
+        Args:
+            engine: Ignite Engine, it can be a trainer, validator or evaluator.
+        """
+        for output_key, batch_key, nearest_interp in zip(self.output_keys, self.batch_keys, self.nearest_interp):
+            transform_key = batch_key + InverseKeys.KEY_SUFFIX
+            if transform_key not in engine.state.batch:
+                warnings.warn(f"all the transforms on `{batch_key}` are not InvertibleTransform.")
+                continue
+
+            transform_info = engine.state.batch[transform_key]
+            if nearest_interp:
+                convert_inverse_interp_mode(trans_info=transform_info, mode="nearest", align_corners=None)
+
+            segs_dict = {
+                batch_key: engine.state.output[output_key].detach().cpu(),
+                transform_key: transform_info,
+            }
+
+            with allow_missing_keys_mode(self.transform):  # type: ignore
+                inverted_key = f"{output_key}{self.postfix}"
+                engine.state.output[inverted_key] = [self._totensor(i[batch_key]) for i in self.inverter(segs_dict)]
diff --git a/monai/handlers/utils.py b/monai/handlers/utils.py
index 2eaf3ab932..4ae38b908a 100644
--- a/monai/handlers/utils.py
+++ b/monai/handlers/utils.py
@@ -11,7 +11,7 @@
 
 import os
 from collections import OrderedDict
-from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Sequence, Union
+from typing import TYPE_CHECKING, Dict, List, Optional, Sequence, Union
 
 import numpy as np
 import torch
@@ -33,7 +33,7 @@
 ]
 
 
-def stopping_fn_from_metric(metric_name: str) -> Callable[[Engine], Any]:
+def stopping_fn_from_metric(metric_name: str):
     """
     Returns a stopping function for ignite.handlers.EarlyStopping using the given metric name.
     """
@@ -44,7 +44,7 @@ def stopping_fn(engine: Engine):
     return stopping_fn
 
 
-def stopping_fn_from_loss() -> Callable[[Engine], Any]:
+def stopping_fn_from_loss():
     """
     Returns a stopping function for ignite.handlers.EarlyStopping using the loss value.
     """
diff --git a/monai/handlers/validation_handler.py b/monai/handlers/validation_handler.py
index 4458a17380..fbd4b7862e 100644
--- a/monai/handlers/validation_handler.py
+++ b/monai/handlers/validation_handler.py
@@ -9,7 +9,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, Optional
 
 from monai.engines.evaluator import Evaluator
 from monai.utils import exact_version, optional_import
@@ -28,11 +28,12 @@ class ValidationHandler:
 
     """
 
-    def __init__(self, validator: Evaluator, interval: int, epoch_level: bool = True) -> None:
+    def __init__(self, interval: int, validator: Optional[Evaluator] = None, epoch_level: bool = True) -> None:
         """
         Args:
-            validator: run the validator when trigger validation, suppose to be Evaluator.
             interval: do validation every N epochs or every N iterations during training.
+            validator: run the validator when trigger validation, suppose to be Evaluator.
+                if None, should call `set_validator()` before training.
             epoch_level: execute validation every N epochs or N iterations.
                 `True` is epoch level, `False` is iteration level.
 
@@ -40,12 +41,20 @@ def __init__(self, validator: Evaluator, interval: int, epoch_level: bool = True
             TypeError: When ``validator`` is not a ``monai.engines.evaluator.Evaluator``.
 
         """
-        if not isinstance(validator, Evaluator):
+        if validator is not None and not isinstance(validator, Evaluator):
             raise TypeError(f"validator must be a monai.engines.evaluator.Evaluator but is {type(validator).__name__}.")
         self.validator = validator
         self.interval = interval
         self.epoch_level = epoch_level
 
+    def set_validator(self, validator: Evaluator):
+        """
+        Set validator if not setting in the __init__().
+        """
+        if not isinstance(validator, Evaluator):
+            raise TypeError(f"validator must be a monai.engines.evaluator.Evaluator but is {type(validator).__name__}.")
+        self.validator = validator
+
     def attach(self, engine: Engine) -> None:
         """
         Args:
@@ -61,4 +70,6 @@ def __call__(self, engine: Engine) -> None:
         Args:
             engine: Ignite Engine, it can be a trainer, validator or evaluator.
         """
+        if self.validator is None:
+            raise RuntimeError("please set validator in __init__() or call `set_validator()` before training.")
         self.validator.run(engine.state.epoch)
diff --git a/monai/losses/__init__.py b/monai/losses/__init__.py
index b9146a6962..78a0fbc191 100644
--- a/monai/losses/__init__.py
+++ b/monai/losses/__init__.py
@@ -13,11 +13,14 @@
 from .dice import (
     Dice,
     DiceCELoss,
+    DiceFocalLoss,
     DiceLoss,
     GeneralizedDiceLoss,
     GeneralizedWassersteinDiceLoss,
     MaskedDiceLoss,
     dice,
+    dice_ce,
+    dice_focal,
     generalized_dice,
     generalized_wasserstein_dice,
 )
diff --git a/monai/losses/dice.py b/monai/losses/dice.py
index 65bf47f388..47af8ea171 100644
--- a/monai/losses/dice.py
+++ b/monai/losses/dice.py
@@ -10,7 +10,7 @@
 # limitations under the License.
 
 import warnings
-from typing import Callable, List, Optional, Union
+from typing import Callable, List, Optional, Sequence, Union
 
 import numpy as np
 import torch
@@ -18,6 +18,7 @@
 import torch.nn.functional as F
 from torch.nn.modules.loss import _Loss
 
+from monai.losses.focal_loss import FocalLoss
 from monai.networks import one_hot
 from monai.utils import LossReduction, Weight
 
@@ -600,15 +601,12 @@ def _compute_alpha_generalized_true_positives(self, flat_target: torch.Tensor) -
 
 class DiceCELoss(_Loss):
     """
-    Compute both Dice loss and Cross Entropy Loss, and return the sum of these two losses.
-    Input logits `input` (BNHW[D] where N is number of classes) is compared with ground truth `target` (BNHW[D]).
-    Axis N of `input` is expected to have logit predictions for each class rather than being image channels,
-    while the same axis of `target` can be 1 or N (one-hot format). The `smooth_nr` and `smooth_dr` parameters are
-    values added for dice loss part to the intersection and union components of the inter-over-union calculation
-    to smooth results respectively, these values should be small. The `include_background` class attribute can be
-    set to False for an instance of the loss to exclude the first category (channel index 0) which is by convention
-    assumed to be background. If the non-background segmentations are small compared to the total image size they can get
-    overwhelmed by the signal from the background so excluding it in such cases helps convergence.
+    Compute both Dice loss and Cross Entropy Loss, and return the weighted sum of these two losses.
+    The details of Dice loss is shown in ``monai.losses.DiceLoss``.
+    The details of Cross Entropy Loss is shown in ``torch.nn.CrossEntropyLoss``. In this implementation,
+    two deprecated parameters ``size_average`` and ``reduce``, and the parameter ``ignore_index`` are
+    not supported.
+
     """
 
     def __init__(
@@ -625,11 +623,13 @@ def __init__(
         smooth_dr: float = 1e-5,
         batch: bool = False,
         ce_weight: Optional[torch.Tensor] = None,
+        lambda_dice: float = 1.0,
+        lambda_ce: float = 1.0,
     ) -> None:
         """
         Args:
-            ``ce_weight`` is only used for cross entropy loss, ``reduction`` is used for both losses and other
-            parameters are only used for dice loss.
+            ``ce_weight`` and ``lambda_ce`` are only used for cross entropy loss.
+            ``reduction`` is used for both losses and other parameters are only used for dice loss.
 
             include_background: if False channel index 0 (background category) is excluded from the calculation.
             to_onehot_y: whether to convert `y` into the one-hot format. Defaults to False.
@@ -655,6 +655,10 @@ def __init__(
                 before any `reduction`.
             ce_weight: a rescaling weight given to each class for cross entropy loss.
                 See ``torch.nn.CrossEntropyLoss()`` for more information.
+            lambda_dice: the trade-off weight value for dice loss. The value should be no less than 0.0.
+                Defaults to 1.0.
+            lambda_ce: the trade-off weight value for cross entropy loss. The value should be no less than 0.0.
+                Defaults to 1.0.
 
         """
         super().__init__()
@@ -675,6 +679,12 @@ def __init__(
             weight=ce_weight,
             reduction=reduction,
         )
+        if lambda_dice < 0.0:
+            raise ValueError("lambda_dice should be no less than 0.0.")
+        if lambda_ce < 0.0:
+            raise ValueError("lambda_ce should be no less than 0.0.")
+        self.lambda_dice = lambda_dice
+        self.lambda_ce = lambda_ce
 
     def forward(self, input: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
         """
@@ -684,7 +694,7 @@ def forward(self, input: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
 
         Raises:
             ValueError: When number of dimensions for input and target are different.
-            ValueError: When number of channels for target is nither 1 or the same as input.
+            ValueError: When number of channels for target is neither 1 nor the same as input.
 
         """
         if len(input.shape) != len(target.shape):
@@ -700,11 +710,123 @@ def forward(self, input: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
             target = torch.squeeze(target, dim=1)
         target = target.long()
         ce_loss = self.cross_entropy(input, target)
-        total_loss: torch.Tensor = dice_loss + ce_loss
+        total_loss: torch.Tensor = self.lambda_dice * dice_loss + self.lambda_ce * ce_loss
+        return total_loss
+
+
+class DiceFocalLoss(_Loss):
+    """
+    Compute both Dice loss and Focal Loss, and return the weighted sum of these two losses.
+    The details of Dice loss is shown in ``monai.losses.DiceLoss``.
+    The details of Focal Loss is shown in ``monai.losses.FocalLoss``.
+
+    """
+
+    def __init__(
+        self,
+        include_background: bool = True,
+        to_onehot_y: bool = False,
+        sigmoid: bool = False,
+        softmax: bool = False,
+        other_act: Optional[Callable] = None,
+        squared_pred: bool = False,
+        jaccard: bool = False,
+        reduction: str = "mean",
+        smooth_nr: float = 1e-5,
+        smooth_dr: float = 1e-5,
+        batch: bool = False,
+        gamma: float = 2.0,
+        focal_weight: Optional[Union[Sequence[float], float, int, torch.Tensor]] = None,
+        lambda_dice: float = 1.0,
+        lambda_focal: float = 1.0,
+    ) -> None:
+        """
+        Args:
+            ``gamma``, ``focal_weight`` and ``lambda_focal`` are only used for focal loss.
+            ``include_background``, ``to_onehot_y``and ``reduction`` are used for both losses
+            and other parameters are only used for dice loss.
+            include_background: if False channel index 0 (background category) is excluded from the calculation.
+            to_onehot_y: whether to convert `y` into the one-hot format. Defaults to False.
+            sigmoid: if True, apply a sigmoid function to the prediction.
+            softmax: if True, apply a softmax function to the prediction.
+            other_act: if don't want to use `sigmoid` or `softmax`, use other callable function to execute
+                other activation layers, Defaults to ``None``. for example:
+                `other_act = torch.tanh`.
+            squared_pred: use squared versions of targets and predictions in the denominator or not.
+            jaccard: compute Jaccard Index (soft IoU) instead of dice or not.
+            reduction: {``"none"``, ``"mean"``, ``"sum"``}
+                Specifies the reduction to apply to the output. Defaults to ``"mean"``.
+
+                - ``"none"``: no reduction will be applied.
+                - ``"mean"``: the sum of the output will be divided by the number of elements in the output.
+                - ``"sum"``: the output will be summed.
+
+            smooth_nr: a small constant added to the numerator to avoid zero.
+            smooth_dr: a small constant added to the denominator to avoid nan.
+            batch: whether to sum the intersection and union areas over the batch dimension before the dividing.
+                Defaults to False, a Dice loss value is computed independently from each item in the batch
+                before any `reduction`.
+            gamma: value of the exponent gamma in the definition of the Focal loss.
+            focal_weight: weights to apply to the voxels of each class. If None no weights are applied.
+                The input can be a single value (same weight for all classes), a sequence of values (the length
+                of the sequence should be the same as the number of classes).
+            lambda_dice: the trade-off weight value for dice loss. The value should be no less than 0.0.
+                Defaults to 1.0.
+            lambda_focal: the trade-off weight value for focal loss. The value should be no less than 0.0.
+                Defaults to 1.0.
+
+        """
+        super().__init__()
+        self.dice = DiceLoss(
+            include_background=include_background,
+            to_onehot_y=to_onehot_y,
+            sigmoid=sigmoid,
+            softmax=softmax,
+            other_act=other_act,
+            squared_pred=squared_pred,
+            jaccard=jaccard,
+            reduction=reduction,
+            smooth_nr=smooth_nr,
+            smooth_dr=smooth_dr,
+            batch=batch,
+        )
+        self.focal = FocalLoss(
+            include_background=include_background,
+            to_onehot_y=to_onehot_y,
+            gamma=gamma,
+            weight=focal_weight,
+            reduction=reduction,
+        )
+        if lambda_dice < 0.0:
+            raise ValueError("lambda_dice should be no less than 0.0.")
+        if lambda_focal < 0.0:
+            raise ValueError("lambda_focal should be no less than 0.0.")
+        self.lambda_dice = lambda_dice
+        self.lambda_focal = lambda_focal
+
+    def forward(self, input: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
+        """
+        Args:
+            input: the shape should be BNH[WD]. The input should be the original logits
+                due to the restriction of ``monai.losses.FocalLoss``.
+            target: the shape should be BNH[WD] or B1H[WD].
+
+        Raises:
+            ValueError: When number of dimensions for input and target are different.
+            ValueError: When number of channels for target is neither 1 nor the same as input.
+
+        """
+        if len(input.shape) != len(target.shape):
+            raise ValueError("the number of dimensions for input and target should be the same.")
+
+        dice_loss = self.dice(input, target)
+        focal_loss = self.focal(input, target)
+        total_loss: torch.Tensor = self.lambda_dice * dice_loss + self.lambda_focal * focal_loss
         return total_loss
 
 
 dice = Dice = DiceLoss
 dice_ce = DiceCELoss
+dice_focal = DiceFocalLoss
 generalized_dice = GeneralizedDiceLoss
 generalized_wasserstein_dice = GeneralizedWassersteinDiceLoss
diff --git a/monai/losses/focal_loss.py b/monai/losses/focal_loss.py
index 664e7673a4..5e0ccd3179 100644
--- a/monai/losses/focal_loss.py
+++ b/monai/losses/focal_loss.py
@@ -45,7 +45,9 @@ def __init__(
             weight: weights to apply to the voxels of each class. If None no weights are applied.
                 This corresponds to the weights `\alpha` in [1].
                 The input can be a single value (same weight for all classes), a sequence of values (the length
-                of the sequence should be the same as the number of classes).
+                of the sequence should be the same as the number of classes, if not ``include_background``, the
+                number should not include class 0).
+                The value/values should be no less than 0. Defaults to None.
             reduction: {``"none"``, ``"mean"``, ``"sum"``}
                 Specifies the reduction to apply to the output. Defaults to ``"mean"``.
 
@@ -83,6 +85,9 @@ def forward(self, input: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
             AssertionError: When input and target (after one hot transform if setted)
                 have different shapes.
             ValueError: When ``self.reduction`` is not one of ["mean", "sum", "none"].
+            ValueError: When ``self.weight`` is a sequence and the length is not equal to the
+                number of classes.
+            ValueError: When ``self.weight`` is/contains a value that is less than 0.
 
         """
         n_pred_ch = input.shape[1]
@@ -122,6 +127,13 @@ def forward(self, input: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
                 class_weight = torch.as_tensor([self.weight] * i.size(1))
             else:
                 class_weight = torch.as_tensor(self.weight)
+                if class_weight.size(0) != i.size(1):
+                    raise ValueError(
+                        "the length of the weight sequence should be the same as the number of classes. "
+                        + "If `include_background=False`, the number should not include class 0."
+                    )
+            if class_weight.min() < 0:
+                raise ValueError("the value/values of weights should be no less than 0.")
             class_weight = class_weight.to(i)
             # Convert the weight to a map in which each voxel
             # has the weight associated with the ground-truth label
diff --git a/monai/losses/image_dissimilarity.py b/monai/losses/image_dissimilarity.py
index 67b2d177f6..eed5808aa3 100644
--- a/monai/losses/image_dissimilarity.py
+++ b/monai/losses/image_dissimilarity.py
@@ -65,8 +65,8 @@ def __init__(
         kernel_size: int = 3,
         kernel_type: str = "rectangular",
         reduction: Union[LossReduction, str] = LossReduction.MEAN,
-        smooth_nr: float = 1e-7,
-        smooth_dr: float = 1e-7,
+        smooth_nr: float = 1e-5,
+        smooth_dr: float = 1e-5,
     ) -> None:
         """
         Args:
@@ -146,6 +146,8 @@ def forward(self, pred: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
         cross = tp_sum - p_avg * t_sum
         t_var = t2_sum - t_avg * t_sum  # std[t] ** 2
         p_var = p2_sum - p_avg * p_sum  # std[p] ** 2
+        t_var = torch.max(t_var, torch.zeros_like(t_var))
+        p_var = torch.max(p_var, torch.zeros_like(p_var))
         ncc: torch.Tensor = (cross * cross + self.smooth_nr) / (t_var * p_var + self.smooth_dr)
         # shape = (batch, 1, D, H, W)
 
diff --git a/monai/networks/blocks/__init__.py b/monai/networks/blocks/__init__.py
index cdf7bc3f6d..ed6ac12430 100644
--- a/monai/networks/blocks/__init__.py
+++ b/monai/networks/blocks/__init__.py
@@ -10,7 +10,7 @@
 # limitations under the License.
 
 from .acti_norm import ADN
-from .activation import Mish, Swish
+from .activation import MemoryEfficientSwish, Mish, Swish
 from .aspp import SimpleASPP
 from .convolutions import Convolution, ResidualUnit
 from .crf import CRF
diff --git a/monai/networks/blocks/activation.py b/monai/networks/blocks/activation.py
index ef6c74f282..f6a04e830e 100644
--- a/monai/networks/blocks/activation.py
+++ b/monai/networks/blocks/activation.py
@@ -17,7 +17,7 @@ class Swish(nn.Module):
     r"""Applies the element-wise function:
 
     .. math::
-        \text{Swish}(x) = x * \text{Sigmoid}(\alpha * x) for constant value alpha.
+        \text{Swish}(x) = x * \text{Sigmoid}(\alpha * x) ~~~~\text{for constant value}~ \alpha.
 
     Citation: Searching for Activation Functions, Ramachandran et al., 2017, https://arxiv.org/abs/1710.05941.
 
@@ -43,6 +43,57 @@ def forward(self, input: torch.Tensor) -> torch.Tensor:
         return input * torch.sigmoid(self.alpha * input)
 
 
+class SwishImplementation(torch.autograd.Function):
+    r"""Memory efficient implementation for training
+    Follows recommendation from:
+    https://github.com/lukemelas/EfficientNet-PyTorch/issues/18#issuecomment-511677853
+
+    Results in ~ 30% memory saving during training as compared to Swish()
+    """
+
+    @staticmethod
+    def forward(ctx, input):
+        result = input * torch.sigmoid(input)
+        ctx.save_for_backward(input)
+        return result
+
+    @staticmethod
+    def backward(ctx, grad_output):
+        input = ctx.saved_tensors[0]
+        sigmoid_input = torch.sigmoid(input)
+        return grad_output * (sigmoid_input * (1 + input * (1 - sigmoid_input)))
+
+
+class MemoryEfficientSwish(nn.Module):
+    r"""Applies the element-wise function:
+
+    .. math::
+        \text{Swish}(x) = x * \text{Sigmoid}(\alpha * x) ~~~~\text{for constant value}~ \alpha=1.
+
+    Memory efficient implementation for training following recommendation from:
+    https://github.com/lukemelas/EfficientNet-PyTorch/issues/18#issuecomment-511677853
+
+    Results in ~ 30% memory saving during training as compared to Swish()
+
+    Citation: Searching for Activation Functions, Ramachandran et al., 2017, https://arxiv.org/abs/1710.05941.
+
+    Shape:
+        - Input: :math:`(N, *)` where `*` means, any number of additional
+          dimensions
+        - Output: :math:`(N, *)`, same shape as the input
+
+
+    Examples::
+
+        >>> m = Act['memswish']()
+        >>> input = torch.randn(2)
+        >>> output = m(input)
+    """
+
+    def forward(self, input: torch.Tensor):
+        return SwishImplementation.apply(input)
+
+
 class Mish(nn.Module):
     r"""Applies the element-wise function:
 
diff --git a/monai/networks/blocks/convolutions.py b/monai/networks/blocks/convolutions.py
index 7bfb3b47e4..39ce60e3f8 100644
--- a/monai/networks/blocks/convolutions.py
+++ b/monai/networks/blocks/convolutions.py
@@ -30,6 +30,34 @@ class Convolution(nn.Sequential):
 
         -- (Conv|ConvTrans) --
 
+    For example:
+
+    .. code-block:: python
+
+        from monai.networks.blocks import Convolution
+
+        conv = Convolution(
+            dimensions=3,
+            in_channels=1,
+            out_channels=1,
+            adn_ordering="ADN",
+            act=("prelu", {"init": 0.2}),
+            dropout=0.1,
+            norm=("layer", {"normalized_shape": (10, 10, 10)}),
+        )
+        print(conv)
+
+    output::
+
+        Convolution(
+          (conv): Conv3d(1, 1, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
+          (adn): ADN(
+            (A): PReLU(num_parameters=1)
+            (D): Dropout(p=0.1, inplace=False)
+            (N): LayerNorm((10, 10, 10), eps=1e-05, elementwise_affine=True)
+          )
+        )
+
     Args:
         dimensions: number of spatial dimensions.
         in_channels: number of input channels.
@@ -142,6 +170,44 @@ class ResidualUnit(nn.Module):
     """
     Residual module with multiple convolutions and a residual connection.
 
+    For example:
+
+    .. code-block:: python
+
+        from monai.networks.blocks import ResidualUnit
+
+        convs = ResidualUnit(
+            dimensions=3,
+            in_channels=1,
+            out_channels=1,
+            adn_ordering="AN",
+            act=("prelu", {"init": 0.2}),
+            norm=("layer", {"normalized_shape": (10, 10, 10)}),
+        )
+        print(convs)
+
+    output::
+
+        ResidualUnit(
+          (conv): Sequential(
+            (unit0): Convolution(
+              (conv): Conv3d(1, 1, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
+              (adn): ADN(
+                (A): PReLU(num_parameters=1)
+                (N): LayerNorm((10, 10, 10), eps=1e-05, elementwise_affine=True)
+              )
+            )
+            (unit1): Convolution(
+              (conv): Conv3d(1, 1, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
+              (adn): ADN(
+                (A): PReLU(num_parameters=1)
+                (N): LayerNorm((10, 10, 10), eps=1e-05, elementwise_affine=True)
+              )
+            )
+          )
+          (residual): Identity()
+        )
+
     Args:
         dimensions: number of spatial dimensions.
         in_channels: number of input channels.
diff --git a/monai/networks/blocks/crf.py b/monai/networks/blocks/crf.py
index 27556a2c72..29d4ef4216 100644
--- a/monai/networks/blocks/crf.py
+++ b/monai/networks/blocks/crf.py
@@ -1,4 +1,4 @@
-# Copyright 2020 MONAI Consortium
+# Copyright 2020 - 2021 MONAI Consortium
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -20,7 +20,7 @@
 class CRF(torch.nn.Module):
     """
     Conditional Random Field: Combines message passing with a class
-    compatability convolution into an iterative process designed
+    compatibility convolution into an iterative process designed
     to successively minimise the energy of the class labeling.
 
     In this implementation, the message passing step is a weighted
@@ -40,7 +40,7 @@ def __init__(
         bilateral_color_sigma: float = 0.5,
         gaussian_spatial_sigma: float = 5.0,
         update_factor: float = 3.0,
-        compatability_kernel_range: int = 1,
+        compatibility_kernel_range: int = 1,
         iterations: int = 5,
     ):
         """
@@ -51,7 +51,7 @@ def __init__(
             bilateral_color_sigma: standard deviation in color space for the bilateral term.
             gaussian_spatial_sigma: standard deviation in spatial coordinates for the gaussian term.
             update_factor: determines the magnitude of each update.
-            compatability_kernel_range: the range of the kernel used in the compatability convolution.
+            compatibility_kernel_range: the range of the kernel used in the compatibility convolution.
             iterations: the number of iterations.
         """
         super(CRF, self).__init__()
@@ -61,14 +61,14 @@ def __init__(
         self.bilateral_color_sigma = bilateral_color_sigma
         self.gaussian_spatial_sigma = gaussian_spatial_sigma
         self.update_factor = update_factor
-        self.compatability_kernel_range = compatability_kernel_range
+        self.compatibility_kernel_range = compatibility_kernel_range
         self.iterations = iterations
 
     def forward(self, input_tensor: torch.Tensor, reference_tensor: torch.Tensor):
         """
         Args:
             input_tensor: tensor containing initial class logits.
-            referenece_tensor: the reference tensor used to guide the message passing.
+            reference_tensor: the reference tensor used to guide the message passing.
 
         Returns:
             output (torch.Tensor): output tensor.
@@ -77,7 +77,7 @@ def forward(self, input_tensor: torch.Tensor, reference_tensor: torch.Tensor):
         # useful values
         spatial_dim = input_tensor.dim() - 2
         class_count = input_tensor.size(1)
-        padding = self.compatability_kernel_range
+        padding = self.compatibility_kernel_range
 
         # constructing spatial feature tensor
         spatial_features = _create_coordinate_tensor(reference_tensor)
@@ -88,18 +88,18 @@ def forward(self, input_tensor: torch.Tensor, reference_tensor: torch.Tensor):
         )
         gaussian_features = spatial_features / self.gaussian_spatial_sigma
 
-        # compatability matrix (potts model (1 - diag) for now)
-        compatability_matrix = _potts_model_weights(class_count).to(device=input_tensor.device)
+        # compatibility matrix (potts model (1 - diag) for now)
+        compatibility_matrix = _potts_model_weights(class_count).to(device=input_tensor.device)
 
         # expanding matrix to kernel
-        compatability_kernel = _expand_matrix_to_kernel(
-            compatability_matrix, spatial_dim, self.compatability_kernel_range
+        compatibility_kernel = _expand_matrix_to_kernel(
+            compatibility_matrix, spatial_dim, self.compatibility_kernel_range
         )
 
         # choosing convolution function
         conv = [conv1d, conv2d, conv3d][spatial_dim - 1]
 
-        # seting up output tensor
+        # setting up output tensor
         output_tensor = softmax(input_tensor, dim=1)
 
         # mean field loop
@@ -114,7 +114,7 @@ def forward(self, input_tensor: torch.Tensor, reference_tensor: torch.Tensor):
 
             # compatibility convolution
             combined_output = pad(combined_output, 2 * spatial_dim * [padding], mode="replicate")
-            compatibility_update = conv(combined_output, compatability_kernel)
+            compatibility_update = conv(combined_output, compatibility_kernel)
 
             # update and normalize
             output_tensor = softmax(input_tensor - self.update_factor * compatibility_update, dim=1)
diff --git a/monai/networks/blocks/localnet_block.py b/monai/networks/blocks/localnet_block.py
index 4166c08774..3997d42436 100644
--- a/monai/networks/blocks/localnet_block.py
+++ b/monai/networks/blocks/localnet_block.py
@@ -1,3 +1,14 @@
+# Copyright 2020 - 2021 MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from typing import Optional, Sequence, Tuple, Type, Union
 
 import torch
@@ -249,7 +260,7 @@ def forward(self, x, mid) -> torch.Tensor:
         Args:
             x: feature to be up-sampled, in shape (batch, ``in_channels``, insize_1, insize_2, [insize_3])
             mid: mid-level feature saved during down-sampling,
-                in shape (batch, ``out_channels``, midsize_1, midsize_2, [midnsize_3])
+                in shape (batch, ``out_channels``, midsize_1, midsize_2, [midsize_3])
 
         Raises:
             ValueError: when ``midsize != insize * 2``
diff --git a/monai/networks/blocks/regunet_block.py b/monai/networks/blocks/regunet_block.py
index f4c2c1f3a7..d2cd3518b9 100644
--- a/monai/networks/blocks/regunet_block.py
+++ b/monai/networks/blocks/regunet_block.py
@@ -8,6 +8,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
 from typing import List, Optional, Sequence, Tuple, Type, Union
 
 import torch
@@ -227,7 +228,7 @@ def __init__(
             spatial_dims: number of spatial dimensions
             extract_levels: spatial levels to extract feature from, 0 refers to the input scale
             num_channels: number of channels at each scale level,
-                List or Tuple of lenth equals to `depth` of the RegNet
+                List or Tuple of length equals to `depth` of the RegNet
             out_channels: number of output channels
             kernel_initializer: kernel initializer
             activation: kernel activation function
diff --git a/monai/networks/blocks/warp.py b/monai/networks/blocks/warp.py
index b9967f2b62..d916c026ff 100644
--- a/monai/networks/blocks/warp.py
+++ b/monai/networks/blocks/warp.py
@@ -1,3 +1,14 @@
+# Copyright 2020 - 2021 MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import warnings
 from typing import List
 
@@ -123,7 +134,7 @@ def forward(self, image: torch.Tensor, ddf: torch.Tensor):
 
 class DVF2DDF(nn.Module):
     """
-    Layer calculates a dense velocity field (DVF) from a dense displacement field (DDF)
+    Layer calculates a dense displacement field (DDF) from a dense velocity field (DVF)
     with scaling and squaring.
 
     Adapted from:
diff --git a/monai/networks/layers/factories.py b/monai/networks/layers/factories.py
index ec36b2ed95..9165a8ebe4 100644
--- a/monai/networks/layers/factories.py
+++ b/monai/networks/layers/factories.py
@@ -256,6 +256,13 @@ def swish_factory():
     return Swish
 
 
+@Act.factory_function("memswish")
+def memswish_factory():
+    from monai.networks.blocks.activation import MemoryEfficientSwish
+
+    return MemoryEfficientSwish
+
+
 @Act.factory_function("mish")
 def mish_factory():
     from monai.networks.blocks.activation import Mish
diff --git a/monai/networks/layers/filtering.py b/monai/networks/layers/filtering.py
index fc6c0a38b5..3b2214d59a 100644
--- a/monai/networks/layers/filtering.py
+++ b/monai/networks/layers/filtering.py
@@ -32,7 +32,7 @@ class BilateralFilter(torch.autograd.Function):
         input: input tensor.
 
         spatial sigma: the standard deviation of the spatial blur. Higher values can
-            hurt performace when not using the approximate method (see fast approx).
+            hurt performance when not using the approximate method (see fast approx).
 
         color sigma: the standard deviation of the color blur. Lower values preserve
             edges better whilst higher values tend to a simple gaussian spatial blur.
@@ -95,7 +95,7 @@ def forward(ctx, input, features, sigmas=None):
 
     @staticmethod
     def backward(ctx, grad_output):
-        raise NotImplementedError("PHLFilter does not currently support backpropergation")
+        raise NotImplementedError("PHLFilter does not currently support Backpropagation")
         # scaled_features, = ctx.saved_variables
         # grad_input = _C.phl_filter(grad_output, scaled_features)
         # return grad_input
diff --git a/monai/networks/nets/__init__.py b/monai/networks/nets/__init__.py
index 6876293bdb..91f46debf6 100644
--- a/monai/networks/nets/__init__.py
+++ b/monai/networks/nets/__init__.py
@@ -15,6 +15,7 @@
 from .classifier import Classifier, Critic, Discriminator
 from .densenet import DenseNet, DenseNet121, DenseNet169, DenseNet201, DenseNet264
 from .dynunet import DynUNet, DynUnet, Dynunet
+from .efficientnet import EfficientNet, EfficientNetBN, drop_connect, get_efficientnet_image_size
 from .fullyconnectednet import FullyConnectedNet, VarFullyConnectedNet
 from .generator import Generator
 from .highresnet import HighResBlock, HighResNet
diff --git a/monai/networks/nets/dynunet.py b/monai/networks/nets/dynunet.py
index 7d0b3bff79..a69814f61c 100644
--- a/monai/networks/nets/dynunet.py
+++ b/monai/networks/nets/dynunet.py
@@ -91,7 +91,7 @@ class DynUNet(nn.Module):
             (1, 2, 8, 6). The last two will be interpolated into (1, 2, 32, 24), and the stacked tensor
             will has the shape (1, 3, 2, 8, 6).
             When calculating the loss, you can use torch.unbind to get all feature maps can compute the loss
-            one by one with the groud truth, then do a weighted average for all losses to achieve the final loss.
+            one by one with the ground truth, then do a weighted average for all losses to achieve the final loss.
             (To be added: a corresponding tutorial link)
 
         deep_supr_num: number of feature maps that will output during deep supervision head. The
diff --git a/monai/networks/nets/efficientnet.py b/monai/networks/nets/efficientnet.py
new file mode 100644
index 0000000000..65054c870f
--- /dev/null
+++ b/monai/networks/nets/efficientnet.py
@@ -0,0 +1,849 @@
+# Copyright 2020 - 2021 MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import math
+import operator
+import re
+from functools import reduce
+from typing import List, NamedTuple, Optional, Tuple, Type, Union
+
+import torch
+from torch import nn
+from torch.utils import model_zoo
+
+from monai.networks.layers.factories import Act, Conv, Norm, Pad, Pool
+
+__all__ = ["EfficientNetBN", "get_efficientnet_image_size", "drop_connect"]
+
+efficientnet_params = {
+    # model_name: (width_mult, depth_mult, image_size, dropout_rate, dropconnect_rate)
+    "efficientnet-b0": (1.0, 1.0, 224, 0.2, 0.2),
+    "efficientnet-b1": (1.0, 1.1, 240, 0.2, 0.2),
+    "efficientnet-b2": (1.1, 1.2, 260, 0.3, 0.2),
+    "efficientnet-b3": (1.2, 1.4, 300, 0.3, 0.2),
+    "efficientnet-b4": (1.4, 1.8, 380, 0.4, 0.2),
+    "efficientnet-b5": (1.6, 2.2, 456, 0.4, 0.2),
+    "efficientnet-b6": (1.8, 2.6, 528, 0.5, 0.2),
+    "efficientnet-b7": (2.0, 3.1, 600, 0.5, 0.2),
+}
+
+
+class MBConvBlock(nn.Module):
+    def __init__(
+        self,
+        spatial_dims: int,
+        in_channels: int,
+        out_channels: int,
+        kernel_size: int,
+        stride: int,
+        image_size: List[int],
+        expand_ratio: int,
+        se_ratio: Optional[float],
+        id_skip: Optional[bool] = True,
+        batch_norm_momentum: float = 0.99,
+        batch_norm_epsilon: float = 1e-3,
+        drop_connect_rate: Optional[float] = 0.2,
+    ) -> None:
+        """
+        Mobile Inverted Residual Bottleneck Block.
+
+        Args:
+            spatial_dims: number of spatial dimensions.
+            in_channels: number of input channels.
+            out_classes: number of output channels.
+            kernel_size: size of the kernel for conv ops.
+            stride: stride to use for conv ops.
+            image_size: input image resolution.
+            expand_ratio: expansion ratio for inverted bottleneck.
+            se_ratio: squeeze-excitation ratio for se layers.
+            id_skip: whether to use skip connection.
+            batch_norm_momentum: momentum for batch norm.
+            batch_norm_epsilon: epsilon for batch norm.
+            drop_connect_rate: dropconnect rate for drop connection (individual weights) layers.
+
+        References:
+            [1] https://arxiv.org/abs/1704.04861 (MobileNet v1)
+            [2] https://arxiv.org/abs/1801.04381 (MobileNet v2)
+            [3] https://arxiv.org/abs/1905.02244 (MobileNet v3)
+        """
+        super().__init__()
+
+        # select the type of N-Dimensional layers to use
+        # these are based on spatial dims and selected from MONAI factories
+        conv_type = Conv["conv", spatial_dims]
+        batchnorm_type = Norm["batch", spatial_dims]
+        adaptivepool_type = Pool["adaptiveavg", spatial_dims]
+
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.id_skip = id_skip
+        self.stride = stride
+        self.expand_ratio = expand_ratio
+        self.drop_connect_rate = drop_connect_rate
+
+        if (se_ratio is not None) and (0.0 < se_ratio <= 1.0):
+            self.has_se = True
+            self.se_ratio = se_ratio
+        else:
+            self.has_se = False
+
+        bn_mom = 1.0 - batch_norm_momentum  # pytorch"s difference from tensorflow
+        bn_eps = batch_norm_epsilon
+
+        # Expansion phase (Inverted Bottleneck)
+        inp = in_channels  # number of input channels
+        oup = in_channels * expand_ratio  # number of output channels
+        if self.expand_ratio != 1:
+            self._expand_conv = conv_type(in_channels=inp, out_channels=oup, kernel_size=1, bias=False)
+            self._expand_conv_padding = _make_same_padder(self._expand_conv, image_size)
+
+            self._bn0 = batchnorm_type(num_features=oup, momentum=bn_mom, eps=bn_eps)
+        else:
+            # need to have the following to fix JIT error:
+            #   "Module 'MBConvBlock' has no attribute '_expand_conv'"
+
+            # FIXME: find a better way to bypass JIT error
+            self._expand_conv = nn.Identity()
+            self._expand_conv_padding = nn.Identity()
+            self._bn0 = nn.Identity()
+
+        # Depthwise convolution phase
+        self._depthwise_conv = conv_type(
+            in_channels=oup,
+            out_channels=oup,
+            groups=oup,  # groups makes it depthwise
+            kernel_size=kernel_size,
+            stride=self.stride,
+            bias=False,
+        )
+        self._depthwise_conv_padding = _make_same_padder(self._depthwise_conv, image_size)
+        self._bn1 = batchnorm_type(num_features=oup, momentum=bn_mom, eps=bn_eps)
+        image_size = _calculate_output_image_size(image_size, self.stride)
+
+        # Squeeze and Excitation layer, if desired
+        if self.has_se:
+            self._se_adaptpool = adaptivepool_type(1)
+            num_squeezed_channels = max(1, int(in_channels * self.se_ratio))
+            self._se_reduce = conv_type(in_channels=oup, out_channels=num_squeezed_channels, kernel_size=1)
+            self._se_reduce_padding = _make_same_padder(self._se_reduce, [1, 1])
+            self._se_expand = conv_type(in_channels=num_squeezed_channels, out_channels=oup, kernel_size=1)
+            self._se_expand_padding = _make_same_padder(self._se_expand, [1, 1])
+
+        # Pointwise convolution phase
+        final_oup = out_channels
+        self._project_conv = conv_type(in_channels=oup, out_channels=final_oup, kernel_size=1, bias=False)
+        self._project_conv_padding = _make_same_padder(self._project_conv, image_size)
+        self._bn2 = batchnorm_type(num_features=final_oup, momentum=bn_mom, eps=bn_eps)
+
+        # swish activation to use - using memory efficient swish by default
+        # can be switched to normal swish using self.set_swish() function call
+        self._swish = Act["memswish"]()
+
+    def forward(self, inputs: torch.Tensor):
+        """MBConvBlock"s forward function.
+
+        Args:
+            inputs: Input tensor.
+
+        Returns:
+            Output of this block after processing.
+        """
+        # Expansion and Depthwise Convolution
+        x = inputs
+        if self.expand_ratio != 1:
+            x = self._expand_conv(self._expand_conv_padding(x))
+            x = self._bn0(x)
+            x = self._swish(x)
+
+        x = self._depthwise_conv(self._depthwise_conv_padding(x))
+        x = self._bn1(x)
+        x = self._swish(x)
+
+        # Squeeze and Excitation
+        if self.has_se:
+            x_squeezed = self._se_adaptpool(x)
+            x_squeezed = self._se_reduce(self._se_reduce_padding(x_squeezed))
+            x_squeezed = self._swish(x_squeezed)
+            x_squeezed = self._se_expand(self._se_expand_padding(x_squeezed))
+            x = torch.sigmoid(x_squeezed) * x
+
+        # Pointwise Convolution
+        x = self._project_conv(self._project_conv_padding(x))
+        x = self._bn2(x)
+
+        # Skip connection and drop connect
+        if self.id_skip and self.stride == 1 and self.in_channels == self.out_channels:
+            # the combination of skip connection and drop connect brings about stochastic depth.
+            if self.drop_connect_rate:
+                x = drop_connect(x, p=self.drop_connect_rate, training=self.training)
+            x = x + inputs  # skip connection
+        return x
+
+    def set_swish(self, memory_efficient: bool = True) -> None:
+        """Sets swish function as memory efficient (for training) or standard (for export).
+
+        Args:
+            memory_efficient (bool): Whether to use memory-efficient version of swish.
+        """
+        self._swish = Act["memswish"]() if memory_efficient else Act["swish"](alpha=1.0)
+
+
+class EfficientNet(nn.Module):
+    def __init__(
+        self,
+        blocks_args_str: List[str],
+        spatial_dims: int = 2,
+        in_channels: int = 3,
+        num_classes: int = 1000,
+        width_coefficient: float = 1.0,
+        depth_coefficient: float = 1.0,
+        dropout_rate: float = 0.2,
+        image_size: int = 224,
+        batch_norm_momentum: float = 0.99,
+        batch_norm_epsilon: float = 1e-3,
+        drop_connect_rate: float = 0.2,
+        depth_divisor: int = 8,
+    ) -> None:
+        """
+        EfficientNet based on `Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/pdf/1905.11946.pdf>`_.
+        Adapted from `EfficientNet-PyTorch
+        <https://github.com/lukemelas/EfficientNet-PyTorch>`_.
+
+        Args:
+            blocks_args_str: block definitions.
+            spatial_dims: number of spatial dimensions.
+            in_channels: number of input channels.
+            num_classes: number of output classes.
+            width_coefficient: width multiplier coefficient (w in paper).
+            depth_coefficient: depth multiplier coefficient (d in paper).
+            dropout_rate: dropout rate for dropout layers.
+            image_size: input image resolution.
+            batch_norm_momentum: momentum for batch norm.
+            batch_norm_epsilon: epsilon for batch norm.
+            drop_connect_rate: dropconnect rate for drop connection (individual weights) layers.
+            depth_divisor: depth divisor for channel rounding.
+
+        Examples::
+
+            # for pretrained spatial 2D ImageNet
+            >>> image_size = get_efficientnet_image_size("efficientnet-b0")
+            >>> inputs = torch.rand(1, 3, image_size, image_size)
+            >>> model = EfficientNetBN("efficientnet-b0", pretrained=True)
+            >>> model.eval()
+            >>> outputs = model(inputs)
+
+            # create spatial 2D
+            >>> model = EfficientNetBN("efficientnet-b0", spatial_dims=2)
+
+            # create spatial 3D
+            >>> model = EfficientNetBN("efficientnet-b0", spatial_dims=3)
+
+            # create EfficientNetB7 for spatial 2D
+            >>> model = EfficientNetBN("efficientnet-b7", spatial_dims=2)
+
+        """
+        super().__init__()
+
+        if spatial_dims not in (1, 2, 3):
+            raise ValueError("spatial_dims can only be 1, 2 or 3.")
+
+        # select the type of N-Dimensional layers to use
+        # these are based on spatial dims and selected from MONAI factories
+        conv_type: Type[Union[nn.Conv1d, nn.Conv2d, nn.Conv3d]] = Conv["conv", spatial_dims]
+        batchnorm_type: Type[Union[nn.BatchNorm1d, nn.BatchNorm2d, nn.BatchNorm3d]] = Norm["batch", spatial_dims]
+        adaptivepool_type: Type[Union[nn.AdaptiveAvgPool1d, nn.AdaptiveAvgPool2d, nn.AdaptiveAvgPool3d]] = Pool[
+            "adaptiveavg", spatial_dims
+        ]
+
+        # decode blocks args into arguments for MBConvBlock
+        blocks_args = _decode_block_list(blocks_args_str)
+
+        # checks for successful decoding of blocks_args_str
+        if not isinstance(blocks_args, list):
+            raise ValueError("blocks_args must be a list")
+
+        if blocks_args == []:
+            raise ValueError("block_args must be non-empty")
+
+        self._blocks_args = blocks_args
+        self.num_classes = num_classes
+        self.in_channels = in_channels
+        self.drop_connect_rate = drop_connect_rate
+
+        # expand input image dimensions to list
+        current_image_size = [image_size] * spatial_dims
+
+        # parameters for batch norm
+        bn_mom = 1 - batch_norm_momentum  # 1 - bn_m to convert tensorflow's arg to pytorch bn compatible
+        bn_eps = batch_norm_epsilon
+
+        # Stem
+        stride = 2
+        out_channels = _round_filters(32, width_coefficient, depth_divisor)  # number of output channels
+        self._conv_stem = conv_type(self.in_channels, out_channels, kernel_size=3, stride=stride, bias=False)
+        self._conv_stem_padding = _make_same_padder(self._conv_stem, current_image_size)
+        self._bn0 = batchnorm_type(num_features=out_channels, momentum=bn_mom, eps=bn_eps)
+        current_image_size = _calculate_output_image_size(current_image_size, stride)
+
+        # build MBConv blocks
+        num_blocks = 0
+        self._blocks = nn.Sequential()
+
+        # update baseline blocks to input/output filters and number of repeats based on width and depth multipliers.
+        for idx, block_args in enumerate(self._blocks_args):
+            block_args = block_args._replace(
+                input_filters=_round_filters(block_args.input_filters, width_coefficient, depth_divisor),
+                output_filters=_round_filters(block_args.output_filters, width_coefficient, depth_divisor),
+                num_repeat=_round_repeats(block_args.num_repeat, depth_coefficient),
+            )
+            self._blocks_args[idx] = block_args
+
+            # calculate the total number of blocks - needed for drop_connect estimation
+            num_blocks += block_args.num_repeat
+
+        # create and add MBConvBlocks to self._blocks
+        idx = 0  # block index counter
+        for block_args in self._blocks_args:
+            blk_drop_connect_rate = self.drop_connect_rate
+
+            # scale drop connect_rate
+            if blk_drop_connect_rate:
+                blk_drop_connect_rate *= float(idx) / num_blocks
+
+            # the first block needs to take care of stride and filter size increase.
+            self._blocks.add_module(
+                str(idx),
+                MBConvBlock(
+                    spatial_dims=spatial_dims,
+                    in_channels=block_args.input_filters,
+                    out_channels=block_args.output_filters,
+                    kernel_size=block_args.kernel_size,
+                    stride=block_args.stride,
+                    image_size=current_image_size,
+                    expand_ratio=block_args.expand_ratio,
+                    se_ratio=block_args.se_ratio,
+                    id_skip=block_args.id_skip,
+                    batch_norm_momentum=batch_norm_momentum,
+                    batch_norm_epsilon=batch_norm_epsilon,
+                    drop_connect_rate=blk_drop_connect_rate,
+                ),
+            )
+            idx += 1  # increment blocks index counter
+
+            current_image_size = _calculate_output_image_size(current_image_size, block_args.stride)
+            if block_args.num_repeat > 1:  # modify block_args to keep same output size
+                block_args = block_args._replace(input_filters=block_args.output_filters, stride=1)
+
+            # add remaining block repeated num_repeat times
+            for _ in range(block_args.num_repeat - 1):
+                blk_drop_connect_rate = self.drop_connect_rate
+
+                # scale drop connect_rate
+                if blk_drop_connect_rate:
+                    blk_drop_connect_rate *= float(idx) / num_blocks
+
+                # add blocks
+                self._blocks.add_module(
+                    str(idx),
+                    MBConvBlock(
+                        spatial_dims=spatial_dims,
+                        in_channels=block_args.input_filters,
+                        out_channels=block_args.output_filters,
+                        kernel_size=block_args.kernel_size,
+                        stride=block_args.stride,
+                        image_size=current_image_size,
+                        expand_ratio=block_args.expand_ratio,
+                        se_ratio=block_args.se_ratio,
+                        id_skip=block_args.id_skip,
+                        batch_norm_momentum=batch_norm_momentum,
+                        batch_norm_epsilon=batch_norm_epsilon,
+                        drop_connect_rate=blk_drop_connect_rate,
+                    ),
+                )
+                idx += 1  # increment blocks index counter
+
+        # sanity check to see if len(self._blocks) equal expected num_blocks
+        if len(self._blocks) != num_blocks:
+            raise ValueError("number of blocks created != num_blocks")
+
+        # Head
+        head_in_channels = block_args.output_filters
+        out_channels = _round_filters(1280, width_coefficient, depth_divisor)
+        self._conv_head = conv_type(head_in_channels, out_channels, kernel_size=1, bias=False)
+        self._conv_head_padding = _make_same_padder(self._conv_head, current_image_size)
+        self._bn1 = batchnorm_type(num_features=out_channels, momentum=bn_mom, eps=bn_eps)
+
+        # final linear layer
+        self._avg_pooling = adaptivepool_type(1)
+        self._dropout = nn.Dropout(dropout_rate)
+        self._fc = nn.Linear(out_channels, self.num_classes)
+
+        # swish activation to use - using memory efficient swish by default
+        # can be switched to normal swish using self.set_swish() function call
+        self._swish = Act["memswish"]()
+
+        # initialize weights using Tensorflow's init method from official impl.
+        self._initialize_weights()
+
+    def set_swish(self, memory_efficient: bool = True) -> None:
+        """
+        Sets swish function as memory efficient (for training) or standard (for JIT export).
+
+        Args:
+            memory_efficient: whether to use memory-efficient version of swish.
+
+        """
+        self._swish = Act["memswish"]() if memory_efficient else Act["swish"](alpha=1.0)
+        for block in self._blocks:
+            block.set_swish(memory_efficient)
+
+    def forward(self, inputs: torch.Tensor):
+        """
+        Args:
+            inputs: input should have spatially N dimensions
+            ``(Batch, in_channels, dim_0[, dim_1, ..., dim_N])``, N is defined by `dimensions`.
+
+        Returns:
+            A torch Tensor of classification prediction in shape
+            ``(Batch, num_classes)``.
+        """
+        # Stem
+        x = self._conv_stem(self._conv_stem_padding(inputs))
+        x = self._swish(self._bn0(x))
+        # Blocks
+        x = self._blocks(x)
+        # Head
+        x = self._conv_head(self._conv_head_padding(x))
+        x = self._swish(self._bn1(x))
+
+        # Pooling and final linear layer
+        x = self._avg_pooling(x)
+
+        x = x.flatten(start_dim=1)
+        x = self._dropout(x)
+        x = self._fc(x)
+        return x
+
+    def _initialize_weights(self) -> None:
+        """
+        Args:
+            None, initializes weights for conv/linear/batchnorm layers
+            following weight init methods from
+            `official Tensorflow EfficientNet implementation
+            <https://github.com/tensorflow/tpu/blob/master/models/official/efficientnet/efficientnet_model.py#L61>`_.
+            Adapted from `EfficientNet-PyTorch's init method
+            <https://github.com/rwightman/gen-efficientnet-pytorch/blob/master/geffnet/efficientnet_builder.py>`_.
+        """
+        for _, m in self.named_modules():
+            if isinstance(m, (nn.Conv1d, nn.Conv2d, nn.Conv3d)):
+                fan_out = reduce(operator.mul, m.kernel_size, 1) * m.out_channels
+                m.weight.data.normal_(0, math.sqrt(2.0 / fan_out))
+                if m.bias is not None:
+                    m.bias.data.zero_()
+            elif isinstance(m, (nn.BatchNorm1d, nn.BatchNorm2d, nn.BatchNorm3d)):
+                m.weight.data.fill_(1.0)
+                m.bias.data.zero_()
+            elif isinstance(m, nn.Linear):
+                fan_out = m.weight.size(0)
+                fan_in = 0
+                init_range = 1.0 / math.sqrt(fan_in + fan_out)
+                m.weight.data.uniform_(-init_range, init_range)
+                m.bias.data.zero_()
+
+
+class EfficientNetBN(EfficientNet):
+    def __init__(
+        self,
+        model_name: str,
+        pretrained: bool = True,
+        progress: bool = True,
+        spatial_dims: int = 2,
+        in_channels: int = 3,
+        num_classes: int = 1000,
+    ) -> None:
+        """
+        Generic wrapper around EfficientNet, used to initialize EfficientNet-B0 to EfficientNet-B7 models
+        model_name is mandatory argument as there is no EfficientNetBN itself,
+        it needs the N in [0, 1, 2, 3, 4, 5, 6, 7] to be a model
+
+        Args:
+            model_name: name of model to initialize, can be from [efficientnet-b0, ..., efficientnet-b7].
+            pretrained: whether to initialize pretrained ImageNet weights, only available for spatial_dims=2.
+            progress: whether to show download progress for pretrained weights download.
+            spatial_dims: number of spatial dimensions.
+            in_channels: number of input channels.
+            num_classes: number of output classes.
+
+        """
+        # block args for EfficientNet-B0 to EfficientNet-B7
+        blocks_args_str = [
+            "r1_k3_s11_e1_i32_o16_se0.25",
+            "r2_k3_s22_e6_i16_o24_se0.25",
+            "r2_k5_s22_e6_i24_o40_se0.25",
+            "r3_k3_s22_e6_i40_o80_se0.25",
+            "r3_k5_s11_e6_i80_o112_se0.25",
+            "r4_k5_s22_e6_i112_o192_se0.25",
+            "r1_k3_s11_e6_i192_o320_se0.25",
+        ]
+
+        # check if model_name is valid model
+        if model_name not in efficientnet_params.keys():
+            raise ValueError(
+                "invalid model_name {} found, must be one of {} ".format(
+                    model_name, ", ".join(efficientnet_params.keys())
+                )
+            )
+
+        # get network parameters
+        weight_coeff, depth_coeff, image_size, dropout_rate, dropconnect_rate = efficientnet_params[model_name]
+
+        # create model and initialize random weights
+        super(EfficientNetBN, self).__init__(
+            blocks_args_str=blocks_args_str,
+            spatial_dims=spatial_dims,
+            in_channels=in_channels,
+            num_classes=num_classes,
+            width_coefficient=weight_coeff,
+            depth_coefficient=depth_coeff,
+            dropout_rate=dropout_rate,
+            image_size=image_size,
+            drop_connect_rate=dropconnect_rate,
+        )
+
+        # attempt to load pretrained
+        is_default_model = (spatial_dims == 2) and (in_channels == 3)
+        loadable_from_file = pretrained and is_default_model
+
+        if loadable_from_file:
+            # skip loading fc layers for transfer learning applications
+            load_fc = num_classes == 1000
+
+            # only pretrained for when `spatial_dims` is 2
+            _load_state_dict(self, model_name, progress, load_fc)
+        else:
+            print(
+                "Skipping loading pretrained weights for non-default {}, pretrained={}, is_default_model={}".format(
+                    model_name, pretrained, is_default_model
+                )
+            )
+
+
+def get_efficientnet_image_size(model_name: str) -> int:
+    """
+    Get the input image size for a given efficientnet model.
+
+    Args:
+        model_name: name of model to initialize, can be from [efficientnet-b0, ..., efficientnet-b7].
+
+    Returns:
+        Image size for single spatial dimension as integer.
+
+    """
+    # check if model_name is valid model
+    if model_name not in efficientnet_params.keys():
+        raise ValueError(
+            "invalid model_name {} found, must be one of {} ".format(model_name, ", ".join(efficientnet_params.keys()))
+        )
+
+    # return input image size (all dims equal so only need to return for one dim)
+    _, _, res, _, _ = efficientnet_params[model_name]
+    return res
+
+
+def drop_connect(inputs: torch.Tensor, p: float, training: bool) -> torch.Tensor:
+    """
+    Drop connect layer that drops individual connections.
+    Differs from dropout as dropconnect drops connections instead of whole neurons as in dropout.
+
+    Based on `Deep Networks with Stochastic Depth <https://arxiv.org/pdf/1603.09382.pdf>`_.
+    Adapted from `Official Tensorflow EfficientNet utils
+    <https://github.com/tensorflow/tpu/blob/master/models/official/efficientnet/utils.py>`_.
+
+    This function is generalized for MONAI's N-Dimensional spatial activations
+    e.g. 1D activations [B, C, H], 2D activations [B, C, H, W] and 3D activations [B, C, H, W, D]
+
+    Args:
+        input: input tensor with [B, C, dim_1, dim_2, ..., dim_N] where N=spatial_dims.
+        p: probability to use for dropping connections.
+        training: whether in training or evaluation mode.
+
+    Returns:
+        output: output tensor after applying drop connection.
+    """
+    if p < 0.0 or p > 1.0:
+        raise ValueError("p must be in range of [0, 1], found {}".format(p))
+
+    # eval mode: drop_connect is switched off - so return input without modifying
+    if not training:
+        return inputs
+
+    # train mode: calculate and apply drop_connect
+    batch_size: int = inputs.shape[0]
+    keep_prob: float = 1 - p
+    num_dims: int = len(inputs.shape) - 2
+
+    # build dimensions for random tensor, use num_dims to populate appropriate spatial dims
+    random_tensor_shape: List[int] = [batch_size, 1] + [1] * num_dims
+
+    # generate binary_tensor mask according to probability (p for 0, 1-p for 1)
+    random_tensor: torch.Tensor = torch.rand(random_tensor_shape, dtype=inputs.dtype, device=inputs.device)
+    random_tensor += keep_prob
+
+    # round to form binary tensor
+    binary_tensor: torch.Tensor = torch.floor(random_tensor)
+
+    # drop connect using binary tensor
+    output: torch.Tensor = inputs / keep_prob * binary_tensor
+    return output
+
+
+def _load_state_dict(model: nn.Module, model_name: str, progress: bool, load_fc: bool) -> None:
+    url_map = {
+        "efficientnet-b0": "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b0-355c32eb.pth",
+        "efficientnet-b1": "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b1-f1951068.pth",
+        "efficientnet-b2": "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b2-8bb594d6.pth",
+        "efficientnet-b3": "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b3-5fb5a3c3.pth",
+        "efficientnet-b4": "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b4-6ed6700e.pth",
+        "efficientnet-b5": "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b5-b6417697.pth",
+        "efficientnet-b6": "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b6-c76e70fd.pth",
+        "efficientnet-b7": "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b7-dcc49843.pth",
+    }
+    # load state dict from url
+    model_url = url_map[model_name]
+    state_dict = model_zoo.load_url(model_url, progress=progress)
+
+    # load state dict into model parameters
+    if load_fc:  # load everything
+        ret = model.load_state_dict(state_dict, strict=False)
+        if ret.missing_keys:
+            raise ValueError("Found missing keys when loading pretrained weights: {}".format(ret.missing_keys))
+    else:  # skip final FC layers, for transfer learning cases
+        state_dict.pop("_fc.weight")
+        state_dict.pop("_fc.bias")
+        ret = model.load_state_dict(state_dict, strict=False)
+
+        # check if no other keys missing except FC layer parameters
+        if set(ret.missing_keys) != {"_fc.weight", "_fc.bias"}:
+            raise ValueError("Found missing keys when loading pretrained weights: {}".format(ret.missing_keys))
+
+    # check for any unexpected keys
+    if ret.unexpected_keys:
+        raise ValueError("Missing keys when loading pretrained weights: {}".format(ret.unexpected_keys))
+
+
+def _get_same_padding_conv_nd(
+    image_size: List[int], kernel_size: Tuple[int, ...], dilation: Tuple[int, ...], stride: Tuple[int, ...]
+) -> List[int]:
+    """
+    Helper for getting padding (nn.ConstantPadNd) to be used to get SAME padding
+    conv operations similar to Tensorflow's SAME padding.
+
+    This function is generalized for MONAI's N-Dimensional spatial operations (e.g. Conv1D, Conv2D, Conv3D)
+
+    Args:
+        image_size: input image/feature spatial size.
+        kernel_size: conv kernel's spatial size.
+        dilation: conv dilation rate for Atrous conv.
+        stride: stride for conv operation.
+
+    Returns:
+        paddings for ConstantPadNd padder to be used on input tensor to conv op.
+    """
+    # get number of spatial dimensions, corresponds to kernel size length
+    num_dims = len(kernel_size)
+
+    # additional checks to populate dilation and stride (in case they are single entry tuples)
+    if len(dilation) == 1:
+        dilation = dilation * num_dims
+
+    if len(stride) == 1:
+        stride = stride * num_dims
+
+    # equation to calculate (pad^+ + pad^-) size
+    _pad_size: List[int] = [
+        max((math.ceil(_i_s / _s) - 1) * _s + (_k_s - 1) * _d + 1 - _i_s, 0)
+        for _i_s, _k_s, _d, _s in zip(image_size, kernel_size, dilation, stride)
+    ]
+    # distribute paddings into pad^+ and pad^- following Tensorflow's same padding strategy
+    _paddings: List[Tuple[int, int]] = [(_p // 2, _p - _p // 2) for _p in _pad_size]
+
+    # unroll list of tuples to tuples, and then to list
+    # reversed as nn.ConstantPadNd expects paddings starting with last dimension
+    _paddings_ret: List[int] = [outer for inner in reversed(_paddings) for outer in inner]
+    return _paddings_ret
+
+
+def _make_same_padder(conv_op: Union[nn.Conv1d, nn.Conv2d, nn.Conv3d], image_size: List[int]):
+    """
+    Helper for initializing ConstantPadNd with SAME padding similar to Tensorflow.
+    Uses output of _get_same_padding_conv_nd() to get the padding size.
+
+    This function is generalized for MONAI's N-Dimensional spatial operations (e.g. Conv1D, Conv2D, Conv3D)
+
+    Args:
+        conv_op: nn.ConvNd operation to extract parameters for op from
+        image_size: input image/feature spatial size
+
+    Returns:
+        If padding required then nn.ConstandNd() padder initialized to paddings otherwise nn.Identity()
+    """
+    # calculate padding required
+    padding: List[int] = _get_same_padding_conv_nd(image_size, conv_op.kernel_size, conv_op.dilation, conv_op.stride)
+
+    # initialize and return padder
+    padder = Pad["constantpad", len(padding) // 2]
+    if sum(padding) > 0:
+        return padder(padding=padding, value=0.0)
+    else:
+        return nn.Identity()
+
+
+def _round_filters(filters: int, width_coefficient: Optional[float], depth_divisor: float) -> int:
+    """
+    Calculate and round number of filters based on width coefficient multiplier and depth divisor.
+
+    Args:
+        filters: number of input filters.
+        width_coefficient: width coefficient for model.
+        depth_divisor: depth divisor to use.
+
+    Returns:
+        new_filters: new number of filters after calculation.
+    """
+
+    if not width_coefficient:
+        return filters
+
+    multiplier: float = width_coefficient
+    divisor: float = depth_divisor
+    filters_float: float = filters * multiplier
+
+    # follow the formula transferred from official TensorFlow implementation
+    new_filters: float = max(divisor, int(filters_float + divisor / 2) // divisor * divisor)
+    if new_filters < 0.9 * filters_float:  # prevent rounding by more than 10%
+        new_filters += divisor
+    return int(new_filters)
+
+
+def _round_repeats(repeats: int, depth_coefficient: Optional[float]) -> int:
+    """
+    Re-calculate module's repeat number of a block based on depth coefficient multiplier.
+
+    Args:
+        repeats: number of original repeats.
+        depth_coefficient: depth coefficient for model.
+
+    Returns:
+        new repeat: new number of repeat after calculating.
+    """
+    if not depth_coefficient:
+        return repeats
+
+    # follow the formula transferred from official TensorFlow impl.
+    return int(math.ceil(depth_coefficient * repeats))
+
+
+def _calculate_output_image_size(input_image_size: List[int], stride: Union[int, Tuple[int]]):
+    """
+    Calculates the output image size when using _make_same_padder with a stride.
+    Required for static padding.
+
+    Args:
+        input_image_size: input image/feature spatial size.
+        stride: Conv2d operation"s stride.
+
+    Returns:
+        output_image_size: output image/feature spatial size.
+    """
+    # get number of spatial dimensions, corresponds to image spatial size length
+    num_dims = len(input_image_size)
+
+    # checks to extract integer stride in case tuple was received
+    if isinstance(stride, tuple):
+        all_strides_equal = all(stride[0] == s for s in stride)
+        if not all_strides_equal:
+            raise ValueError("unequal strides are not possible, got {}".format(stride))
+
+        stride = stride[0]
+
+    # return output image size
+    return [int(math.ceil(im_sz / stride)) for im_sz in input_image_size]
+
+
+def _decode_block_list(string_list: List[str]):
+    """
+    Decode a list of string notations to specify blocks inside the network.
+
+    Args:
+        string_list: a list of strings, each string is a notation of block.
+
+    Returns:
+        blocks_args: a list of BlockArgs namedtuples of block args.
+    """
+    # Parameters for an individual model block
+    # namedtuple with defaults for mypy help from:
+    # https://stackoverflow.com/a/53255358
+    class BlockArgs(NamedTuple):
+        num_repeat: int
+        kernel_size: int
+        stride: int
+        expand_ratio: int
+        input_filters: int
+        output_filters: int
+        id_skip: bool
+        se_ratio: Optional[float] = None
+
+    def _decode_block_string(block_string: str):
+        """
+        Get a block through a string notation of arguments.
+
+        Args:
+            block_string (str): A string notation of arguments.
+                                Examples: "r1_k3_s11_e1_i32_o16_se0.25".
+
+        Returns:
+            BlockArgs: namedtuple defined at the top of this function.
+        """
+        ops = block_string.split("_")
+        options = {}
+        for op in ops:
+            splits = re.split(r"(\d.*)", op)
+            if len(splits) >= 2:
+                key, value = splits[:2]
+                options[key] = value
+
+        # check stride
+        stride_check = (
+            ("s" in options and len(options["s"]) == 1)
+            or (len(options["s"]) == 2 and options["s"][0] == options["s"][1])
+            or (len(options["s"]) == 3 and options["s"][0] == options["s"][1] and options["s"][0] == options["s"][2])
+        )
+        if not stride_check:
+            raise ValueError("invalid stride option received")
+
+        return BlockArgs(
+            num_repeat=int(options["r"]),
+            kernel_size=int(options["k"]),
+            stride=int(options["s"][0]),
+            expand_ratio=int(options["e"]),
+            input_filters=int(options["i"]),
+            output_filters=int(options["o"]),
+            id_skip=("noskip" not in block_string),
+            se_ratio=float(options["se"]) if "se" in options else None,
+        )
+
+    # convert block strings into BlockArgs for each entry in string_list list
+    blocks_args: List[BlockArgs] = []
+    for current_string in string_list:
+        blocks_args.append(_decode_block_string(current_string))
+
+    # return blocks_args list, to be used for arguments of MBConv layers in EfficientNet
+    return blocks_args
diff --git a/monai/networks/nets/senet.py b/monai/networks/nets/senet.py
index f5738edeeb..1e04e02973 100644
--- a/monai/networks/nets/senet.py
+++ b/monai/networks/nets/senet.py
@@ -263,8 +263,8 @@ def _load_state_dict(model, arch, progress):
         model_url = model_urls[arch]
     else:
         raise ValueError(
-            "only 'senet154', 'se_resnet50', 'se_resnet101',  'se_resnet152', 'se_resnext50_32x4d', \
-            and se_resnext101_32x4d are supported to load pretrained weights."
+            "only 'senet154', 'se_resnet50', 'se_resnet101',  'se_resnet152', 'se_resnext50_32x4d', "
+            + "and se_resnext101_32x4d are supported to load pretrained weights."
         )
 
     pattern_conv = re.compile(r"^(layer[1-4]\.\d\.(?:conv)\d\.)(\w*)$")
diff --git a/monai/networks/nets/torchvision_fc.py b/monai/networks/nets/torchvision_fc.py
index 4fdd0d64ef..8b8a223b55 100644
--- a/monai/networks/nets/torchvision_fc.py
+++ b/monai/networks/nets/torchvision_fc.py
@@ -1,3 +1,14 @@
+# Copyright 2020 - 2021 MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from typing import Tuple, Union
 
 import torch
@@ -13,15 +24,8 @@ class TorchVisionFullyConvModel(torch.nn.Module):
 
     Args:
         model_name: name of any torchvision with adaptive avg pooling and fully connected layer at the end.
-            - resnet18 (default)
-            - resnet34
-            - resnet50
-            - resnet101
-            - resnet152
-            - resnext50_32x4d
-            - resnext101_32x8d
-            - wide_resnet50_2
-            - wide_resnet101_2
+            ``resnet18`` (default), ``resnet34m``, ``resnet50``, ``resnet101``, ``resnet152``,
+            ``resnext50_32x4d``, ``resnext101_32x8d``, ``wide_resnet50_2``, ``wide_resnet101_2``.
         n_classes: number of classes for the last classification layer. Default to 1.
         pool_size: the kernel size for `AvgPool2d` to replace `AdaptiveAvgPool2d`. Default to (7, 7).
         pool_stride: the stride for `AvgPool2d` to replace `AdaptiveAvgPool2d`. Default to 1.
diff --git a/monai/optimizers/lr_finder.py b/monai/optimizers/lr_finder.py
index 9e753a1ced..49d4427b3d 100644
--- a/monai/optimizers/lr_finder.py
+++ b/monai/optimizers/lr_finder.py
@@ -1,3 +1,14 @@
+# Copyright 2020 - 2021 MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import warnings
 from functools import partial
 from typing import TYPE_CHECKING, Any, Callable, Dict, Optional, Tuple, Type, Union
diff --git a/monai/optimizers/lr_scheduler.py b/monai/optimizers/lr_scheduler.py
index aa9bf2a89b..c4488f6e07 100644
--- a/monai/optimizers/lr_scheduler.py
+++ b/monai/optimizers/lr_scheduler.py
@@ -1,3 +1,14 @@
+# Copyright 2020 - 2021 MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from torch.optim import Optimizer
 from torch.optim.lr_scheduler import _LRScheduler
 
diff --git a/monai/transforms/__init__.py b/monai/transforms/__init__.py
index b8cc832db1..f96194c262 100644
--- a/monai/transforms/__init__.py
+++ b/monai/transforms/__init__.py
@@ -160,6 +160,7 @@
     KeepLargestConnectedComponent,
     LabelToContour,
     MeanEnsemble,
+    ProbNMS,
     VoteEnsemble,
 )
 from .post.dictionary import (
@@ -182,6 +183,9 @@
     MeanEnsembled,
     MeanEnsembleD,
     MeanEnsembleDict,
+    ProbNMSd,
+    ProbNMSD,
+    ProbNMSDict,
     VoteEnsembled,
     VoteEnsembleD,
     VoteEnsembleDict,
@@ -367,6 +371,7 @@
 )
 from .utils import (
     allow_missing_keys_mode,
+    convert_inverse_interp_mode,
     copypaste_arrays,
     create_control_grid,
     create_grid,
diff --git a/monai/transforms/compose.py b/monai/transforms/compose.py
index d509ea33a1..ce965b8b18 100644
--- a/monai/transforms/compose.py
+++ b/monai/transforms/compose.py
@@ -19,7 +19,7 @@
 
 from monai.transforms.inverse import InvertibleTransform
 
-# For backwards compatiblity (so this still works: from monai.transforms.compose import MapTransform)
+# For backwards compatibility (so this still works: from monai.transforms.compose import MapTransform)
 from monai.transforms.transform import (  # noqa: F401
     MapTransform,
     Randomizable,
@@ -32,7 +32,7 @@
 __all__ = ["Compose"]
 
 
-class Compose(RandomizableTransform, InvertibleTransform):
+class Compose(Randomizable, InvertibleTransform):
     """
     ``Compose`` provides the ability to chain a series of calls together in a
     sequence. Each transform in the sequence must take a single argument and
@@ -102,14 +102,14 @@ def __init__(self, transforms: Optional[Union[Sequence[Callable], Callable]] = N
     def set_random_state(self, seed: Optional[int] = None, state: Optional[np.random.RandomState] = None) -> "Compose":
         super().set_random_state(seed=seed, state=state)
         for _transform in self.transforms:
-            if not isinstance(_transform, RandomizableTransform):
+            if not isinstance(_transform, Randomizable):
                 continue
             _transform.set_random_state(seed=self.R.randint(MAX_SEED, dtype="uint32"))
         return self
 
     def randomize(self, data: Optional[Any] = None) -> None:
         for _transform in self.transforms:
-            if not isinstance(_transform, RandomizableTransform):
+            if not isinstance(_transform, Randomizable):
                 continue
             try:
                 _transform.randomize(data)
diff --git a/monai/transforms/croppad/array.py b/monai/transforms/croppad/array.py
index 159fa1a5f4..c8f7136334 100644
--- a/monai/transforms/croppad/array.py
+++ b/monai/transforms/croppad/array.py
@@ -20,7 +20,7 @@
 
 from monai.config import IndexSelection
 from monai.data.utils import get_random_patch, get_valid_patch_size
-from monai.transforms.transform import Randomizable, RandomizableTransform, Transform
+from monai.transforms.transform import Randomizable, Transform
 from monai.transforms.utils import (
     generate_pos_neg_label_crop_centers,
     generate_spatial_bounding_box,
@@ -279,7 +279,7 @@ def __call__(self, img: np.ndarray):
         return cropper(img)
 
 
-class RandSpatialCrop(RandomizableTransform):
+class RandSpatialCrop(Randomizable):
     """
     Crop image with random size or specific size ROI. It can crop at a random position as center
     or at the image center. And allows to set the minimum size to limit the randomly generated ROI.
@@ -324,7 +324,7 @@ def __call__(self, img: np.ndarray):
         return cropper(img)
 
 
-class RandSpatialCropSamples(RandomizableTransform):
+class RandSpatialCropSamples(Randomizable):
     """
     Crop image with random size or specific size ROI to generate a list of N samples.
     It can crop at a random position as center or at the image center. And allows to set
@@ -432,7 +432,7 @@ def __call__(self, img: np.ndarray):
         return cropped
 
 
-class RandWeightedCrop(RandomizableTransform):
+class RandWeightedCrop(Randomizable):
     """
     Samples a list of `num_samples` image patches according to the provided `weight_map`.
 
@@ -484,7 +484,7 @@ def __call__(self, img: np.ndarray, weight_map: Optional[np.ndarray] = None) ->
         return results
 
 
-class RandCropByPosNegLabel(RandomizableTransform):
+class RandCropByPosNegLabel(Randomizable):
     """
     Crop random fixed sized regions with the center being a foreground or background voxel
     based on the Pos Neg Ratio.
diff --git a/monai/transforms/croppad/dictionary.py b/monai/transforms/croppad/dictionary.py
index 64e9f862f9..c4ef659c69 100644
--- a/monai/transforms/croppad/dictionary.py
+++ b/monai/transforms/croppad/dictionary.py
@@ -16,6 +16,7 @@
 """
 
 from copy import deepcopy
+from enum import Enum
 from itertools import chain
 from math import floor
 from typing import Any, Callable, Dict, Hashable, List, Mapping, Optional, Sequence, Tuple, Union
@@ -34,13 +35,14 @@
     SpatialPad,
 )
 from monai.transforms.inverse import InvertibleTransform
-from monai.transforms.transform import MapTransform, Randomizable, RandomizableTransform
+from monai.transforms.transform import MapTransform, Randomizable
 from monai.transforms.utils import (
     generate_pos_neg_label_crop_centers,
     generate_spatial_bounding_box,
     map_binary_to_indices,
     weighted_patch_samples,
 )
+from monai.utils import ImageMetaKey as Key
 from monai.utils import Method, NumpyPadMode, ensure_tuple, ensure_tuple_rep, fall_back_tuple
 from monai.utils.enums import InverseKeys
 
@@ -124,7 +126,7 @@ def __init__(
     def __call__(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
         d = dict(data)
         for key, m in self.key_iterator(d, self.mode):
-            self.push_transform(d, key)
+            self.push_transform(d, key, extra_info={"mode": m.value if isinstance(m, Enum) else m})
             d[key] = self.padder(d[key], mode=m)
         return d
 
@@ -192,7 +194,7 @@ def __init__(
     def __call__(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
         d = dict(data)
         for key, m in self.key_iterator(d, self.mode):
-            self.push_transform(d, key)
+            self.push_transform(d, key, extra_info={"mode": m.value if isinstance(m, Enum) else m})
             d[key] = self.padder(d[key], mode=m)
         return d
 
@@ -258,7 +260,7 @@ def __init__(
     def __call__(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
         d = dict(data)
         for key, m in self.key_iterator(d, self.mode):
-            self.push_transform(d, key)
+            self.push_transform(d, key, extra_info={"mode": m.value if isinstance(m, Enum) else m})
             d[key] = self.padder(d[key], mode=m)
         return d
 
@@ -385,7 +387,7 @@ def inverse(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndar
         return d
 
 
-class RandSpatialCropd(RandomizableTransform, MapTransform, InvertibleTransform):
+class RandSpatialCropd(Randomizable, MapTransform, InvertibleTransform):
     """
     Dictionary-based version :py:class:`monai.transforms.RandSpatialCrop`.
     Crop image with random size or specific size ROI. It can crop at a random position as
@@ -412,7 +414,6 @@ def __init__(
         random_size: bool = True,
         allow_missing_keys: bool = False,
     ) -> None:
-        RandomizableTransform.__init__(self, prob=1.0, do_transform=True)
         MapTransform.__init__(self, keys, allow_missing_keys)
         self.roi_size = roi_size
         self.random_center = random_center
@@ -476,13 +477,13 @@ def inverse(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndar
         return d
 
 
-class RandSpatialCropSamplesd(RandomizableTransform, MapTransform):
+class RandSpatialCropSamplesd(Randomizable, MapTransform):
     """
     Dictionary-based version :py:class:`monai.transforms.RandSpatialCropSamples`.
     Crop image with random size or specific size ROI to generate a list of N samples.
     It can crop at a random position as center or at the image center. And allows to set
     the minimum size to limit the randomly generated ROI. Suppose all the expected fields
-    specified by `keys` have same shape.
+    specified by `keys` have same shape, and add `patch_index` to the corresponding meta data.
     It will return a list of dictionaries for all the cropped images.
 
     Args:
@@ -494,6 +495,9 @@ class RandSpatialCropSamplesd(RandomizableTransform, MapTransform):
         random_center: crop at random position as center or the image center.
         random_size: crop with random size or specific size ROI.
             The actual size is sampled from `randint(roi_size, img_size)`.
+        meta_key_postfix: use `key_{postfix}` to to fetch the meta data according to the key data,
+            default is `meta_dict`, the meta data is a dictionary object.
+            used to add `patch_index` to the meta dict.
         allow_missing_keys: don't raise exception if key is missing.
 
     Raises:
@@ -508,14 +512,15 @@ def __init__(
         num_samples: int,
         random_center: bool = True,
         random_size: bool = True,
+        meta_key_postfix: str = "meta_dict",
         allow_missing_keys: bool = False,
     ) -> None:
-        RandomizableTransform.__init__(self, prob=1.0, do_transform=True)
         MapTransform.__init__(self, keys, allow_missing_keys)
         if num_samples < 1:
             raise ValueError(f"num_samples must be positive, got {num_samples}.")
         self.num_samples = num_samples
         self.cropper = RandSpatialCropd(keys, roi_size, random_center, random_size, allow_missing_keys)
+        self.meta_key_postfix = meta_key_postfix
 
     def set_random_state(
         self, seed: Optional[int] = None, state: Optional[np.random.RandomState] = None
@@ -528,7 +533,18 @@ def randomize(self, data: Optional[Any] = None) -> None:
         pass
 
     def __call__(self, data: Mapping[Hashable, np.ndarray]) -> List[Dict[Hashable, np.ndarray]]:
-        return [self.cropper(data) for _ in range(self.num_samples)]
+        ret = []
+        d = dict(data)
+        for i in range(self.num_samples):
+            cropped = self.cropper(d)
+            # add `patch_index` to the meta data
+            for key in self.key_iterator(d):
+                meta_data_key = f"{key}_{self.meta_key_postfix}"
+                if meta_data_key not in cropped:
+                    cropped[meta_data_key] = {}  # type: ignore
+                cropped[meta_data_key][Key.PATCH_INDEX] = i
+            ret.append(cropped)
+        return ret
 
 
 class CropForegroundd(MapTransform, InvertibleTransform):
@@ -609,7 +625,7 @@ def inverse(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndar
         return d
 
 
-class RandWeightedCropd(RandomizableTransform, MapTransform):
+class RandWeightedCropd(Randomizable, MapTransform):
     """
     Samples a list of `num_samples` image patches according to the provided `weight_map`.
 
@@ -637,7 +653,6 @@ def __init__(
         center_coord_key: Optional[str] = None,
         allow_missing_keys: bool = False,
     ):
-        RandomizableTransform.__init__(self, prob=1.0, do_transform=True)
         MapTransform.__init__(self, keys, allow_missing_keys)
         self.spatial_size = ensure_tuple(spatial_size)
         self.w_key = w_key
@@ -676,11 +691,13 @@ def __call__(self, data: Mapping[Hashable, np.ndarray]) -> List[Dict[Hashable, n
         return results
 
 
-class RandCropByPosNegLabeld(RandomizableTransform, MapTransform):
+class RandCropByPosNegLabeld(Randomizable, MapTransform):
     """
     Dictionary-based version :py:class:`monai.transforms.RandCropByPosNegLabel`.
     Crop random fixed sized regions with the center being a foreground or background voxel
     based on the Pos Neg Ratio.
+    Suppose all the expected fields specified by `keys` have same shape,
+    and add `patch_index` to the corresponding meta data.
     And will return a list of dictionaries for all the cropped images.
 
     Args:
@@ -706,6 +723,9 @@ class RandCropByPosNegLabeld(RandomizableTransform, MapTransform):
             `image_threshold`, and randomly select crop centers based on them, need to provide `fg_indices_key`
             and `bg_indices_key` together, expect to be 1 dim array of spatial indices after flattening.
             a typical usage is to call `FgBgToIndicesd` transform first and cache the results.
+        meta_key_postfix: use `key_{postfix}` to to fetch the meta data according to the key data,
+            default is `meta_dict`, the meta data is a dictionary object.
+            used to add `patch_index` to the meta dict.
         allow_missing_keys: don't raise exception if key is missing.
 
     Raises:
@@ -726,9 +746,9 @@ def __init__(
         image_threshold: float = 0.0,
         fg_indices_key: Optional[str] = None,
         bg_indices_key: Optional[str] = None,
+        meta_key_postfix: str = "meta_dict",
         allow_missing_keys: bool = False,
     ) -> None:
-        RandomizableTransform.__init__(self)
         MapTransform.__init__(self, keys, allow_missing_keys)
         self.label_key = label_key
         self.spatial_size: Union[Tuple[int, ...], Sequence[int], int] = spatial_size
@@ -742,6 +762,7 @@ def __init__(
         self.image_threshold = image_threshold
         self.fg_indices_key = fg_indices_key
         self.bg_indices_key = bg_indices_key
+        self.meta_key_postfix = meta_key_postfix
         self.centers: Optional[List[List[np.ndarray]]] = None
 
     def randomize(
@@ -783,6 +804,12 @@ def __call__(self, data: Mapping[Hashable, np.ndarray]) -> List[Dict[Hashable, n
             # fill in the extra keys with unmodified data
             for key in set(data.keys()).difference(set(self.keys)):
                 results[i][key] = data[key]
+            # add `patch_index` to the meta data
+            for key in self.key_iterator(d):
+                meta_data_key = f"{key}_{self.meta_key_postfix}"
+                if meta_data_key not in results[i]:
+                    results[i][meta_data_key] = {}  # type: ignore
+                results[i][meta_data_key][Key.PATCH_INDEX] = i
 
         return results
 
@@ -800,6 +827,7 @@ class ResizeWithPadOrCropd(MapTransform, InvertibleTransform):
             ``"median"``, ``"minimum"``, ``"reflect"``, ``"symmetric"``, ``"wrap"``, ``"empty"``}
             One of the listed string values or a user supplied function for padding. Defaults to ``"constant"``.
             See also: https://numpy.org/doc/1.18/reference/generated/numpy.pad.html
+            It also can be a sequence of string, each element corresponds to a key in ``keys``.
         allow_missing_keys: don't raise exception if key is missing.
 
     """
@@ -808,18 +836,26 @@ def __init__(
         self,
         keys: KeysCollection,
         spatial_size: Union[Sequence[int], int],
-        mode: Union[NumpyPadMode, str] = NumpyPadMode.CONSTANT,
+        mode: NumpyPadModeSequence = NumpyPadMode.CONSTANT,
         allow_missing_keys: bool = False,
     ) -> None:
         super().__init__(keys, allow_missing_keys)
-        self.padcropper = ResizeWithPadOrCrop(spatial_size=spatial_size, mode=mode)
+        self.mode = ensure_tuple_rep(mode, len(self.keys))
+        self.padcropper = ResizeWithPadOrCrop(spatial_size=spatial_size)
 
     def __call__(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
         d = dict(data)
-        for key in self.key_iterator(d):
+        for key, m in self.key_iterator(d, self.mode):
             orig_size = d[key].shape[1:]
-            d[key] = self.padcropper(d[key])
-            self.push_transform(d, key, orig_size=orig_size)
+            d[key] = self.padcropper(d[key], mode=m)
+            self.push_transform(
+                d,
+                key,
+                orig_size=orig_size,
+                extra_info={
+                    "mode": m.value if isinstance(m, Enum) else m,
+                },
+            )
         return d
 
     def inverse(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
diff --git a/monai/transforms/intensity/array.py b/monai/transforms/intensity/array.py
index f89e381daa..62350d4ab0 100644
--- a/monai/transforms/intensity/array.py
+++ b/monai/transforms/intensity/array.py
@@ -122,6 +122,7 @@ def __init__(self, offsets: Union[Tuple[float, float], float], prob: float = 0.1
             if len(offsets) != 2:
                 raise AssertionError("offsets should be a number or pair of numbers.")
             self.offsets = (min(offsets), max(offsets))
+        self._offset = self.offsets[0]
 
     def randomize(self, data: Optional[Any] = None) -> None:
         self._offset = self.R.uniform(low=self.offsets[0], high=self.offsets[1])
@@ -217,6 +218,7 @@ def __init__(
             if len(factors) != 2:
                 raise AssertionError("factors should be a number or pair of numbers.")
             self.factors = (min(factors), max(factors))
+        self.factor = self.factors[0]
         self.nonzero = nonzero
         self.channel_wise = channel_wise
         self.dtype = dtype
@@ -294,6 +296,7 @@ def __init__(self, factors: Union[Tuple[float, float], float], prob: float = 0.1
             if len(factors) != 2:
                 raise AssertionError("factors should be a number or pair of numbers.")
             self.factors = (min(factors), max(factors))
+        self.factor = self.factors[0]
 
     def randomize(self, data: Optional[Any] = None) -> None:
         self.factor = self.R.uniform(low=self.factors[0], high=self.factors[1])
@@ -874,6 +877,10 @@ def __init__(
         self.sigma_z = sigma_z
         self.approx = approx
 
+        self.x = self.sigma_x[0]
+        self.y = self.sigma_y[0]
+        self.z = self.sigma_z[0]
+
     def randomize(self, data: Optional[Any] = None) -> None:
         super().randomize(None)
         self.x = self.R.uniform(low=self.sigma_x[0], high=self.sigma_x[1])
diff --git a/monai/transforms/intensity/dictionary.py b/monai/transforms/intensity/dictionary.py
index 517c34cbf2..a35e5c8ea6 100644
--- a/monai/transforms/intensity/dictionary.py
+++ b/monai/transforms/intensity/dictionary.py
@@ -206,6 +206,7 @@ def __init__(
             if len(offsets) != 2:
                 raise AssertionError("offsets should be a number or pair of numbers.")
             self.offsets = (min(offsets), max(offsets))
+        self._offset = self.offsets[0]
 
     def randomize(self, data: Optional[Any] = None) -> None:
         self._offset = self.R.uniform(low=self.offsets[0], high=self.offsets[1])
@@ -293,6 +294,7 @@ def __init__(
             if len(factors) != 2:
                 raise AssertionError("factors should be a number or pair of numbers.")
             self.factors = (min(factors), max(factors))
+        self.factor = self.factors[0]
         self.nonzero = nonzero
         self.channel_wise = channel_wise
         self.dtype = dtype
@@ -380,6 +382,7 @@ def __init__(
             if len(factors) != 2:
                 raise AssertionError("factors should be a number or pair of numbers.")
             self.factors = (min(factors), max(factors))
+        self.factor = self.factors[0]
 
     def randomize(self, data: Optional[Any] = None) -> None:
         self.factor = self.R.uniform(low=self.factors[0], high=self.factors[1])
@@ -760,11 +763,11 @@ def __init__(
     ) -> None:
         MapTransform.__init__(self, keys, allow_missing_keys)
         RandomizableTransform.__init__(self, prob)
-        self.sigma_x = sigma_x
-        self.sigma_y = sigma_y
-        self.sigma_z = sigma_z
+        self.sigma_x, self.sigma_y, self.sigma_z = sigma_x, sigma_y, sigma_z
         self.approx = approx
 
+        self.x, self.y, self.z = self.sigma_x[0], self.sigma_y[0], self.sigma_z[0]
+
     def randomize(self, data: Optional[Any] = None) -> None:
         super().randomize(None)
         self.x = self.R.uniform(low=self.sigma_x[0], high=self.sigma_x[1])
diff --git a/monai/transforms/inverse.py b/monai/transforms/inverse.py
index 3e5b68e8e4..3baef91717 100644
--- a/monai/transforms/inverse.py
+++ b/monai/transforms/inverse.py
@@ -76,7 +76,7 @@ def push_transform(
         info = {
             InverseKeys.CLASS_NAME: self.__class__.__name__,
             InverseKeys.ID: id(self),
-            InverseKeys.ORIG_SIZE: orig_size or data[key].shape[1:],
+            InverseKeys.ORIG_SIZE: orig_size or (data[key].shape[1:] if hasattr(data[key], "shape") else None),
         }
         if extra_info is not None:
             info[InverseKeys.EXTRA_INFO] = extra_info
diff --git a/monai/transforms/io/array.py b/monai/transforms/io/array.py
index 61439c0355..7a7fcb8cda 100644
--- a/monai/transforms/io/array.py
+++ b/monai/transforms/io/array.py
@@ -270,6 +270,12 @@ def __init__(
         self.save_batch = save_batch
 
     def __call__(self, img: Union[torch.Tensor, np.ndarray], meta_data: Optional[Dict] = None):
+        """
+        Args:
+            img: target data content that save into file.
+            meta_data: key-value pairs of meta_data corresponding to the data.
+
+        """
         if self.save_batch:
             self.saver.save_batch(img, meta_data)
         else:
diff --git a/monai/transforms/io/dictionary.py b/monai/transforms/io/dictionary.py
index 6a82ff2267..413f83b62d 100644
--- a/monai/transforms/io/dictionary.py
+++ b/monai/transforms/io/dictionary.py
@@ -124,15 +124,18 @@ class SaveImaged(MapTransform):
     """
     Dictionary-based wrapper of :py:class:`monai.transforms.SaveImage`.
 
-    NB: image should include channel dimension: [B],C,H,W,[D].
+    Note:
+        Image should include channel dimension: [B],C,H,W,[D].
+        If the data is a patch of big image, will append the patch index to filename.
 
     Args:
         keys: keys of the corresponding items to be transformed.
             See also: :py:class:`monai.transforms.compose.MapTransform`
         meta_key_postfix: `key_{postfix}` was used to store the metadata in `LoadImaged`.
-            So need the key to extract metadata to save images, default is `meta_dict`.
-            The meta data is a dictionary object, if no corresponding metadata, set to `None`.
-            For example, for data with key `image`, the metadata by default is in `image_meta_dict`.
+            so need the key to extract metadata to save images, default is `meta_dict`.
+            for example, for data with key `image`, the metadata by default is in `image_meta_dict`.
+            the meta data is a dictionary object which contains: filename, affine, original_shape, etc.
+            if no corresponding metadata, set to `None`.
         output_dir: output image directory.
         output_postfix: a string appended to all output file names, default to `trans`.
         output_ext: output file extension name, available extensions: `.nii.gz`, `.nii`, `.png`.
diff --git a/monai/transforms/post/array.py b/monai/transforms/post/array.py
index 6462753cf9..7ac0e6799c 100644
--- a/monai/transforms/post/array.py
+++ b/monai/transforms/post/array.py
@@ -21,6 +21,7 @@
 import torch.nn.functional as F
 
 from monai.networks import one_hot
+from monai.networks.layers import GaussianFilter
 from monai.transforms.transform import Transform
 from monai.transforms.utils import get_largest_connected_component_mask
 from monai.utils import ensure_tuple
@@ -422,3 +423,97 @@ def __call__(self, img: Union[Sequence[torch.Tensor], torch.Tensor]) -> torch.Te
             return torch.argmax(img_, dim=1, keepdim=has_ch_dim)
         # for One-Hot data, round the float number to 0 or 1
         return torch.round(img_)
+
+
+class ProbNMS(Transform):
+    """
+    Performs probability based non-maximum suppression (NMS) on the probabilities map via
+    iteratively selecting the coordinate with highest probability and then move it as well
+    as its surrounding values. The remove range is determined by the parameter `box_size`.
+    If multiple coordinates have the same highest probability, only one of them will be
+    selected.
+
+    Args:
+        spatial_dims: number of spatial dimensions of the input probabilities map.
+            Defaults to 2.
+        sigma: the standard deviation for gaussian filter.
+            It could be a single value, or `spatial_dims` number of values. Defaults to 0.0.
+        prob_threshold: the probability threshold, the function will stop searching if
+            the highest probability is no larger than the threshold. The value should be
+            no less than 0.0. Defaults to 0.5.
+        box_size: the box size (in pixel) to be removed around the the pixel with the maximum probability.
+            It can be an integer that defines the size of a square or cube,
+            or a list containing different values for each dimensions. Defaults to 48.
+
+    Return:
+        a list of selected lists, where inner lists contain probability and coordinates.
+        For example, for 3D input, the inner lists are in the form of [probability, x, y, z].
+
+    Raises:
+        ValueError: When ``prob_threshold`` is less than 0.0.
+        ValueError: When ``box_size`` is a list or tuple, and its length is not equal to `spatial_dims`.
+        ValueError: When ``box_size`` has a less than 1 value.
+
+    """
+
+    def __init__(
+        self,
+        spatial_dims: int = 2,
+        sigma: Union[Sequence[float], float, Sequence[torch.Tensor], torch.Tensor] = 0.0,
+        prob_threshold: float = 0.5,
+        box_size: Union[int, Sequence[int]] = 48,
+    ) -> None:
+        self.sigma = sigma
+        self.spatial_dims = spatial_dims
+        if self.sigma != 0:
+            self.filter = GaussianFilter(spatial_dims=spatial_dims, sigma=sigma)
+        if prob_threshold < 0:
+            raise ValueError("prob_threshold should be no less than 0.0.")
+        self.prob_threshold = prob_threshold
+        if isinstance(box_size, int):
+            self.box_size = np.asarray([box_size] * spatial_dims)
+        else:
+            if len(box_size) != spatial_dims:
+                raise ValueError("the sequence length of box_size should be the same as spatial_dims.")
+            self.box_size = np.asarray(box_size)
+        if self.box_size.min() <= 0:
+            raise ValueError("box_size should be larger than 0.")
+
+        self.box_lower_bd = self.box_size // 2
+        self.box_upper_bd = self.box_size - self.box_lower_bd
+
+    def __call__(
+        self,
+        prob_map: Union[np.ndarray, torch.Tensor],
+    ):
+        """
+        prob_map: the input probabilities map, it must have shape (H[, W, ...]).
+        """
+        if self.sigma != 0:
+            if not isinstance(prob_map, torch.Tensor):
+                prob_map = torch.as_tensor(prob_map, dtype=torch.float)
+            self.filter.to(prob_map)
+            prob_map = self.filter(prob_map)
+        else:
+            if not isinstance(prob_map, torch.Tensor):
+                prob_map = prob_map.copy()
+
+        if isinstance(prob_map, torch.Tensor):
+            prob_map = prob_map.detach().cpu().numpy()
+
+        prob_map_shape = prob_map.shape
+
+        outputs = []
+        while np.max(prob_map) > self.prob_threshold:
+            max_idx = np.unravel_index(prob_map.argmax(), prob_map_shape)
+            prob_max = prob_map[max_idx]
+            max_idx_arr = np.asarray(max_idx)
+            outputs.append([prob_max] + list(max_idx_arr))
+
+            idx_min_range = (max_idx_arr - self.box_lower_bd).clip(0, None)
+            idx_max_range = (max_idx_arr + self.box_upper_bd).clip(None, prob_map_shape)
+            # for each dimension, set values during index ranges to 0
+            slices = tuple(slice(idx_min_range[i], idx_max_range[i]) for i in range(self.spatial_dims))
+            prob_map[slices] = 0
+
+        return outputs
diff --git a/monai/transforms/post/dictionary.py b/monai/transforms/post/dictionary.py
index 6d28f780d4..52bde4ab79 100644
--- a/monai/transforms/post/dictionary.py
+++ b/monai/transforms/post/dictionary.py
@@ -28,6 +28,7 @@
     KeepLargestConnectedComponent,
     LabelToContour,
     MeanEnsemble,
+    ProbNMS,
     VoteEnsemble,
 )
 from monai.transforms.transform import MapTransform
@@ -340,10 +341,66 @@ def __call__(self, data: dict) -> List[dict]:
         return monai.data.decollate_batch(data, self.batch_size)
 
 
+class ProbNMSd(MapTransform):
+    """
+    Performs probability based non-maximum suppression (NMS) on the probabilities map via
+    iteratively selecting the coordinate with highest probability and then move it as well
+    as its surrounding values. The remove range is determined by the parameter `box_size`.
+    If multiple coordinates have the same highest probability, only one of them will be
+    selected.
+
+    Args:
+        spatial_dims: number of spatial dimensions of the input probabilities map.
+            Defaults to 2.
+        sigma: the standard deviation for gaussian filter.
+            It could be a single value, or `spatial_dims` number of values. Defaults to 0.0.
+        prob_threshold: the probability threshold, the function will stop searching if
+            the highest probability is no larger than the threshold. The value should be
+            no less than 0.0. Defaults to 0.5.
+        box_size: the box size (in pixel) to be removed around the the pixel with the maximum probability.
+            It can be an integer that defines the size of a square or cube,
+            or a list containing different values for each dimensions. Defaults to 48.
+
+    Return:
+        a list of selected lists, where inner lists contain probability and coordinates.
+        For example, for 3D input, the inner lists are in the form of [probability, x, y, z].
+
+    Raises:
+        ValueError: When ``prob_threshold`` is less than 0.0.
+        ValueError: When ``box_size`` is a list or tuple, and its length is not equal to `spatial_dims`.
+        ValueError: When ``box_size`` has a less than 1 value.
+
+    """
+
+    def __init__(
+        self,
+        keys: KeysCollection,
+        spatial_dims: int = 2,
+        sigma: Union[Sequence[float], float, Sequence[torch.Tensor], torch.Tensor] = 0.0,
+        prob_threshold: float = 0.5,
+        box_size: Union[int, Sequence[int]] = 48,
+        allow_missing_keys: bool = False,
+    ) -> None:
+        super().__init__(keys, allow_missing_keys)
+        self.prob_nms = ProbNMS(
+            spatial_dims=spatial_dims,
+            sigma=sigma,
+            prob_threshold=prob_threshold,
+            box_size=box_size,
+        )
+
+    def __call__(self, data: Mapping[Hashable, Union[np.ndarray, torch.Tensor]]):
+        d = dict(data)
+        for key in self.key_iterator(d):
+            d[key] = self.prob_nms(d[key])
+        return d
+
+
 ActivationsD = ActivationsDict = Activationsd
 AsDiscreteD = AsDiscreteDict = AsDiscreted
 KeepLargestConnectedComponentD = KeepLargestConnectedComponentDict = KeepLargestConnectedComponentd
 LabelToContourD = LabelToContourDict = LabelToContourd
 MeanEnsembleD = MeanEnsembleDict = MeanEnsembled
+ProbNMSD = ProbNMSDict = ProbNMSd
 VoteEnsembleD = VoteEnsembleDict = VoteEnsembled
 DecollateD = DecollateDict = Decollated
diff --git a/monai/transforms/spatial/array.py b/monai/transforms/spatial/array.py
index 1c096ba743..a3eb055f7e 100644
--- a/monai/transforms/spatial/array.py
+++ b/monai/transforms/spatial/array.py
@@ -23,7 +23,7 @@
 from monai.data.utils import compute_shape_offset, to_affine_nd, zoom_affine
 from monai.networks.layers import AffineTransform, GaussianFilter, grid_pull
 from monai.transforms.croppad.array import CenterSpatialCrop
-from monai.transforms.transform import RandomizableTransform, Transform
+from monai.transforms.transform import Randomizable, RandomizableTransform, Transform
 from monai.transforms.utils import (
     create_control_grid,
     create_grid,
@@ -790,7 +790,7 @@ class RandAxisFlip(RandomizableTransform):
     """
 
     def __init__(self, prob: float = 0.1) -> None:
-        RandomizableTransform.__init__(self, min(max(prob, 0.0), 1.0))
+        RandomizableTransform.__init__(self, prob)
         self._axis: Optional[int] = None
 
     def randomize(self, data: np.ndarray) -> None:
@@ -1004,7 +1004,7 @@ def __call__(
         return grid if self.as_tensor_output else np.asarray(grid.cpu().numpy()), affine
 
 
-class RandAffineGrid(RandomizableTransform):
+class RandAffineGrid(Randomizable):
     """
     Generate randomised affine grid.
     """
@@ -1101,7 +1101,7 @@ def get_transformation_matrix(self) -> Optional[Union[np.ndarray, torch.Tensor]]
         return self.affine
 
 
-class RandDeformGrid(RandomizableTransform):
+class RandDeformGrid(Randomizable):
     """
     Generate random deformation grid.
     """
diff --git a/monai/transforms/spatial/dictionary.py b/monai/transforms/spatial/dictionary.py
index 86c94302a1..9f782bf8fc 100644
--- a/monai/transforms/spatial/dictionary.py
+++ b/monai/transforms/spatial/dictionary.py
@@ -16,6 +16,7 @@
 """
 
 from copy import deepcopy
+from enum import Enum
 from typing import Any, Dict, Hashable, Mapping, Optional, Sequence, Tuple, Union
 
 import numpy as np
@@ -208,16 +209,24 @@ def __call__(
                 align_corners=align_corners,
                 dtype=dtype,
             )
-            self.push_transform(d, key, extra_info={"meta_data_key": meta_data_key, "old_affine": old_affine})
+            self.push_transform(
+                d,
+                key,
+                extra_info={
+                    "meta_data_key": meta_data_key,
+                    "old_affine": old_affine,
+                    "mode": mode.value if isinstance(mode, Enum) else mode,
+                    "padding_mode": padding_mode.value if isinstance(padding_mode, Enum) else padding_mode,
+                    "align_corners": align_corners if align_corners is not None else "none",
+                },
+            )
             # set the 'affine' key
             meta_data["affine"] = new_affine
         return d
 
     def inverse(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
         d = deepcopy(dict(data))
-        for key, mode, padding_mode, align_corners, dtype in self.key_iterator(
-            d, self.mode, self.padding_mode, self.align_corners, self.dtype
-        ):
+        for key, dtype in self.key_iterator(d, self.dtype):
             transform = self.get_most_recent_transform(d, key)
             if self.spacing_transform.diagonal:
                 raise RuntimeError(
@@ -227,6 +236,9 @@ def inverse(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndar
             # Create inverse transform
             meta_data = d[transform[InverseKeys.EXTRA_INFO]["meta_data_key"]]
             old_affine = np.array(transform[InverseKeys.EXTRA_INFO]["old_affine"])
+            mode = transform[InverseKeys.EXTRA_INFO]["mode"]
+            padding_mode = transform[InverseKeys.EXTRA_INFO]["padding_mode"]
+            align_corners = transform[InverseKeys.EXTRA_INFO]["align_corners"]
             orig_pixdim = np.sqrt(np.sum(np.square(old_affine), 0))[:-1]
             inverse_transform = Spacing(orig_pixdim, diagonal=self.spacing_transform.diagonal)
             # Apply inverse
@@ -235,7 +247,7 @@ def inverse(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndar
                 affine=meta_data["affine"],
                 mode=mode,
                 padding_mode=padding_mode,
-                align_corners=align_corners,
+                align_corners=False if align_corners == "none" else align_corners,
                 dtype=dtype,
             )
             meta_data["affine"] = new_affine
@@ -483,17 +495,26 @@ def __init__(
     def __call__(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
         d = dict(data)
         for key, mode, align_corners in self.key_iterator(d, self.mode, self.align_corners):
-            self.push_transform(d, key)
+            self.push_transform(
+                d,
+                key,
+                extra_info={
+                    "mode": mode.value if isinstance(mode, Enum) else mode,
+                    "align_corners": align_corners if align_corners is not None else "none",
+                },
+            )
             d[key] = self.resizer(d[key], mode=mode, align_corners=align_corners)
         return d
 
     def inverse(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
         d = deepcopy(dict(data))
-        for key, mode, align_corners in self.key_iterator(d, self.mode, self.align_corners):
+        for key in self.key_iterator(d):
             transform = self.get_most_recent_transform(d, key)
             orig_size = transform[InverseKeys.ORIG_SIZE]
+            mode = transform[InverseKeys.EXTRA_INFO]["mode"]
+            align_corners = transform[InverseKeys.EXTRA_INFO]["align_corners"]
             # Create inverse transform
-            inverse_transform = Resize(orig_size, mode, align_corners)
+            inverse_transform = Resize(orig_size, mode, None if align_corners == "none" else align_corners)
             # Apply inverse transform
             d[key] = inverse_transform(d[key])
             # Remove the applied transform
@@ -573,17 +594,28 @@ def __call__(
         for key, mode, padding_mode in self.key_iterator(d, self.mode, self.padding_mode):
             orig_size = d[key].shape[1:]
             d[key], affine = self.affine(d[key], mode=mode, padding_mode=padding_mode)
-            self.push_transform(d, key, orig_size=orig_size, extra_info={"affine": affine})
+            self.push_transform(
+                d,
+                key,
+                orig_size=orig_size,
+                extra_info={
+                    "affine": affine,
+                    "mode": mode.value if isinstance(mode, Enum) else mode,
+                    "padding_mode": padding_mode.value if isinstance(padding_mode, Enum) else padding_mode,
+                },
+            )
         return d
 
     def inverse(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
         d = deepcopy(dict(data))
 
-        for key, mode, padding_mode in self.key_iterator(d, self.mode, self.padding_mode):
+        for key in self.key_iterator(d):
             transform = self.get_most_recent_transform(d, key)
             orig_size = transform[InverseKeys.ORIG_SIZE]
             # Create inverse transform
             fwd_affine = transform[InverseKeys.EXTRA_INFO]["affine"]
+            mode = transform[InverseKeys.EXTRA_INFO]["mode"]
+            padding_mode = transform[InverseKeys.EXTRA_INFO]["padding_mode"]
             inv_affine = np.linalg.inv(fwd_affine)
 
             affine_grid = AffineGrid(affine=inv_affine)
@@ -701,18 +733,28 @@ def __call__(
             affine = torch.as_tensor(np.eye(len(sp_size) + 1), device=self.rand_affine.rand_affine_grid.device)
 
         for key, mode, padding_mode in self.key_iterator(d, self.mode, self.padding_mode):
-            self.push_transform(d, key, extra_info={"affine": affine})
+            self.push_transform(
+                d,
+                key,
+                extra_info={
+                    "affine": affine,
+                    "mode": mode.value if isinstance(mode, Enum) else mode,
+                    "padding_mode": padding_mode.value if isinstance(padding_mode, Enum) else padding_mode,
+                },
+            )
             d[key] = self.rand_affine.resampler(d[key], grid, mode=mode, padding_mode=padding_mode)
         return d
 
     def inverse(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
         d = deepcopy(dict(data))
 
-        for key, mode, padding_mode in self.key_iterator(d, self.mode, self.padding_mode):
+        for key in self.key_iterator(d):
             transform = self.get_most_recent_transform(d, key)
             orig_size = transform[InverseKeys.ORIG_SIZE]
             # Create inverse transform
             fwd_affine = transform[InverseKeys.EXTRA_INFO]["affine"]
+            mode = transform[InverseKeys.EXTRA_INFO]["mode"]
+            padding_mode = transform[InverseKeys.EXTRA_INFO]["padding_mode"]
             inv_affine = np.linalg.inv(fwd_affine)
 
             affine_grid = AffineGrid(affine=inv_affine)
@@ -1171,24 +1213,35 @@ def __call__(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.nda
                 dtype=dtype,
             )
             rot_mat = self.rotator.get_rotation_matrix()
-            self.push_transform(d, key, orig_size=orig_size, extra_info={"rot_mat": rot_mat})
+            self.push_transform(
+                d,
+                key,
+                orig_size=orig_size,
+                extra_info={
+                    "rot_mat": rot_mat,
+                    "mode": mode.value if isinstance(mode, Enum) else mode,
+                    "padding_mode": padding_mode.value if isinstance(padding_mode, Enum) else padding_mode,
+                    "align_corners": align_corners if align_corners is not None else "none",
+                },
+            )
         return d
 
     def inverse(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
         d = deepcopy(dict(data))
-        for key, mode, padding_mode, align_corners, dtype in self.key_iterator(
-            d, self.mode, self.padding_mode, self.align_corners, self.dtype
-        ):
+        for key, dtype in self.key_iterator(d, self.dtype):
             transform = self.get_most_recent_transform(d, key)
             # Create inverse transform
             fwd_rot_mat = transform[InverseKeys.EXTRA_INFO]["rot_mat"]
+            mode = transform[InverseKeys.EXTRA_INFO]["mode"]
+            padding_mode = transform[InverseKeys.EXTRA_INFO]["padding_mode"]
+            align_corners = transform[InverseKeys.EXTRA_INFO]["align_corners"]
             inv_rot_mat = np.linalg.inv(fwd_rot_mat)
 
             xform = AffineTransform(
                 normalized=False,
                 mode=mode,
                 padding_mode=padding_mode,
-                align_corners=align_corners,
+                align_corners=False if align_corners == "none" else align_corners,
                 reverse_indexing=True,
             )
             output = xform(
@@ -1283,10 +1336,6 @@ def randomize(self, data: Optional[Any] = None) -> None:
     def __call__(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
         self.randomize()
         d = dict(data)
-        if not self._do_transform:
-            for key in self.keys:
-                self.push_transform(d, key, extra_info={"rot_mat": np.eye(d[key].ndim)})
-            return d
         angle: Union[Sequence[float], float] = self.x if d[self.keys[0]].ndim == 3 else (self.x, self.y, self.z)
         rotator = Rotate(
             angle=angle,
@@ -1296,34 +1345,48 @@ def __call__(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.nda
             d, self.mode, self.padding_mode, self.align_corners, self.dtype
         ):
             orig_size = d[key].shape[1:]
-            d[key] = rotator(
-                d[key],
-                mode=mode,
-                padding_mode=padding_mode,
-                align_corners=align_corners,
-                dtype=dtype,
+            if self._do_transform:
+                d[key] = rotator(
+                    d[key],
+                    mode=mode,
+                    padding_mode=padding_mode,
+                    align_corners=align_corners,
+                    dtype=dtype,
+                )
+                rot_mat = rotator.get_rotation_matrix()
+            else:
+                rot_mat = np.eye(d[key].ndim)
+            self.push_transform(
+                d,
+                key,
+                orig_size=orig_size,
+                extra_info={
+                    "rot_mat": rot_mat,
+                    "mode": mode.value if isinstance(mode, Enum) else mode,
+                    "padding_mode": padding_mode.value if isinstance(padding_mode, Enum) else padding_mode,
+                    "align_corners": align_corners if align_corners is not None else "none",
+                },
             )
-            rot_mat = rotator.get_rotation_matrix()
-            self.push_transform(d, key, orig_size=orig_size, extra_info={"rot_mat": rot_mat})
         return d
 
     def inverse(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
         d = deepcopy(dict(data))
-        for key, mode, padding_mode, align_corners, dtype in self.key_iterator(
-            d, self.mode, self.padding_mode, self.align_corners, self.dtype
-        ):
+        for key, dtype in self.key_iterator(d, self.dtype):
             transform = self.get_most_recent_transform(d, key)
             # Check if random transform was actually performed (based on `prob`)
             if transform[InverseKeys.DO_TRANSFORM]:
                 # Create inverse transform
                 fwd_rot_mat = transform[InverseKeys.EXTRA_INFO]["rot_mat"]
+                mode = transform[InverseKeys.EXTRA_INFO]["mode"]
+                padding_mode = transform[InverseKeys.EXTRA_INFO]["padding_mode"]
+                align_corners = transform[InverseKeys.EXTRA_INFO]["align_corners"]
                 inv_rot_mat = np.linalg.inv(fwd_rot_mat)
 
                 xform = AffineTransform(
                     normalized=False,
                     mode=mode,
                     padding_mode=padding_mode,
-                    align_corners=align_corners,
+                    align_corners=False if align_corners == "none" else align_corners,
                     reverse_indexing=True,
                 )
                 output = xform(
@@ -1384,7 +1447,15 @@ def __call__(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.nda
         for key, mode, padding_mode, align_corners in self.key_iterator(
             d, self.mode, self.padding_mode, self.align_corners
         ):
-            self.push_transform(d, key)
+            self.push_transform(
+                d,
+                key,
+                extra_info={
+                    "mode": mode.value if isinstance(mode, Enum) else mode,
+                    "padding_mode": padding_mode.value if isinstance(padding_mode, Enum) else padding_mode,
+                    "align_corners": align_corners if align_corners is not None else "none",
+                },
+            )
             d[key] = self.zoomer(
                 d[key],
                 mode=mode,
@@ -1395,19 +1466,20 @@ def __call__(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.nda
 
     def inverse(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
         d = deepcopy(dict(data))
-        for key, mode, padding_mode, align_corners in self.key_iterator(
-            d, self.mode, self.padding_mode, self.align_corners
-        ):
+        for key in self.key_iterator(d):
             transform = self.get_most_recent_transform(d, key)
             # Create inverse transform
             zoom = np.array(self.zoomer.zoom)
             inverse_transform = Zoom(zoom=1 / zoom, keep_size=self.zoomer.keep_size)
+            mode = transform[InverseKeys.EXTRA_INFO]["mode"]
+            padding_mode = transform[InverseKeys.EXTRA_INFO]["padding_mode"]
+            align_corners = transform[InverseKeys.EXTRA_INFO]["align_corners"]
             # Apply inverse
             d[key] = inverse_transform(
                 d[key],
                 mode=mode,
                 padding_mode=padding_mode,
-                align_corners=align_corners,
+                align_corners=None if align_corners == "none" else align_corners,
             )
             # Size might be out by 1 voxel so pad
             d[key] = SpatialPad(transform[InverseKeys.ORIG_SIZE])(d[key])
@@ -1496,7 +1568,16 @@ def __call__(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.nda
         for key, mode, padding_mode, align_corners in self.key_iterator(
             d, self.mode, self.padding_mode, self.align_corners
         ):
-            self.push_transform(d, key, extra_info={"zoom": self._zoom})
+            self.push_transform(
+                d,
+                key,
+                extra_info={
+                    "zoom": self._zoom,
+                    "mode": mode.value if isinstance(mode, Enum) else mode,
+                    "padding_mode": padding_mode.value if isinstance(padding_mode, Enum) else padding_mode,
+                    "align_corners": align_corners if align_corners is not None else "none",
+                },
+            )
             if self._do_transform:
                 d[key] = zoomer(
                     d[key],
@@ -1508,21 +1589,22 @@ def __call__(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.nda
 
     def inverse(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
         d = deepcopy(dict(data))
-        for key, mode, padding_mode, align_corners in self.key_iterator(
-            d, self.mode, self.padding_mode, self.align_corners
-        ):
+        for key in self.key_iterator(d):
             transform = self.get_most_recent_transform(d, key)
             # Check if random transform was actually performed (based on `prob`)
             if transform[InverseKeys.DO_TRANSFORM]:
                 # Create inverse transform
                 zoom = np.array(transform[InverseKeys.EXTRA_INFO]["zoom"])
+                mode = transform[InverseKeys.EXTRA_INFO]["mode"]
+                padding_mode = transform[InverseKeys.EXTRA_INFO]["padding_mode"]
+                align_corners = transform[InverseKeys.EXTRA_INFO]["align_corners"]
                 inverse_transform = Zoom(zoom=1 / zoom, keep_size=self.keep_size)
                 # Apply inverse
                 d[key] = inverse_transform(
                     d[key],
                     mode=mode,
                     padding_mode=padding_mode,
-                    align_corners=align_corners,
+                    align_corners=None if align_corners == "none" else align_corners,
                 )
                 # Size might be out by 1 voxel so pad
                 d[key] = SpatialPad(transform[InverseKeys.ORIG_SIZE])(d[key])
diff --git a/monai/transforms/transform.py b/monai/transforms/transform.py
index 6a22db1076..ff5f021739 100644
--- a/monai/transforms/transform.py
+++ b/monai/transforms/transform.py
@@ -180,17 +180,27 @@ class RandomizableTransform(Randomizable, Transform):
     """
     An interface for handling random state locally, currently based on a class variable `R`,
     which is an instance of `np.random.RandomState`.
-    This is mainly for randomized data augmentation transforms. For example::
+    This class introduces a randomized flag `_do_transform`, is mainly for randomized data augmentation transforms.
+    For example:
 
-        class RandShiftIntensity(RandomizableTransform):
-            def randomize():
+    .. code-block:: python
+
+        from monai.transforms import RandomizableTransform
+
+        class RandShiftIntensity100(RandomizableTransform):
+            def randomize(self):
+                super().randomize(None)
                 self._offset = self.R.uniform(low=0, high=100)
+
             def __call__(self, img):
                 self.randomize()
+                if not self._do_transform:
+                    return img
                 return img + self._offset
 
         transform = RandShiftIntensity()
         transform.set_random_state(seed=0)
+        print(transform(10))
 
     """
 
diff --git a/monai/transforms/utility/array.py b/monai/transforms/utility/array.py
index 4ad0676fba..8f060eed13 100644
--- a/monai/transforms/utility/array.py
+++ b/monai/transforms/utility/array.py
@@ -16,24 +16,18 @@
 import logging
 import sys
 import time
-from typing import TYPE_CHECKING, Callable, Dict, List, Optional, Sequence, Tuple, Union
+from typing import Callable, Dict, List, Optional, Sequence, Tuple, Union
 
 import numpy as np
 import torch
 
 from monai.config import DtypeLike, NdarrayTensor
-from monai.transforms.transform import RandomizableTransform, Transform
+from monai.transforms.transform import Randomizable, Transform
 from monai.transforms.utils import extreme_points_to_image, get_extreme_points, map_binary_to_indices
 from monai.utils import ensure_tuple, min_version, optional_import
 
-if TYPE_CHECKING:
-    from PIL.Image import Image as PILImageImage
-    from PIL.Image import fromarray as pil_image_fromarray
-
-    has_pil = True
-else:
-    PILImageImage, has_pil = optional_import("PIL.Image", name="Image")
-    pil_image_fromarray, _ = optional_import("PIL.Image", name="fromarray")
+PILImageImage, has_pil = optional_import("PIL.Image", name="Image")
+pil_image_fromarray, _ = optional_import("PIL.Image", name="fromarray")
 
 __all__ = [
     "Identity",
@@ -166,7 +160,7 @@ def __call__(self, img: np.ndarray, meta_dict: Optional[Dict] = None):
         Apply the transform to `img`.
         """
         if not isinstance(meta_dict, dict):
-            raise ValueError("meta_dict must be a dictionay data.")
+            raise ValueError("meta_dict must be a dictionary data.")
 
         channel_dim = meta_dict.get("original_channel_dim", None)
 
@@ -302,7 +296,7 @@ class ToTensor(Transform):
     Converts the input image to a tensor without applying any other transformations.
     """
 
-    def __call__(self, img: Union[np.ndarray, torch.Tensor, PILImageImage]) -> torch.Tensor:
+    def __call__(self, img) -> torch.Tensor:
         """
         Apply the transform to `img` and make it contiguous.
         """
@@ -316,7 +310,7 @@ class ToNumpy(Transform):
     Converts the input data to numpy array, can support list or tuple of numbers and PyTorch Tensor.
     """
 
-    def __call__(self, img: Union[List, Tuple, np.ndarray, torch.Tensor, PILImageImage]) -> np.ndarray:
+    def __call__(self, img) -> np.ndarray:
         """
         Apply the transform to `img` and make it contiguous.
         """
@@ -330,7 +324,7 @@ class ToPIL(Transform):
     Converts the input image (in the form of NumPy array or PyTorch Tensor) to PIL image
     """
 
-    def __call__(self, img: Union[np.ndarray, torch.Tensor, PILImageImage]) -> PILImageImage:
+    def __call__(self, img):
         """
         Apply the transform to `img` and make it contiguous.
         """
@@ -673,7 +667,7 @@ def __call__(self, img: np.ndarray) -> np.ndarray:
         return np.stack(result, axis=0)
 
 
-class AddExtremePointsChannel(RandomizableTransform):
+class AddExtremePointsChannel(Randomizable):
     """
     Add extreme points of label to the image as a new channel. This transform generates extreme
     point from label and applies a gaussian filter. The pixel values in points image are rescaled
diff --git a/monai/transforms/utility/dictionary.py b/monai/transforms/utility/dictionary.py
index 63ed6ec305..67da9ceb35 100644
--- a/monai/transforms/utility/dictionary.py
+++ b/monai/transforms/utility/dictionary.py
@@ -17,13 +17,15 @@
 
 import copy
 import logging
-from typing import TYPE_CHECKING, Any, Callable, Dict, Hashable, List, Mapping, Optional, Sequence, Tuple, Union
+from copy import deepcopy
+from typing import Any, Callable, Dict, Hashable, List, Mapping, Optional, Sequence, Tuple, Union
 
 import numpy as np
 import torch
 
 from monai.config import DtypeLike, KeysCollection, NdarrayTensor
-from monai.transforms.transform import MapTransform, RandomizableTransform
+from monai.transforms.inverse import InvertibleTransform
+from monai.transforms.transform import MapTransform, Randomizable
 from monai.transforms.utility.array import (
     AddChannel,
     AsChannelFirst,
@@ -48,14 +50,7 @@
     ToTensor,
 )
 from monai.transforms.utils import extreme_points_to_image, get_extreme_points
-from monai.utils import ensure_tuple, ensure_tuple_rep, optional_import
-
-if TYPE_CHECKING:
-    from PIL.Image import Image as PILImageImage
-
-    has_pil = True
-else:
-    PILImageImage, has_pil = optional_import("PIL.Image", name="Image")
+from monai.utils import ensure_tuple, ensure_tuple_rep
 
 __all__ = [
     "Identityd",
@@ -386,7 +381,7 @@ def __call__(
         return d
 
 
-class ToTensord(MapTransform):
+class ToTensord(MapTransform, InvertibleTransform):
     """
     Dictionary-based wrapper of :py:class:`monai.transforms.ToTensor`.
     """
@@ -401,14 +396,24 @@ def __init__(self, keys: KeysCollection, allow_missing_keys: bool = False) -> No
         super().__init__(keys, allow_missing_keys)
         self.converter = ToTensor()
 
-    def __call__(
-        self, data: Mapping[Hashable, Union[np.ndarray, torch.Tensor, PILImageImage]]
-    ) -> Dict[Hashable, Union[np.ndarray, torch.Tensor, PILImageImage]]:
+    def __call__(self, data: Mapping[Hashable, Any]) -> Dict[Hashable, Any]:
         d = dict(data)
         for key in self.key_iterator(d):
+            self.push_transform(d, key)
             d[key] = self.converter(d[key])
         return d
 
+    def inverse(self, data: Mapping[Hashable, Any]) -> Dict[Hashable, Any]:
+        d = deepcopy(dict(data))
+        for key in self.key_iterator(d):
+            # Create inverse transform
+            inverse_transform = ToNumpy()
+            # Apply inverse
+            d[key] = inverse_transform(d[key])
+            # Remove the applied transform
+            self.pop_transform(d, key)
+        return d
+
 
 class ToNumpyd(MapTransform):
     """
@@ -425,9 +430,7 @@ def __init__(self, keys: KeysCollection, allow_missing_keys: bool = False) -> No
         super().__init__(keys, allow_missing_keys)
         self.converter = ToNumpy()
 
-    def __call__(
-        self, data: Mapping[Hashable, Union[np.ndarray, torch.Tensor, PILImageImage]]
-    ) -> Dict[Hashable, Union[np.ndarray, torch.Tensor, PILImageImage]]:
+    def __call__(self, data: Mapping[Hashable, Any]) -> Dict[Hashable, Any]:
         d = dict(data)
         for key in self.key_iterator(d):
             d[key] = self.converter(d[key])
@@ -449,9 +452,7 @@ def __init__(self, keys: KeysCollection, allow_missing_keys: bool = False) -> No
         super().__init__(keys, allow_missing_keys)
         self.converter = ToPIL()
 
-    def __call__(
-        self, data: Mapping[Hashable, Union[np.ndarray, torch.Tensor, PILImageImage]]
-    ) -> Dict[Hashable, Union[np.ndarray, torch.Tensor, PILImageImage]]:
+    def __call__(self, data: Mapping[Hashable, Any]) -> Dict[Hashable, Any]:
         d = dict(data)
         for key in self.key_iterator(d):
             d[key] = self.converter(d[key])
@@ -669,10 +670,6 @@ def __init__(self, keys: KeysCollection, name: str, dim: int = 0, allow_missing_
             name: the name corresponding to the key to store the concatenated data.
             dim: on which dimension to concatenate the items, default is 0.
             allow_missing_keys: don't raise exception if key is missing.
-
-        Raises:
-            ValueError: When insufficient keys are given (``len(self.keys) < 2``).
-
         """
         super().__init__(keys, allow_missing_keys)
         self.name = name
@@ -748,9 +745,9 @@ def __call__(self, data):
         return d
 
 
-class RandLambdad(Lambdad, RandomizableTransform):
+class RandLambdad(Lambdad, Randomizable):
     """
-    RandomizableTransform version :py:class:`monai.transforms.Lambdad`, the input `func` contains random logic.
+    Randomizable version :py:class:`monai.transforms.Lambdad`, the input `func` contains random logic.
     It's a randomizable transform so `CacheDataset` will not execute it and cache the results.
 
     Args:
@@ -870,7 +867,7 @@ def __call__(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.nda
         return d
 
 
-class AddExtremePointsChanneld(RandomizableTransform, MapTransform):
+class AddExtremePointsChanneld(Randomizable, MapTransform):
     """
     Dictionary-based wrapper of :py:class:`monai.transforms.AddExtremePointsChannel`.
 
diff --git a/monai/transforms/utils.py b/monai/transforms/utils.py
index eb1b194c96..c08b786e98 100644
--- a/monai/transforms/utils.py
+++ b/monai/transforms/utils.py
@@ -22,8 +22,18 @@
 from monai.networks.layers import GaussianFilter
 from monai.transforms.compose import Compose
 from monai.transforms.transform import MapTransform
-from monai.utils import ensure_tuple, ensure_tuple_rep, ensure_tuple_size, fall_back_tuple, min_version, optional_import
-from monai.utils.misc import issequenceiterable
+from monai.utils import (
+    GridSampleMode,
+    InterpolateMode,
+    InverseKeys,
+    ensure_tuple,
+    ensure_tuple_rep,
+    ensure_tuple_size,
+    fall_back_tuple,
+    issequenceiterable,
+    min_version,
+    optional_import,
+)
 
 measure, _ = optional_import("skimage.measure", "0.14.2", min_version)
 
@@ -53,6 +63,7 @@
     "extreme_points_to_image",
     "map_spatial_axes",
     "allow_missing_keys_mode",
+    "convert_inverse_interp_mode",
 ]
 
 
@@ -695,7 +706,7 @@ def map_spatial_axes(
             The default `None` will convert to all the spatial axes of the image.
             If axis is negative it counts from the last to the first axis.
             If axis is a tuple of ints.
-        channel_first: the image data is channel first or channel last, defaut to channel first.
+        channel_first: the image data is channel first or channel last, default to channel first.
 
     """
     if spatial_axes is None:
@@ -756,3 +767,36 @@ def allow_missing_keys_mode(transform: Union[MapTransform, Compose, Tuple[MapTra
         # Revert
         for t, o_s in zip(transforms, orig_states):
             t.allow_missing_keys = o_s
+
+
+def convert_inverse_interp_mode(trans_info: List, mode: str = "nearest", align_corners: Optional[bool] = None):
+    """
+    Change the interpolation mode when inverting spatial transforms, default to "nearest".
+    This function modifies trans_info's `InverseKeys.EXTRA_INFO`.
+
+    See also: :py:class:`monai.transform.inverse.InvertibleTransform`
+
+    Args:
+        trans_info: transforms inverse information list, contains context of every invertible transform.
+        mode: target interpolation mode to convert, default to "nearest" as it's usually used to save the mode output.
+        align_corners: target align corner value in PyTorch interpolation API, need to align with the `mode`.
+
+    """
+    interp_modes = [i.value for i in InterpolateMode] + [i.value for i in GridSampleMode]
+
+    # set to string for DataLoader collation
+    align_corners_ = "none" if align_corners is None else align_corners
+
+    for item in ensure_tuple(trans_info):
+        if InverseKeys.EXTRA_INFO in item:
+            orig_mode = item[InverseKeys.EXTRA_INFO].get("mode", None)
+            if orig_mode is not None:
+                if orig_mode[0] in interp_modes:
+                    item[InverseKeys.EXTRA_INFO]["mode"] = [mode for _ in range(len(mode))]
+                elif orig_mode in interp_modes:
+                    item[InverseKeys.EXTRA_INFO]["mode"] = mode
+            if "align_corners" in item[InverseKeys.EXTRA_INFO]:
+                if issequenceiterable(item[InverseKeys.EXTRA_INFO]["align_corners"]):
+                    item[InverseKeys.EXTRA_INFO]["align_corners"] = [align_corners_ for _ in range(len(mode))]
+                else:
+                    item[InverseKeys.EXTRA_INFO]["align_corners"] = align_corners_
diff --git a/monai/utils/__init__.py b/monai/utils/__init__.py
index f6a137f47d..d622ce96ae 100644
--- a/monai/utils/__init__.py
+++ b/monai/utils/__init__.py
@@ -69,6 +69,5 @@
     min_version,
     optional_import,
 )
-from .prob_nms import ProbNMS
 from .profiling import PerfContext, torch_profiler_full, torch_profiler_time_cpu_gpu, torch_profiler_time_end_to_end
 from .state_cacher import StateCacher
diff --git a/monai/utils/misc.py b/monai/utils/misc.py
index ee0963548c..bd8e46d8b5 100644
--- a/monai/utils/misc.py
+++ b/monai/utils/misc.py
@@ -358,3 +358,4 @@ class ImageMetaKey:
     """
 
     FILENAME_OR_OBJ = "filename_or_obj"
+    PATCH_INDEX = "patch_index"
diff --git a/monai/utils/module.py b/monai/utils/module.py
index 0e11a6531d..b51b2820a8 100644
--- a/monai/utils/module.py
+++ b/monai/utils/module.py
@@ -11,6 +11,7 @@
 
 import inspect
 import sys
+import warnings
 from importlib import import_module
 from pkgutil import walk_packages
 from re import match
@@ -95,17 +96,21 @@ def min_version(the_module, min_version_str: str = "") -> bool:
     Returns True if the module's version is greater or equal to the 'min_version'.
     When min_version_str is not provided, it always returns True.
     """
-    if min_version_str:
-        mod_version = tuple(int(x) for x in the_module.__version__.split(".")[:2])
-        required = tuple(int(x) for x in min_version_str.split(".")[:2])
-        return mod_version >= required
-    return True  # always valid version
+    if not min_version_str or not hasattr(the_module, "__version__"):
+        return True  # always valid version
+
+    mod_version = tuple(int(x) for x in the_module.__version__.split(".")[:2])
+    required = tuple(int(x) for x in min_version_str.split(".")[:2])
+    return mod_version >= required
 
 
 def exact_version(the_module, version_str: str = "") -> bool:
     """
     Returns True if the module's __version__ matches version_str
     """
+    if not hasattr(the_module, "__version__"):
+        warnings.warn(f"{the_module} has no attribute __version__ in exact_version check.")
+        return False
     return bool(the_module.__version__ == version_str)
 
 
@@ -250,21 +255,11 @@ def has_option(obj, keywords: Union[str, Sequence[str]]) -> bool:
 def get_package_version(dep_name, default="NOT INSTALLED or UNKNOWN VERSION."):
     """
     Try to load package and get version. If not found, return `default`.
-
-    If the package was already loaded, leave it. If wasn't previously loaded, unload it.
     """
-    dep_ver = default
-    dep_already_loaded = dep_name not in sys.modules
-
     dep, has_dep = optional_import(dep_name)
-    if has_dep:
-        if hasattr(dep, "__version__"):
-            dep_ver = dep.__version__
-        # if not previously loaded, unload it
-        if not dep_already_loaded:
-            del dep
-            del sys.modules[dep_name]
-    return dep_ver
+    if has_dep and hasattr(dep, "__version__"):
+        return dep.__version__
+    return default
 
 
 def get_torch_version_tuple():
diff --git a/monai/utils/prob_nms.py b/monai/utils/prob_nms.py
deleted file mode 100644
index c789dab0bb..0000000000
--- a/monai/utils/prob_nms.py
+++ /dev/null
@@ -1,100 +0,0 @@
-from typing import List, Sequence, Tuple, Union
-
-import numpy as np
-import torch
-
-from monai.networks.layers import GaussianFilter
-
-
-class ProbNMS:
-    """
-    Performs probability based non-maximum suppression (NMS) on the probabilities map via
-    iteratively selecting the coordinate with highest probability and then move it as well
-    as its surrounding values. The remove range is determined by the parameter `box_size`.
-    If multiple coordinates have the same highest probability, only one of them will be
-    selected.
-
-    Args:
-        spatial_dims: number of spatial dimensions of the input probabilities map.
-            Defaults to 2.
-        sigma: the standard deviation for gaussian filter.
-            It could be a single value, or `spatial_dims` number of values. Defaults to 0.0.
-        prob_threshold: the probability threshold, the function will stop searching if
-            the highest probability is no larger than the threshold. The value should be
-            no less than 0.0. Defaults to 0.5.
-        box_size: the box size (in pixel) to be removed around the the pixel with the maximum probability.
-            It can be an integer that defines the size of a square or cube,
-            or a list containing different values for each dimensions. Defaults to 48.
-
-    Return:
-        a list of selected lists, where inner lists contain probability and coordinates.
-        For example, for 3D input, the inner lists are in the form of [probability, x, y, z].
-
-    Raises:
-        ValueError: When ``prob_threshold`` is less than 0.0.
-        ValueError: When ``box_size`` is a list or tuple, and its length is not equal to `spatial_dims`.
-        ValueError: When ``box_size`` has a less than 1 value.
-
-    """
-
-    def __init__(
-        self,
-        spatial_dims: int = 2,
-        sigma: Union[Sequence[float], float, Sequence[torch.Tensor], torch.Tensor] = 0.0,
-        prob_threshold: float = 0.5,
-        box_size: Union[int, List[int], Tuple[int]] = 48,
-    ) -> None:
-        self.sigma = sigma
-        self.spatial_dims = spatial_dims
-        if self.sigma != 0:
-            self.filter = GaussianFilter(spatial_dims=spatial_dims, sigma=sigma)
-        if prob_threshold < 0:
-            raise ValueError("prob_threshold should be no less than 0.0.")
-        self.prob_threshold = prob_threshold
-        if isinstance(box_size, int):
-            self.box_size = np.asarray([box_size] * spatial_dims)
-        else:
-            if len(box_size) != spatial_dims:
-                raise ValueError("the sequence length of box_size should be the same as spatial_dims.")
-            self.box_size = np.asarray(box_size)
-        if self.box_size.min() <= 0:
-            raise ValueError("box_size should be larger than 0.")
-
-        self.box_lower_bd = self.box_size // 2
-        self.box_upper_bd = self.box_size - self.box_lower_bd
-
-    def __call__(
-        self,
-        probs_map: Union[np.ndarray, torch.Tensor],
-    ):
-        """
-        probs_map: the input probabilities map, it must have shape (H[, W, ...]).
-        """
-        if self.sigma != 0:
-            if not isinstance(probs_map, torch.Tensor):
-                probs_map = torch.as_tensor(probs_map, dtype=torch.float)
-            self.filter.to(probs_map)
-            probs_map = self.filter(probs_map)
-        else:
-            if not isinstance(probs_map, torch.Tensor):
-                probs_map = probs_map.copy()
-
-        if isinstance(probs_map, torch.Tensor):
-            probs_map = probs_map.detach().cpu().numpy()
-
-        probs_map_shape = probs_map.shape
-
-        outputs = []
-        while np.max(probs_map) > self.prob_threshold:
-            max_idx = np.unravel_index(probs_map.argmax(), probs_map_shape)
-            prob_max = probs_map[max_idx]
-            max_idx_arr = np.asarray(max_idx)
-            outputs.append([prob_max] + list(max_idx_arr))
-
-            idx_min_range = (max_idx_arr - self.box_lower_bd).clip(0, None)
-            idx_max_range = (max_idx_arr + self.box_upper_bd).clip(None, probs_map_shape)
-            # for each dimension, set values during index ranges to 0
-            slices = tuple(slice(idx_min_range[i], idx_max_range[i]) for i in range(self.spatial_dims))
-            probs_map[slices] = 0
-
-        return outputs
diff --git a/monai/utils/state_cacher.py b/monai/utils/state_cacher.py
index 66e9080724..65a6118670 100644
--- a/monai/utils/state_cacher.py
+++ b/monai/utils/state_cacher.py
@@ -1,3 +1,14 @@
+# Copyright 2020 - 2021 MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import copy
 import os
 import tempfile
diff --git a/monai/visualize/class_activation_maps.py b/monai/visualize/class_activation_maps.py
index b310ec0834..c63e8e51d9 100644
--- a/monai/visualize/class_activation_maps.py
+++ b/monai/visualize/class_activation_maps.py
@@ -18,7 +18,7 @@
 import torch.nn.functional as F
 
 from monai.transforms import ScaleIntensity
-from monai.utils import ensure_tuple
+from monai.utils import ensure_tuple, get_torch_version_tuple
 from monai.visualize.visualizer import default_upsampler
 
 __all__ = ["CAM", "GradCAM", "GradCAMpp", "ModelWithHooks", "default_normalizer"]
@@ -73,7 +73,13 @@ def __init__(
                 continue
             _registered.append(name)
             if self.register_backward:
-                mod.register_backward_hook(self.backward_hook(name))
+                if get_torch_version_tuple() < (1, 8):
+                    mod.register_backward_hook(self.backward_hook(name))
+                else:
+                    if "inplace" in mod.__dict__ and mod.__dict__["inplace"]:
+                        # inplace=True causes errors for register_full_backward_hook
+                        mod.__dict__["inplace"] = False
+                    mod.register_full_backward_hook(self.backward_hook(name))
             if self.register_forward:
                 mod.register_forward_hook(self.forward_hook(name))
         if len(_registered) != len(self.target_layers):
diff --git a/monai/visualize/occlusion_sensitivity.py b/monai/visualize/occlusion_sensitivity.py
index ee9a967da1..46b9115c7a 100644
--- a/monai/visualize/occlusion_sensitivity.py
+++ b/monai/visualize/occlusion_sensitivity.py
@@ -152,7 +152,7 @@ def __init__(
         upsampler: Optional[Callable] = default_upsampler,
         verbose: bool = True,
     ) -> None:
-        """Occlusion sensitivitiy constructor.
+        """Occlusion sensitivity constructor.
 
         Args:
             nn_module: Classification model to use for inference
diff --git a/requirements-dev.txt b/requirements-dev.txt
index dc4181b310..9ba05fb769 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -3,7 +3,7 @@
 pytorch-ignite==0.4.4
 gdown>=3.6.4
 scipy
-itk>=5.0
+itk>=5.0, <=5.1.2
 nibabel
 pillow
 tensorboard
@@ -26,9 +26,9 @@ mypy>=0.790
 ninja
 torchvision
 psutil
-Sphinx==3.3.0
+Sphinx==3.5.3
 recommonmark==0.6.0
 sphinx-autodoc-typehints==1.11.1
-sphinx-rtd-theme==0.5.0
-cucim==0.18.1
+sphinx-rtd-theme==0.5.2
+cucim==0.18.2
 openslide-python==1.1.2
diff --git a/setup.cfg b/setup.cfg
index f06c56d001..702c8638c1 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -36,9 +36,9 @@ all =
     pytorch-ignite==0.4.4
     gdown>=3.6.4
     torchvision
-    itk>=5.0
+    itk>=5.0, <=5.1.2
     tqdm>=4.47.0
-    cucim==0.18.1
+    cucim==0.18.2
     openslide-python==1.1.2
 nibabel =
     nibabel
@@ -55,7 +55,7 @@ ignite =
 torchvision =
     torchvision
 itk =
-    itk>=5.0
+    itk>=5.0, <=5.1.2
 tqdm =
     tqdm>=4.47.0
 lmdb =
@@ -63,7 +63,7 @@ lmdb =
 psutil =
     psutil
 cucim =
-    cucim==0.18.1
+    cucim==0.18.2
 openslide =
     openslide-python==1.1.2
 
diff --git a/tests/min_tests.py b/tests/min_tests.py
index e896e81c70..47892a143e 100644
--- a/tests/min_tests.py
+++ b/tests/min_tests.py
@@ -33,6 +33,7 @@ def run_testsuit():
         "test_cachedataset_parallel",
         "test_dataset",
         "test_detect_envelope",
+        "test_efficientnet",
         "test_iterable_dataset",
         "test_ensemble_evaluator",
         "test_handler_checkpoint_loader",
@@ -42,10 +43,12 @@ def run_testsuit():
         "test_handler_confusion_matrix",
         "test_handler_confusion_matrix_dist",
         "test_handler_hausdorff_distance",
+        "test_handler_garbage_collector",
         "test_handler_mean_dice",
-        "test_handler_prob_map_generator",
+        "test_handler_prob_map_producer",
         "test_handler_rocauc",
         "test_handler_rocauc_dist",
+        "test_handler_parameter_scheduler",
         "test_handler_segmentation_saver",
         "test_handler_smartcache",
         "test_handler_stats",
@@ -94,6 +97,7 @@ def run_testsuit():
         "test_smartcachedataset",
         "test_spacing",
         "test_spacingd",
+        "test_senet",
         "test_surface_distance",
         "test_zoom",
         "test_zoom_affine",
@@ -112,6 +116,8 @@ def run_testsuit():
         "test_save_imaged",
         "test_ensure_channel_first",
         "test_ensure_channel_firstd",
+        "test_handler_early_stop",
+        "test_handler_transform_inverter",
     ]
     assert sorted(exclude_cases) == sorted(set(exclude_cases)), f"Duplicated items in {exclude_cases}"
 
diff --git a/tests/test_activations.py b/tests/test_activations.py
index 1614642d6d..5ed9ec2046 100644
--- a/tests/test_activations.py
+++ b/tests/test_activations.py
@@ -48,6 +48,15 @@
 ]
 
 TEST_CASE_5 = [
+    "memswish",
+    torch.tensor([[[[-10, -8, -6, -4, -2], [0, 2, 4, 6, 8]]]], dtype=torch.float32),
+    torch.tensor(
+        [[[[-4.54e-04, -2.68e-03, -1.48e-02, -7.19e-02, -2.38e-01], [0.00e00, 1.76e00, 3.93e00, 5.99e00, 8.00e00]]]]
+    ),
+    (1, 1, 2, 5),
+]
+
+TEST_CASE_6 = [
     "mish",
     torch.tensor([[[[-10, -8, -6, -4, -2], [0, 2, 4, 6, 8]]]], dtype=torch.float32),
     torch.tensor(
@@ -64,7 +73,7 @@ def test_value_shape(self, input_param, img, out, expected_shape):
         torch.testing.assert_allclose(result, out)
         self.assertTupleEqual(result.shape, expected_shape)
 
-    @parameterized.expand([TEST_CASE_4, TEST_CASE_5])
+    @parameterized.expand([TEST_CASE_4, TEST_CASE_5, TEST_CASE_6])
     def test_monai_activations_value_shape(self, input_param, img, out, expected_shape):
         act = Act[input_param]()
         result = act(img)
diff --git a/tests/test_compose.py b/tests/test_compose.py
index bb8a5f08c5..97b044af8f 100644
--- a/tests/test_compose.py
+++ b/tests/test_compose.py
@@ -14,11 +14,11 @@
 
 from monai.data import DataLoader, Dataset
 from monai.transforms import AddChannel, Compose
-from monai.transforms.transform import RandomizableTransform
+from monai.transforms.transform import Randomizable
 from monai.utils import set_determinism
 
 
-class _RandXform(RandomizableTransform):
+class _RandXform(Randomizable):
     def randomize(self):
         self.val = self.R.random_sample()
 
@@ -80,7 +80,7 @@ def c(d):  # transform to handle dict data
             self.assertDictEqual(item, {"a": 2, "b": 1, "c": 2})
 
     def test_random_compose(self):
-        class _Acc(RandomizableTransform):
+        class _Acc(Randomizable):
             self.rand = 0.0
 
             def randomize(self, data=None):
@@ -99,7 +99,7 @@ def __call__(self, data):
         self.assertAlmostEqual(c(1), 1.90734751)
 
     def test_randomize_warn(self):
-        class _RandomClass(RandomizableTransform):
+        class _RandomClass(Randomizable):
             def randomize(self, foo1, foo2):
                 pass
 
diff --git a/tests/test_crf_cpu.py b/tests/test_crf_cpu.py
index f6e82d16a5..41ae75f4b4 100644
--- a/tests/test_crf_cpu.py
+++ b/tests/test_crf_cpu.py
@@ -30,7 +30,7 @@
             0.5,  # bilateral_color_sigma
             5.0,  # gaussian_spatial_sigma
             1.0,  # update_factor
-            1,  # compatability_kernel_range
+            1,  # compatibility_kernel_range
             5,  # iterations
         ],
         # Input
@@ -92,7 +92,7 @@
             0.5,  # bilateral_color_sigma
             5.0,  # gaussian_spatial_sigma
             1.0,  # update_factor
-            1,  # compatability_kernel_range
+            1,  # compatibility_kernel_range
             5,  # iterations
         ],
         # Input
@@ -189,7 +189,7 @@
             0.1,  # bilateral_color_sigma
             5.0,  # gaussian_spatial_sigma
             1.0,  # update_factor
-            1,  # compatability_kernel_range
+            1,  # compatibility_kernel_range
             2,  # iterations
         ],
         # Input
diff --git a/tests/test_crf_cuda.py b/tests/test_crf_cuda.py
index 55d57d67bf..90fe64cd4e 100644
--- a/tests/test_crf_cuda.py
+++ b/tests/test_crf_cuda.py
@@ -30,7 +30,7 @@
             0.5,  # bilateral_color_sigma
             5.0,  # gaussian_spatial_sigma
             1.0,  # update_factor
-            1,  # compatability_kernel_range
+            1,  # compatibility_kernel_range
             5,  # iterations
         ],
         # Input
@@ -92,7 +92,7 @@
             0.5,  # bilateral_color_sigma
             5.0,  # gaussian_spatial_sigma
             1.0,  # update_factor
-            1,  # compatability_kernel_range
+            1,  # compatibility_kernel_range
             5,  # iterations
         ],
         # Input
@@ -189,7 +189,7 @@
             0.1,  # bilateral_color_sigma
             5.0,  # gaussian_spatial_sigma
             1.0,  # update_factor
-            1,  # compatability_kernel_range
+            1,  # compatibility_kernel_range
             2,  # iterations
         ],
         # Input
@@ -444,7 +444,7 @@ def test(self, test_case_description, params, input, features, expected):
         output = crf(input_tensor, feature_tensor).cpu().numpy()
 
         # Ensure result are as expected
-        np.testing.assert_allclose(output, expected, atol=1e-4, rtol=1e-4)
+        np.testing.assert_allclose(output, expected, atol=1e-3, rtol=1e-3)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_cuimage_reader.py b/tests/test_cuimage_reader.py
index 1b0293f159..2cbfaec113 100644
--- a/tests/test_cuimage_reader.py
+++ b/tests/test_cuimage_reader.py
@@ -1,3 +1,14 @@
+# Copyright 2020 - 2021 MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import os
 import unittest
 from unittest import skipUnless
@@ -14,7 +25,7 @@
 PILImage, has_pil = optional_import("PIL.Image")
 
 FILE_URL = "http://openslide.cs.cmu.edu/download/openslide-testdata/Generic-TIFF/CMU-1.tiff"
-FILE_PATH = os.path.join(os.path.dirname(__file__), "testing_data", os.path.basename(FILE_URL))
+FILE_PATH = os.path.join(os.path.dirname(__file__), "testing_data", "temp_" + os.path.basename(FILE_URL))
 
 HEIGHT = 32914
 WIDTH = 46000
@@ -105,7 +116,7 @@ def test_read_rgba(self, img_expected):
         image = {}
         reader = WSIReader("cuCIM")
         for mode in ["RGB", "RGBA"]:
-            file_path = self.create_rgba_image(img_expected, "test_cu_tiff_image", mode=mode)
+            file_path = self.create_rgba_image(img_expected, "temp_cu_tiff_image", mode=mode)
             img_obj = reader.read(file_path)
             image[mode], _ = reader.get_data(img_obj)
 
diff --git a/tests/test_dice_ce_loss.py b/tests/test_dice_ce_loss.py
index 8627c6d130..3423e1425b 100644
--- a/tests/test_dice_ce_loss.py
+++ b/tests/test_dice_ce_loss.py
@@ -43,6 +43,20 @@
         },
         0.2088,
     ],
+    [  # shape: (2, 2, 3), (2, 1, 3) lambda_dice: 1.0, lambda_ce: 2.0
+        {
+            "include_background": False,
+            "to_onehot_y": True,
+            "ce_weight": torch.tensor([1.0, 1.0]),
+            "lambda_dice": 1.0,
+            "lambda_ce": 2.0,
+        },
+        {
+            "input": torch.tensor([[[100.0, 100.0, 0.0], [0.0, 0.0, 1.0]], [[1.0, 0.0, 1.0], [0.0, 1.0, 0.0]]]),
+            "target": torch.tensor([[[0.0, 0.0, 1.0]], [[0.0, 1.0, 0.0]]]),
+        },
+        0.4176,
+    ],
     [  # shape: (2, 2, 3), (2, 1, 3), do not include class 0
         {"include_background": False, "to_onehot_y": True, "ce_weight": torch.tensor([0.0, 1.0])},
         {
diff --git a/tests/test_dice_focal_loss.py b/tests/test_dice_focal_loss.py
new file mode 100644
index 0000000000..4bab68131c
--- /dev/null
+++ b/tests/test_dice_focal_loss.py
@@ -0,0 +1,80 @@
+# Copyright 2020 - 2021 MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+import numpy as np
+import torch
+
+from monai.losses import DiceFocalLoss, DiceLoss, FocalLoss
+from tests.utils import SkipIfBeforePyTorchVersion, test_script_save
+
+
+class TestDiceFocalLoss(unittest.TestCase):
+    def test_result_onehot_target_include_bg(self):
+        size = [3, 3, 5, 5]
+        label = torch.randint(low=0, high=2, size=size)
+        pred = torch.randn(size)
+        for reduction in ["sum", "mean", "none"]:
+            common_params = {
+                "include_background": True,
+                "to_onehot_y": False,
+                "reduction": reduction,
+            }
+            for focal_weight in [None, torch.tensor([1.0, 1.0, 2.0]), (3, 2.0, 1)]:
+                for lambda_focal in [0.5, 1.0, 1.5]:
+                    dice_focal = DiceFocalLoss(
+                        focal_weight=focal_weight, gamma=1.0, lambda_focal=lambda_focal, **common_params
+                    )
+                    dice = DiceLoss(**common_params)
+                    focal = FocalLoss(weight=focal_weight, gamma=1.0, **common_params)
+                    result = dice_focal(pred, label)
+                    expected_val = dice(pred, label) + lambda_focal * focal(pred, label)
+                    np.testing.assert_allclose(result, expected_val)
+
+    def test_result_no_onehot_no_bg(self):
+        size = [3, 3, 5, 5]
+        label = torch.randint(low=0, high=2, size=size)
+        label = torch.argmax(label, dim=1, keepdim=True)
+        pred = torch.randn(size)
+        for reduction in ["sum", "mean", "none"]:
+            common_params = {
+                "include_background": False,
+                "to_onehot_y": True,
+                "reduction": reduction,
+            }
+            for focal_weight in [2.0, torch.tensor([1.0, 2.0]), (2.0, 1)]:
+                for lambda_focal in [0.5, 1.0, 1.5]:
+                    dice_focal = DiceFocalLoss(focal_weight=focal_weight, lambda_focal=lambda_focal, **common_params)
+                    dice = DiceLoss(**common_params)
+                    focal = FocalLoss(weight=focal_weight, **common_params)
+                    result = dice_focal(pred, label)
+                    expected_val = dice(pred, label) + lambda_focal * focal(pred, label)
+                    np.testing.assert_allclose(result, expected_val)
+
+    def test_ill_shape(self):
+        loss = DiceFocalLoss()
+        with self.assertRaisesRegex(ValueError, ""):
+            loss(torch.ones((1, 2, 3)), torch.ones((1, 1, 2, 3)))
+
+    def test_ill_lambda(self):
+        with self.assertRaisesRegex(ValueError, ""):
+            loss = DiceFocalLoss(lambda_dice=-1.0)
+
+    @SkipIfBeforePyTorchVersion((1, 7, 0))
+    def test_script(self):
+        loss = DiceFocalLoss()
+        test_input = torch.ones(2, 1, 8, 8)
+        test_script_save(loss, test_input, test_input)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/test_dvf2ddf.py b/tests/test_dvf2ddf.py
index bf04fed8b6..cc3323cf13 100644
--- a/tests/test_dvf2ddf.py
+++ b/tests/test_dvf2ddf.py
@@ -1,3 +1,14 @@
+# Copyright 2020 - 2021 MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 
 import numpy as np
diff --git a/tests/test_efficientnet.py b/tests/test_efficientnet.py
new file mode 100644
index 0000000000..7ef56c52a9
--- /dev/null
+++ b/tests/test_efficientnet.py
@@ -0,0 +1,308 @@
+# Copyright 2020 - 2021 MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import unittest
+from typing import TYPE_CHECKING
+from unittest import skipUnless
+
+import torch
+from parameterized import parameterized
+
+from monai.networks import eval_mode
+from monai.networks.nets import EfficientNetBN, drop_connect, get_efficientnet_image_size
+from monai.utils import optional_import
+from tests.utils import skip_if_quick, test_pretrained_networks, test_script_save
+
+if TYPE_CHECKING:
+    import torchvision
+
+    has_torchvision = True
+else:
+    torchvision, has_torchvision = optional_import("torchvision")
+
+if TYPE_CHECKING:
+    import PIL
+
+    has_pil = True
+else:
+    PIL, has_pil = optional_import("PIL")
+
+
+def get_model_names():
+    return ["efficientnet-b{}".format(d) for d in range(8)]
+
+
+def get_expected_model_shape(model_name):
+    model_input_shapes = {
+        "efficientnet-b0": 224,
+        "efficientnet-b1": 240,
+        "efficientnet-b2": 260,
+        "efficientnet-b3": 300,
+        "efficientnet-b4": 380,
+        "efficientnet-b5": 456,
+        "efficientnet-b6": 528,
+        "efficientnet-b7": 600,
+    }
+    return model_input_shapes[model_name]
+
+
+def make_shape_cases(models, spatial_dims, batches, pretrained, in_channels=3, num_classes=1000):
+    ret_tests = []
+    for spatial_dim in spatial_dims:  # selected spatial_dims
+        for batch in batches:  # check single batch as well as multiple batch input
+            for model in models:  # selected models
+                for is_pretrained in pretrained:  # pretrained or not pretrained
+                    kwargs = {
+                        "model_name": model,
+                        "pretrained": is_pretrained,
+                        "progress": False,
+                        "spatial_dims": spatial_dim,
+                        "in_channels": in_channels,
+                        "num_classes": num_classes,
+                    }
+                    ret_tests.append(
+                        [
+                            kwargs,
+                            (
+                                batch,
+                                in_channels,
+                            )
+                            + (get_expected_model_shape(model),) * spatial_dim,
+                            (batch, num_classes),
+                        ]
+                    )
+    return ret_tests
+
+
+# create list of selected models to speed up redundant tests
+# only test the models B0, B3, B7
+SEL_MODELS = [get_model_names()[i] for i in [0, 3, 7]]
+
+# pretrained=False cases
+# 1D models are cheap so do test for all models in 1D
+CASES_1D = make_shape_cases(
+    models=get_model_names(), spatial_dims=[1], batches=[1, 4], pretrained=[False], in_channels=3, num_classes=1000
+)
+
+# 2D and 3D models are expensive so use selected models
+CASES_2D = make_shape_cases(
+    models=SEL_MODELS, spatial_dims=[2], batches=[1, 4], pretrained=[False], in_channels=3, num_classes=1000
+)
+CASES_3D = make_shape_cases(
+    models=[SEL_MODELS[0]], spatial_dims=[3], batches=[1], pretrained=[False], in_channels=3, num_classes=1000
+)
+
+# pretrained=True cases
+# tabby kitty test with pretrained model
+# needs 'testing_data/kitty_test.jpg'
+# image from: https://commons.wikimedia.org/wiki/File:Tabby_cat_with_blue_eyes-3336579.jpg
+CASES_KITTY_TRAINED = [
+    (
+        {
+            "model_name": "efficientnet-b0",
+            "pretrained": True,
+            "progress": False,
+            "spatial_dims": 2,
+            "in_channels": 3,
+            "num_classes": 1000,
+        },
+        os.path.join(os.path.dirname(__file__), "testing_data", "kitty_test.jpg"),
+        282,  # ~ tiger cat
+    ),
+    (
+        {
+            "model_name": "efficientnet-b3",
+            "pretrained": True,
+            "progress": False,
+            "spatial_dims": 2,
+            "in_channels": 3,
+            "num_classes": 1000,
+        },
+        os.path.join(os.path.dirname(__file__), "testing_data", "kitty_test.jpg"),
+        282,  # ~ tiger cat
+    ),
+    (
+        {
+            "model_name": "efficientnet-b7",
+            "pretrained": True,
+            "progress": False,
+            "spatial_dims": 2,
+            "in_channels": 3,
+            "num_classes": 1000,
+        },
+        os.path.join(os.path.dirname(__file__), "testing_data", "kitty_test.jpg"),
+        282,  # ~ tiger cat
+    ),
+]
+
+# varying num_classes and in_channels
+CASES_VARIATIONS = []
+
+# change num_classes test
+# 10 classes
+# 2D
+CASES_VARIATIONS.extend(
+    make_shape_cases(
+        models=SEL_MODELS, spatial_dims=[2], batches=[1], pretrained=[False, True], in_channels=3, num_classes=10
+    )
+)
+# 3D
+CASES_VARIATIONS.extend(
+    make_shape_cases(
+        models=[SEL_MODELS[0]], spatial_dims=[3], batches=[1], pretrained=[False], in_channels=3, num_classes=10
+    )
+)
+
+# change in_channels test
+# 1 channel
+# 2D
+CASES_VARIATIONS.extend(
+    make_shape_cases(
+        models=SEL_MODELS, spatial_dims=[2], batches=[1], pretrained=[False, True], in_channels=1, num_classes=1000
+    )
+)
+# 8 channel
+# 2D
+CASES_VARIATIONS.extend(
+    make_shape_cases(
+        models=SEL_MODELS, spatial_dims=[2], batches=[1], pretrained=[False, True], in_channels=8, num_classes=1000
+    )
+)
+# 3D
+CASES_VARIATIONS.extend(
+    make_shape_cases(
+        models=[SEL_MODELS[0]], spatial_dims=[3], batches=[1], pretrained=[False], in_channels=1, num_classes=1000
+    )
+)
+
+
+class TestEFFICIENTNET(unittest.TestCase):
+    @parameterized.expand(CASES_1D + CASES_2D + CASES_3D + CASES_VARIATIONS)
+    def test_shape(self, input_param, input_shape, expected_shape):
+        device = "cuda" if torch.cuda.is_available() else "cpu"
+        print(input_param)
+
+        # initialize model
+        net = EfficientNetBN(**input_param).to(device)
+
+        # run inference with random tensor
+        with eval_mode(net):
+            result = net(torch.randn(input_shape).to(device))
+
+        # check output shape
+        self.assertEqual(result.shape, expected_shape)
+
+    @parameterized.expand(CASES_1D + CASES_2D)
+    def test_non_default_shapes(self, input_param, input_shape, expected_shape):
+        device = "cuda" if torch.cuda.is_available() else "cpu"
+        print(input_param)
+
+        # initialize model
+        net = EfficientNetBN(**input_param).to(device)
+
+        # override input shape with different variations
+        num_dims = len(input_shape) - 2
+        non_default_sizes = [128, 256, 512]
+        for candidate_size in non_default_sizes:
+            input_shape = input_shape[0:2] + (candidate_size,) * num_dims
+            print(input_shape)
+            # run inference with random tensor
+            with eval_mode(net):
+                result = net(torch.randn(input_shape).to(device))
+
+            # check output shape
+            self.assertEqual(result.shape, expected_shape)
+
+    @parameterized.expand(CASES_KITTY_TRAINED)
+    @skip_if_quick
+    @skipUnless(has_torchvision, "Requires `torchvision` package.")
+    @skipUnless(has_pil, "Requires `pillow` package.")
+    def test_kitty_pretrained(self, input_param, image_path, expected_label):
+        device = "cuda" if torch.cuda.is_available() else "cpu"
+
+        # open image
+        image_size = get_efficientnet_image_size(input_param["model_name"])
+        img = PIL.Image.open(image_path)
+
+        # defin ImageNet transform
+        tfms = torchvision.transforms.Compose(
+            [
+                torchvision.transforms.Resize(image_size),
+                torchvision.transforms.CenterCrop(image_size),
+                torchvision.transforms.ToTensor(),
+                torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
+            ]
+        )
+
+        # preprocess and prepare image tensor
+        img = tfms(img).unsqueeze(0).to(device)
+
+        # initialize a pretrained model
+        net = test_pretrained_networks(EfficientNetBN, input_param, device)
+
+        # run inference
+        with eval_mode(net):
+            result = net(img)
+        pred_label = torch.argmax(result, dim=-1)
+
+        # check output
+        self.assertEqual(pred_label, expected_label)
+
+    def test_drop_connect_layer(self):
+        p_list = [float(d + 1) / 10.0 for d in range(9)]
+        # testing 1D, 2D and 3D shape
+        for rand_tensor_shape in [(512, 16, 4), (384, 16, 4, 4), (256, 16, 4, 4, 4)]:
+
+            # test validation mode, out tensor == in tensor
+            training = False
+            for p in p_list:
+                in_tensor = torch.rand(rand_tensor_shape) + 0.1
+                out_tensor = drop_connect(in_tensor, p, training=training)
+                self.assertTrue(torch.equal(out_tensor, in_tensor))
+
+            # test training mode, sum((out tensor * (1.0 - p)) != in tensor)/out_tensor.size() == p
+            # use tolerance of 0.175 to account for rounding errors due to finite set in/out
+            tol = 0.175
+            training = True
+            for p in p_list:
+                in_tensor = torch.rand(rand_tensor_shape) + 0.1
+                out_tensor = drop_connect(in_tensor, p, training=training)
+
+                p_calculated = 1.0 - torch.sum(torch.isclose(in_tensor, out_tensor * (1.0 - p))) / float(
+                    in_tensor.numel()
+                )
+                p_calculated = p_calculated.cpu().numpy()
+
+                self.assertTrue(abs(p_calculated - p) < tol)
+
+    def test_ill_arg(self):
+        with self.assertRaises(ValueError):
+            # wrong spatial_dims
+            EfficientNetBN(model_name="efficientnet-b0", spatial_dims=4)
+            # wrong model_name
+            EfficientNetBN(model_name="efficientnet-b10", spatial_dims=3)
+
+    def test_func_get_efficientnet_input_shape(self):
+        for model in get_model_names():
+            result_shape = get_efficientnet_image_size(model_name=model)
+            expected_shape = get_expected_model_shape(model)
+            self.assertEqual(result_shape, expected_shape)
+
+    def test_script(self):
+        net = EfficientNetBN(model_name="efficientnet-b0", spatial_dims=2, in_channels=3, num_classes=1000)
+        net.set_swish(memory_efficient=False)  # at the moment custom memory efficient swish is not exportable with jit
+        test_data = torch.randn(1, 3, 224, 224)
+        test_script_save(net, test_data)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/test_ensemble_evaluator.py b/tests/test_ensemble_evaluator.py
index 9cc977d876..28a2d4f941 100644
--- a/tests/test_ensemble_evaluator.py
+++ b/tests/test_ensemble_evaluator.py
@@ -12,7 +12,7 @@
 import unittest
 
 import torch
-from ignite.engine import Events
+from ignite.engine import EventEnum, Events
 
 from monai.engines import EnsembleEvaluator
 
@@ -44,11 +44,17 @@ def forward(self, x):
         net3 = TestNet(lambda x: x + 4)
         net4 = TestNet(lambda x: x + 5)
 
+        class CustomEvents(EventEnum):
+            FOO_EVENT = "foo_event"
+            BAR_EVENT = "bar_event"
+
         val_engine = EnsembleEvaluator(
             device=device,
             val_data_loader=val_loader,
             networks=[net0, net1, net2, net3, net4],
             pred_keys=["pred0", "pred1", "pred2", "pred3", "pred4"],
+            event_names=["bwd_event", "opt_event", CustomEvents],
+            event_to_attr={CustomEvents.FOO_EVENT: "foo", "opt_event": "opt"},
         )
 
         @val_engine.on(Events.ITERATION_COMPLETED)
@@ -57,6 +63,21 @@ def run_post_transform(engine):
                 expected_value = engine.state.iteration + i
                 torch.testing.assert_allclose(engine.state.output[f"pred{i}"], torch.tensor([[expected_value]]))
 
+        @val_engine.on(Events.EPOCH_COMPLETED)
+        def trigger_custom_event():
+            val_engine.fire_event(CustomEvents.FOO_EVENT)
+            val_engine.fire_event(CustomEvents.BAR_EVENT)
+            val_engine.fire_event("bwd_event")
+            val_engine.fire_event("opt_event")
+
+        @val_engine.on(CustomEvents.FOO_EVENT)
+        def do_foo_op():
+            self.assertEqual(val_engine.state.foo, 0)
+
+        @val_engine.on("opt_event")
+        def do_bar_op():
+            self.assertEqual(val_engine.state.opt, 0)
+
         val_engine.run()
 
 
diff --git a/tests/test_file_basename.py b/tests/test_file_basename.py
index 1b67baea8c..cb7ee77e62 100644
--- a/tests/test_file_basename.py
+++ b/tests/test_file_basename.py
@@ -57,10 +57,18 @@ def test_value(self):
             expected = os.path.join(output_tmp, "test", "test")
             self.assertEqual(result, expected)
 
+            result = create_file_basename("", "test.txt", output_tmp, "foo", 5)
+            expected = os.path.join(output_tmp, "test", "test_5")
+            self.assertEqual(result, expected)
+
             result = create_file_basename("post", "test.tar.gz", output_tmp, "foo")
             expected = os.path.join(output_tmp, "test", "test_post")
             self.assertEqual(result, expected)
 
+            result = create_file_basename("post", "test.tar.gz", output_tmp, "foo", 8)
+            expected = os.path.join(output_tmp, "test", "test_post_8")
+            self.assertEqual(result, expected)
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/tests/test_focal_loss.py b/tests/test_focal_loss.py
index 4512dac4b9..66665774ef 100644
--- a/tests/test_focal_loss.py
+++ b/tests/test_focal_loss.py
@@ -187,6 +187,16 @@ def test_ill_shape(self):
         with self.assertRaisesRegex(AssertionError, ""):
             FocalLoss(reduction="mean")(chn_input, chn_target)
 
+    def test_ill_class_weight(self):
+        chn_input = torch.ones((1, 4, 3, 3))
+        chn_target = torch.ones((1, 4, 3, 3))
+        with self.assertRaisesRegex(ValueError, ""):
+            FocalLoss(include_background=True, weight=(1.0, 1.0, 2.0))(chn_input, chn_target)
+        with self.assertRaisesRegex(ValueError, ""):
+            FocalLoss(include_background=False, weight=(1.0, 1.0, 1.0, 1.0))(chn_input, chn_target)
+        with self.assertRaisesRegex(ValueError, ""):
+            FocalLoss(include_background=False, weight=(1.0, 1.0, -1.0))(chn_input, chn_target)
+
     @SkipIfBeforePyTorchVersion((1, 7, 0))
     def test_script(self):
         loss = FocalLoss()
diff --git a/tests/test_get_package_version.py b/tests/test_get_package_version.py
new file mode 100644
index 0000000000..beddb340ab
--- /dev/null
+++ b/tests/test_get_package_version.py
@@ -0,0 +1,31 @@
+# Copyright 2020 - 2021 MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+from monai.utils.module import get_package_version
+
+
+class TestGetVersion(unittest.TestCase):
+    def test_default(self):
+        output = get_package_version("42foobarnoexist")
+        self.assertTrue("UNKNOWN" in output)
+
+        output = get_package_version("numpy")
+        self.assertFalse("UNKNOWN" in output)
+
+    def test_msg(self):
+        output = get_package_version("42foobarnoexist", "test")
+        self.assertTrue("test" in output)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/test_globalnet.py b/tests/test_globalnet.py
index 0aab57d272..32bc58f610 100644
--- a/tests/test_globalnet.py
+++ b/tests/test_globalnet.py
@@ -1,3 +1,14 @@
+# Copyright 2020 - 2021 MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 
 import numpy as np
diff --git a/tests/test_handler_checkpoint_loader.py b/tests/test_handler_checkpoint_loader.py
index 838cc3f4dd..a69193c98c 100644
--- a/tests/test_handler_checkpoint_loader.py
+++ b/tests/test_handler_checkpoint_loader.py
@@ -38,7 +38,7 @@ def test_one_save_one_load(self):
             engine1.run([0] * 8, max_epochs=5)
             path = tempdir + "/checkpoint_final_iteration=40.pt"
             engine2 = Engine(lambda e, b: None)
-            CheckpointLoader(load_path=path, load_dict={"net": net2, "eng": engine2}).attach(engine2)
+            CheckpointLoader(load_path=path, load_dict={"net": net2, "eng": engine2}, strict=True).attach(engine2)
 
             @engine2.on(Events.STARTED)
             def check_epoch(engine: Engine):
@@ -49,7 +49,7 @@ def check_epoch(engine: Engine):
 
             # test bad case with max_epochs smaller than current epoch
             engine3 = Engine(lambda e, b: None)
-            CheckpointLoader(load_path=path, load_dict={"net": net2, "eng": engine3}).attach(engine3)
+            CheckpointLoader(load_path=path, load_dict={"net": net2, "eng": engine3}, strict=True).attach(engine3)
 
             try:
                 engine3.run([0] * 8, max_epochs=3)
@@ -75,7 +75,7 @@ def test_two_save_one_load(self):
             engine.run([0] * 8, max_epochs=5)
             path = tempdir + "/checkpoint_final_iteration=40.pt"
             engine = Engine(lambda e, b: None)
-            CheckpointLoader(load_path=path, load_dict={"net": net2}).attach(engine)
+            CheckpointLoader(load_path=path, load_dict={"net": net2}, strict=True).attach(engine)
             engine.run([0] * 8, max_epochs=1)
             torch.testing.assert_allclose(net2.state_dict()["weight"], torch.tensor([0.1]))
 
@@ -96,10 +96,80 @@ def test_save_single_device_load_multi_devices(self):
             engine.run([0] * 8, max_epochs=5)
             path = tempdir + "/net_final_iteration=40.pt"
             engine = Engine(lambda e, b: None)
-            CheckpointLoader(load_path=path, load_dict={"net": net2}).attach(engine)
+            CheckpointLoader(load_path=path, load_dict={"net": net2}, strict=True).attach(engine)
             engine.run([0] * 8, max_epochs=1)
             torch.testing.assert_allclose(net2.state_dict()["module.weight"].cpu(), torch.tensor([0.1]))
 
+    def test_partial_under_load(self):
+        logging.basicConfig(stream=sys.stdout, level=logging.INFO)
+        net1 = torch.nn.Sequential(*[torch.nn.PReLU(), torch.nn.PReLU()])
+        data1 = net1.state_dict()
+        data1["0.weight"] = torch.tensor([0.1])
+        data1["1.weight"] = torch.tensor([0.2])
+        net1.load_state_dict(data1)
+
+        net2 = torch.nn.Sequential(*[torch.nn.PReLU()])
+        data2 = net2.state_dict()
+        data2["0.weight"] = torch.tensor([0.3])
+        net2.load_state_dict(data2)
+
+        with tempfile.TemporaryDirectory() as tempdir:
+            engine = Engine(lambda e, b: None)
+            CheckpointSaver(save_dir=tempdir, save_dict={"net": net1}, save_final=True).attach(engine)
+            engine.run([0] * 8, max_epochs=5)
+            path = tempdir + "/net_final_iteration=40.pt"
+            engine = Engine(lambda e, b: None)
+            CheckpointLoader(load_path=path, load_dict={"net": net2}, strict=False).attach(engine)
+            engine.run([0] * 8, max_epochs=1)
+            torch.testing.assert_allclose(net2.state_dict()["0.weight"].cpu(), torch.tensor([0.1]))
+
+    def test_partial_over_load(self):
+        logging.basicConfig(stream=sys.stdout, level=logging.INFO)
+        net1 = torch.nn.Sequential(*[torch.nn.PReLU()])
+        data1 = net1.state_dict()
+        data1["0.weight"] = torch.tensor([0.1])
+        net1.load_state_dict(data1)
+
+        net2 = torch.nn.Sequential(*[torch.nn.PReLU(), torch.nn.PReLU()])
+        data2 = net2.state_dict()
+        data2["0.weight"] = torch.tensor([0.2])
+        data2["1.weight"] = torch.tensor([0.3])
+        net2.load_state_dict(data2)
+
+        with tempfile.TemporaryDirectory() as tempdir:
+            engine = Engine(lambda e, b: None)
+            CheckpointSaver(save_dir=tempdir, save_dict={"net": net1}, save_final=True).attach(engine)
+            engine.run([0] * 8, max_epochs=5)
+            path = tempdir + "/net_final_iteration=40.pt"
+            engine = Engine(lambda e, b: None)
+            CheckpointLoader(load_path=path, load_dict={"net": net2}, strict=False).attach(engine)
+            engine.run([0] * 8, max_epochs=1)
+            torch.testing.assert_allclose(net2.state_dict()["0.weight"].cpu(), torch.tensor([0.1]))
+
+    def test_strict_shape(self):
+        logging.basicConfig(stream=sys.stdout, level=logging.INFO)
+        net1 = torch.nn.Sequential(*[torch.nn.PReLU(num_parameters=5)])
+        data1 = net1.state_dict()
+        data1["0.weight"] = torch.tensor([1, 2, 3, 4, 5])
+        data1["new"] = torch.tensor(0.1)
+        net1.load_state_dict(data1, strict=False)
+
+        net2 = torch.nn.Sequential(*[torch.nn.PReLU(), torch.nn.PReLU()])
+        data2 = net2.state_dict()
+        data2["0.weight"] = torch.tensor([0.2])
+        data2["1.weight"] = torch.tensor([0.3])
+        net2.load_state_dict(data2)
+
+        with tempfile.TemporaryDirectory() as tempdir:
+            engine = Engine(lambda e, b: None)
+            CheckpointSaver(save_dir=tempdir, save_dict={"net": net1}, save_final=True).attach(engine)
+            engine.run([0] * 8, max_epochs=5)
+            path = tempdir + "/net_final_iteration=40.pt"
+            engine = Engine(lambda e, b: None)
+            CheckpointLoader(load_path=path, load_dict={"net": net2}, strict=False, strict_shape=False).attach(engine)
+            engine.run([0] * 8, max_epochs=1)
+            torch.testing.assert_allclose(net2.state_dict()["0.weight"].cpu(), torch.tensor([0.2]))
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/tests/test_handler_early_stop.py b/tests/test_handler_early_stop.py
new file mode 100644
index 0000000000..efe8e89825
--- /dev/null
+++ b/tests/test_handler_early_stop.py
@@ -0,0 +1,66 @@
+# Copyright 2020 - 2021 MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+from ignite.engine import Engine, Events
+
+from monai.handlers import EarlyStopHandler
+
+
+class TestHandlerEarlyStop(unittest.TestCase):
+    def test_early_stop_train_loss(self):
+        def _train_func(engine, batch):
+            return {"loss": 1.5}
+
+        trainer = Engine(_train_func)
+        EarlyStopHandler(
+            patience=5,
+            score_function=lambda x: x.state.output["loss"],
+            trainer=trainer,
+            epoch_level=False,
+        ).attach(trainer)
+
+        trainer.run(range(4), max_epochs=2)
+        self.assertEqual(trainer.state.iteration, 6)
+        self.assertEqual(trainer.state.epoch, 2)
+
+    def test_early_stop_val_metric(self):
+        def _train_func(engine, batch):
+            pass
+
+        trainer = Engine(_train_func)
+        validator = Engine(_train_func)
+        validator.state.metrics["val_acc"] = 0.90
+
+        @trainer.on(Events.EPOCH_COMPLETED)
+        def run_validation(engine):
+            validator.state.metrics["val_acc"] += 0.01
+            validator.run(range(3))
+
+        handler = EarlyStopHandler(
+            patience=3,
+            score_function=lambda x: x.state.metrics["val_acc"],
+            trainer=None,
+            min_delta=0.1,
+            cumulative_delta=True,
+            epoch_level=True,
+        )
+        handler.attach(validator)
+        handler.set_trainer(trainer=trainer)
+
+        trainer.run(range(3), max_epochs=5)
+        self.assertEqual(trainer.state.iteration, 12)
+        self.assertEqual(trainer.state.epoch, 4)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/test_handler_garbage_collector.py b/tests/test_handler_garbage_collector.py
new file mode 100644
index 0000000000..3766283f40
--- /dev/null
+++ b/tests/test_handler_garbage_collector.py
@@ -0,0 +1,77 @@
+# Copyright 2020 - 2021 MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import gc
+import unittest
+from unittest import skipUnless
+
+import torch
+from ignite.engine import Engine
+from parameterized import parameterized
+
+from monai.data import Dataset
+from monai.handlers import GarbageCollector
+from monai.utils import exact_version, optional_import
+
+Events, has_ignite = optional_import("ignite.engine", "0.4.4", exact_version, "Events")
+
+
+TEST_CASE_0 = [[0, 1, 2], "epoch"]
+
+TEST_CASE_1 = [[0, 1, 2], "iteration"]
+
+TEST_CASE_2 = [[0, 1, 2], Events.EPOCH_COMPLETED]
+
+
+class TestHandlerGarbageCollector(unittest.TestCase):
+    @skipUnless(has_ignite, "Requires ignite")
+    @parameterized.expand(
+        [
+            TEST_CASE_0,
+            TEST_CASE_1,
+            TEST_CASE_2,
+        ]
+    )
+    def test_content(self, data, trigger_event):
+        # set up engine
+        gb_count_dict = {}
+
+        def _train_func(engine, batch):
+            # store garbage collection counts
+            if trigger_event == Events.EPOCH_COMPLETED or trigger_event.lower() == "epoch":
+                if engine.state.iteration % engine.state.epoch_length == 1:
+                    gb_count_dict[engine.state.epoch] = gc.get_count()
+            elif trigger_event.lower() == "iteration":
+                gb_count_dict[engine.state.iteration] = gc.get_count()
+
+        engine = Engine(_train_func)
+
+        # set up testing handler
+        dataset = Dataset(data, transform=None)
+        data_loader = torch.utils.data.DataLoader(dataset, batch_size=1)
+        GarbageCollector(trigger_event=trigger_event, log_level=30).attach(engine)
+
+        engine.run(data_loader, max_epochs=5)
+
+        first_count = 0
+        for iter, gb_count in gb_count_dict.items():
+            # At least one zero-generation object is collected
+            # self.assertGreaterEqual(gb_count[0], 0)
+            if iter > 1:
+                # Since we are collecting all objects from all generations manually at each call,
+                # starting from the second call, there shouldn't be any 1st and 2nd
+                # generation objects available to collect.
+                self.assertEqual(gb_count[1], first_count)
+                self.assertEqual(gb_count[2], first_count)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/test_handler_mean_dice.py b/tests/test_handler_mean_dice.py
index d15b549d86..648ffe91ae 100644
--- a/tests/test_handler_mean_dice.py
+++ b/tests/test_handler_mean_dice.py
@@ -39,8 +39,8 @@ def _val_func(engine, batch):
         y = torch.Tensor([[[0], [1]], [[0], [1]]])
         dice_metric.update([y_pred, y])
 
-        y_pred = torch.Tensor([[[0], [1]], [[1], [0]]])
-        y = torch.Tensor([[[0], [1]], [[1], [0]]])
+        y_pred = [torch.Tensor([[0], [1]]), torch.Tensor([[1], [0]])]
+        y = [torch.Tensor([[0], [1]]), torch.Tensor([[1], [0]])]
         dice_metric.update([y_pred, y])
 
         avg_dice = dice_metric.compute()
diff --git a/tests/test_handler_parameter_scheduler.py b/tests/test_handler_parameter_scheduler.py
new file mode 100644
index 0000000000..5b3e845ace
--- /dev/null
+++ b/tests/test_handler_parameter_scheduler.py
@@ -0,0 +1,123 @@
+import unittest
+
+import torch
+from ignite.engine import Engine, Events
+from torch.nn import Module
+
+from monai.handlers.parameter_scheduler import ParamSchedulerHandler
+
+
+class ToyNet(Module):
+    def __init__(self, value):
+        super(ToyNet, self).__init__()
+        self.value = value
+
+    def forward(self, input):
+        return input
+
+    def get_value(self):
+        return self.value
+
+    def set_value(self, value):
+        self.value = value
+
+
+class TestHandlerParameterScheduler(unittest.TestCase):
+    def test_linear_scheduler(self):
+        # Testing step_constant
+        net = ToyNet(value=-1)
+        engine = Engine(lambda e, b: None)
+        ParamSchedulerHandler(
+            parameter_setter=net.set_value,
+            value_calculator="linear",
+            vc_kwargs={"initial_value": 0, "step_constant": 2, "step_max_value": 5, "max_value": 10},
+            epoch_level=True,
+            event=Events.EPOCH_COMPLETED,
+        ).attach(engine)
+        engine.run([0] * 8, max_epochs=2)
+        torch.testing.assert_allclose(net.get_value(), 0)
+
+        # Testing linear increase
+        net = ToyNet(value=-1)
+        engine = Engine(lambda e, b: None)
+        ParamSchedulerHandler(
+            parameter_setter=net.set_value,
+            value_calculator="linear",
+            vc_kwargs={"initial_value": 0, "step_constant": 2, "step_max_value": 5, "max_value": 10},
+            epoch_level=True,
+            event=Events.EPOCH_COMPLETED,
+        ).attach(engine)
+        engine.run([0] * 8, max_epochs=3)
+        torch.testing.assert_allclose(net.get_value(), 3.333333, atol=0.001, rtol=0.0)
+
+        # Testing max_value
+        net = ToyNet(value=-1)
+        engine = Engine(lambda e, b: None)
+        ParamSchedulerHandler(
+            parameter_setter=net.set_value,
+            value_calculator="linear",
+            vc_kwargs={"initial_value": 0, "step_constant": 2, "step_max_value": 5, "max_value": 10},
+            epoch_level=True,
+            event=Events.EPOCH_COMPLETED,
+        ).attach(engine)
+        engine.run([0] * 8, max_epochs=10)
+        torch.testing.assert_allclose(net.get_value(), 10)
+
+    def test_exponential_scheduler(self):
+        net = ToyNet(value=-1)
+        engine = Engine(lambda e, b: None)
+        ParamSchedulerHandler(
+            parameter_setter=net.set_value,
+            value_calculator="exponential",
+            vc_kwargs={"initial_value": 10, "gamma": 0.99},
+            epoch_level=True,
+            event=Events.EPOCH_COMPLETED,
+        ).attach(engine)
+        engine.run([0] * 8, max_epochs=2)
+        torch.testing.assert_allclose(net.get_value(), 10 * 0.99 * 0.99)
+
+    def test_step_scheduler(self):
+        net = ToyNet(value=-1)
+        engine = Engine(lambda e, b: None)
+        ParamSchedulerHandler(
+            parameter_setter=net.set_value,
+            value_calculator="step",
+            vc_kwargs={"initial_value": 10, "gamma": 0.99, "step_size": 5},
+            epoch_level=True,
+            event=Events.EPOCH_COMPLETED,
+        ).attach(engine)
+        engine.run([0] * 8, max_epochs=10)
+        torch.testing.assert_allclose(net.get_value(), 10 * 0.99 * 0.99)
+
+    def test_multistep_scheduler(self):
+        net = ToyNet(value=-1)
+        engine = Engine(lambda e, b: None)
+        ParamSchedulerHandler(
+            parameter_setter=net.set_value,
+            value_calculator="multistep",
+            vc_kwargs={"initial_value": 10, "gamma": 0.99, "milestones": [3, 6]},
+            epoch_level=True,
+            event=Events.EPOCH_COMPLETED,
+        ).attach(engine)
+        engine.run([0] * 8, max_epochs=10)
+        torch.testing.assert_allclose(net.get_value(), 10 * 0.99 * 0.99)
+
+    def test_custom_scheduler(self):
+        def custom_logic(initial_value, gamma, current_step):
+            return initial_value * gamma ** (current_step % 9)
+
+        net = ToyNet(value=-1)
+        engine = Engine(lambda e, b: None)
+        ParamSchedulerHandler(
+            parameter_setter=net.set_value,
+            value_calculator=custom_logic,
+            vc_kwargs={"initial_value": 10, "gamma": 0.99},
+            epoch_level=True,
+            event=Events.EPOCH_COMPLETED,
+        ).attach(engine)
+        engine.run([0] * 8, max_epochs=2)
+        torch.testing.assert_allclose(net.get_value(), 10 * 0.99 * 0.99)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/test_handler_prob_map_generator.py b/tests/test_handler_prob_map_producer.py
similarity index 90%
rename from tests/test_handler_prob_map_generator.py
rename to tests/test_handler_prob_map_producer.py
index 4882060be9..4f719fccc0 100644
--- a/tests/test_handler_prob_map_generator.py
+++ b/tests/test_handler_prob_map_producer.py
@@ -23,9 +23,9 @@
 from monai.engines import Evaluator
 from monai.handlers import ValidationHandler
 
-TEST_CASE_0 = ["image_inference_output_1", 2]
-TEST_CASE_1 = ["image_inference_output_2", 9]
-TEST_CASE_2 = ["image_inference_output_3", 1000]
+TEST_CASE_0 = ["temp_image_inference_output_1", 2]
+TEST_CASE_1 = ["temp_image_inference_output_2", 9]
+TEST_CASE_2 = ["temp_image_inference_output_3", 1000]
 
 
 class TestDataset(Dataset):
@@ -82,8 +82,9 @@ def inference(enging, batch):
         evaluator = TestEvaluator(torch.device("cpu:0"), data_loader, size, val_handlers=[prob_map_gen])
 
         # set up validation handler
-        validation = ValidationHandler(evaluator, interval=1)
+        validation = ValidationHandler(interval=1, validator=None)
         validation.attach(engine)
+        validation.set_validator(validator=evaluator)
 
         engine.run(data_loader)
 
diff --git a/tests/test_handler_segmentation_saver.py b/tests/test_handler_segmentation_saver.py
index 1a2bbb7fbd..5449530b50 100644
--- a/tests/test_handler_segmentation_saver.py
+++ b/tests/test_handler_segmentation_saver.py
@@ -40,10 +40,15 @@ def _train_func(engine, batch):
             saver = SegmentationSaver(output_dir=tempdir, output_postfix="seg", output_ext=output_ext, scale=255)
             saver.attach(engine)
 
-            data = [{"filename_or_obj": ["testfile" + str(i) + ".nii.gz" for i in range(8)]}]
+            data = [
+                {
+                    "filename_or_obj": ["testfile" + str(i) + ".nii.gz" for i in range(8)],
+                    "patch_index": list(range(8)),
+                }
+            ]
             engine.run(data, max_epochs=1)
             for i in range(8):
-                filepath = os.path.join("testfile" + str(i), "testfile" + str(i) + "_seg" + output_ext)
+                filepath = os.path.join("testfile" + str(i), "testfile" + str(i) + "_seg" + f"_{i}" + output_ext)
                 self.assertTrue(os.path.exists(os.path.join(tempdir, filepath)))
 
     @parameterized.expand([TEST_CASE_0, TEST_CASE_1])
diff --git a/tests/test_handler_transform_inverter.py b/tests/test_handler_transform_inverter.py
new file mode 100644
index 0000000000..087839a75e
--- /dev/null
+++ b/tests/test_handler_transform_inverter.py
@@ -0,0 +1,109 @@
+# Copyright 2020 - 2021 MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sys
+import unittest
+
+import numpy as np
+import torch
+from ignite.engine import Engine
+
+from monai.data import CacheDataset, DataLoader, create_test_image_3d
+from monai.engines.utils import IterationEvents
+from monai.handlers import TransformInverter
+from monai.transforms import (
+    AddChanneld,
+    CastToTyped,
+    Compose,
+    LoadImaged,
+    RandAffined,
+    RandAxisFlipd,
+    RandFlipd,
+    RandRotate90d,
+    RandRotated,
+    RandZoomd,
+    ResizeWithPadOrCropd,
+    ScaleIntensityd,
+    ToTensord,
+)
+from monai.utils.misc import set_determinism
+from tests.utils import make_nifti_image
+
+KEYS = ["image", "label"]
+
+
+class TestTransformInverter(unittest.TestCase):
+    def test_invert(self):
+        set_determinism(seed=0)
+        im_fname, seg_fname = [make_nifti_image(i) for i in create_test_image_3d(101, 100, 107, noise_max=100)]
+        transform = Compose(
+            [
+                LoadImaged(KEYS),
+                AddChanneld(KEYS),
+                ScaleIntensityd("image", minv=1, maxv=10),
+                RandFlipd(KEYS, prob=0.5, spatial_axis=[1, 2]),
+                RandAxisFlipd(KEYS, prob=0.5),
+                RandRotate90d(KEYS, spatial_axes=(1, 2)),
+                RandZoomd(KEYS, prob=0.5, min_zoom=0.5, max_zoom=1.1, keep_size=True),
+                RandRotated(KEYS, prob=0.5, range_x=np.pi, mode="bilinear", align_corners=True),
+                RandAffined(KEYS, prob=0.5, rotate_range=np.pi, mode="nearest"),
+                ResizeWithPadOrCropd(KEYS, 100),
+                ToTensord(KEYS),
+                CastToTyped(KEYS, dtype=torch.uint8),
+            ]
+        )
+        data = [{"image": im_fname, "label": seg_fname} for _ in range(12)]
+
+        # num workers = 0 for mac or gpu transforms
+        num_workers = 0 if sys.platform == "darwin" or torch.cuda.is_available() else 2
+
+        dataset = CacheDataset(data, transform=transform, progress=False)
+        loader = DataLoader(dataset, num_workers=num_workers, batch_size=5)
+
+        # set up engine
+        def _train_func(engine, batch):
+            self.assertTupleEqual(batch["image"].shape[1:], (1, 100, 100, 100))
+            engine.state.output = batch
+            engine.fire_event(IterationEvents.MODEL_COMPLETED)
+            return engine.state.output
+
+        engine = Engine(_train_func)
+        engine.register_events(*IterationEvents)
+
+        # set up testing handler
+        TransformInverter(
+            transform=transform,
+            loader=loader,
+            output_keys=["image", "label"],
+            batch_keys="label",
+            nearest_interp=True,
+            num_workers=0 if sys.platform == "darwin" or torch.cuda.is_available() else 2,
+        ).attach(engine)
+
+        engine.run(loader, max_epochs=1)
+        set_determinism(seed=None)
+        self.assertTupleEqual(engine.state.output["image"].shape, (2, 1, 100, 100, 100))
+        self.assertTupleEqual(engine.state.output["label"].shape, (2, 1, 100, 100, 100))
+        for i in engine.state.output["image_inverted"] + engine.state.output["label_inverted"]:
+            torch.testing.assert_allclose(i.to(torch.uint8).to(torch.float), i.to(torch.float))
+            self.assertTupleEqual(i.shape, (1, 100, 101, 107))
+        # check labels match
+        reverted = engine.state.output["label_inverted"][-1].detach().cpu().numpy()[0].astype(np.int32)
+        original = LoadImaged(KEYS)(data[-1])["label"]
+        n_good = np.sum(np.isclose(reverted, original, atol=1e-3))
+        reverted_name = engine.state.output["label_meta_dict"]["filename_or_obj"][-1]
+        original_name = data[-1]["label"]
+        self.assertEqual(reverted_name, original_name)
+        self.assertTrue((reverted.size - n_good) in (0, 23641), "diff. in two possible values")
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/test_handler_validation.py b/tests/test_handler_validation.py
index 11a51c7213..06f400109d 100644
--- a/tests/test_handler_validation.py
+++ b/tests/test_handler_validation.py
@@ -37,7 +37,7 @@ def _train_func(engine, batch):
         # set up testing handler
         val_data_loader = torch.utils.data.DataLoader(Dataset(data))
         evaluator = TestEvaluator(torch.device("cpu:0"), val_data_loader)
-        saver = ValidationHandler(evaluator, interval=2)
+        saver = ValidationHandler(interval=2, validator=evaluator)
         saver.attach(engine)
 
         engine.run(data, max_epochs=5)
diff --git a/tests/test_integration_workflows.py b/tests/test_integration_workflows.py
index db7580bf86..00d097b2b6 100644
--- a/tests/test_integration_workflows.py
+++ b/tests/test_integration_workflows.py
@@ -160,7 +160,7 @@ def attach(self, engine):
             engine.add_event_handler(IterationEvents.FORWARD_COMPLETED, self._forward_completed)
             engine.add_event_handler(IterationEvents.LOSS_COMPLETED, self._loss_completed)
             engine.add_event_handler(IterationEvents.BACKWARD_COMPLETED, self._backward_completed)
-            engine.add_event_handler(IterationEvents.OPTIMIZER_COMPLETED, self._optimizer_completed)
+            engine.add_event_handler(IterationEvents.MODEL_COMPLETED, self._model_completed)
 
         def _forward_completed(self, engine):
             pass
@@ -171,7 +171,7 @@ def _loss_completed(self, engine):
         def _backward_completed(self, engine):
             pass
 
-        def _optimizer_completed(self, engine):
+        def _model_completed(self, engine):
             pass
 
     train_handlers = [
diff --git a/tests/test_integration_workflows_gan.py b/tests/test_integration_workflows_gan.py
index 73a9e69370..c54e8b01f2 100644
--- a/tests/test_integration_workflows_gan.py
+++ b/tests/test_integration_workflows_gan.py
@@ -145,7 +145,7 @@ def tearDown(self):
         set_determinism(seed=None)
         shutil.rmtree(self.data_dir)
 
-    @TimedCall(seconds=100, daemon=False)
+    @TimedCall(seconds=200, daemon=False)
     def test_training(self):
         torch.manual_seed(0)
 
diff --git a/tests/test_inverse.py b/tests/test_inverse.py
index ccc4f366c2..358bf0176a 100644
--- a/tests/test_inverse.py
+++ b/tests/test_inverse.py
@@ -54,6 +54,7 @@
     SpatialPadd,
     Zoomd,
     allow_missing_keys_mode,
+    convert_inverse_interp_mode,
 )
 from monai.utils import first, get_seed, optional_import, set_determinism
 from monai.utils.enums import InverseKeys
@@ -572,9 +573,11 @@ def test_inverse_inferred_seg(self):
         segs_dict = {"label": segs, label_transform_key: data[label_transform_key]}
 
         segs_dict_decollated = decollate_batch(segs_dict)
-
         # inverse of individual segmentation
         seg_dict = first(segs_dict_decollated)
+        # test to convert interpolation mode for 1 data of model output batch
+        convert_inverse_interp_mode(seg_dict, mode="nearest", align_corners=None)
+
         with allow_missing_keys_mode(transforms):
             inv_seg = transforms.inverse(seg_dict)["label"]
         self.assertEqual(len(data["label_transforms"]), num_invertible_transforms)
diff --git a/tests/test_inverse_collation.py b/tests/test_inverse_collation.py
index 3e07a8f0e2..c302e04017 100644
--- a/tests/test_inverse_collation.py
+++ b/tests/test_inverse_collation.py
@@ -29,6 +29,7 @@
     RandRotated,
     RandZoomd,
     ResizeWithPadOrCropd,
+    ToTensord,
 )
 from monai.utils import optional_import, set_determinism
 from tests.utils import make_nifti_image
@@ -113,7 +114,7 @@ def test_collation(self, _, transform, collate_fn, ndim):
         if collate_fn:
             modified_transform = transform
         else:
-            modified_transform = Compose([transform, ResizeWithPadOrCropd(KEYS, 100)])
+            modified_transform = Compose([transform, ResizeWithPadOrCropd(KEYS, 100), ToTensord(KEYS)])
 
         # num workers = 0 for mac or gpu transforms
         num_workers = 0 if sys.platform == "darwin" or torch.cuda.is_available() else 2
diff --git a/tests/test_lesion_froc.py b/tests/test_lesion_froc.py
index 6702997c64..2454de88fa 100644
--- a/tests/test_lesion_froc.py
+++ b/tests/test_lesion_froc.py
@@ -1,3 +1,14 @@
+# Copyright 2020 - 2021 MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import os
 import unittest
 from unittest import skipUnless
@@ -185,7 +196,7 @@ def prepare_test_data():
 ]
 
 
-TEST_CASE_5 = [
+TEST_CASE_6 = [
     {
         "data": [
             {
@@ -207,7 +218,7 @@ def prepare_test_data():
     2.0 / 3.0,
 ]
 
-TEST_CASE_6 = [
+TEST_CASE_7 = [
     {
         "data": [
             {
@@ -229,7 +240,7 @@ def prepare_test_data():
     0.4,
 ]
 
-TEST_CASE_7 = [
+TEST_CASE_8 = [
     {
         "data": [
             {
@@ -257,7 +268,7 @@ def prepare_test_data():
     1.0 / 3.0,
 ]
 
-TEST_CASE_8 = [
+TEST_CASE_9 = [
     {
         "data": [
             {
@@ -305,6 +316,7 @@ def setUp(self):
             TEST_CASE_6,
             TEST_CASE_7,
             TEST_CASE_8,
+            TEST_CASE_9,
         ]
     )
     def test_read_patches_cucim(self, input_parameters, expected):
diff --git a/tests/test_load_decathlon_datalist.py b/tests/test_load_decathlon_datalist.py
index 90b9d3ab03..fe7ff6f8a2 100644
--- a/tests/test_load_decathlon_datalist.py
+++ b/tests/test_load_decathlon_datalist.py
@@ -96,6 +96,31 @@ def test_seg_no_labels(self):
             result = load_decathlon_datalist(file_path, True, "test", tempdir)
             self.assertEqual(result[0]["image"], os.path.join(tempdir, "spleen_15.nii.gz"))
 
+    def test_additional_items(self):
+        with tempfile.TemporaryDirectory() as tempdir:
+            with open(os.path.join(tempdir, "mask31.txt"), "w") as f:
+                f.write("spleen31 mask")
+
+            test_data = {
+                "name": "Spleen",
+                "description": "Spleen Segmentation",
+                "labels": {"0": "background", "1": "spleen"},
+                "training": [
+                    {"image": "spleen_19.nii.gz", "label": "spleen_19.nii.gz", "mask": "spleen mask"},
+                    {"image": "spleen_31.nii.gz", "label": "spleen_31.nii.gz", "mask": "mask31.txt"},
+                ],
+                "test": ["spleen_15.nii.gz", "spleen_23.nii.gz"],
+            }
+            json_str = json.dumps(test_data)
+            file_path = os.path.join(tempdir, "test_data.json")
+            with open(file_path, "w") as json_file:
+                json_file.write(json_str)
+            result = load_decathlon_datalist(file_path, True, "training", tempdir)
+            self.assertEqual(result[0]["image"], os.path.join(tempdir, "spleen_19.nii.gz"))
+            self.assertEqual(result[0]["label"], os.path.join(tempdir, "spleen_19.nii.gz"))
+            self.assertEqual(result[1]["mask"], os.path.join(tempdir, "mask31.txt"))
+            self.assertEqual(result[0]["mask"], "spleen mask")
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/tests/test_local_normalized_cross_correlation_loss.py b/tests/test_local_normalized_cross_correlation_loss.py
index bb0bd7b642..31954e727b 100644
--- a/tests/test_local_normalized_cross_correlation_loss.py
+++ b/tests/test_local_normalized_cross_correlation_loss.py
@@ -1,4 +1,4 @@
-# Copyright 2020 MONAI Consortium
+# Copyright 2020 - 2021 MONAI Consortium
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
diff --git a/tests/test_localnet.py b/tests/test_localnet.py
index df1d9f61cb..dc680f15f9 100644
--- a/tests/test_localnet.py
+++ b/tests/test_localnet.py
@@ -1,3 +1,14 @@
+# Copyright 2020 - 2021 MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 
 import torch
diff --git a/tests/test_localnet_block.py b/tests/test_localnet_block.py
index e6171aeae9..f4e857a0fa 100644
--- a/tests/test_localnet_block.py
+++ b/tests/test_localnet_block.py
@@ -1,3 +1,14 @@
+# Copyright 2020 - 2021 MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 
 import torch
diff --git a/tests/test_masked_inference_wsi_dataset.py b/tests/test_masked_inference_wsi_dataset.py
index 7c8a815c2e..ed79b4f3a7 100644
--- a/tests/test_masked_inference_wsi_dataset.py
+++ b/tests/test_masked_inference_wsi_dataset.py
@@ -1,3 +1,14 @@
+# Copyright 2020 - 2021 MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import os
 import unittest
 from unittest import skipUnless
@@ -15,11 +26,13 @@
 _, has_osl = optional_import("openslide")
 
 FILE_URL = "http://openslide.cs.cmu.edu/download/openslide-testdata/Generic-TIFF/CMU-1.tiff"
-FILE_PATH = os.path.join(os.path.dirname(__file__), "testing_data", os.path.basename(FILE_URL))
+base_name, extension = os.path.splitext(os.path.basename(FILE_URL))
+FILE_NAME = "temp_" + base_name
+FILE_PATH = os.path.join(os.path.dirname(__file__), "testing_data", FILE_NAME + extension)
 
-MASK1 = os.path.join(os.path.dirname(__file__), "testing_data", "tissue_mask1.npy")
-MASK2 = os.path.join(os.path.dirname(__file__), "testing_data", "tissue_mask2.npy")
-MASK4 = os.path.join(os.path.dirname(__file__), "testing_data", "tissue_mask4.npy")
+MASK1 = os.path.join(os.path.dirname(__file__), "testing_data", "temp_tissue_mask1.npy")
+MASK2 = os.path.join(os.path.dirname(__file__), "testing_data", "temp_tissue_mask2.npy")
+MASK4 = os.path.join(os.path.dirname(__file__), "testing_data", "temp_tissue_mask4.npy")
 
 HEIGHT = 32914
 WIDTH = 46000
@@ -47,7 +60,7 @@ def prepare_data():
     [
         {
             "image": np.array([[[243]], [[243]], [[243]]], dtype=np.uint8),
-            "name": "CMU-1",
+            "name": FILE_NAME,
             "mask_location": [100, 100],
         },
     ],
@@ -62,12 +75,12 @@ def prepare_data():
     [
         {
             "image": np.array([[[243]], [[243]], [[243]]], dtype=np.uint8),
-            "name": "CMU-1",
+            "name": FILE_NAME,
             "mask_location": [100, 100],
         },
         {
             "image": np.array([[[243]], [[243]], [[243]]], dtype=np.uint8),
-            "name": "CMU-1",
+            "name": FILE_NAME,
             "mask_location": [101, 100],
         },
     ],
@@ -82,22 +95,22 @@ def prepare_data():
     [
         {
             "image": np.array([[[243]], [[243]], [[243]]], dtype=np.uint8),
-            "name": "CMU-1",
+            "name": FILE_NAME,
             "mask_location": [100, 100],
         },
         {
             "image": np.array([[[243]], [[243]], [[243]]], dtype=np.uint8),
-            "name": "CMU-1",
+            "name": FILE_NAME,
             "mask_location": [100, 101],
         },
         {
             "image": np.array([[[243]], [[243]], [[243]]], dtype=np.uint8),
-            "name": "CMU-1",
+            "name": FILE_NAME,
             "mask_location": [101, 100],
         },
         {
             "image": np.array([[[243]], [[243]], [[243]]], dtype=np.uint8),
-            "name": "CMU-1",
+            "name": FILE_NAME,
             "mask_location": [101, 101],
         },
     ],
@@ -121,7 +134,7 @@ def prepare_data():
                 ],
                 dtype=np.uint8,
             ),
-            "name": "CMU-1",
+            "name": FILE_NAME,
             "mask_location": [100, 100],
         },
     ],
@@ -139,17 +152,17 @@ def prepare_data():
     [
         {
             "image": np.array([[[243]], [[243]], [[243]]], dtype=np.uint8),
-            "name": "CMU-1",
+            "name": FILE_NAME,
             "mask_location": [100, 100],
         },
         {
             "image": np.array([[[243]], [[243]], [[243]]], dtype=np.uint8),
-            "name": "CMU-1",
+            "name": FILE_NAME,
             "mask_location": [100, 100],
         },
         {
             "image": np.array([[[243]], [[243]], [[243]]], dtype=np.uint8),
-            "name": "CMU-1",
+            "name": FILE_NAME,
             "mask_location": [101, 100],
         },
     ],
@@ -167,7 +180,7 @@ def prepare_data():
     [
         {
             "image": np.array([[[243]], [[243]], [[243]]], dtype=np.uint8),
-            "name": "CMU-1",
+            "name": FILE_NAME,
             "mask_location": [100, 100],
         },
     ],
@@ -182,12 +195,12 @@ def prepare_data():
     [
         {
             "image": np.array([[[243]], [[243]], [[243]]], dtype=np.uint8),
-            "name": "CMU-1",
+            "name": FILE_NAME,
             "mask_location": [100, 100],
         },
         {
             "image": np.array([[[243]], [[243]], [[243]]], dtype=np.uint8),
-            "name": "CMU-1",
+            "name": FILE_NAME,
             "mask_location": [101, 100],
         },
     ],
diff --git a/tests/test_nifti_endianness.py b/tests/test_nifti_endianness.py
index b725e2462c..57f26d2247 100644
--- a/tests/test_nifti_endianness.py
+++ b/tests/test_nifti_endianness.py
@@ -1,3 +1,14 @@
+# Copyright 2020 - 2021 MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import os
 import tempfile
 import unittest
diff --git a/tests/test_openslide_reader.py b/tests/test_openslide_reader.py
index ca50cec4de..c0b395fd02 100644
--- a/tests/test_openslide_reader.py
+++ b/tests/test_openslide_reader.py
@@ -1,3 +1,14 @@
+# Copyright 2020 - 2021 MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import os
 import unittest
 from unittest import skipUnless
@@ -14,7 +25,7 @@
 
 
 FILE_URL = "http://openslide.cs.cmu.edu/download/openslide-testdata/Generic-TIFF/CMU-1.tiff"
-FILE_PATH = os.path.join(os.path.dirname(__file__), "testing_data", os.path.basename(FILE_URL))
+FILE_PATH = os.path.join(os.path.dirname(__file__), "testing_data", "temp_" + os.path.basename(FILE_URL))
 
 HEIGHT = 32914
 WIDTH = 46000
diff --git a/tests/test_patch_wsi_dataset.py b/tests/test_patch_wsi_dataset.py
index d030671d06..7c34997872 100644
--- a/tests/test_patch_wsi_dataset.py
+++ b/tests/test_patch_wsi_dataset.py
@@ -1,3 +1,14 @@
+# Copyright 2020 - 2021 MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import os
 import unittest
 from unittest import skipUnless
@@ -14,7 +25,7 @@
 _, has_osl = optional_import("openslide")
 
 FILE_URL = "http://openslide.cs.cmu.edu/download/openslide-testdata/Generic-TIFF/CMU-1.tiff"
-FILE_PATH = os.path.join(os.path.dirname(__file__), "testing_data", os.path.basename(FILE_URL))
+FILE_PATH = os.path.join(os.path.dirname(__file__), "testing_data", "temp_" + os.path.basename(FILE_URL))
 
 TEST_CASE_0 = [
     {
diff --git a/tests/test_pathology_transforms.py b/tests/test_pathology_transforms.py
new file mode 100644
index 0000000000..eabcc3d9e1
--- /dev/null
+++ b/tests/test_pathology_transforms.py
@@ -0,0 +1,74 @@
+# Copyright 2020 - 2021 MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+from typing import TYPE_CHECKING
+
+from parameterized import parameterized
+
+from monai.apps.pathology.transforms import ExtractStainsMacenko
+from monai.utils import exact_version, optional_import
+
+if TYPE_CHECKING:
+    import cupy as cp
+else:
+    cp, _ = optional_import("cupy", "8.6.0", exact_version)
+
+# input pixels are all transparent and below the beta absorbance threshold
+EXTRACT_STAINS_TEST_CASE_1 = [
+    cp.zeros((3, 2, 3)),
+    cp.array([[0.0, 0.0], [0.70710678, 0.70710678], [0.70710678, 0.70710678]]),
+]
+
+# input pixels are all the same, but above beta absorbance threshold
+EXTRACT_STAINS_TEST_CASE_2 = [
+    cp.full((3, 2, 3), 200),
+    cp.array([[0.57735027, 0.57735027], [0.57735027, 0.57735027], [0.57735027, 0.57735027]]),
+]
+
+# input pixels are all transparent and below the beta absorbance threshold
+NORMALIZE_STAINS_TEST_CASE_1 = [
+    {},
+    cp.zeros((3, 2, 3)),
+    cp.array([[[63, 25, 60], [63, 25, 60]], [[63, 25, 60], [63, 25, 60]], [[63, 25, 60], [63, 25, 60]]]),
+]
+
+# input pixels are all the same, but above beta absorbance threshold
+NORMALIZE_STAINS_TEST_CASE_2 = [
+    {},
+    cp.full((3, 2, 3), 200),
+    cp.array([[[63, 25, 60], [63, 25, 60]], [[63, 25, 60], [63, 25, 60]], [[63, 25, 60], [63, 25, 60]]]),
+]
+
+# with a custom target_he, which is the same as the image's stain matrix
+NORMALIZE_STAINS_TEST_CASE_3 = [
+    {"target_he": cp.full((3, 2), 0.57735027)},
+    cp.full((3, 2, 3), 200),
+    cp.full((3, 2, 3), 42),
+]
+
+
+class TestExtractStainsMacenko(unittest.TestCase):
+    @parameterized.expand([EXTRACT_STAINS_TEST_CASE_1, EXTRACT_STAINS_TEST_CASE_2])
+    def test_value(self, image, expected_data):
+        result = ExtractStainsMacenko()(image)
+        cp.testing.assert_allclose(result, expected_data)
+
+
+class TestNormalizeStainsMacenko(unittest.TestCase):
+    @parameterized.expand([NORMALIZE_STAINS_TEST_CASE_1, NORMALIZE_STAINS_TEST_CASE_2, NORMALIZE_STAINS_TEST_CASE_3])
+    def test_value(self, argments, image, expected_data):
+        result = NormalizeStainsMacenko(**argments)(image)
+        cp.testing.assert_allclose(result, expected_data)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/test_prob_nms.py b/tests/test_probnms.py
similarity index 98%
rename from tests/test_prob_nms.py
rename to tests/test_probnms.py
index fb88d9cfb4..e51d1017d8 100644
--- a/tests/test_prob_nms.py
+++ b/tests/test_probnms.py
@@ -15,7 +15,7 @@
 import torch
 from parameterized import parameterized
 
-from monai.utils import ProbNMS
+from monai.transforms.post.array import ProbNMS
 
 probs_map_1 = np.random.rand(100, 100).clip(0, 0.5)
 TEST_CASES_2D_1 = [{"spatial_dims": 2, "prob_threshold": 0.5, "box_size": 10}, probs_map_1, []]
diff --git a/tests/test_probnmsd.py b/tests/test_probnmsd.py
new file mode 100644
index 0000000000..5b75d4310f
--- /dev/null
+++ b/tests/test_probnmsd.py
@@ -0,0 +1,103 @@
+# Copyright 2020 - 2021 MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+import numpy as np
+import torch
+from parameterized import parameterized
+
+from monai.transforms.post.dictionary import ProbNMSD
+
+probs_map_1 = np.random.rand(100, 100).clip(0, 0.5)
+TEST_CASES_2D_1 = [{"spatial_dims": 2, "prob_threshold": 0.5, "box_size": 10}, {"prob_map": probs_map_1}, []]
+
+probs_map_2 = np.random.rand(100, 100).clip(0, 0.5)
+probs_map_2[33, 33] = 0.7
+probs_map_2[66, 66] = 0.9
+expected_2 = [[0.9, 66, 66], [0.7, 33, 33]]
+TEST_CASES_2D_2 = [
+    {"spatial_dims": 2, "prob_threshold": 0.5, "box_size": [10, 10]},
+    {"prob_map": probs_map_2},
+    expected_2,
+]
+
+probs_map_3 = np.random.rand(100, 100).clip(0, 0.5)
+probs_map_3[56, 58] = 0.7
+probs_map_3[60, 66] = 0.8
+probs_map_3[66, 66] = 0.9
+expected_3 = [[0.9, 66, 66], [0.8, 60, 66]]
+TEST_CASES_2D_3 = [
+    {"spatial_dims": 2, "prob_threshold": 0.5, "box_size": (10, 20)},
+    {"prob_map": probs_map_3},
+    expected_3,
+]
+
+probs_map_4 = np.random.rand(100, 100).clip(0, 0.5)
+probs_map_4[33, 33] = 0.7
+probs_map_4[66, 66] = 0.9
+expected_4 = [[0.9, 66, 66]]
+TEST_CASES_2D_4 = [
+    {"spatial_dims": 2, "prob_threshold": 0.8, "box_size": 10},
+    {"prob_map": probs_map_4},
+    expected_4,
+]
+
+probs_map_5 = np.random.rand(100, 100).clip(0, 0.5)
+TEST_CASES_2D_5 = [{"spatial_dims": 2, "prob_threshold": 0.5, "sigma": 0.1}, {"prob_map": probs_map_5}, []]
+
+probs_map_6 = torch.as_tensor(np.random.rand(100, 100).clip(0, 0.5))
+TEST_CASES_2D_6 = [{"spatial_dims": 2, "prob_threshold": 0.5, "sigma": 0.1}, {"prob_map": probs_map_6}, []]
+
+probs_map_7 = torch.as_tensor(np.random.rand(100, 100).clip(0, 0.5))
+probs_map_7[33, 33] = 0.7
+probs_map_7[66, 66] = 0.9
+if torch.cuda.is_available():
+    probs_map_7 = probs_map_7.cuda()
+expected_7 = [[0.9, 66, 66], [0.7, 33, 33]]
+TEST_CASES_2D_7 = [
+    {"spatial_dims": 2, "prob_threshold": 0.5, "sigma": 0.1},
+    {"prob_map": probs_map_7},
+    expected_7,
+]
+
+probs_map_3d = torch.rand([50, 50, 50]).uniform_(0, 0.5)
+probs_map_3d[25, 25, 25] = 0.7
+probs_map_3d[45, 45, 45] = 0.9
+expected_3d = [[0.9, 45, 45, 45], [0.7, 25, 25, 25]]
+TEST_CASES_3D = [
+    {"spatial_dims": 3, "prob_threshold": 0.5, "box_size": (10, 10, 10)},
+    {"prob_map": probs_map_3d},
+    expected_3d,
+]
+
+
+class TestProbNMS(unittest.TestCase):
+    @parameterized.expand(
+        [
+            TEST_CASES_2D_1,
+            TEST_CASES_2D_2,
+            TEST_CASES_2D_3,
+            TEST_CASES_2D_4,
+            TEST_CASES_2D_5,
+            TEST_CASES_2D_6,
+            TEST_CASES_2D_7,
+            TEST_CASES_3D,
+        ]
+    )
+    def test_output(self, class_args, probs_map, expected):
+        nms = ProbNMSD(keys="prob_map", **class_args)
+        output = nms(probs_map)
+        np.testing.assert_allclose(output["prob_map"], expected)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/test_rand_crop_by_pos_neg_labeld.py b/tests/test_rand_crop_by_pos_neg_labeld.py
index 06e63c14e8..d52ba900ac 100644
--- a/tests/test_rand_crop_by_pos_neg_labeld.py
+++ b/tests/test_rand_crop_by_pos_neg_labeld.py
@@ -91,6 +91,8 @@ def test_type_shape(self, input_param, input_data, expected_type, expected_shape
         self.assertTupleEqual(result[0]["image"].shape, expected_shape)
         self.assertTupleEqual(result[0]["extral"].shape, expected_shape)
         self.assertTupleEqual(result[0]["label"].shape, expected_shape)
+        for i, item in enumerate(result):
+            self.assertEqual(item["image_meta_dict"]["patch_index"], i)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_rand_lambdad.py b/tests/test_rand_lambdad.py
index 2ddfeefae0..a450b67413 100644
--- a/tests/test_rand_lambdad.py
+++ b/tests/test_rand_lambdad.py
@@ -13,11 +13,11 @@
 
 import numpy as np
 
-from monai.transforms.transform import RandomizableTransform
+from monai.transforms.transform import Randomizable
 from monai.transforms.utility.dictionary import RandLambdad
 
 
-class RandTest(RandomizableTransform):
+class RandTest(Randomizable):
     """
     randomisable transform for testing.
     """
diff --git a/tests/test_rand_spatial_crop_samplesd.py b/tests/test_rand_spatial_crop_samplesd.py
index afd7ab602c..09688f44b7 100644
--- a/tests/test_rand_spatial_crop_samplesd.py
+++ b/tests/test_rand_spatial_crop_samplesd.py
@@ -70,6 +70,9 @@ def test_shape(self, input_param, input_data, expected_shape, expected_last):
         for item, expected in zip(result, expected_shape):
             self.assertTupleEqual(item["img"].shape, expected)
             self.assertTupleEqual(item["seg"].shape, expected)
+        for i, item in enumerate(result):
+            self.assertEqual(item["img_meta_dict"]["patch_index"], i)
+            self.assertEqual(item["seg_meta_dict"]["patch_index"], i)
         np.testing.assert_allclose(item["img"], expected_last["img"])
         np.testing.assert_allclose(item["seg"], expected_last["seg"])
 
diff --git a/tests/test_save_imaged.py b/tests/test_save_imaged.py
index a6ebfe0d8d..d6536b3d51 100644
--- a/tests/test_save_imaged.py
+++ b/tests/test_save_imaged.py
@@ -87,9 +87,20 @@
     False,
 ]
 
+TEST_CASE_6 = [
+    {
+        "img": torch.randint(0, 255, (1, 2, 3, 4)),
+        "img_meta_dict": {"filename_or_obj": "testfile0.nii.gz"},
+        "patch_index": 6,
+    },
+    ".nii.gz",
+    False,
+    False,
+]
+
 
 class TestSaveImaged(unittest.TestCase):
-    @parameterized.expand([TEST_CASE_0, TEST_CASE_1, TEST_CASE_2, TEST_CASE_3, TEST_CASE_4, TEST_CASE_5])
+    @parameterized.expand([TEST_CASE_0, TEST_CASE_1, TEST_CASE_2, TEST_CASE_3, TEST_CASE_4, TEST_CASE_5, TEST_CASE_6])
     def test_saved_content(self, test_data, output_ext, resample, save_batch):
         with tempfile.TemporaryDirectory() as tempdir:
             trans = SaveImaged(
@@ -106,7 +117,9 @@ def test_saved_content(self, test_data, output_ext, resample, save_batch):
                     filepath = os.path.join("testfile" + str(i), "testfile" + str(i) + "_trans" + output_ext)
                     self.assertTrue(os.path.exists(os.path.join(tempdir, filepath)))
             else:
-                filepath = os.path.join("testfile0", "testfile0" + "_trans" + output_ext)
+                patch_index = test_data["img_meta_dict"].get("patch_index", None)
+                patch_index = f"_{patch_index}" if patch_index is not None else ""
+                filepath = os.path.join("testfile0", "testfile0" + "_trans" + patch_index + output_ext)
                 self.assertTrue(os.path.exists(os.path.join(tempdir, filepath)))
 
 
diff --git a/tests/test_smartcache_patch_wsi_dataset.py b/tests/test_smartcache_patch_wsi_dataset.py
index a7c90b5205..876a30a3b8 100644
--- a/tests/test_smartcache_patch_wsi_dataset.py
+++ b/tests/test_smartcache_patch_wsi_dataset.py
@@ -1,3 +1,14 @@
+# Copyright 2020 - 2021 MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import os
 import unittest
 from unittest import skipUnless
@@ -13,7 +24,7 @@
 _, has_cim = optional_import("cucim")
 
 FILE_URL = "http://openslide.cs.cmu.edu/download/openslide-testdata/Generic-TIFF/CMU-1.tiff"
-FILE_PATH = os.path.join(os.path.dirname(__file__), "testing_data", os.path.basename(FILE_URL))
+FILE_PATH = os.path.join(os.path.dirname(__file__), "testing_data", "temp_" + os.path.basename(FILE_URL))
 
 TEST_CASE_0 = [
     {
diff --git a/tests/test_warp.py b/tests/test_warp.py
index 4ed1562b29..c6c79a369a 100644
--- a/tests/test_warp.py
+++ b/tests/test_warp.py
@@ -1,3 +1,14 @@
+# Copyright 2020 - 2021 MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import unittest
 
 import numpy as np
@@ -8,6 +19,7 @@
 from monai.config.deviceconfig import USE_COMPILED
 from monai.networks.blocks.warp import Warp
 from monai.utils import GridSampleMode, GridSamplePadMode
+from tests.utils import SkipIfBeforePyTorchVersion
 
 LOW_POWER_TEST_CASES = [  # run with BUILD_MONAI=1 to test csrc/resample, BUILD_MONAI=0 to test native grid_sample
     [
@@ -103,6 +115,7 @@ def test_ill_shape(self):
         with self.assertRaisesRegex(ValueError, ""):
             warp_layer(image=torch.arange(4).reshape((1, 1, 2, 2)).to(dtype=torch.float), ddf=torch.zeros(1, 2, 3, 3))
 
+    @SkipIfBeforePyTorchVersion((1, 8))
     def test_grad(self):
         for b in GridSampleMode:
             for p in GridSamplePadMode:
diff --git a/tests/testing_data/kitty_test.jpg b/tests/testing_data/kitty_test.jpg
new file mode 100644
index 0000000000..f103760de5
Binary files /dev/null and b/tests/testing_data/kitty_test.jpg differ
diff --git a/tests/utils.py b/tests/utils.py
index 20f94cd1eb..5fa67f3e49 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -569,13 +569,14 @@ def query_memory(n=2):
     """
     Find best n idle devices and return a string of device ids.
     """
-    bash_string = "nvidia-smi --query-gpu=utilization.gpu,power.draw,memory.used --format=csv,noheader,nounits"
+    bash_string = "nvidia-smi --query-gpu=power.draw,temperature.gpu,memory.used --format=csv,noheader,nounits"
 
     try:
         p1 = Popen(bash_string.split(), stdout=PIPE)
         output, error = p1.communicate()
         free_memory = [x.split(",") for x in output.decode("utf-8").split("\n")[:-1]]
         free_memory = np.asarray(free_memory, dtype=float).T
+        free_memory[1] += free_memory[0]  # combine 0/1 column measures
         ids = np.lexsort(free_memory)[:n]
     except (FileNotFoundError, TypeError, IndexError):
         ids = range(n) if isinstance(n, int) else []
diff --git a/versioneer.py b/versioneer.py
index 441b3d4c2d..9112ac66a5 100644
--- a/versioneer.py
+++ b/versioneer.py
@@ -1,4 +1,4 @@
-# Version: 0.18
+# Version: 0.19
 
 """The Versioneer - like a rocketeer, but for versions.
 
@@ -6,16 +6,12 @@
 ==============
 
 * like a rocketeer, but for versions!
-* https://github.com/warner/python-versioneer
+* https://github.com/python-versioneer/python-versioneer
 * Brian Warner
 * License: Public Domain
-* Compatible With: python2.6, 2.7, 3.2, 3.3, 3.4, 3.5, 3.6, and pypy
-* [![Latest Version]
-(https://pypip.in/version/versioneer/badge.svg?style=flat)
-](https://pypi.python.org/pypi/versioneer/)
-* [![Build Status]
-(https://travis-ci.org/warner/python-versioneer.png?branch=master)
-](https://travis-ci.org/warner/python-versioneer)
+* Compatible with: Python 3.6, 3.7, 3.8, 3.9 and pypy3
+* [![Latest Version][pypi-image]][pypi-url]
+* [![Build Status][travis-image]][travis-url]
 
 This is a tool for managing a recorded version number in distutils-based
 python projects. The goal is to remove the tedious and error-prone "update
@@ -26,9 +22,10 @@
 
 ## Quick Install
 
-* `pip install versioneer` to somewhere to your $PATH
-* add a `[versioneer]` section to your setup.cfg (see below)
+* `pip install versioneer` to somewhere in your $PATH
+* add a `[versioneer]` section to your setup.cfg (see [Install](INSTALL.md))
 * run `versioneer install` in your source tree, commit the results
+* Verify version information with `python setup.py version`
 
 ## Version Identifiers
 
@@ -60,7 +57,7 @@
 for example `git describe --tags --dirty --always` reports things like
 "0.7-1-g574ab98-dirty" to indicate that the checkout is one revision past the
 0.7 tag, has a unique revision id of "574ab98", and is "dirty" (it has
-uncommitted changes.
+uncommitted changes).
 
 The version identifier is used for multiple purposes:
 
@@ -165,7 +162,7 @@
 
 Some situations are known to cause problems for Versioneer. This details the
 most significant ones. More can be found on Github
-[issues page](https://github.com/warner/python-versioneer/issues).
+[issues page](https://github.com/python-versioneer/python-versioneer/issues).
 
 ### Subprojects
 
@@ -193,9 +190,9 @@
 Pip-8.1.1 is known to have this problem, but hopefully it will get fixed in
 some later version.
 
-[Bug #38](https://github.com/warner/python-versioneer/issues/38) is tracking
+[Bug #38](https://github.com/python-versioneer/python-versioneer/issues/38) is tracking
 this issue. The discussion in
-[PR #61](https://github.com/warner/python-versioneer/pull/61) describes the
+[PR #61](https://github.com/python-versioneer/python-versioneer/pull/61) describes the
 issue from the Versioneer side in more detail.
 [pip PR#3176](https://github.com/pypa/pip/pull/3176) and
 [pip PR#3615](https://github.com/pypa/pip/pull/3615) contain work to improve
@@ -223,22 +220,10 @@
 cause egg_info to be rebuilt (including `sdist`, `wheel`, and installing into
 a different virtualenv), so this can be surprising.
 
-[Bug #83](https://github.com/warner/python-versioneer/issues/83) describes
+[Bug #83](https://github.com/python-versioneer/python-versioneer/issues/83) describes
 this one, but upgrading to a newer version of setuptools should probably
 resolve it.
 
-### Unicode version strings
-
-While Versioneer works (and is continually tested) with both Python 2 and
-Python 3, it is not entirely consistent with bytes-vs-unicode distinctions.
-Newer releases probably generate unicode version strings on py2. It's not
-clear that this is wrong, but it may be surprising for applications when then
-write these strings to a network connection or include them in bytes-oriented
-APIs like cryptographic checksums.
-
-[Bug #71](https://github.com/warner/python-versioneer/issues/71) investigates
-this question.
-
 
 ## Updating Versioneer
 
@@ -264,6 +249,12 @@
 direction and include code from all supported VCS systems, reducing the
 number of intermediate scripts.
 
+## Similar projects
+
+* [setuptools_scm](https://github.com/pypa/setuptools_scm/) - a non-vendored build-time
+  dependency
+* [minver](https://github.com/jbweston/miniver) - a lightweight reimplementation of
+  versioneer
 
 ## License
 
@@ -273,14 +264,15 @@
 Dedication" license (CC0-1.0), as described in
 https://creativecommons.org/publicdomain/zero/1.0/ .
 
-"""
+[pypi-image]: https://img.shields.io/pypi/v/versioneer.svg
+[pypi-url]: https://pypi.python.org/pypi/versioneer/
+[travis-image]:
+https://img.shields.io/travis/com/python-versioneer/python-versioneer.svg
+[travis-url]: https://travis-ci.com/github/python-versioneer/python-versioneer
 
-from __future__ import print_function
+"""
 
-try:
-    import configparser
-except ImportError:
-    import ConfigParser as configparser
+import configparser
 import errno
 import json
 import os
@@ -340,9 +332,9 @@ def get_config_from_root(root):
     # configparser.NoOptionError (if it lacks "VCS="). See the docstring at
     # the top of versioneer.py for instructions on writing your setup.cfg .
     setup_cfg = os.path.join(root, "setup.cfg")
-    parser = configparser.SafeConfigParser()
+    parser = configparser.ConfigParser()
     with open(setup_cfg, "r") as f:
-        parser.readfp(f)
+        parser.read_file(f)
     VCS = parser.get("versioneer", "VCS")  # mandatory
 
     def get(parser, name):
@@ -373,7 +365,7 @@ class NotThisMethod(Exception):
 
 
 def register_vcs_handler(vcs, method):  # decorator
-    """Decorator to mark a method as the handler for a particular VCS."""
+    """Create decorator to mark a method as the handler of a VCS."""
 
     def decorate(f):
         """Store f in HANDLERS[vcs][method]."""
@@ -409,9 +401,7 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, env=
         if verbose:
             print("unable to find command, tried %s" % (commands,))
         return None, None
-    stdout = p.communicate()[0].strip()
-    if sys.version_info[0] >= 3:
-        stdout = stdout.decode()
+    stdout = p.communicate()[0].strip().decode()
     if p.returncode != 0:
         if verbose:
             print("unable to run %s (error)" % dispcmd)
@@ -422,7 +412,7 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, env=
 
 LONG_VERSION_PY[
     "git"
-] = '''
+] = r'''
 # This file helps to compute a version number in source trees obtained from
 # git-archive tarball (such as those provided by githubs download-from-tag
 # feature). Distribution tarballs (built by setup.py sdist) and build
@@ -430,7 +420,7 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, env=
 # that just contains the computed version number.
 
 # This file is released into the public domain. Generated by
-# versioneer-0.18 (https://github.com/warner/python-versioneer)
+# versioneer-0.19 (https://github.com/python-versioneer/python-versioneer)
 
 """Git implementation of _version.py."""
 
@@ -481,7 +471,7 @@ class NotThisMethod(Exception):
 
 
 def register_vcs_handler(vcs, method):  # decorator
-    """Decorator to mark a method as the handler for a particular VCS."""
+    """Create decorator to mark a method as the handler of a VCS."""
     def decorate(f):
         """Store f in HANDLERS[vcs][method]."""
         if vcs not in HANDLERS:
@@ -517,9 +507,7 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
         if verbose:
             print("unable to find command, tried %%s" %% (commands,))
         return None, None
-    stdout = p.communicate()[0].strip()
-    if sys.version_info[0] >= 3:
-        stdout = stdout.decode()
+    stdout = p.communicate()[0].strip().decode()
     if p.returncode != 0:
         if verbose:
             print("unable to run %%s (error)" %% dispcmd)
@@ -589,6 +577,10 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose):
         raise NotThisMethod("no keywords at all, weird")
     date = keywords.get("date")
     if date is not None:
+        # Use only the last line.  Previous lines may contain GPG signature
+        # information.
+        date = date.splitlines()[-1]
+
         # git-2.2.0 added "%%cI", which expands to an ISO-8601 -compliant
         # datestamp. However we prefer "%%ci" (which expands to an "ISO-8601
         # -like" string, which we must then edit to make compliant), because
@@ -724,6 +716,9 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
     # commit date: see ISO-8601 comment in git_versions_from_keywords()
     date = run_command(GITS, ["show", "-s", "--format=%%ci", "HEAD"],
                        cwd=root)[0].strip()
+    # Use only the last line.  Previous lines may contain GPG signature
+    # information.
+    date = date.splitlines()[-1]
     pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
 
     return pieces
@@ -762,18 +757,18 @@ def render_pep440(pieces):
 
 
 def render_pep440_pre(pieces):
-    """TAG[.post.devDISTANCE] -- No -dirty.
+    """TAG[.post0.devDISTANCE] -- No -dirty.
 
     Exceptions:
-    1: no tags. 0.post.devDISTANCE
+    1: no tags. 0.post0.devDISTANCE
     """
     if pieces["closest-tag"]:
         rendered = pieces["closest-tag"]
         if pieces["distance"]:
-            rendered += ".post.dev%%d" %% pieces["distance"]
+            rendered += ".post0.dev%%d" %% pieces["distance"]
     else:
         # exception #1
-        rendered = "0.post.dev%%d" %% pieces["distance"]
+        rendered = "0.post0.dev%%d" %% pieces["distance"]
     return rendered
 
 
@@ -981,6 +976,10 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose):
         raise NotThisMethod("no keywords at all, weird")
     date = keywords.get("date")
     if date is not None:
+        # Use only the last line.  Previous lines may contain GPG signature
+        # information.
+        date = date.splitlines()[-1]
+
         # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant
         # datestamp. However we prefer "%ci" (which expands to an "ISO-8601
         # -like" string, which we must then edit to make compliant), because
@@ -1117,6 +1116,9 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
 
     # commit date: see ISO-8601 comment in git_versions_from_keywords()
     date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip()
+    # Use only the last line.  Previous lines may contain GPG signature
+    # information.
+    date = date.splitlines()[-1]
     pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
 
     return pieces
@@ -1189,7 +1191,7 @@ def versions_from_parentdir(parentdir_prefix, root, verbose):
 
 
 SHORT_VERSION_PY = """
-# This file was generated by 'versioneer.py' (0.18) from
+# This file was generated by 'versioneer.py' (0.19) from
 # revision-control system data, or from the parent directory name of an
 # unpacked source archive. Distribution tarballs contain a pre-generated copy
 # of this file.
@@ -1263,18 +1265,18 @@ def render_pep440(pieces):
 
 
 def render_pep440_pre(pieces):
-    """TAG[.post.devDISTANCE] -- No -dirty.
+    """TAG[.post0.devDISTANCE] -- No -dirty.
 
     Exceptions:
-    1: no tags. 0.post.devDISTANCE
+    1: no tags. 0.post0.devDISTANCE
     """
     if pieces["closest-tag"]:
         rendered = pieces["closest-tag"]
         if pieces["distance"]:
-            rendered += ".post.dev%d" % pieces["distance"]
+            rendered += ".post0.dev%d" % pieces["distance"]
     else:
         # exception #1
-        rendered = "0.post.dev%d" % pieces["distance"]
+        rendered = "0.post0.dev%d" % pieces["distance"]
     return rendered
 
 
@@ -1310,7 +1312,7 @@ def render_pep440_old(pieces):
 
     The ".dev0" means dirty.
 
-    Eexceptions:
+    Exceptions:
     1: no tags. 0.postDISTANCE[.dev0]
     """
     if pieces["closest-tag"]:
@@ -1493,8 +1495,12 @@ def get_version():
     return get_versions()["version"]
 
 
-def get_cmdclass():
-    """Get the custom setuptools/distutils subclasses used by Versioneer."""
+def get_cmdclass(cmdclass=None):
+    """Get the custom setuptools/distutils subclasses used by Versioneer.
+
+    If the package uses a different cmdclass (e.g. one from numpy), it
+    should be provide as an argument.
+    """
     if "versioneer" in sys.modules:
         del sys.modules["versioneer"]
         # this fixes the "python setup.py develop" case (also 'install' and
@@ -1508,9 +1514,9 @@ def get_cmdclass():
         # parent is protected against the child's "import versioneer". By
         # removing ourselves from sys.modules here, before the child build
         # happens, we protect the child from the parent's versioneer too.
-        # Also see https://github.com/warner/python-versioneer/issues/52
+        # Also see https://github.com/python-versioneer/python-versioneer/issues/52
 
-    cmds = {}
+    cmds = {} if cmdclass is None else cmdclass.copy()
 
     # we add "version" to both distutils and setuptools
     from distutils.core import Command
@@ -1553,7 +1559,9 @@ def run(self):
     #  setup.py egg_info -> ?
 
     # we override different "build_py" commands for both environments
-    if "setuptools" in sys.modules:
+    if "build_py" in cmds:
+        _build_py = cmds["build_py"]
+    elif "setuptools" in sys.modules:
         from setuptools.command.build_py import build_py as _build_py
     else:
         from distutils.command.build_py import build_py as _build_py
@@ -1573,6 +1581,31 @@ def run(self):
 
     cmds["build_py"] = cmd_build_py
 
+    if "setuptools" in sys.modules:
+        from setuptools.command.build_ext import build_ext as _build_ext
+    else:
+        from distutils.command.build_ext import build_ext as _build_ext
+
+    class cmd_build_ext(_build_ext):
+        def run(self):
+            root = get_root()
+            cfg = get_config_from_root(root)
+            versions = get_versions()
+            _build_ext.run(self)
+            if self.inplace:
+                # build_ext --inplace will only build extensions in
+                # build/lib<..> dir with no _version.py to write to.
+                # As in place builds will already have a _version.py
+                # in the module dir, we do not need to write one.
+                return
+            # now locate _version.py in the new build/ directory and replace
+            # it with an updated value
+            target_versionfile = os.path.join(self.build_lib, cfg.versionfile_source)
+            print("UPDATING %s" % target_versionfile)
+            write_to_version_file(target_versionfile, versions)
+
+    cmds["build_ext"] = cmd_build_ext
+
     if "cx_Freeze" in sys.modules:  # cx_freeze enabled?
         from cx_Freeze.dist import build_exe as _build_exe
 
@@ -1611,10 +1644,7 @@ def run(self):
         del cmds["build_py"]
 
     if "py2exe" in sys.modules:  # py2exe enabled?
-        try:
-            from py2exe.distutils_buildexe import py2exe as _py2exe  # py3
-        except ImportError:
-            from py2exe.build_exe import py2exe as _py2exe  # py2
+        from py2exe.distutils_buildexe import py2exe as _py2exe
 
         class cmd_py2exe(_py2exe):
             def run(self):
@@ -1643,7 +1673,9 @@ def run(self):
         cmds["py2exe"] = cmd_py2exe
 
     # we override different "sdist" commands for both environments
-    if "setuptools" in sys.modules:
+    if "sdist" in cmds:
+        _sdist = cmds["sdist"]
+    elif "setuptools" in sys.modules:
         from setuptools.command.sdist import sdist as _sdist
     else:
         from distutils.command.sdist import sdist as _sdist
@@ -1718,7 +1750,7 @@ def make_release_tree(self, base_dir, files):
 
 
 def do_setup():
-    """Main VCS-independent setup function for installing Versioneer."""
+    """Do main VCS-independent setup function for installing Versioneer."""
     root = get_root()
     try:
         cfg = get_config_from_root(root)