From 0beaf6904700692f5ae959fd0ec0d4675396e4aa Mon Sep 17 00:00:00 2001 From: Yinlei Sun Date: Wed, 7 Jun 2023 15:30:24 +0800 Subject: [PATCH] [Enhance] Enable full precision training on Ascend NPU. (#3085) ## Motivation We will support full precision training on the next generation Ascend NPU, so there is no need to enable mixed precision by default. ## Modification Determine whether the current chip supports full precision training, and automatically enable mixed precision. ## BC-breaking (Optional) Not affected. ## Use cases (Optional) We have verified the correctness on the Ascend NPU. --- mmseg/apis/train.py | 4 ++-- mmseg/utils/__init__.py | 6 ++++-- mmseg/utils/util_distribution.py | 9 +++++++++ 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/mmseg/apis/train.py b/mmseg/apis/train.py index 1ada647288..d823412dbc 100644 --- a/mmseg/apis/train.py +++ b/mmseg/apis/train.py @@ -15,7 +15,7 @@ from mmseg.core import DistEvalHook, EvalHook, build_optimizer from mmseg.datasets import build_dataloader, build_dataset from mmseg.utils import (build_ddp, build_dp, find_latest_checkpoint, - get_root_logger) + get_root_logger, is_npu_support_full_precision) def init_random_seed(seed=None, device='cuda'): @@ -136,7 +136,7 @@ def train_segmentor(model, logger=logger, meta=meta)) - if cfg.device == 'npu': + if cfg.device == 'npu' and not is_npu_support_full_precision(): optimiter_config = dict(type='Fp16OptimizerHook', loss_scale='dynamic') cfg.optimizer_config = optimiter_config if \ not cfg.optimizer_config else cfg.optimizer_config diff --git a/mmseg/utils/__init__.py b/mmseg/utils/__init__.py index e3ef4b355c..5322ee90fc 100644 --- a/mmseg/utils/__init__.py +++ b/mmseg/utils/__init__.py @@ -3,9 +3,11 @@ from .logger import get_root_logger from .misc import find_latest_checkpoint from .set_env import setup_multi_processes -from .util_distribution import build_ddp, build_dp, get_device +from .util_distribution import (build_ddp, build_dp, get_device, + is_npu_support_full_precision) __all__ = [ 'get_root_logger', 'collect_env', 'find_latest_checkpoint', - 'setup_multi_processes', 'build_ddp', 'build_dp', 'get_device' + 'setup_multi_processes', 'build_ddp', 'build_dp', 'get_device', + 'is_npu_support_full_precision' ] diff --git a/mmseg/utils/util_distribution.py b/mmseg/utils/util_distribution.py index a99939ed91..8d9fd77169 100644 --- a/mmseg/utils/util_distribution.py +++ b/mmseg/utils/util_distribution.py @@ -94,6 +94,15 @@ def is_npu_available(): return hasattr(torch, 'npu') and torch.npu.is_available() +def is_npu_support_full_precision() -> bool: + """Returns True if npu devices support full precision training.""" + if not is_npu_available(): + return False + import torch_npu.npu.utils as npu_utils + version_of_support_full_precision = 220 + return npu_utils.get_soc_version() >= version_of_support_full_precision + + def get_device(): """Returns an available device, cpu, npu, cuda or mlu.""" is_device_available = {