From 1767fee69881ab53a071d5e8fd07cafc00a3f7a0 Mon Sep 17 00:00:00 2001 From: wejoncy Date: Tue, 10 Sep 2024 17:03:22 +0800 Subject: [PATCH 1/6] support coremlfp16 --- .../coreml/builders/impl/base_op_builder.cc | 24 +++++++++++++++---- .../coreml/builders/impl/base_op_builder.h | 2 +- .../coreml/builders/impl/binary_op_builder.cc | 4 ++-- .../coreml/builders/impl/builder_utils.cc | 16 +++++++++++++ .../coreml/builders/impl/builder_utils.h | 3 +++ .../coreml/builders/model_builder.cc | 3 +++ .../core/providers/coreml/model/model.mm | 16 +++++++++++++ 7 files changed, 60 insertions(+), 8 deletions(-) diff --git a/onnxruntime/core/providers/coreml/builders/impl/base_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/base_op_builder.cc index 2cae85a0a1c8..9de6e2c20c97 100644 --- a/onnxruntime/core/providers/coreml/builders/impl/base_op_builder.cc +++ b/onnxruntime/core/providers/coreml/builders/impl/base_op_builder.cc @@ -1,6 +1,7 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. +#include #include "core/providers/common.h" #include "core/providers/coreml/builders/helper.h" #include "core/providers/coreml/builders/impl/base_op_builder.h" @@ -12,6 +13,10 @@ using namespace CoreML::Specification; namespace onnxruntime { namespace coreml { +static std::set Float16Ops = { + "Add", +}; + namespace { // TODO, move this to shared_library bool HasExternalInitializer(const InitializedTensorSet& initializers, const Node& node, @@ -83,7 +88,7 @@ bool BaseOpBuilder::HasSupportedInputs(const Node& node, const OpBuilderInputPar } /* static */ -bool BaseOpBuilder::IsInputFloat(const Node& node, size_t idx, const OpBuilderInputParams& /*input_params*/, +bool BaseOpBuilder::IsInputDtypeSupport(const Node& node, size_t idx, const OpBuilderInputParams& /*input_params*/, const logging::Logger& logger) { if (idx >= node.InputDefs().size()) { LOGS(logger, VERBOSE) << "Input index [" << idx << "] is out of range"; @@ -94,12 +99,21 @@ bool BaseOpBuilder::IsInputFloat(const Node& node, size_t idx, const OpBuilderIn int32_t input_type = ONNX_NAMESPACE::TensorProto_DataType_UNDEFINED; - // currently only float is supported - if (!GetType(input, input_type, logger) || input_type != ONNX_NAMESPACE::TensorProto_DataType_FLOAT) { - LOGS(logger, VERBOSE) << "[" << node.OpType() << "] Input type: [" << input_type << "] is not currently supported"; + if (!GetType(input, input_type, logger)) { + LOGS(logger, VERBOSE) << "[" << node.OpType() << "] Get Input type failed"; return false; } + // float is supported + if (input_type == ONNX_NAMESPACE::TensorProto_DataType_FLOAT){ + return true; + } + + if (input_type == ONNX_NAMESPACE::TensorProto_DataType_FLOAT16 && Float16Ops.count(node.OpType())) { + return true; + } + + LOGS(logger, VERBOSE) << "[" << node.OpType() << "] Input type: [" << input_type << "] is not currently supported"; return true; } @@ -107,7 +121,7 @@ bool BaseOpBuilder::HasSupportedInputsImpl(const Node& node, const OpBuilderInpu const logging::Logger& logger) const { // We only check the type of input 0 by default // specific op builder can override this - return IsInputFloat(node, 0, input_params, logger); + return IsInputDtypeSupport(node, 0, input_params, logger); } bool BaseOpBuilder::HasSupportedOpSet(const Node& node, const logging::Logger& logger) const { diff --git a/onnxruntime/core/providers/coreml/builders/impl/base_op_builder.h b/onnxruntime/core/providers/coreml/builders/impl/base_op_builder.h index 071008520fbd..6bd3c43f373c 100644 --- a/onnxruntime/core/providers/coreml/builders/impl/base_op_builder.h +++ b/onnxruntime/core/providers/coreml/builders/impl/base_op_builder.h @@ -33,7 +33,7 @@ class BaseOpBuilder : public IOpBuilder { } // currently we only support float - static bool IsInputFloat(const Node& node, size_t idx, const OpBuilderInputParams& input_params, + static bool IsInputDtypeSupport(const Node& node, size_t idx, const OpBuilderInputParams& input_params, const logging::Logger& logger); private: diff --git a/onnxruntime/core/providers/coreml/builders/impl/binary_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/binary_op_builder.cc index fb8e07633621..3ecea9c3770f 100644 --- a/onnxruntime/core/providers/coreml/builders/impl/binary_op_builder.cc +++ b/onnxruntime/core/providers/coreml/builders/impl/binary_op_builder.cc @@ -139,8 +139,8 @@ bool BinaryOpBuilder::HasSupportedInputsImpl(const Node& node, const OpBuilderIn // Add/Sub/Mul/Div spec says inputs must be of the same type. // Pow spec says inputs can be different types. // We only support float for all of these inputs. - if (!IsInputFloat(node, 0, input_params, logger) || - ((node.OpType() == "Pow") && !IsInputFloat(node, 1, input_params, logger))) { + if (!IsInputDtypeSupport(node, 0, input_params, logger) || + ((node.OpType() == "Pow") && !IsInputDtypeSupport(node, 1, input_params, logger))) { return false; } diff --git a/onnxruntime/core/providers/coreml/builders/impl/builder_utils.cc b/onnxruntime/core/providers/coreml/builders/impl/builder_utils.cc index e02186d3aee8..328f8b327992 100644 --- a/onnxruntime/core/providers/coreml/builders/impl/builder_utils.cc +++ b/onnxruntime/core/providers/coreml/builders/impl/builder_utils.cc @@ -96,6 +96,9 @@ Status CreateCoreMLWeight(CoreML::Specification::WeightParams& weight, case ONNX_NAMESPACE::TensorProto_DataType_FLOAT: CreateCoreMLWeight(weight, unpacked_tensor.DataAsSpan()); break; + case ONNX_NAMESPACE::TensorProto_DataType_FLOAT16: + CreateCoreMLWeight(weight, unpacked_tensor.DataAsSpan()); + break; case ONNX_NAMESPACE::TensorProto_DataType_INT32: CreateCoreMLWeight(weight, unpacked_tensor.DataAsSpan()); break; @@ -114,6 +117,11 @@ void CreateCoreMLWeight(CoreML::Specification::WeightParams& weight, gsl::spanAssign(data.begin(), data.end()); } +void CreateCoreMLWeight(CoreML::Specification::WeightParams& weight, gsl::span data) { + const char* data_byte_ptr = (const char*)(data.data()); + weight.mutable_float16value()->assign(data_byte_ptr, data_byte_ptr+data.size_bytes()); +} + namespace { template void CreateCoreMLWeightConvertingDataToFloats(CoreML::Specification::WeightParams& weight, gsl::span data) { @@ -133,6 +141,8 @@ void CreateCoreMLWeight(CoreML::Specification::WeightParams& weight, gsl::span(gsl::span data, std::optional> shape); +template MILSpec::Value CreateTensorValue(gsl::span data, + std::optional> shape); +template MILSpec::Value CreateTensorValue(gsl::span data, + std::optional> shape); +template MILSpec::Value CreateTensorValue(gsl::span data, + std::optional> shape); template MILSpec::Value CreateScalarTensorValue(const float& data); template MILSpec::Value CreateScalarTensorValue(const int32_t& data); diff --git a/onnxruntime/core/providers/coreml/builders/impl/builder_utils.h b/onnxruntime/core/providers/coreml/builders/impl/builder_utils.h index 475ce79b0a81..f25936e25a17 100644 --- a/onnxruntime/core/providers/coreml/builders/impl/builder_utils.h +++ b/onnxruntime/core/providers/coreml/builders/impl/builder_utils.h @@ -41,6 +41,9 @@ Status CreateCoreMLWeight(CoreML::Specification::WeightParams& weight, const ONN // Copy the float array to a coreml weight void CreateCoreMLWeight(CoreML::Specification::WeightParams& weight, gsl::span data); +// Copy the float array to a coreml weight +void CreateCoreMLWeight(CoreML::Specification::WeightParams& weight, gsl::span data); + // Copy the int32_t array to a coreml weight void CreateCoreMLWeight(CoreML::Specification::WeightParams& weight, gsl::span data); diff --git a/onnxruntime/core/providers/coreml/builders/model_builder.cc b/onnxruntime/core/providers/coreml/builders/model_builder.cc index 9668bfcd09ad..7ecfad8493ea 100644 --- a/onnxruntime/core/providers/coreml/builders/model_builder.cc +++ b/onnxruntime/core/providers/coreml/builders/model_builder.cc @@ -811,6 +811,9 @@ Status ModelBuilder::RegisterModelInputOutput(const NodeArg& node_arg, bool is_i case ONNX_NAMESPACE::TensorProto_DataType_FLOAT: multi_array->set_datatype(ArrayFeatureType::FLOAT32); break; + case ONNX_NAMESPACE::TensorProto_DataType_FLOAT16: + multi_array->set_datatype(ArrayFeatureType::FLOAT16); + break; case ONNX_NAMESPACE::TensorProto_DataType_INT32: multi_array->set_datatype(ArrayFeatureType::INT32); break; diff --git a/onnxruntime/core/providers/coreml/model/model.mm b/onnxruntime/core/providers/coreml/model/model.mm index 68460ff7c9b3..60c93aa60162 100644 --- a/onnxruntime/core/providers/coreml/model/model.mm +++ b/onnxruntime/core/providers/coreml/model/model.mm @@ -120,6 +120,10 @@ Status CreateInputFeatureProvider(const std::unordered_map(mlmultiarray_buffer); + auto* dst_buffer = static_cast(tensor_buffer); + const auto block_byte_size = block_size * sizeof(uint16_t); + + for (int64_t idx = 0; idx < num_blocks; ++idx) { + memcpy(dst_buffer, src_buffer, block_byte_size); + src_buffer += stride; + dst_buffer += block_size; + } + break; + } case ONNX_NAMESPACE::TensorProto_DataType_INT32: { const auto* src_buffer = static_cast(mlmultiarray_buffer); auto* dst_buffer = static_cast(tensor_buffer); From bb9900882c08a16a16cb27e2555fd1b3c29bebbf Mon Sep 17 00:00:00 2001 From: wejoncy Date: Wed, 11 Sep 2024 03:49:00 -0700 Subject: [PATCH 2/6] support unary and binary ops --- .../coreml/builders/impl/base_op_builder.cc | 11 +-- .../coreml/builders/impl/binary_op_builder.cc | 4 +- .../coreml/builders/impl/unary_op_builder.cc | 32 +++++++ .../providers/coreml/coreml_basic_test.cc | 87 +++++++++++++++++++ onnxruntime/test/util/test_utils.cc | 5 ++ 5 files changed, 132 insertions(+), 7 deletions(-) diff --git a/onnxruntime/core/providers/coreml/builders/impl/base_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/base_op_builder.cc index 9de6e2c20c97..cc6f2d796c5e 100644 --- a/onnxruntime/core/providers/coreml/builders/impl/base_op_builder.cc +++ b/onnxruntime/core/providers/coreml/builders/impl/base_op_builder.cc @@ -14,7 +14,7 @@ namespace onnxruntime { namespace coreml { static std::set Float16Ops = { - "Add", + "Add", "Mul", "Sub", "Div", "Pow", "Sqrt", "Reciprocal" }; namespace { @@ -88,7 +88,7 @@ bool BaseOpBuilder::HasSupportedInputs(const Node& node, const OpBuilderInputPar } /* static */ -bool BaseOpBuilder::IsInputDtypeSupport(const Node& node, size_t idx, const OpBuilderInputParams& /*input_params*/, +bool BaseOpBuilder::IsInputDtypeSupport(const Node& node, size_t idx, const OpBuilderInputParams& input_params, const logging::Logger& logger) { if (idx >= node.InputDefs().size()) { LOGS(logger, VERBOSE) << "Input index [" << idx << "] is out of range"; @@ -109,12 +109,13 @@ bool BaseOpBuilder::IsInputDtypeSupport(const Node& node, size_t idx, const OpBu return true; } - if (input_type == ONNX_NAMESPACE::TensorProto_DataType_FLOAT16 && Float16Ops.count(node.OpType())) { +#if defined(COREML_ENABLE_MLPROGRAM) + if (input_params.create_mlprogram && input_type == ONNX_NAMESPACE::TensorProto_DataType_FLOAT16 && Float16Ops.count(node.OpType())) { return true; } - +#endif LOGS(logger, VERBOSE) << "[" << node.OpType() << "] Input type: [" << input_type << "] is not currently supported"; - return true; + return false; } bool BaseOpBuilder::HasSupportedInputsImpl(const Node& node, const OpBuilderInputParams& input_params, diff --git a/onnxruntime/core/providers/coreml/builders/impl/binary_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/binary_op_builder.cc index 3ecea9c3770f..bc1eed8c1920 100644 --- a/onnxruntime/core/providers/coreml/builders/impl/binary_op_builder.cc +++ b/onnxruntime/core/providers/coreml/builders/impl/binary_op_builder.cc @@ -73,7 +73,7 @@ Status BinaryOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const } else if (op_type == "Sub") { coreml_op_type = "sub"; } else if (op_type == "Div") { - // we only support fp32 currently. when we add support for integers we need to check the type and use + // we support fp32/fp16 currently. when we add support for integers we need to check the type and use // "floor_div" or "real_div" accordingly coreml_op_type = "real_div"; } else if (op_type == "Pow") { @@ -138,7 +138,7 @@ bool BinaryOpBuilder::HasSupportedInputsImpl(const Node& node, const OpBuilderIn const logging::Logger& logger) const { // Add/Sub/Mul/Div spec says inputs must be of the same type. // Pow spec says inputs can be different types. - // We only support float for all of these inputs. + // We support float/float16 for all of these inputs. if (!IsInputDtypeSupport(node, 0, input_params, logger) || ((node.OpType() == "Pow") && !IsInputDtypeSupport(node, 1, input_params, logger))) { return false; diff --git a/onnxruntime/core/providers/coreml/builders/impl/unary_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/unary_op_builder.cc index 3403378d5911..595e08d1d771 100644 --- a/onnxruntime/core/providers/coreml/builders/impl/unary_op_builder.cc +++ b/onnxruntime/core/providers/coreml/builders/impl/unary_op_builder.cc @@ -3,6 +3,7 @@ #include "core/providers/common.h" +#include "core/providers/coreml/builders/impl/builder_utils.h" #include "core/providers/coreml/builders/helper.h" #include "core/providers/coreml/builders/impl/base_op_builder.h" #include "core/providers/coreml/builders/model_builder.h" @@ -14,6 +15,7 @@ namespace coreml { class UnaryOpBuilder : public BaseOpBuilder { Status AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node, const logging::Logger& logger) const override; + bool SupportsMLProgram() const override { return true; } }; Status UnaryOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node, @@ -21,6 +23,35 @@ Status UnaryOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const const auto& op_type(node.OpType()); const auto& input_defs(node.InputDefs()); + +#if defined(COREML_ENABLE_MLPROGRAM) + if (model_builder.CreateMLProgram()) { + using namespace CoreML::Specification::MILSpec; + + // https://apple.github.io/coremltools/source/coremltools.converters.mil.mil.ops.defs.html#module-coremltools.converters.mil.mil.ops.defs.iOS15.elementwise_binary + std::string_view coreml_op_type; + if (op_type == "Sqrt") { + coreml_op_type = "sqrt"; + } else if (op_type == "Reciprocal") { + coreml_op_type = "inverse"; + } else { + return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, + "UnaryOpBuilder::AddToModelBuilderImpl, unexpected op: ", op_type); + } + + std::unique_ptr op = model_builder.CreateOperation(node, coreml_op_type); + AddOperationInput(*op, "x", input_defs[0]->Name()); + if (op_type == "Reciprocal") { + float epsilon = 1e-4; //epsilon: const T (Optional, default=1e-4) + AddOperationInput(*op, "epsilon", model_builder.AddScalarConstant(op->type(), "epsilon", epsilon)); + } + + AddOperationOutput(*op, *node.OutputDefs()[0]); + + model_builder.AddOperation(std::move(op)); + } else +#endif // defined (COREML_ENABLE_MLPROGRAM) + { std::unique_ptr layer = model_builder.CreateNNLayer(node); if (op_type == "Sqrt") { @@ -36,6 +67,7 @@ Status UnaryOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const *layer->mutable_output()->Add() = node.OutputDefs()[0]->Name(); model_builder.AddLayer(std::move(layer)); + } return Status::OK(); } diff --git a/onnxruntime/test/providers/coreml/coreml_basic_test.cc b/onnxruntime/test/providers/coreml/coreml_basic_test.cc index daa24db13411..c9d8a605678b 100644 --- a/onnxruntime/test/providers/coreml/coreml_basic_test.cc +++ b/onnxruntime/test/providers/coreml/coreml_basic_test.cc @@ -257,6 +257,93 @@ TEST(CoreMLExecutionProviderTest, TestNameSanitization) { // TensorRT does not support Clip opset 11 yet. test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); } + +TEST(CoreMLExecutionProviderTest, TestBinaryFp16) { + auto test_binary_op = [](std::string op){ + OpTester test(op, 11); + + std::vector dims{3, 3}; + std::vector input1 = {-1.0f, 0.0f, 1.0f, + -6.0f, 0.0f, 6.0f, + -5.4f, 2.0f, 6.0f}; + std::vector input1_fp16(9); + ConvertFloatToMLFloat16(input1.data(), input1_fp16.data(), 9); + std::vector input2 = {-1.0f, 0.0f, 1.0f, + -5.0f, 0.0f, 5.0f, + -5.0f, 2.0f, 5.0f}; + std::vector input2_fp16(9); + ConvertFloatToMLFloat16(input2.data(), input2_fp16.data(), 9); + std::vector output(9); + if (op == "Add"){ + for(int i = 0; i < 9; i++){ + output[i] = input1_fp16[i] + input2_fp16[i]; + } + } else if (op == "Sub") { + for(int i = 0; i < 9; i++){ + output[i] = input1_fp16[i] - input2_fp16[i]; + } + } else if (op == "Mul") { + for(int i = 0; i < 9; i++){ + output[i] = input1_fp16[i] * input2_fp16[i]; + } + } else if (op == "Div") { + for(int i = 0; i < 9; i++){ + output[i] = input1_fp16[i] / input2_fp16[i]; + } + } + std::vector output_fp16(9); + ConvertFloatToMLFloat16(output.data(), output_fp16.data(), 9); + + test.AddInput("0", dims, input1_fp16); + test.AddInput("1.min", dims, input2_fp16); + test.AddOutput("3", dims, output_fp16); + + // TensorRT does not support Clip opset 11 yet. + std::vector> coreml_ep; + coreml_ep.emplace_back(MakeCoreMLExecutionProvider(COREML_FLAG_CREATE_MLPROGRAM)); + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &coreml_ep); + }; + test_binary_op("Add"); + test_binary_op("Sub"); + test_binary_op("Div"); + test_binary_op("Mul"); +} + +TEST(CoreMLExecutionProviderTest, TestUnaryFp16) { + auto test_binary_op = [](std::string op){ + OpTester test(op, 11); + + std::vector dims{3, 3}; + std::vector input1 = {-1.0f, 0.0f, 1.0f, + -6.0f, 0.2f, 6.0f, + -5.4f, 2.0f, 6.0f}; + std::vector input1_fp16(9); + ConvertFloatToMLFloat16(input1.data(), input1_fp16.data(), 9); + + std::vector output(9); + if (op == "Sqrt"){ + for(int i = 0; i < 9; i++){ + output[i] = sqrt(input1_fp16[i]); + } + } else if (op == "Reciprocal") { + for(int i = 0; i < 9; i++){ + output[i] = 1.0f/(1e-4+input1_fp16[i]); + } + } + std::vector output_fp16(9); + ConvertFloatToMLFloat16(output.data(), output_fp16.data(), 9); + + test.AddInput("0", dims, input1_fp16); + test.AddOutput("3", dims, output_fp16); + + // TensorRT does not support Clip opset 11 yet. + std::vector> coreml_ep; + coreml_ep.emplace_back(MakeCoreMLExecutionProvider(COREML_FLAG_CREATE_MLPROGRAM)); + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &coreml_ep); + }; + test_binary_op("Sqrt"); + test_binary_op("Reciprocal"); +} #endif } // namespace test diff --git a/onnxruntime/test/util/test_utils.cc b/onnxruntime/test/util/test_utils.cc index 6bc0f8d10549..606b8d580fa3 100644 --- a/onnxruntime/test/util/test_utils.cc +++ b/onnxruntime/test/util/test_utils.cc @@ -55,6 +55,11 @@ void VerifyOutput(const std::string& output_name, ::testing::Pointwise(::testing::FloatNear(fp32_abs_err), tensor.DataAsSpan())); break; } + case ONNX_NAMESPACE::TensorProto_DataType_FLOAT16: { + EXPECT_THAT(expected_tensor.DataAsSpan(), + ::testing::Pointwise(::testing::FloatNear(fp32_abs_err), tensor.DataAsSpan())); + break; + } default: ORT_THROW("Unhandled data type. Please add 'case' statement for ", element_type); } From 4e866d1dbab6dc41af3db414b79fc2523c20ca46 Mon Sep 17 00:00:00 2001 From: wejoncy Date: Wed, 11 Sep 2024 03:57:27 -0700 Subject: [PATCH 3/6] format --- .../coreml/builders/impl/base_op_builder.cc | 7 ++-- .../coreml/builders/impl/base_op_builder.h | 2 +- .../coreml/builders/impl/builder_utils.cc | 10 +++--- .../coreml/builders/impl/unary_op_builder.cc | 27 +++++++-------- .../providers/coreml/coreml_basic_test.cc | 34 +++++++++---------- 5 files changed, 38 insertions(+), 42 deletions(-) diff --git a/onnxruntime/core/providers/coreml/builders/impl/base_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/base_op_builder.cc index cc6f2d796c5e..a261dbb63d07 100644 --- a/onnxruntime/core/providers/coreml/builders/impl/base_op_builder.cc +++ b/onnxruntime/core/providers/coreml/builders/impl/base_op_builder.cc @@ -14,8 +14,7 @@ namespace onnxruntime { namespace coreml { static std::set Float16Ops = { - "Add", "Mul", "Sub", "Div", "Pow", "Sqrt", "Reciprocal" -}; + "Add", "Mul", "Sub", "Div", "Pow", "Sqrt", "Reciprocal"}; namespace { // TODO, move this to shared_library @@ -89,7 +88,7 @@ bool BaseOpBuilder::HasSupportedInputs(const Node& node, const OpBuilderInputPar /* static */ bool BaseOpBuilder::IsInputDtypeSupport(const Node& node, size_t idx, const OpBuilderInputParams& input_params, - const logging::Logger& logger) { + const logging::Logger& logger) { if (idx >= node.InputDefs().size()) { LOGS(logger, VERBOSE) << "Input index [" << idx << "] is out of range"; return false; @@ -105,7 +104,7 @@ bool BaseOpBuilder::IsInputDtypeSupport(const Node& node, size_t idx, const OpBu } // float is supported - if (input_type == ONNX_NAMESPACE::TensorProto_DataType_FLOAT){ + if (input_type == ONNX_NAMESPACE::TensorProto_DataType_FLOAT) { return true; } diff --git a/onnxruntime/core/providers/coreml/builders/impl/base_op_builder.h b/onnxruntime/core/providers/coreml/builders/impl/base_op_builder.h index 6bd3c43f373c..a2cbef6dd57d 100644 --- a/onnxruntime/core/providers/coreml/builders/impl/base_op_builder.h +++ b/onnxruntime/core/providers/coreml/builders/impl/base_op_builder.h @@ -34,7 +34,7 @@ class BaseOpBuilder : public IOpBuilder { // currently we only support float static bool IsInputDtypeSupport(const Node& node, size_t idx, const OpBuilderInputParams& input_params, - const logging::Logger& logger); + const logging::Logger& logger); private: virtual bool IsOpSupportedImpl(const Node& /*node*/, const OpBuilderInputParams& /*input_params*/, diff --git a/onnxruntime/core/providers/coreml/builders/impl/builder_utils.cc b/onnxruntime/core/providers/coreml/builders/impl/builder_utils.cc index 328f8b327992..fc6b5792f364 100644 --- a/onnxruntime/core/providers/coreml/builders/impl/builder_utils.cc +++ b/onnxruntime/core/providers/coreml/builders/impl/builder_utils.cc @@ -119,7 +119,7 @@ void CreateCoreMLWeight(CoreML::Specification::WeightParams& weight, gsl::span data) { const char* data_byte_ptr = (const char*)(data.data()); - weight.mutable_float16value()->assign(data_byte_ptr, data_byte_ptr+data.size_bytes()); + weight.mutable_float16value()->assign(data_byte_ptr, data_byte_ptr + data.size_bytes()); } namespace { @@ -141,8 +141,6 @@ void CreateCoreMLWeight(CoreML::Specification::WeightParams& weight, gsl::span(gsl::span data, std::optional> shape); template MILSpec::Value CreateTensorValue(gsl::span data, - std::optional> shape); + std::optional> shape); template MILSpec::Value CreateTensorValue(gsl::span data, - std::optional> shape); + std::optional> shape); template MILSpec::Value CreateTensorValue(gsl::span data, - std::optional> shape); + std::optional> shape); template MILSpec::Value CreateScalarTensorValue(const float& data); template MILSpec::Value CreateScalarTensorValue(const int32_t& data); diff --git a/onnxruntime/core/providers/coreml/builders/impl/unary_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/unary_op_builder.cc index 595e08d1d771..6d46c3789dec 100644 --- a/onnxruntime/core/providers/coreml/builders/impl/unary_op_builder.cc +++ b/onnxruntime/core/providers/coreml/builders/impl/unary_op_builder.cc @@ -23,7 +23,6 @@ Status UnaryOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const const auto& op_type(node.OpType()); const auto& input_defs(node.InputDefs()); - #if defined(COREML_ENABLE_MLPROGRAM) if (model_builder.CreateMLProgram()) { using namespace CoreML::Specification::MILSpec; @@ -42,7 +41,7 @@ Status UnaryOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const std::unique_ptr op = model_builder.CreateOperation(node, coreml_op_type); AddOperationInput(*op, "x", input_defs[0]->Name()); if (op_type == "Reciprocal") { - float epsilon = 1e-4; //epsilon: const T (Optional, default=1e-4) + float epsilon = 1e-4; // epsilon: const T (Optional, default=1e-4) AddOperationInput(*op, "epsilon", model_builder.AddScalarConstant(op->type(), "epsilon", epsilon)); } @@ -52,21 +51,21 @@ Status UnaryOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const } else #endif // defined (COREML_ENABLE_MLPROGRAM) { - std::unique_ptr layer = model_builder.CreateNNLayer(node); + std::unique_ptr layer = model_builder.CreateNNLayer(node); - if (op_type == "Sqrt") { - layer->mutable_unary()->set_type(COREML_SPEC::UnaryFunctionLayerParams::SQRT); - } else if (op_type == "Reciprocal") { - layer->mutable_unary()->set_type(COREML_SPEC::UnaryFunctionLayerParams::INVERSE); - } else { - return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, - "UnaryOpBuilder::AddToModelBuilderImpl, unknown op: ", op_type); - } + if (op_type == "Sqrt") { + layer->mutable_unary()->set_type(COREML_SPEC::UnaryFunctionLayerParams::SQRT); + } else if (op_type == "Reciprocal") { + layer->mutable_unary()->set_type(COREML_SPEC::UnaryFunctionLayerParams::INVERSE); + } else { + return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, + "UnaryOpBuilder::AddToModelBuilderImpl, unknown op: ", op_type); + } - *layer->mutable_input()->Add() = input_defs[0]->Name(); - *layer->mutable_output()->Add() = node.OutputDefs()[0]->Name(); + *layer->mutable_input()->Add() = input_defs[0]->Name(); + *layer->mutable_output()->Add() = node.OutputDefs()[0]->Name(); - model_builder.AddLayer(std::move(layer)); + model_builder.AddLayer(std::move(layer)); } return Status::OK(); } diff --git a/onnxruntime/test/providers/coreml/coreml_basic_test.cc b/onnxruntime/test/providers/coreml/coreml_basic_test.cc index c9d8a605678b..4fdfd2dc1be2 100644 --- a/onnxruntime/test/providers/coreml/coreml_basic_test.cc +++ b/onnxruntime/test/providers/coreml/coreml_basic_test.cc @@ -259,35 +259,35 @@ TEST(CoreMLExecutionProviderTest, TestNameSanitization) { } TEST(CoreMLExecutionProviderTest, TestBinaryFp16) { - auto test_binary_op = [](std::string op){ + auto test_binary_op = [](std::string op) { OpTester test(op, 11); std::vector dims{3, 3}; std::vector input1 = {-1.0f, 0.0f, 1.0f, - -6.0f, 0.0f, 6.0f, - -5.4f, 2.0f, 6.0f}; + -6.0f, 0.0f, 6.0f, + -5.4f, 2.0f, 6.0f}; std::vector input1_fp16(9); ConvertFloatToMLFloat16(input1.data(), input1_fp16.data(), 9); std::vector input2 = {-1.0f, 0.0f, 1.0f, - -5.0f, 0.0f, 5.0f, - -5.0f, 2.0f, 5.0f}; + -5.0f, 0.0f, 5.0f, + -5.0f, 2.0f, 5.0f}; std::vector input2_fp16(9); ConvertFloatToMLFloat16(input2.data(), input2_fp16.data(), 9); std::vector output(9); - if (op == "Add"){ - for(int i = 0; i < 9; i++){ + if (op == "Add") { + for (int i = 0; i < 9; i++) { output[i] = input1_fp16[i] + input2_fp16[i]; } } else if (op == "Sub") { - for(int i = 0; i < 9; i++){ + for (int i = 0; i < 9; i++) { output[i] = input1_fp16[i] - input2_fp16[i]; } } else if (op == "Mul") { - for(int i = 0; i < 9; i++){ + for (int i = 0; i < 9; i++) { output[i] = input1_fp16[i] * input2_fp16[i]; } } else if (op == "Div") { - for(int i = 0; i < 9; i++){ + for (int i = 0; i < 9; i++) { output[i] = input1_fp16[i] / input2_fp16[i]; } } @@ -310,24 +310,24 @@ TEST(CoreMLExecutionProviderTest, TestBinaryFp16) { } TEST(CoreMLExecutionProviderTest, TestUnaryFp16) { - auto test_binary_op = [](std::string op){ + auto test_binary_op = [](std::string op) { OpTester test(op, 11); std::vector dims{3, 3}; std::vector input1 = {-1.0f, 0.0f, 1.0f, - -6.0f, 0.2f, 6.0f, - -5.4f, 2.0f, 6.0f}; + -6.0f, 0.2f, 6.0f, + -5.4f, 2.0f, 6.0f}; std::vector input1_fp16(9); ConvertFloatToMLFloat16(input1.data(), input1_fp16.data(), 9); std::vector output(9); - if (op == "Sqrt"){ - for(int i = 0; i < 9; i++){ + if (op == "Sqrt") { + for (int i = 0; i < 9; i++) { output[i] = sqrt(input1_fp16[i]); } } else if (op == "Reciprocal") { - for(int i = 0; i < 9; i++){ - output[i] = 1.0f/(1e-4+input1_fp16[i]); + for (int i = 0; i < 9; i++) { + output[i] = 1.0f / (1e-4 + input1_fp16[i]); } } std::vector output_fp16(9); From 0611bf5dc0355a076766c1ed5ef748a43b5fcafa Mon Sep 17 00:00:00 2001 From: wejoncy Date: Wed, 11 Sep 2024 04:24:31 -0700 Subject: [PATCH 4/6] more ops --- .../core/providers/coreml/builders/impl/base_op_builder.cc | 4 +++- .../core/providers/coreml/builders/impl/unary_op_builder.cc | 4 ++-- onnxruntime/test/providers/coreml/coreml_basic_test.cc | 5 +++++ 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/onnxruntime/core/providers/coreml/builders/impl/base_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/base_op_builder.cc index a261dbb63d07..f267dc755135 100644 --- a/onnxruntime/core/providers/coreml/builders/impl/base_op_builder.cc +++ b/onnxruntime/core/providers/coreml/builders/impl/base_op_builder.cc @@ -14,7 +14,9 @@ namespace onnxruntime { namespace coreml { static std::set Float16Ops = { - "Add", "Mul", "Sub", "Div", "Pow", "Sqrt", "Reciprocal"}; + "Add", "Mul", "Sub", "Div", "Pow", "Sqrt", "Reciprocal", + "Sigmoid", "Tanh", "Relu", "LeakyRelu", "Concat", "GridSample", "GlobalAveragePool", + "GlobalMaxPool", "AveragePool", "MaxPool", "Reshape", "Split", "Transpose"}; namespace { // TODO, move this to shared_library diff --git a/onnxruntime/core/providers/coreml/builders/impl/unary_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/unary_op_builder.cc index 6d46c3789dec..aa3060d62686 100644 --- a/onnxruntime/core/providers/coreml/builders/impl/unary_op_builder.cc +++ b/onnxruntime/core/providers/coreml/builders/impl/unary_op_builder.cc @@ -48,8 +48,8 @@ Status UnaryOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const AddOperationOutput(*op, *node.OutputDefs()[0]); model_builder.AddOperation(std::move(op)); - } else -#endif // defined (COREML_ENABLE_MLPROGRAM) + } else // NOLINT +#endif // defined (COREML_ENABLE_MLPROGRAM) { std::unique_ptr layer = model_builder.CreateNNLayer(node); diff --git a/onnxruntime/test/providers/coreml/coreml_basic_test.cc b/onnxruntime/test/providers/coreml/coreml_basic_test.cc index 4fdfd2dc1be2..6da88a24bd45 100644 --- a/onnxruntime/test/providers/coreml/coreml_basic_test.cc +++ b/onnxruntime/test/providers/coreml/coreml_basic_test.cc @@ -329,6 +329,10 @@ TEST(CoreMLExecutionProviderTest, TestUnaryFp16) { for (int i = 0; i < 9; i++) { output[i] = 1.0f / (1e-4 + input1_fp16[i]); } + } else if (op == "Relu") { + for (int i = 0; i < 9; i++) { + output[i] = fmax(0.0f, input1_fp16[i]); + } } std::vector output_fp16(9); ConvertFloatToMLFloat16(output.data(), output_fp16.data(), 9); @@ -343,6 +347,7 @@ TEST(CoreMLExecutionProviderTest, TestUnaryFp16) { }; test_binary_op("Sqrt"); test_binary_op("Reciprocal"); + test_binary_op("Relu"); } #endif From 3944fd606ea96254d64f09b40e6b918f07f777f7 Mon Sep 17 00:00:00 2001 From: wejoncy Date: Wed, 11 Sep 2024 20:24:31 -0700 Subject: [PATCH 5/6] fix --- .../core/providers/coreml/builders/impl/base_op_builder.cc | 4 ++++ .../core/providers/coreml/builders/impl/base_op_builder.h | 2 +- .../core/providers/coreml/builders/impl/unary_op_builder.cc | 1 - 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/onnxruntime/core/providers/coreml/builders/impl/base_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/base_op_builder.cc index f267dc755135..25d7890faeba 100644 --- a/onnxruntime/core/providers/coreml/builders/impl/base_op_builder.cc +++ b/onnxruntime/core/providers/coreml/builders/impl/base_op_builder.cc @@ -13,6 +13,8 @@ using namespace CoreML::Specification; namespace onnxruntime { namespace coreml { +// Once all ops are supportted FP16, we can remove it. Before that, we keep a set of ops to +// filter suppported ones. static std::set Float16Ops = { "Add", "Mul", "Sub", "Div", "Pow", "Sqrt", "Reciprocal", "Sigmoid", "Tanh", "Relu", "LeakyRelu", "Concat", "GridSample", "GlobalAveragePool", @@ -110,11 +112,13 @@ bool BaseOpBuilder::IsInputDtypeSupport(const Node& node, size_t idx, const OpBu return true; } +// only support MLProgram for FP16 #if defined(COREML_ENABLE_MLPROGRAM) if (input_params.create_mlprogram && input_type == ONNX_NAMESPACE::TensorProto_DataType_FLOAT16 && Float16Ops.count(node.OpType())) { return true; } #endif + LOGS(logger, VERBOSE) << "[" << node.OpType() << "] Input type: [" << input_type << "] is not currently supported"; return false; } diff --git a/onnxruntime/core/providers/coreml/builders/impl/base_op_builder.h b/onnxruntime/core/providers/coreml/builders/impl/base_op_builder.h index a2cbef6dd57d..153ae841b238 100644 --- a/onnxruntime/core/providers/coreml/builders/impl/base_op_builder.h +++ b/onnxruntime/core/providers/coreml/builders/impl/base_op_builder.h @@ -32,7 +32,7 @@ class BaseOpBuilder : public IOpBuilder { : allow_empty_tensor_as_input_(allow_empty_tensor_as_input) { } - // currently we only support float + // currently we support float/float16 static bool IsInputDtypeSupport(const Node& node, size_t idx, const OpBuilderInputParams& input_params, const logging::Logger& logger); diff --git a/onnxruntime/core/providers/coreml/builders/impl/unary_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/unary_op_builder.cc index aa3060d62686..e8a138aa4979 100644 --- a/onnxruntime/core/providers/coreml/builders/impl/unary_op_builder.cc +++ b/onnxruntime/core/providers/coreml/builders/impl/unary_op_builder.cc @@ -27,7 +27,6 @@ Status UnaryOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const if (model_builder.CreateMLProgram()) { using namespace CoreML::Specification::MILSpec; - // https://apple.github.io/coremltools/source/coremltools.converters.mil.mil.ops.defs.html#module-coremltools.converters.mil.mil.ops.defs.iOS15.elementwise_binary std::string_view coreml_op_type; if (op_type == "Sqrt") { coreml_op_type = "sqrt"; From 4f935e765e1a6d5ce1945f3daeab7e1a7bd519dc Mon Sep 17 00:00:00 2001 From: wejoncy Date: Wed, 18 Sep 2024 01:07:49 -0700 Subject: [PATCH 6/6] unify UT --- .../coreml/builders/impl/base_op_builder.cc | 7 +- .../coreml/builders/impl/builder_utils.cc | 18 +++ .../coreml/builders/impl/unary_op_builder.cc | 7 +- .../coreml/builders/model_builder.cc | 8 + .../providers/coreml/builders/model_builder.h | 3 +- .../providers/coreml/coreml_basic_test.cc | 92 ------------ .../cpu/math/element_wise_ops_test.cc | 137 +++++++++++------- .../apple/coreml_supported_mlprogram_ops.md | 2 + 8 files changed, 123 insertions(+), 151 deletions(-) diff --git a/onnxruntime/core/providers/coreml/builders/impl/base_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/base_op_builder.cc index 25d7890faeba..748fe1dad226 100644 --- a/onnxruntime/core/providers/coreml/builders/impl/base_op_builder.cc +++ b/onnxruntime/core/providers/coreml/builders/impl/base_op_builder.cc @@ -15,9 +15,9 @@ namespace coreml { // Once all ops are supportted FP16, we can remove it. Before that, we keep a set of ops to // filter suppported ones. -static std::set Float16Ops = { +static std::set Float16Ops = { "Add", "Mul", "Sub", "Div", "Pow", "Sqrt", "Reciprocal", - "Sigmoid", "Tanh", "Relu", "LeakyRelu", "Concat", "GridSample", "GlobalAveragePool", + "Sigmoid", "Tanh", "Relu", "LeakyRelu", "Concat", "GridSample", "GlobalAveragePool", "Clip", "DepthToSpace", "Resize", "Slice", "GlobalMaxPool", "AveragePool", "MaxPool", "Reshape", "Split", "Transpose"}; namespace { @@ -91,7 +91,8 @@ bool BaseOpBuilder::HasSupportedInputs(const Node& node, const OpBuilderInputPar } /* static */ -bool BaseOpBuilder::IsInputDtypeSupport(const Node& node, size_t idx, const OpBuilderInputParams& input_params, +bool BaseOpBuilder::IsInputDtypeSupport(const Node& node, size_t idx, + [[maybe_unused]] const OpBuilderInputParams& input_params, const logging::Logger& logger) { if (idx >= node.InputDefs().size()) { LOGS(logger, VERBOSE) << "Input index [" << idx << "] is out of range"; diff --git a/onnxruntime/core/providers/coreml/builders/impl/builder_utils.cc b/onnxruntime/core/providers/coreml/builders/impl/builder_utils.cc index fc6b5792f364..a27895b6e37f 100644 --- a/onnxruntime/core/providers/coreml/builders/impl/builder_utils.cc +++ b/onnxruntime/core/providers/coreml/builders/impl/builder_utils.cc @@ -131,6 +131,15 @@ void CreateCoreMLWeightConvertingDataToFloats(CoreML::Specification::WeightParam [](T v) { return narrow(v); }); *weight.mutable_floatvalue() = std::move(weight_floats); } + +template +void CreateCoreMLWeightConvertingDataToFloat16s(CoreML::Specification::WeightParams& weight, gsl::span data) { + std::vector weight_float16s{}; + weight_float16s.reserve(data.size()); + std::transform(data.begin(), data.end(), std::back_inserter(weight_float16s), + [](T v) { return MLFloat16(narrow(v)); }); + CreateCoreMLWeight(weight, weight_float16s); +} } // namespace void CreateCoreMLWeight(CoreML::Specification::WeightParams& weight, gsl::span data) { @@ -203,6 +212,13 @@ void CopyDataToTensorValue(MILSpec::TensorValue& tensor_value, gsl::span< tensor_value.mutable_floats()->mutable_values()->Add(data.begin(), data.end()); } +template <> +void CopyDataToTensorValue(MILSpec::TensorValue& tensor_value, gsl::span data) { + const char* begin = (const char*)(data.data()); + const char* end = (const char*)(data.data()) + data.size() * sizeof(MLFloat16); + tensor_value.mutable_bytes()->mutable_values()->assign(begin, end); +} + template <> void CopyDataToTensorValue(MILSpec::TensorValue& tensor_value, gsl::span data) { tensor_value.mutable_ints()->mutable_values()->Add(data.begin(), data.end()); @@ -300,6 +316,8 @@ template MILSpec::Value CreateTensorValue(gsl::span> shape); template MILSpec::Value CreateTensorValue(gsl::span data, std::optional> shape); +template MILSpec::Value CreateTensorValue(gsl::span data, + std::optional> shape); template MILSpec::Value CreateTensorValue(gsl::span data, std::optional> shape); template MILSpec::Value CreateTensorValue(gsl::span data, diff --git a/onnxruntime/core/providers/coreml/builders/impl/unary_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/unary_op_builder.cc index e8a138aa4979..335ca737081b 100644 --- a/onnxruntime/core/providers/coreml/builders/impl/unary_op_builder.cc +++ b/onnxruntime/core/providers/coreml/builders/impl/unary_op_builder.cc @@ -41,7 +41,12 @@ Status UnaryOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const AddOperationInput(*op, "x", input_defs[0]->Name()); if (op_type == "Reciprocal") { float epsilon = 1e-4; // epsilon: const T (Optional, default=1e-4) - AddOperationInput(*op, "epsilon", model_builder.AddScalarConstant(op->type(), "epsilon", epsilon)); + auto dtype = node.InputDefs()[0]->TypeAsProto()->tensor_type().elem_type(); + if (dtype == ONNX_NAMESPACE::TensorProto_DataType_FLOAT) { + AddOperationInput(*op, "epsilon", model_builder.AddScalarConstant(op->type(), "epsilon", epsilon)); + } else if (dtype == ONNX_NAMESPACE::TensorProto_DataType_FLOAT16) { + AddOperationInput(*op, "epsilon", model_builder.AddScalarConstant(op->type(), "epsilon", MLFloat16(epsilon))); + } } AddOperationOutput(*op, *node.OutputDefs()[0]); diff --git a/onnxruntime/core/providers/coreml/builders/model_builder.cc b/onnxruntime/core/providers/coreml/builders/model_builder.cc index 7ecfad8493ea..50faebf06875 100644 --- a/onnxruntime/core/providers/coreml/builders/model_builder.cc +++ b/onnxruntime/core/providers/coreml/builders/model_builder.cc @@ -639,6 +639,14 @@ std::string_view ModelBuilder::AddConstantImpl(std::string_view op_type, std::st return AddTensorValueAsConstantOperation(op_type, value_type, std::move(input_value)); } +template <> +std::string_view ModelBuilder::AddConstantImpl(std::string_view op_type, std::string_view value_type, + gsl::span value, + std::optional> shape) { + auto input_value = CreateTensorValue(value, shape); + return AddTensorValueAsConstantOperation(op_type, value_type, std::move(input_value)); +} + template <> std::string_view ModelBuilder::AddConstantImpl(std::string_view op_type, std::string_view value_type, gsl::span value, diff --git a/onnxruntime/core/providers/coreml/builders/model_builder.h b/onnxruntime/core/providers/coreml/builders/model_builder.h index bb791fb90290..688dccfc3530 100644 --- a/onnxruntime/core/providers/coreml/builders/model_builder.h +++ b/onnxruntime/core/providers/coreml/builders/model_builder.h @@ -107,11 +107,12 @@ class ModelBuilder { std::string_view AddConstant(std::string_view op_type, std::string_view value_type, gsl::span value, std::optional> shape = std::nullopt) { static_assert(std::is_same_v || + std::is_same_v || std::is_same_v || std::is_same_v || std::is_same_v, // add specialization in AddConstantImpl for new types if needed - "AddConstant currently supports float, int64_t, std::string and bool."); + "AddConstant currently supports float/MLFloat16, int64_t, std::string and bool."); return AddConstantImpl(op_type, value_type, value, shape); } diff --git a/onnxruntime/test/providers/coreml/coreml_basic_test.cc b/onnxruntime/test/providers/coreml/coreml_basic_test.cc index 6da88a24bd45..daa24db13411 100644 --- a/onnxruntime/test/providers/coreml/coreml_basic_test.cc +++ b/onnxruntime/test/providers/coreml/coreml_basic_test.cc @@ -257,98 +257,6 @@ TEST(CoreMLExecutionProviderTest, TestNameSanitization) { // TensorRT does not support Clip opset 11 yet. test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); } - -TEST(CoreMLExecutionProviderTest, TestBinaryFp16) { - auto test_binary_op = [](std::string op) { - OpTester test(op, 11); - - std::vector dims{3, 3}; - std::vector input1 = {-1.0f, 0.0f, 1.0f, - -6.0f, 0.0f, 6.0f, - -5.4f, 2.0f, 6.0f}; - std::vector input1_fp16(9); - ConvertFloatToMLFloat16(input1.data(), input1_fp16.data(), 9); - std::vector input2 = {-1.0f, 0.0f, 1.0f, - -5.0f, 0.0f, 5.0f, - -5.0f, 2.0f, 5.0f}; - std::vector input2_fp16(9); - ConvertFloatToMLFloat16(input2.data(), input2_fp16.data(), 9); - std::vector output(9); - if (op == "Add") { - for (int i = 0; i < 9; i++) { - output[i] = input1_fp16[i] + input2_fp16[i]; - } - } else if (op == "Sub") { - for (int i = 0; i < 9; i++) { - output[i] = input1_fp16[i] - input2_fp16[i]; - } - } else if (op == "Mul") { - for (int i = 0; i < 9; i++) { - output[i] = input1_fp16[i] * input2_fp16[i]; - } - } else if (op == "Div") { - for (int i = 0; i < 9; i++) { - output[i] = input1_fp16[i] / input2_fp16[i]; - } - } - std::vector output_fp16(9); - ConvertFloatToMLFloat16(output.data(), output_fp16.data(), 9); - - test.AddInput("0", dims, input1_fp16); - test.AddInput("1.min", dims, input2_fp16); - test.AddOutput("3", dims, output_fp16); - - // TensorRT does not support Clip opset 11 yet. - std::vector> coreml_ep; - coreml_ep.emplace_back(MakeCoreMLExecutionProvider(COREML_FLAG_CREATE_MLPROGRAM)); - test.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &coreml_ep); - }; - test_binary_op("Add"); - test_binary_op("Sub"); - test_binary_op("Div"); - test_binary_op("Mul"); -} - -TEST(CoreMLExecutionProviderTest, TestUnaryFp16) { - auto test_binary_op = [](std::string op) { - OpTester test(op, 11); - - std::vector dims{3, 3}; - std::vector input1 = {-1.0f, 0.0f, 1.0f, - -6.0f, 0.2f, 6.0f, - -5.4f, 2.0f, 6.0f}; - std::vector input1_fp16(9); - ConvertFloatToMLFloat16(input1.data(), input1_fp16.data(), 9); - - std::vector output(9); - if (op == "Sqrt") { - for (int i = 0; i < 9; i++) { - output[i] = sqrt(input1_fp16[i]); - } - } else if (op == "Reciprocal") { - for (int i = 0; i < 9; i++) { - output[i] = 1.0f / (1e-4 + input1_fp16[i]); - } - } else if (op == "Relu") { - for (int i = 0; i < 9; i++) { - output[i] = fmax(0.0f, input1_fp16[i]); - } - } - std::vector output_fp16(9); - ConvertFloatToMLFloat16(output.data(), output_fp16.data(), 9); - - test.AddInput("0", dims, input1_fp16); - test.AddOutput("3", dims, output_fp16); - - // TensorRT does not support Clip opset 11 yet. - std::vector> coreml_ep; - coreml_ep.emplace_back(MakeCoreMLExecutionProvider(COREML_FLAG_CREATE_MLPROGRAM)); - test.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &coreml_ep); - }; - test_binary_op("Sqrt"); - test_binary_op("Reciprocal"); - test_binary_op("Relu"); -} #endif } // namespace test diff --git a/onnxruntime/test/providers/cpu/math/element_wise_ops_test.cc b/onnxruntime/test/providers/cpu/math/element_wise_ops_test.cc index bd3d21d4929f..659622a70e4c 100644 --- a/onnxruntime/test/providers/cpu/math/element_wise_ops_test.cc +++ b/onnxruntime/test/providers/cpu/math/element_wise_ops_test.cc @@ -22,26 +22,38 @@ std::vector MakeMLFloat16(const std::initializer_list& input) return output; } -#if defined(USE_CUDA) || defined(USE_ROCM) -void TestFloat16(const char* op_name, const std::vector& lhs_dim, - const std::initializer_list& lhs_values, const std::vector& rhs_dim, - const std::initializer_list& rhs_values, const std::vector& out_dim, - const std::initializer_list& out_values) { +void TestBinaryFloat16(const char* op_name, const std::vector& lhs_dim, + const std::initializer_list& lhs_values, const std::vector& rhs_dim, + const std::initializer_list& rhs_values, const std::vector& out_dim, + const std::initializer_list& out_values, bool enable_bf16 = true) { + ORT_UNUSED_PARAMETER(op_name); + ORT_UNUSED_PARAMETER(lhs_dim); + ORT_UNUSED_PARAMETER(lhs_values); + ORT_UNUSED_PARAMETER(rhs_dim); + ORT_UNUSED_PARAMETER(rhs_values); + ORT_UNUSED_PARAMETER(out_dim); + ORT_UNUSED_PARAMETER(out_values); + ORT_UNUSED_PARAMETER(enable_bf16); +#if defined(USE_CUDA) || defined(USE_ROCM) || defined(COREML_ENABLE_MLPROGRAM) { OpTester tester(op_name, 14); tester.AddInput("A", lhs_dim, MakeMLFloat16(lhs_values)); tester.AddInput("B", rhs_dim, MakeMLFloat16(rhs_values)); tester.AddOutput("C", out_dim, MakeMLFloat16(out_values)); std::vector> execution_providers; -#ifdef USE_CUDA +#ifdef COREML_ENABLE_MLPROGRAM + execution_providers.push_back(DefaultCoreMLExecutionProvider(true)); +#elif USE_CUDA execution_providers.push_back(DefaultCudaExecutionProvider()); #elif USE_ROCM execution_providers.push_back(DefaultRocmExecutionProvider()); #endif tester.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers); } +#endif - { +#if defined(USE_CUDA) || defined(USE_ROCM) + if (enable_bf16) { OpTester tester(op_name, 14); tester.AddInput("A", lhs_dim, MakeBFloat16(lhs_values)); tester.AddInput("B", rhs_dim, MakeBFloat16(rhs_values)); @@ -54,9 +66,52 @@ void TestFloat16(const char* op_name, const std::vector& lhs_dim, #endif tester.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers); } +#endif } + +void TestUnaryFloat16(const char* op_name, const std::vector& lhs_dim, + const std::initializer_list& lhs_values, const std::vector& out_dim, + const std::initializer_list& out_values, int opset = 14) { + ORT_UNUSED_PARAMETER(op_name); + ORT_UNUSED_PARAMETER(lhs_dim); + ORT_UNUSED_PARAMETER(lhs_values); + ORT_UNUSED_PARAMETER(rhs_dim); + ORT_UNUSED_PARAMETER(out_dim); + ORT_UNUSED_PARAMETER(out_values); + ORT_UNUSED_PARAMETER(opset); +#if defined(USE_CUDA) || defined(USE_ROCM) || defined(COREML_ENABLE_MLPROGRAM) + { + OpTester tester(op_name, opset); + tester.AddInput("A", lhs_dim, MakeMLFloat16(lhs_values)); + tester.AddOutput("C", out_dim, MakeMLFloat16(out_values)); + std::vector> execution_providers; +#ifdef COREML_ENABLE_MLPROGRAM + execution_providers.push_back(DefaultCoreMLExecutionProvider(true)); +#elif USE_CUDA + execution_providers.push_back(DefaultCudaExecutionProvider()); +#elif USE_ROCM + execution_providers.push_back(DefaultRocmExecutionProvider()); +#endif + tester.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers); + } #endif +#if defined(USE_CUDA) || defined(USE_ROCM) + { + OpTester tester(op_name, opset); + tester.AddInput("A", lhs_dim, MakeBFloat16(lhs_values)); + tester.AddOutput("C", out_dim, MakeBFloat16(out_values)); + std::vector> execution_providers; +#ifdef USE_CUDA + execution_providers.push_back(DefaultCudaExecutionProvider()); +#elif USE_ROCM + execution_providers.push_back(DefaultRocmExecutionProvider()); +#endif + tester.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers); + } +#endif +} + void TestBFloat16(const char* op_name, const std::vector& lhs_dim, const std::initializer_list& lhs_values, const std::vector& rhs_dim, const std::initializer_list& rhs_values, const std::vector& out_dim, @@ -163,9 +218,7 @@ TEST(MathOpTest, Add_float) { test.Run(); #endif -#if defined(USE_CUDA) || defined(USE_ROCM) - TestFloat16("Add", dims, lhs_values, dims, rhs_values, dims, out_values); -#endif + TestBinaryFloat16("Add", dims, lhs_values, dims, rhs_values, dims, out_values); #if defined(USE_DNNL) TestBFloat16("Add", dims, lhs_values, dims, rhs_values, dims, out_values); @@ -202,9 +255,7 @@ TEST(MathOpTest, Add_Broadcast_Axis) { test.AddOutput("C", dims, out_values); test.Run(OpTester::ExpectResult::kExpectSuccess, ""); -#if defined(USE_CUDA) || defined(USE_ROCM) - TestFloat16("Add", dims, lhs_values, {3, 1}, rhs_values, dims, out_values); -#endif + TestBinaryFloat16("Add", dims, lhs_values, {3, 1}, rhs_values, dims, out_values); #if defined(USE_DNNL) TestBFloat16("Add", dims, lhs_values, {3, 1}, rhs_values, dims, out_values); @@ -228,9 +279,7 @@ TEST(MathOpTest, Add_Broadcast_MultidirectionalAB) { {kTensorrtExecutionProvider}); // TensorRT: got C with shape [3, 1] #endif -#if defined(USE_CUDA) || defined(USE_ROCM) - TestFloat16("Add", {3, 1}, lhs_values, {3}, rhs_values, {3, 3}, out_values); -#endif + TestBinaryFloat16("Add", {3, 1}, lhs_values, {3}, rhs_values, {3, 3}, out_values); #if defined(USE_DNNL) TestBFloat16("Add", {3, 1}, lhs_values, {3}, rhs_values, {3, 3}, out_values); @@ -254,9 +303,7 @@ TEST(MathOpTest, Add_Broadcast_MultidirectionalBA) { {kTensorrtExecutionProvider}); // TensorRT: got C with shape [3, 1] #endif -#if defined(USE_CUDA) || defined(USE_ROCM) - TestFloat16("Add", {3}, lhs_values, {3, 1}, rhs_values, {3, 3}, out_values); -#endif + TestBinaryFloat16("Add", {3}, lhs_values, {3, 1}, rhs_values, {3, 3}, out_values); #if defined(USE_DNNL) TestBFloat16("Add", {3}, lhs_values, {3, 1}, rhs_values, {3, 3}, out_values); @@ -527,9 +574,7 @@ TEST(MathOpTest, Sub) { test.AddOutput("C", dims, out_values); test.Run(); -#if defined(USE_CUDA) || defined(USE_ROCM) - TestFloat16("Sub", dims, lhs_values, dims, rhs_values, dims, out_values); -#endif + TestBinaryFloat16("Sub", dims, lhs_values, dims, rhs_values, dims, out_values); #if defined(USE_DNNL) TestBFloat16("Sub", dims, lhs_values, dims, rhs_values, dims, out_values); @@ -584,9 +629,7 @@ TEST(MathOpTest, Mul) { test.Run(); -#if defined(USE_CUDA) || defined(USE_ROCM) - TestFloat16("Mul", dims, lhs_values, dims, rhs_values, dims, out_values); -#endif + TestBinaryFloat16("Mul", dims, lhs_values, dims, rhs_values, dims, out_values); #if defined(USE_DNNL) TestBFloat16("Mul", dims, lhs_values, dims, rhs_values, dims, out_values); @@ -622,9 +665,7 @@ TEST(MathOpTest, Div) { test.AddOutput("C", dims, out_values); test.Run(); -#if defined(USE_CUDA) || defined(USE_ROCM) - TestFloat16("Div", dims, lhs_values, dims, rhs_values, dims, out_values); -#endif + TestBinaryFloat16("Div", dims, lhs_values, dims, rhs_values, dims, out_values); #if defined(USE_DNNL) TestBFloat16("Div", dims, lhs_values, dims, rhs_values, dims, out_values); @@ -772,13 +813,12 @@ TEST(MathOpTest, Ceil_double) { TEST(MathOpTest, Reciprocal) { OpTester test("Reciprocal"); std::vector dims{2, 2}; - test.AddInput("X", dims, - {1.0f, 2.0f, - -1.0f, -2.0f}); - test.AddOutput("Y", dims, - {1.0f, 0.5f, - -1.0f, -0.5f}); + std::initializer_list inputs = {1.0f, 2.0f, -1.0f, -2.0f}; + std::initializer_list outputs = {1.0f, 0.5f, -1.0f, -0.5f}; + test.AddInput("X", dims, inputs); + test.AddOutput("Y", dims, outputs); test.Run(); + TestUnaryFloat16("Reciprocal", dims, inputs, dims, outputs, 12); } TEST(MathOpTest, Reciprocal_double) { @@ -795,14 +835,13 @@ TEST(MathOpTest, Reciprocal_double) { TEST(MathOpTest, Sqrt_Float) { OpTester test("Sqrt"); + std::initializer_list inputs = {1.0f, 4.0f, 0.0f, 9.0f}; + std::initializer_list outputs = {1.0f, 2.0f, 0.0f, 3.0f}; std::vector dims{2, 2}; - test.AddInput("X", dims, - {1.0f, 4.0f, - 0.0f, 9.0f}); - test.AddOutput("Y", dims, - {1.0f, 2.0f, - 0.0f, 3.0f}); + test.AddInput("X", dims, inputs); + test.AddOutput("Y", dims, outputs); test.Run(); + TestUnaryFloat16("Sqrt", dims, inputs, dims, outputs); } #if defined(USE_DNNL) || defined(USE_CUDA) @@ -1056,24 +1095,13 @@ TEST(MathOpTest, Pow_double_int64) { test.Run(); } -#if defined(USE_CUDA) || defined(USE_ROCM) TEST(MathOpTest, Pow_float16_float16) { - OpTester test("Pow", 12); std::vector dims{4}; - - test.AddInput("X", dims, MakeMLFloat16({2.0f, 2.0f, std::sqrt(2.0f), 1.0f})); - test.AddInput("Y", dims, MakeMLFloat16({0.0f, 8.0f, 2.0f, 9.0f})); - test.AddOutput("Z", dims, MakeMLFloat16({1.0f, 256.0f, 2.0f, 1.0f})); - - std::vector> execution_providers; -#ifdef USE_CUDA - execution_providers.push_back(DefaultCudaExecutionProvider()); -#elif USE_ROCM - execution_providers.push_back(DefaultRocmExecutionProvider()); -#endif - test.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers); + TestBinaryFloat16("Pow", dims, {2.0f, 2.0f, std::sqrt(2.0f), 1.0f}, dims, {0.0f, 8.0f, 2.0f, 9.0f}, + dims, {1.0f, 256.0f, 2.0f, 1.0f}, false); } +#if defined(USE_CUDA) || defined(USE_ROCM) TEST(MathOpTest, Pow_float_float16) { OpTester test("Pow", 12); std::vector dims{4}; @@ -3660,5 +3688,6 @@ TEST(MathOpTest, BitwiseNot_uint8) { test.AddOutput("Y", dims, {254, 251, 250, 252}); test.Run(); } + } // namespace test } // namespace onnxruntime diff --git a/tools/ci_build/github/apple/coreml_supported_mlprogram_ops.md b/tools/ci_build/github/apple/coreml_supported_mlprogram_ops.md index bb4cfb2e09dc..0b51311e2271 100644 --- a/tools/ci_build/github/apple/coreml_supported_mlprogram_ops.md +++ b/tools/ci_build/github/apple/coreml_supported_mlprogram_ops.md @@ -29,3 +29,5 @@ Keep in sync with doco generated from /docs/execution-providers/CoreML-Execution |ai.onnx:Sigmoid|| |ai.onnx:Tanh|| |ai.onnx:Transpose|| +|ai.onnx:Sqrt|| +|ai.onnx:Reciprocal|this ask for a `epislon` (default 1e-4) where onnx don't provide|