Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[CoreML MLProgram] Support Float16 (1/N) #22068

Draft
wants to merge 6 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 28 additions & 7 deletions onnxruntime/core/providers/coreml/builders/impl/base_op_builder.cc
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

#include <set>
#include "core/providers/common.h"
#include "core/providers/coreml/builders/helper.h"
#include "core/providers/coreml/builders/impl/base_op_builder.h"
Expand All @@ -12,6 +13,13 @@
namespace onnxruntime {
namespace coreml {

// Once all ops are supportted FP16, we can remove it. Before that, we keep a set of ops to
// filter suppported ones.
static std::set<std::string> Float16Ops = {

Check warning on line 18 in onnxruntime/core/providers/coreml/builders/impl/base_op_builder.cc

View workflow job for this annotation

GitHub Actions / Optional Lint C++

[cpplint] reported by reviewdog 🐶 Add #include <string> for string [build/include_what_you_use] [4] Raw Output: onnxruntime/core/providers/coreml/builders/impl/base_op_builder.cc:18: Add #include <string> for string [build/include_what_you_use] [4]
"Add", "Mul", "Sub", "Div", "Pow", "Sqrt", "Reciprocal",
"Sigmoid", "Tanh", "Relu", "LeakyRelu", "Concat", "GridSample", "GlobalAveragePool", "Clip", "DepthToSpace", "Resize", "Slice",

Check warning on line 20 in onnxruntime/core/providers/coreml/builders/impl/base_op_builder.cc

View workflow job for this annotation

GitHub Actions / Optional Lint C++

[cpplint] reported by reviewdog 🐶 Lines should be <= 120 characters long [whitespace/line_length] [2] Raw Output: onnxruntime/core/providers/coreml/builders/impl/base_op_builder.cc:20: Lines should be <= 120 characters long [whitespace/line_length] [2]
"GlobalMaxPool", "AveragePool", "MaxPool", "Reshape", "Split", "Transpose"};

namespace {
// TODO, move this to shared_library
bool HasExternalInitializer(const InitializedTensorSet& initializers, const Node& node,
Expand Down Expand Up @@ -83,8 +91,9 @@
}

/* static */
bool BaseOpBuilder::IsInputFloat(const Node& node, size_t idx, const OpBuilderInputParams& /*input_params*/,
const logging::Logger& logger) {
bool BaseOpBuilder::IsInputDtypeSupport(const Node& node, size_t idx,
[[maybe_unused]] const OpBuilderInputParams& input_params,
const logging::Logger& logger) {
if (idx >= node.InputDefs().size()) {
LOGS(logger, VERBOSE) << "Input index [" << idx << "] is out of range";
return false;
Expand All @@ -94,20 +103,32 @@

int32_t input_type = ONNX_NAMESPACE::TensorProto_DataType_UNDEFINED;

// currently only float is supported
if (!GetType(input, input_type, logger) || input_type != ONNX_NAMESPACE::TensorProto_DataType_FLOAT) {
LOGS(logger, VERBOSE) << "[" << node.OpType() << "] Input type: [" << input_type << "] is not currently supported";
if (!GetType(input, input_type, logger)) {
LOGS(logger, VERBOSE) << "[" << node.OpType() << "] Get Input type failed";
return false;
}

return true;
// float is supported
if (input_type == ONNX_NAMESPACE::TensorProto_DataType_FLOAT) {
return true;
}

// only support MLProgram for FP16
#if defined(COREML_ENABLE_MLPROGRAM)
if (input_params.create_mlprogram && input_type == ONNX_NAMESPACE::TensorProto_DataType_FLOAT16 && Float16Ops.count(node.OpType())) {

Check warning on line 118 in onnxruntime/core/providers/coreml/builders/impl/base_op_builder.cc

View workflow job for this annotation

GitHub Actions / Optional Lint C++

[cpplint] reported by reviewdog 🐶 Lines should be <= 120 characters long [whitespace/line_length] [2] Raw Output: onnxruntime/core/providers/coreml/builders/impl/base_op_builder.cc:118: Lines should be <= 120 characters long [whitespace/line_length] [2]
return true;
}
#endif

LOGS(logger, VERBOSE) << "[" << node.OpType() << "] Input type: [" << input_type << "] is not currently supported";
return false;
}

bool BaseOpBuilder::HasSupportedInputsImpl(const Node& node, const OpBuilderInputParams& input_params,
const logging::Logger& logger) const {
// We only check the type of input 0 by default
// specific op builder can override this
return IsInputFloat(node, 0, input_params, logger);
return IsInputDtypeSupport(node, 0, input_params, logger);
}

bool BaseOpBuilder::HasSupportedOpSet(const Node& node, const logging::Logger& logger) const {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,9 @@ class BaseOpBuilder : public IOpBuilder {
: allow_empty_tensor_as_input_(allow_empty_tensor_as_input) {
}

// currently we only support float
static bool IsInputFloat(const Node& node, size_t idx, const OpBuilderInputParams& input_params,
const logging::Logger& logger);
// currently we support float/float16
static bool IsInputDtypeSupport(const Node& node, size_t idx, const OpBuilderInputParams& input_params,
const logging::Logger& logger);

private:
virtual bool IsOpSupportedImpl(const Node& /*node*/, const OpBuilderInputParams& /*input_params*/,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ Status BinaryOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const
} else if (op_type == "Sub") {
coreml_op_type = "sub";
} else if (op_type == "Div") {
// we only support fp32 currently. when we add support for integers we need to check the type and use
// we support fp32/fp16 currently. when we add support for integers we need to check the type and use
// "floor_div" or "real_div" accordingly
coreml_op_type = "real_div";
} else if (op_type == "Pow") {
Expand Down Expand Up @@ -138,9 +138,9 @@ bool BinaryOpBuilder::HasSupportedInputsImpl(const Node& node, const OpBuilderIn
const logging::Logger& logger) const {
// Add/Sub/Mul/Div spec says inputs must be of the same type.
// Pow spec says inputs can be different types.
// We only support float for all of these inputs.
if (!IsInputFloat(node, 0, input_params, logger) ||
((node.OpType() == "Pow") && !IsInputFloat(node, 1, input_params, logger))) {
// We support float/float16 for all of these inputs.
if (!IsInputDtypeSupport(node, 0, input_params, logger) ||
((node.OpType() == "Pow") && !IsInputDtypeSupport(node, 1, input_params, logger))) {
return false;
}

Expand Down
32 changes: 32 additions & 0 deletions onnxruntime/core/providers/coreml/builders/impl/builder_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,9 @@
case ONNX_NAMESPACE::TensorProto_DataType_FLOAT:
CreateCoreMLWeight(weight, unpacked_tensor.DataAsSpan<float>());
break;
case ONNX_NAMESPACE::TensorProto_DataType_FLOAT16:
CreateCoreMLWeight(weight, unpacked_tensor.DataAsSpan<MLFloat16>());
break;
case ONNX_NAMESPACE::TensorProto_DataType_INT32:
CreateCoreMLWeight(weight, unpacked_tensor.DataAsSpan<int32_t>());
break;
Expand All @@ -114,6 +117,11 @@
weight.mutable_floatvalue()->Assign(data.begin(), data.end());
}

void CreateCoreMLWeight(CoreML::Specification::WeightParams& weight, gsl::span<const MLFloat16> data) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need CreateCoreMLWeightConvertingDataToFloats to also support converting to fp16? Maybe not until we add fp16 support to operators that have initializers as input.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Already support CreateCoreMLWeightConvertingDataToFloat16s

const char* data_byte_ptr = (const char*)(data.data());
weight.mutable_float16value()->assign(data_byte_ptr, data_byte_ptr + data.size_bytes());
}

namespace {
template <typename T>
void CreateCoreMLWeightConvertingDataToFloats(CoreML::Specification::WeightParams& weight, gsl::span<const T> data) {
Expand All @@ -123,6 +131,15 @@
[](T v) { return narrow<float>(v); });
*weight.mutable_floatvalue() = std::move(weight_floats);
}

template <typename T>
void CreateCoreMLWeightConvertingDataToFloat16s(CoreML::Specification::WeightParams& weight, gsl::span<const T> data) {
std::vector<MLFloat16> weight_float16s{};
weight_float16s.reserve(data.size());
std::transform(data.begin(), data.end(), std::back_inserter(weight_float16s),

Check warning on line 139 in onnxruntime/core/providers/coreml/builders/impl/builder_utils.cc

View workflow job for this annotation

GitHub Actions / Optional Lint C++

[cpplint] reported by reviewdog 🐶 Add #include <algorithm> for transform [build/include_what_you_use] [4] Raw Output: onnxruntime/core/providers/coreml/builders/impl/builder_utils.cc:139: Add #include <algorithm> for transform [build/include_what_you_use] [4]
[](T v) { return MLFloat16(narrow<float>(v)); });
CreateCoreMLWeight(weight, weight_float16s);
}
} // namespace

void CreateCoreMLWeight(CoreML::Specification::WeightParams& weight, gsl::span<const int32_t> data) {
Expand Down Expand Up @@ -195,6 +212,13 @@
tensor_value.mutable_floats()->mutable_values()->Add(data.begin(), data.end());
}

template <>
void CopyDataToTensorValue<MLFloat16>(MILSpec::TensorValue& tensor_value, gsl::span<const MLFloat16> data) {
const char* begin = (const char*)(data.data());
const char* end = (const char*)(data.data()) + data.size() * sizeof(MLFloat16);
tensor_value.mutable_bytes()->mutable_values()->assign(begin, end);
}

template <>
void CopyDataToTensorValue<int32_t>(MILSpec::TensorValue& tensor_value, gsl::span<const int32_t> data) {
tensor_value.mutable_ints()->mutable_values()->Add(data.begin(), data.end());
Expand Down Expand Up @@ -290,6 +314,14 @@
// explicit specializations for types we handle so the implementation can be in the .cc file
template MILSpec::Value CreateTensorValue<int64_t, int32_t>(gsl::span<const int64_t> data,
std::optional<gsl::span<const int64_t>> shape);
template MILSpec::Value CreateTensorValue<float, float>(gsl::span<const float> data,
std::optional<gsl::span<const int64_t>> shape);
template MILSpec::Value CreateTensorValue<MLFloat16, MLFloat16>(gsl::span<const MLFloat16> data,
std::optional<gsl::span<const int64_t>> shape);
template MILSpec::Value CreateTensorValue<bool, bool>(gsl::span<const bool> data,
std::optional<gsl::span<const int64_t>> shape);
template MILSpec::Value CreateTensorValue<std::string, std::string>(gsl::span<const std::string> data,
std::optional<gsl::span<const int64_t>> shape);

template MILSpec::Value CreateScalarTensorValue(const float& data);
template MILSpec::Value CreateScalarTensorValue(const int32_t& data);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ Status CreateCoreMLWeight(CoreML::Specification::WeightParams& weight, const ONN
// Copy the float array to a coreml weight
void CreateCoreMLWeight(CoreML::Specification::WeightParams& weight, gsl::span<const float> data);

// Copy the float array to a coreml weight
void CreateCoreMLWeight(CoreML::Specification::WeightParams& weight, gsl::span<const MLFloat16> data);

// Copy the int32_t array to a coreml weight
void CreateCoreMLWeight(CoreML::Specification::WeightParams& weight, gsl::span<const int32_t> data);

Expand Down
59 changes: 47 additions & 12 deletions onnxruntime/core/providers/coreml/builders/impl/unary_op_builder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

#include "core/providers/common.h"

#include "core/providers/coreml/builders/impl/builder_utils.h"
#include "core/providers/coreml/builders/helper.h"
#include "core/providers/coreml/builders/impl/base_op_builder.h"
#include "core/providers/coreml/builders/model_builder.h"
Expand All @@ -14,28 +15,62 @@
class UnaryOpBuilder : public BaseOpBuilder {
Status AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node,
const logging::Logger& logger) const override;
bool SupportsMLProgram() const override { return true; }
};

Status UnaryOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node,
const logging::Logger& /* logger */) const {
const auto& op_type(node.OpType());
const auto& input_defs(node.InputDefs());

std::unique_ptr<COREML_SPEC::NeuralNetworkLayer> layer = model_builder.CreateNNLayer(node);
#if defined(COREML_ENABLE_MLPROGRAM)
if (model_builder.CreateMLProgram()) {
using namespace CoreML::Specification::MILSpec;

Check warning on line 28 in onnxruntime/core/providers/coreml/builders/impl/unary_op_builder.cc

View workflow job for this annotation

GitHub Actions / Optional Lint C++

[cpplint] reported by reviewdog 🐶 Do not use namespace using-directives. Use using-declarations instead. [build/namespaces] [5] Raw Output: onnxruntime/core/providers/coreml/builders/impl/unary_op_builder.cc:28: Do not use namespace using-directives. Use using-declarations instead. [build/namespaces] [5]

if (op_type == "Sqrt") {
layer->mutable_unary()->set_type(COREML_SPEC::UnaryFunctionLayerParams::SQRT);
} else if (op_type == "Reciprocal") {
layer->mutable_unary()->set_type(COREML_SPEC::UnaryFunctionLayerParams::INVERSE);
} else {
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
"UnaryOpBuilder::AddToModelBuilderImpl, unknown op: ", op_type);
}
std::string_view coreml_op_type;
if (op_type == "Sqrt") {
coreml_op_type = "sqrt";
} else if (op_type == "Reciprocal") {
wejoncy marked this conversation as resolved.
Show resolved Hide resolved
coreml_op_type = "inverse";
} else {
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
"UnaryOpBuilder::AddToModelBuilderImpl, unexpected op: ", op_type);
}

std::unique_ptr<Operation> op = model_builder.CreateOperation(node, coreml_op_type);
AddOperationInput(*op, "x", input_defs[0]->Name());
if (op_type == "Reciprocal") {
float epsilon = 1e-4; // epsilon: const T (Optional, default=1e-4)
auto dtype = node.InputDefs()[0]->TypeAsProto()->tensor_type().elem_type();
if (dtype == ONNX_NAMESPACE::TensorProto_DataType_FLOAT) {
AddOperationInput(*op, "epsilon", model_builder.AddScalarConstant(op->type(), "epsilon", epsilon));
} else if (dtype == ONNX_NAMESPACE::TensorProto_DataType_FLOAT16) {
AddOperationInput(*op, "epsilon", model_builder.AddScalarConstant(op->type(), "epsilon", MLFloat16(epsilon)));
}
}

AddOperationOutput(*op, *node.OutputDefs()[0]);

*layer->mutable_input()->Add() = input_defs[0]->Name();
*layer->mutable_output()->Add() = node.OutputDefs()[0]->Name();
model_builder.AddOperation(std::move(op));
} else // NOLINT
#endif // defined (COREML_ENABLE_MLPROGRAM)
{
std::unique_ptr<COREML_SPEC::NeuralNetworkLayer> layer = model_builder.CreateNNLayer(node);

model_builder.AddLayer(std::move(layer));
if (op_type == "Sqrt") {
layer->mutable_unary()->set_type(COREML_SPEC::UnaryFunctionLayerParams::SQRT);
} else if (op_type == "Reciprocal") {
layer->mutable_unary()->set_type(COREML_SPEC::UnaryFunctionLayerParams::INVERSE);
} else {
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
"UnaryOpBuilder::AddToModelBuilderImpl, unknown op: ", op_type);
}

*layer->mutable_input()->Add() = input_defs[0]->Name();
*layer->mutable_output()->Add() = node.OutputDefs()[0]->Name();

model_builder.AddLayer(std::move(layer));
}
return Status::OK();
}

Expand Down
11 changes: 11 additions & 0 deletions onnxruntime/core/providers/coreml/builders/model_builder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -639,6 +639,14 @@ std::string_view ModelBuilder::AddConstantImpl(std::string_view op_type, std::st
return AddTensorValueAsConstantOperation(op_type, value_type, std::move(input_value));
}

template <>
std::string_view ModelBuilder::AddConstantImpl(std::string_view op_type, std::string_view value_type,
gsl::span<const MLFloat16> value,
std::optional<gsl::span<const int64_t>> shape) {
auto input_value = CreateTensorValue<MLFloat16>(value, shape);
return AddTensorValueAsConstantOperation(op_type, value_type, std::move(input_value));
}

template <>
std::string_view ModelBuilder::AddConstantImpl(std::string_view op_type, std::string_view value_type,
gsl::span<const int64_t> value,
Expand Down Expand Up @@ -811,6 +819,9 @@ Status ModelBuilder::RegisterModelInputOutput(const NodeArg& node_arg, bool is_i
case ONNX_NAMESPACE::TensorProto_DataType_FLOAT:
multi_array->set_datatype(ArrayFeatureType::FLOAT32);
break;
case ONNX_NAMESPACE::TensorProto_DataType_FLOAT16:
multi_array->set_datatype(ArrayFeatureType::FLOAT16);
break;
case ONNX_NAMESPACE::TensorProto_DataType_INT32:
multi_array->set_datatype(ArrayFeatureType::INT32);
break;
Expand Down
3 changes: 2 additions & 1 deletion onnxruntime/core/providers/coreml/builders/model_builder.h
Original file line number Diff line number Diff line change
Expand Up @@ -107,11 +107,12 @@ class ModelBuilder {
std::string_view AddConstant(std::string_view op_type, std::string_view value_type, gsl::span<const T> value,
std::optional<gsl::span<const int64_t>> shape = std::nullopt) {
static_assert(std::is_same_v<T, float> ||
std::is_same_v<T, MLFloat16> ||
std::is_same_v<T, int64_t> ||
std::is_same_v<T, std::string> ||
std::is_same_v<T, bool>,
// add specialization in AddConstantImpl for new types if needed
"AddConstant currently supports float, int64_t, std::string and bool.");
"AddConstant currently supports float/MLFloat16, int64_t, std::string and bool.");
return AddConstantImpl(op_type, value_type, value, shape);
}

Expand Down
16 changes: 16 additions & 0 deletions onnxruntime/core/providers/coreml/model/model.mm
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,10 @@ Status CreateInputFeatureProvider(const std::unordered_map<std::string, OnnxTens
data_type = MLMultiArrayDataTypeFloat32;
break;
}
case ONNX_NAMESPACE::TensorProto_DataType_FLOAT16: {
data_type = MLMultiArrayDataTypeFloat16;
break;
}
case ONNX_NAMESPACE::TensorProto_DataType_INT32: {
data_type = MLMultiArrayDataTypeInt32;
break;
Expand Down Expand Up @@ -205,6 +209,18 @@ Status CopyMLMultiArrayBuffer(const void* mlmultiarray_buffer, void* tensor_buff
}
break;
}
case ONNX_NAMESPACE::TensorProto_DataType_FLOAT16: {
const auto* src_buffer = static_cast<const uint16_t*>(mlmultiarray_buffer);
auto* dst_buffer = static_cast<uint16_t*>(tensor_buffer);
const auto block_byte_size = block_size * sizeof(uint16_t);

for (int64_t idx = 0; idx < num_blocks; ++idx) {
memcpy(dst_buffer, src_buffer, block_byte_size);
src_buffer += stride;
dst_buffer += block_size;
}
break;
}
case ONNX_NAMESPACE::TensorProto_DataType_INT32: {
const auto* src_buffer = static_cast<const int32_t*>(mlmultiarray_buffer);
auto* dst_buffer = static_cast<int32_t*>(tensor_buffer);
Expand Down
Loading
Loading