diff --git a/docs/2024.html b/docs/2024.html index 67d3818699..5277d041c4 100644 --- a/docs/2024.html +++ b/docs/2024.html @@ -42,6 +42,12 @@
New features
  • API of SynetInnerProduct16b framework.
  • +

    Test framework

    +
    New features
    + + Home

    June 3, 2024 (version 6.1.138)

    diff --git a/docs/help/group__synet__inner__product__bf16.html b/docs/help/group__synet__inner__product__bf16.html index d739c04efa..4ab5740f0a 100644 --- a/docs/help/group__synet__inner__product__bf16.html +++ b/docs/help/group__synet__inner__product__bf16.html @@ -50,9 +50,9 @@

    Simd Library Documentation.

    - - - + + + @@ -72,8 +72,8 @@

    Simd Library Documentation.

    Detailed Description

    Functions to acceleratŠµ InnerProduct16bLayer in Synet Framework.

    Function Documentation

    - -

    ◆ SimdSynetInnerProduct16bInit()

    + +

    ◆ SimdSynetInnerProduct16bInit()

    @@ -118,7 +118,7 @@

    - + @@ -156,7 +156,7 @@

    [in]

    - +

    Functions

    SIMD_API void * SimdSynetInnerProduct16bInit (size_t M, size_t N, size_t K, SimdTensorDataType typeA, SimdTensorDataType typeB, SimdTensorDataType typeC, SimdBool transpA, SimdBool constB, SimdBool bias)
     Initilizes BF16 inner product (matrix mutiplication) algorithm. More...
     
    SIMD_API void * SimdSynetInnerProduct16bInit (size_t M, size_t N, size_t K, SimdTensorDataType typeA, SimdTensorDataType typeB, SimdTensorDataType typeC, SimdBool transB, SimdBool constB, SimdBool bias)
     Initilizes BF16 inner product (matrix mutiplication) algorithm. More...
     
    SIMD_API size_t SimdSynetInnerProduct16bInternalBufferSize (const void *context)
     Gets size in bytes of internal buffer used inside BF16 inner product algorithm. More...
     
    SimdBool transpA, transB,
    typeA- a type of A matrix. It can be FP32 or BF16.
    [in]typeB- a type of B matrix. It can be FP32 or BF16.
    [in]typeC- a type of C matrix. It can be FP32 or BF16.
    [in]transpA- a transpose matrix A before multiplication.
    [in]transB- a transpose matrix B before multiplication.
    [in]constB- a matrix B is constant.
    [in]bias- a flag to add bias to output matrix C.
    @@ -185,7 +185,7 @@

    Parameters
    - +
    [in]context- a pointer to BF16 inner product context. It must be created by function SimdSynetInnerProduct16bInit and released by function SimdRelease.
    [in]context- a pointer to BF16 inner product context. It must be created by function SimdSynetInnerProduct16bInit and released by function SimdRelease.
    @@ -212,7 +212,7 @@

    Parameters
    - +
    [in]context- a pointer to BF16 inner product context. It must be created by function SimdSynetInnerProduct16bInit and released by function SimdRelease.
    [in]context- a pointer to BF16 inner product context. It must be created by function SimdSynetInnerProduct16bInit and released by function SimdRelease.
    @@ -239,7 +239,7 @@

    Parameters
    - +
    [in]context- a pointer to BF16 inner product context. It must be created by function SimdSynetInnerProduct16bInit and released by function SimdRelease.
    [in]context- a pointer to BF16 inner product context. It must be created by function SimdSynetInnerProduct16bInit and released by function SimdRelease.
    @@ -288,7 +288,7 @@

    Parameters
    - + @@ -345,7 +345,7 @@

    Parameters

    [in,out]context- a pointer to BF16 inner product context. It must be created by function SimdSynetInnerProduct16bInit and released by function SimdRelease.
    [in,out]context- a pointer to BF16 inner product context. It must be created by function SimdSynetInnerProduct16bInit and released by function SimdRelease.
    [in]weight- a pointer to inner product weights. Can be NULL.
    [out]internal- a flag signalized that weight is stored in the internal buffer. Can be NULL.
    [in]bias- a pointer to bias. Can be NULL.
    - + diff --git a/prj/vs2019/Base.vcxproj b/prj/vs2019/Base.vcxproj index bd3556e261..74830e3a82 100644 --- a/prj/vs2019/Base.vcxproj +++ b/prj/vs2019/Base.vcxproj @@ -87,6 +87,7 @@ + @@ -195,6 +196,7 @@ + diff --git a/prj/vs2019/Base.vcxproj.filters b/prj/vs2019/Base.vcxproj.filters index f5682d25ca..3d999d3cc4 100644 --- a/prj/vs2019/Base.vcxproj.filters +++ b/prj/vs2019/Base.vcxproj.filters @@ -346,6 +346,9 @@ Base + + Base + @@ -600,6 +603,9 @@ Inc + + Inc + diff --git a/prj/vs2019/Test.vcxproj b/prj/vs2019/Test.vcxproj index fd8c79db66..1538d6c3f2 100644 --- a/prj/vs2019/Test.vcxproj +++ b/prj/vs2019/Test.vcxproj @@ -115,6 +115,7 @@ + diff --git a/prj/vs2019/Test.vcxproj.filters b/prj/vs2019/Test.vcxproj.filters index 2daa4ab7ce..82f73cf51b 100644 --- a/prj/vs2019/Test.vcxproj.filters +++ b/prj/vs2019/Test.vcxproj.filters @@ -253,6 +253,9 @@ Test + + Test + diff --git a/prj/vs2022/Base.vcxproj b/prj/vs2022/Base.vcxproj index bd3556e261..74830e3a82 100644 --- a/prj/vs2022/Base.vcxproj +++ b/prj/vs2022/Base.vcxproj @@ -87,6 +87,7 @@ + @@ -195,6 +196,7 @@ + diff --git a/prj/vs2022/Base.vcxproj.filters b/prj/vs2022/Base.vcxproj.filters index f5682d25ca..3d999d3cc4 100644 --- a/prj/vs2022/Base.vcxproj.filters +++ b/prj/vs2022/Base.vcxproj.filters @@ -346,6 +346,9 @@ Base + + Base + @@ -600,6 +603,9 @@ Inc + + Inc + diff --git a/prj/vs2022/Test.vcxproj b/prj/vs2022/Test.vcxproj index fd8c79db66..1538d6c3f2 100644 --- a/prj/vs2022/Test.vcxproj +++ b/prj/vs2022/Test.vcxproj @@ -115,6 +115,7 @@ + diff --git a/prj/vs2022/Test.vcxproj.filters b/prj/vs2022/Test.vcxproj.filters index 2daa4ab7ce..82f73cf51b 100644 --- a/prj/vs2022/Test.vcxproj.filters +++ b/prj/vs2022/Test.vcxproj.filters @@ -253,6 +253,9 @@ Test + + Test + diff --git a/src/Simd/SimdBaseSynetInnerProduct16b.cpp b/src/Simd/SimdBaseSynetInnerProduct16b.cpp new file mode 100644 index 0000000000..3c7bfd722b --- /dev/null +++ b/src/Simd/SimdBaseSynetInnerProduct16b.cpp @@ -0,0 +1,44 @@ +/* +* Simd Library (http://ermig1979.github.io/Simd). +* +* Copyright (c) 2011-2024 Yermalayeu Ihar. +* +* Permission is hereby granted, free of charge, to any person obtaining a copy +* of this software and associated documentation files (the "Software"), to deal +* in the Software without restriction, including without limitation the rights +* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the Software is +* furnished to do so, subject to the following conditions: +* +* The above copyright notice and this permission notice shall be included in +* all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +* SOFTWARE. +*/ +#include "Simd/SimdSynetInnerProduct16b.h" +#include "Simd/SimdCpu.h" +#include "Simd/SimdBase.h" + +namespace Simd +{ +#if defined(SIMD_SYNET_ENABLE) + namespace Base + { + //------------------------------------------------------------------------------------------------- + + void* SynetInnerProduct16bInit(size_t M, size_t N, size_t K, SimdTensorDataType typeA, SimdTensorDataType typeB, SimdTensorDataType typeC, SimdBool transB, SimdBool constB, SimdBool bias) + { + InnerProductParam16b param(M, N, K, typeA, typeB, typeC, transB, constB, bias); + if (!param.Valid()) + return NULL; + return NULL; + } + } +#endif +} diff --git a/src/Simd/SimdLib.cpp b/src/Simd/SimdLib.cpp index a1f955f455..d75b112347 100644 --- a/src/Simd/SimdLib.cpp +++ b/src/Simd/SimdLib.cpp @@ -75,6 +75,7 @@ BOOL APIENTRY DllMain(HMODULE hModule, DWORD dwReasonForCall, LPVOID lpReserved) #include "Simd/SimdSynetDeconvolution32f.h" #include "Simd/SimdSynetGridSample.h" #include "Simd/SimdSynetInnerProduct32f.h" +#include "Simd/SimdSynetInnerProduct16b.h" #include "Simd/SimdSynetMergedConvolution32f.h" #include "Simd/SimdSynetMergedConvolution16b.h" #include "Simd/SimdSynetMergedConvolution8i.h" @@ -5261,14 +5262,14 @@ SIMD_API void SimdSynetInnerProductLayerForward(const float * src, const float * #endif } -SIMD_API void* SimdSynetInnerProduct16bInit(size_t M, size_t N, size_t K, SimdTensorDataType typeA, SimdTensorDataType typeB, SimdTensorDataType typeC, SimdBool transpA, SimdBool constB, SimdBool bias) +SIMD_API void* SimdSynetInnerProduct16bInit(size_t M, size_t N, size_t K, SimdTensorDataType typeA, SimdTensorDataType typeB, SimdTensorDataType typeC, SimdBool transB, SimdBool constB, SimdBool bias) { SIMD_EMPTY(); #if defined(SIMD_SYNET_ENABLE) - typedef void* (*SimdSynetInnerProduct16bInitPtr) (size_t M, size_t N, size_t K, SimdTensorDataType typeA, SimdTensorDataType typeB, SimdTensorDataType typeC, SimdBool transpA, SimdBool constB, SimdBool bias); - const static SimdSynetInnerProduct16bInitPtr simdSynetInnerProduct32fInit = NULL;// SIMD_FUNC4(SynetInnerProduct32fInit, SIMD_AMXBF16_FUNC, SIMD_AVX512BW_FUNC, SIMD_AVX2_FUNC, SIMD_SSE41_FUNC); + typedef void* (*SimdSynetInnerProduct16bInitPtr) (size_t M, size_t N, size_t K, SimdTensorDataType typeA, SimdTensorDataType typeB, SimdTensorDataType typeC, SimdBool transB, SimdBool constB, SimdBool bias); + const static SimdSynetInnerProduct16bInitPtr simdSynetInnerProduct32fInit = SIMD_FUNC0(SynetInnerProduct16bInit);// , SIMD_AMXBF16_FUNC, SIMD_AVX512BW_FUNC, SIMD_AVX2_FUNC, SIMD_SSE41_FUNC); - return 0;// simdSynetInnerProduct16bInit(M, N, K, typeA, typeB, typeC, transpA, constB, bias); + return 0;// simdSynetInnerProduct16bInit(M, N, K, typeA, typeB, typeC, transB, constB, bias); #else assert(0); return 0; @@ -5279,7 +5280,7 @@ SIMD_API size_t SimdSynetInnerProduct16bInternalBufferSize(const void* context) { SIMD_EMPTY(); #if defined(SIMD_SYNET_ENABLE) - return 0;// ((SynetInnerProduct16b*)context)->InternalBufferSize(); + return ((SynetInnerProduct16b*)context)->InternalBufferSize(); #else assert(0); return 0; @@ -5290,7 +5291,7 @@ SIMD_API size_t SimdSynetInnerProduct16bExternalBufferSize(const void* context) { SIMD_EMPTY(); #if defined(SIMD_SYNET_ENABLE) - return 0;// ((SynetInnerProduct16b*)context)->ExternalBufferSize(); + return ((SynetInnerProduct16b*)context)->ExternalBufferSize(); #else assert(0); return 0; @@ -5301,7 +5302,7 @@ SIMD_API const char* SimdSynetInnerProduct16bInfo(const void* context) { SIMD_EMPTY(); #if defined(SIMD_SYNET_ENABLE) - return 0;// ((SynetInnerProduct16b*)context)->Info(); + return ((SynetInnerProduct16b*)context)->Info(); #else assert(0); return 0; @@ -5312,7 +5313,7 @@ SIMD_API void SimdSynetInnerProduct16bSetParams(void* context, const float* weig { SIMD_EMPTY(); #if defined(SIMD_SYNET_ENABLE) - //((SynetInnerProduct16b*)context)->SetParams(weight, internal, bias); + ((SynetInnerProduct16b*)context)->SetParams(weight, internal, bias); #else assert(0); #endif @@ -5322,9 +5323,9 @@ SIMD_API void SimdSynetInnerProduct16bForward(void* context, const uint8_t* A, c { SIMD_EMPTY(); #if defined(SIMD_SYNET_ENABLE) - //SynetInnerProduct16b* c = (SynetInnerProduct16b*)context; - //SIMD_PERF_EXT(c); - //c->Forward(A, B, buf, C); + SynetInnerProduct16b* c = (SynetInnerProduct16b*)context; + SIMD_PERF_EXT(c); + c->Forward(A, B, buf, C); #else assert(0); #endif diff --git a/src/Simd/SimdLib.h b/src/Simd/SimdLib.h index fbc5507d61..93b91218e2 100644 --- a/src/Simd/SimdLib.h +++ b/src/Simd/SimdLib.h @@ -6613,7 +6613,7 @@ extern "C" /*! @ingroup synet_inner_product_bf16 - \fn void* SimdSynetInnerProduct16bInit(size_t M, size_t N, size_t K, SimdTensorDataType typeA, SimdTensorDataType typeB, SimdTensorDataType typeC, SimdBool transpA, SimdBool constB, SimdBool bias); + \fn void* SimdSynetInnerProduct16bInit(size_t M, size_t N, size_t K, SimdTensorDataType typeA, SimdTensorDataType typeB, SimdTensorDataType typeC, SimdBool transB, SimdBool constB, SimdBool bias); \short Initilizes BF16 inner product (matrix mutiplication) algorithm. @@ -6634,14 +6634,14 @@ extern "C" \param [in] typeA - a type of A matrix. It can be FP32 or BF16. \param [in] typeB - a type of B matrix. It can be FP32 or BF16. \param [in] typeC - a type of C matrix. It can be FP32 or BF16. - \param [in] transpA - a transpose matrix A before multiplication. + \param [in] transB - a transpose matrix B before multiplication. \param [in] constB - a matrix B is constant. \param [in] bias - a flag to add bias to output matrix C. \return a pointer to BF32 inner product context. On error it returns NULL. It must be released with using of function ::SimdRelease. This pointer is used in functions ::SimdSynetInnerProduct16bInternalBufferSize, ::SimdSynetInnerProduct16bExternalBufferSize, ::SimdSynetInnerProduct16bInfo, ::SimdSynetInnerProduct16bSetParams and ::SimdSynetInnerProduct16bForward. */ - SIMD_API void* SimdSynetInnerProduct16bInit(size_t M, size_t N, size_t K, SimdTensorDataType typeA, SimdTensorDataType typeB, SimdTensorDataType typeC, SimdBool transpA, SimdBool constB, SimdBool bias); + SIMD_API void* SimdSynetInnerProduct16bInit(size_t M, size_t N, size_t K, SimdTensorDataType typeA, SimdTensorDataType typeB, SimdTensorDataType typeC, SimdBool transB, SimdBool constB, SimdBool bias); /*! @ingroup synet_inner_product_bf16 diff --git a/src/Simd/SimdSynetInnerProduct16b.h b/src/Simd/SimdSynetInnerProduct16b.h new file mode 100644 index 0000000000..da200cd1b0 --- /dev/null +++ b/src/Simd/SimdSynetInnerProduct16b.h @@ -0,0 +1,220 @@ +/* +* Simd Library (http://ermig1979.github.io/Simd). +* +* Copyright (c) 2011-2024 Yermalayeu Ihar. +* +* Permission is hereby granted, free of charge, to any person obtaining a copy +* of this software and associated documentation files (the "Software"), to deal +* in the Software without restriction, including without limitation the rights +* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the Software is +* furnished to do so, subject to the following conditions: +* +* The above copyright notice and this permission notice shall be included in +* all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +* SOFTWARE. +*/ +#ifndef __SimdSynetInnerProduct16b_h__ +#define __SimdSynetInnerProduct16b_h__ + +#include "Simd/SimdArray.h" +#include "Simd/SimdPerformance.h" +#include "Simd/SimdSynetConvParam.h" + +namespace Simd +{ + struct InnerProductParam16b + { + size_t M, N, K; + SimdTensorDataType typeA, typeB, typeC; + SimdBool transB, constB, bias; + + InnerProductParam16b(size_t m, size_t n, size_t k, + SimdTensorDataType ta, SimdTensorDataType tb, SimdTensorDataType tc, + SimdBool t, SimdBool c, SimdBool b) + : M(m), N(n), K(k) + , typeA(ta), typeB(tb), typeC(tc) + , transB(t), constB(c), bias(b) + { + } + + bool Valid() + { + return + (typeA == SimdTensorData32f || typeA == SimdTensorData16b) && + (typeB == SimdTensorData32f || typeB == SimdTensorData16b) && + (typeC == SimdTensorData32f || typeC == SimdTensorData16b); + } + + String Info() const + { + std::stringstream ss; + ss << M << "x" << N << "x" << K << "-"; + ss << ToChar(typeA) << ToChar(typeB) << ToChar(typeC) << "-"; + ss << (transB ? "t" : "n") << (constB ? "1" : "2") << (bias ? "b" : "o"); + return ss.str(); + } + + int64_t Flop() const + { + return int64_t(M) * N * K * 2; + } + }; + + //------------------------------------------------------------------------------------------------- + + class SynetInnerProduct16b : public Deletable + { + public: + SynetInnerProduct16b(const InnerProductParam16b& p) + : _param(p) +#if defined(SIMD_PERFORMANCE_STATISTIC) && (defined(NDEBUG) || defined(SIMD_PERF_STAT_IN_DEBUG)) + , _perf(NULL) +#endif + { + } + + const InnerProductParam16b& Param() const + { + return _param; + } + + virtual size_t InternalBufferSize() const + { + return _buffer.RawSize() + _weight.RawSize() + _bias.RawSize(); + } + + virtual size_t ExternalBufferSize() const + { + return 0; + } + + virtual String Ext() const = 0; + virtual String Desc() const = 0; + + virtual void SetParams(const float* weight, SimdBool* internal, const float* bias) = 0; + virtual void Forward(const uint8_t* A, const uint8_t* B, const uint8_t* bias, uint8_t* C) = 0; + +#if defined(SIMD_PERFORMANCE_STATISTIC) && (defined(NDEBUG) || defined(SIMD_PERF_STAT_IN_DEBUG)) + Base::PerformanceMeasurer* Perf(const char* func) + { + if (_perf == NULL) + _perf = Simd::Base::PerformanceMeasurerStorage::s_storage.Get(func, Param().Info() + " " + Desc(), Param().Flop()); + return _perf; + } +#endif + + const char* Info() const + { + _info = Desc(); + return _info.c_str(); + } + + protected: + InnerProductParam16b _param; +#if defined(SIMD_PERFORMANCE_STATISTIC) && (defined(NDEBUG) || defined(SIMD_PERF_STAT_IN_DEBUG)) + Base::PerformanceMeasurer * _perf; +#endif + Array8u _buffer; + Array16u _weight; + Array32f _bias; + mutable String _info; + + uint8_t* Buffer(uint8_t* buffer) + { + if (buffer) + return buffer; + else + { + _buffer.Resize(ExternalBufferSize()); + return _buffer.data; + } + } + }; + + //------------------------------------------------------------------------------------------------- + + namespace Base + { + //class SynetInnerProduct16bGemm : public SynetInnerProduct16b + //{ + //public: + // SynetInnerProduct16bGemm(const InnerProductParam16b& p); + // virtual String Ext() const { return "Base"; } + // virtual String Desc() const; + // virtual void SetParams(const float* weight, SimdBool* internal, const float* bias); + // virtual void Forward(const float * src, float * dst); + + //protected: + // typedef void(*GemmPtr)(size_t M, size_t N, size_t K, const float* alpha, const float* A, size_t lda, const float* B, size_t ldb, const float* beta, float* C, size_t ldc); + // typedef void(*BiasAndActivationPtr)(const float* bias, size_t count, size_t size, ::SimdConvolutionActivationType activation, const float* params, SimdBool trans, float* dst); + // typedef void(*ProdPtr)(const float* src, const float* weight, const float* bias, size_t count, size_t size, float* dst); + // typedef void(*CbPackPtr)(size_t M, size_t N, size_t K, const float* B, float* pB, GemmKernelType type, bool compatibility); + // typedef void(*CbRunPtr)(size_t M, size_t N, size_t K, const float* A, const float* B, float* C, GemmKernelType type, bool compatibility); + + // float _0, _1; + // GemmPtr _gemm; + // BiasAndActivationPtr _biasAndActivation; + // ProdPtr _prod; + // size_t _M, _N, _K, _ldW, _ldS, _ldD; + // Array32f _cbWeight; + // CbPackPtr _cbPack; + // CbRunPtr _cbRun; + //}; + + //class SynetInnerProduct16bProd : public SynetInnerProduct16b + //{ + //public: + // SynetInnerProduct16bProd(const InnerProductParam16b& p); + // virtual String Ext() const { return "Base"; } + // virtual String Desc() const { return Ext() + "::Prod"; } + // virtual size_t InternalBufferSize() const { return _rWeight.size + _rBias.size; } + // virtual void SetParams(const float* weight, SimdBool* internal, const float* bias, const float* params); + // virtual void Forward(const float* src, float* dst); + + // static bool Preferable(const InnerProductParam16b& p); + + //protected: + // typedef void(*ProdPtr)(const float* src, const float* weight, const float* bias, size_t input, size_t output, float* dst); + + // ProdPtr _prod; + // Array32f _rWeight, _rBias; + // size_t _F, _N, _K; + + // void SetSize(size_t F); + // void ReorderWeight(const float* src, float* dst); + //}; + + //------------------------------------------------------------------------------------------------- + + void * SynetInnerProduct16bInit(size_t M, size_t N, size_t K, SimdTensorDataType typeA, SimdTensorDataType typeB, SimdTensorDataType typeC, SimdBool transB, SimdBool constB, SimdBool bias); + } + +#ifdef SIMD_SSE41_ENABLE + namespace Sse41 + { + } +#endif + +#ifdef SIMD_AVX2_ENABLE + namespace Avx2 + { + } +#endif + +#ifdef SIMD_AVX512BW_ENABLE + namespace Avx512bw + { + } +#endif + +} + +#endif diff --git a/src/Test/Test.cpp b/src/Test/Test.cpp index 65dc9935dc..31e9308f17 100644 --- a/src/Test/Test.cpp +++ b/src/Test/Test.cpp @@ -379,6 +379,8 @@ namespace Test TEST_ADD_GROUP_A0(SynetInnerProductLayerForward); TEST_ADD_GROUP_A0(SynetInnerProduct8i); + TEST_ADD_GROUP_A0(SynetInnerProduct16bForward); + TEST_ADD_GROUP_A0(SynetMergedConvolution8iForward); TEST_ADD_GROUP_A0(SynetMergedConvolution16bForward); diff --git a/src/Test/TestSynetInnerProduct16b.cpp b/src/Test/TestSynetInnerProduct16b.cpp new file mode 100644 index 0000000000..1d90e6c1d9 --- /dev/null +++ b/src/Test/TestSynetInnerProduct16b.cpp @@ -0,0 +1,179 @@ +/* +* Tests for Simd Library (http://ermig1979.github.io/Simd). +* +* Copyright (c) 2011-2024 Yermalayeu Ihar. +* +* Permission is hereby granted, free of charge, to any person obtaining a copy +* of this software and associated documentation files (the "Software"), to deal +* in the Software without restriction, including without limitation the rights +* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the Software is +* furnished to do so, subject to the following conditions: +* +* The above copyright notice and this permission notice shall be included in +* all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +* SOFTWARE. +*/ +#include "Test/TestCompare.h" +#include "Test/TestPerformance.h" +#include "Test/TestTensor.h" +#include "Test/TestString.h" +#include "Test/TestRandom.h" + +#include "Simd/SimdSynet.h" +#include "Simd/SimdSynetInnerProduct16b.h" + +namespace Test +{ +#if defined(SIMD_SYNET_ENABLE) + namespace + { + struct FuncIP16b + { + typedef void* (*FuncPtr)(size_t M, size_t N, size_t K, SimdTensorDataType typeA, SimdTensorDataType typeB, SimdTensorDataType typeC, SimdBool transB, SimdBool constB, SimdBool bias); + + FuncPtr func; + String desc; + + FuncIP16b(const FuncPtr& f, const String& d) : func(f), desc(d) {} + + void Update(const Simd::InnerProductParam16b& p) + { + desc = desc + "[" + p.Info() + "]"; + } + + void Call(void* context, const uint8_t* A, const uint8_t* B, uint8_t* buf, uint8_t* C) const + { + TEST_PERFORMANCE_TEST(desc); + ::SimdSynetInnerProduct16bForward(context, A, B, buf, C); + } + }; + } + +#define FUNC_IP16B(function) \ + FuncIP16b(function, std::string(#function)) + + bool SynetInnerProduct16bForwardAutoTest(float eps, Simd::InnerProductParam16b p, FuncIP16b f1, FuncIP16b f2) + { + bool result = true; + + f1.Update(p); + f2.Update(p); + + TEST_LOG_SS(Info, "Test " << f1.desc << " & " << f2.desc << "."); + + Shape sA = Shp(p.M, p.K), sB = p.transB ? Shp(p.N, p.K) : Shp(p.K, p.N), sC = Shp(p.M, p.N); + Tensor32f Af(sA), Bf(sB), C1f(sC), C2f(sC), C3f(sC), bias(Shp(p.N)); + Tensor16u Ab(sA), Bb(sB), C1b(sC), C2b(sC); + + FillRandom(Af.Data(), Af.Size(), -1.0, 1.0f); + FillRandom(Bf.Data(), Bf.Size(), -1.0, 1.0f); + FillRandom(bias.Data(), bias.Size(), -1.0, 1.0f); + + SimdFloat32ToBFloat16(Af.Data(), Af.Size(), Ab.Data()); + SimdFloat32ToBFloat16(Bf.Data(), Bf.Size(), Bb.Data()); + + Fill(C1f, 1.0f); + Fill(C1f, 2.0f); + + const uint8_t* A = p.typeA == SimdTensorData32f ? (uint8_t*)Af.Data() : (uint8_t*)Ab.Data(); + const uint8_t* B = p.typeB == SimdTensorData32f ? (uint8_t*)Bf.Data() : (uint8_t*)Bb.Data(); + uint8_t* C1 = p.typeC == SimdTensorData32f ? (uint8_t*)C1f.Data() : (uint8_t*)C1b.Data(); + uint8_t* C2 = p.typeC == SimdTensorData32f ? (uint8_t*)C2f.Data() : (uint8_t*)C2b.Data(); + + void* context1 = f1.func(p.M, p.N, p.K, p.typeA, p.typeB, p.typeC, p.transB, p.constB, p.bias); + void* context2 = f2.func(p.M, p.N, p.K, p.typeA, p.typeB, p.typeC, p.transB, p.constB, p.bias); + + if (context1 == NULL) + return true; + + ::SimdSynetInnerProduct16bSetParams(context1, Bf.Data(), NULL, bias.Data()); + ::SimdSynetInnerProduct16bSetParams(context2, Bf.Data(), NULL, bias.Data()); + + Tensor8u buf; + buf.Extend( Shp(SimdSynetInnerProduct16bExternalBufferSize(context1)) ); + buf.Extend( Shp(SimdSynetInnerProduct16bExternalBufferSize(context2)) ); + + TEST_ALIGN(SIMD_ALIGN); + + TEST_EXECUTE_AT_LEAST_MIN_TIME(f1.Call(context1, A, B, buf.Data(), C1)); + + TEST_EXECUTE_AT_LEAST_MIN_TIME(f2.Call(context2, A, B, buf.Data(), C2)); + + ::SimdRelease(context1); + ::SimdRelease(context2); + + if (p.typeC == SimdTensorData16b) + { + eps = eps * 1.0f; + SimdBFloat16ToFloat32(C1b.Data(), C1b.Size(), C1f.Data()); + SimdBFloat16ToFloat32(C2b.Data(), C2b.Size(), C2f.Data()); + } + result = result && Compare(C1f, C2f, eps, true, 64, DifferenceBoth); + + if(0) + { + void* context3 = SimdSynetInnerProduct32fInit(p.M, p.K, p.N, p.transB, SimdConvolutionActivationIdentity); + ::SimdSynetInnerProduct32fSetParams(context3, Bf.Data(), NULL, bias.Data(), NULL); + ::SimdSynetInnerProduct32fForward(context3, Af.Data(), C3f.Data()); + ::SimdRelease(context3); + + result = result && Compare(C1f, C3f, 0.03, true, 64, DifferenceBoth, " Compare to SynetInnerProduct32f.");//0.129 + } + + return result; + } + + bool SynetInnerProduct16bForwardAutoTest(float eps, const FuncIP16b& f1, const FuncIP16b& f2) + { + bool result = true; + + SimdBool t = SimdTrue, f = SimdFalse; + const SimdTensorDataType f32 = SimdTensorData32f, b16 = SimdTensorData16b; + using Param = Simd::InnerProductParam16b; + +#if defined(NDEBUG) +#if 1 + result = result && SynetInnerProduct16bForwardAutoTest(eps, Param(128, 128, 128, f32, f32, f32, f, t, f), f1, f2); +#endif +#else + result = result && SynetInnerProduct16bForwardAutoTest(eps, Param(128, 128, 128, f32, f32, f32, f, t, f), f1, f2); +#endif + + return result; + } + + bool SynetInnerProduct16bForwardAutoTest() + { + const float EPS = 0.001f; + bool result = true; + + if (TestBase()) + result = result && SynetInnerProduct16bForwardAutoTest(EPS, FUNC_IP16B(Simd::Base::SynetInnerProduct16bInit), FUNC_IP16B(SimdSynetInnerProduct16bInit)); + +//#ifdef SIMD_SSE41_ENABLE +// if (Simd::Sse41::Enable && TestSse41()) +// result = result && SynetInnerProduct16borwardAutoTest(EPS, FUNC_IP16B(Simd::Sse41::SynetInnerProduct16bInit), FUNC_IP16B(SimdSynetInnerProduct32fInit)); +//#endif +// +//#ifdef SIMD_AVX2_ENABLE +// if (Simd::Avx2::Enable && TestAvx2()) +// result = result && SynetInnerProduct16borwardAutoTest(EPS, FUNC_IP16B(Simd::Avx2::SynetInnerProduct16bInit), FUNC_IP16B(SimdSynetInnerProduct16bInit)); +//#endif +// +//#ifdef SIMD_AVX512BW_ENABLE +// if (Simd::Avx512bw::Enable && TestAvx512bw()) +// result = result && SynetInnerProduct16bForwardAutoTest(EPS, FUNC_IP16B(Simd::Avx512bw::SynetInnerProduct16bInit), FUNC_IP16B(SimdSynetInnerProduct16bInit)); +//#endif + + return result; + } +#endif +}
    [in]context- a pointer to BF16 inner product context. It must be created by function SimdSynetInnerProduct16bInit and released by function SimdRelease.
    [in]context- a pointer to BF16 inner product context. It must be created by function SimdSynetInnerProduct16bInit and released by function SimdRelease.
    [in]A- a pointer to A matrix.
    [in]B- a pointer to B matrix. Can be NULL if B is constant matrix. In that case you have to set B in function SimdSynetInnerProduct16bSetParams.
    [out]buf- a pointer to external buffer. The size of the external temporary buffer is determined by function SimdSynetInnerProduct16bExternalBufferSize. Can be NULL (it causes usage of internal buffer).