Skip to content

Commit

Permalink
+add AVX-512VNNI optimizations of functions DescrIntCosineDistancesMx…
Browse files Browse the repository at this point in the history
…Np, DescrIntCosineDistancesMxNa.
  • Loading branch information
ermig1979 committed Jun 28, 2023
1 parent 5c67d27 commit 6057373
Show file tree
Hide file tree
Showing 11 changed files with 331 additions and 4 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/cmake.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ jobs:
run: lscpu

- name: Configure CMake
run: cmake ./prj/cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{matrix.build_type}} -DSIMD_TEST_FLAGS="-mavx2"
run: cmake ./prj/cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{matrix.build_type}} -DSIMD_AVX512VNNI=ON -DSIMD_TEST_FLAGS="-mavx2"

- name: Build
run: cmake --build ${{github.workspace}}/build --config ${{matrix.build_type}} --parallel$(nproc)
Expand Down
4 changes: 2 additions & 2 deletions docs/2023.html
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,8 @@ <h5>New features</h5>
<li>Support of 4-bit and 5-bit depth in Base implementation, SSE4.1, AVX2, AVX-512BW optimizations of function DescrIntDecode32f.</li>
<li>Support of 4-bit and 5-bit depth in Base implementation, SSE4.1, AVX2, AVX-512BW optimizations of function DescrIntDecode16f.</li>
<li>Support of 4-bit and 5-bit depth in Base implementation, SSE4.1, AVX2, AVX-512BW optimizations of function DescrIntCosineDistance.</li>
<li>Support of 4-bit and 5-bit depth in Base implementation, SSE4.1, AVX2, AVX-512BW optimizations of function DescrIntCosineDistancesMxNp.</li>
<li>Support of 4-bit and 5-bit depth in Base implementation, SSE4.1, AVX2, AVX-512BW optimizations of function DescrIntCosineDistancesMxNa.</li>
<li>Support of 4-bit and 5-bit depth in Base implementation, SSE4.1, AVX2, AVX-512BW, AVX-512VNNI optimizations of function DescrIntCosineDistancesMxNp.</li>
<li>Support of 4-bit and 5-bit depth in Base implementation, SSE4.1, AVX2, AVX-512BW, AVX-512VNNI optimizations of function DescrIntCosineDistancesMxNa.</li>
<li>Base implementation, SSE4.1, AVX2, AVX-512BW optimizations of function SynetNormalizeLayerForwardV3.</li>
</ul>
<h5>Improving</h5>
Expand Down
6 changes: 6 additions & 0 deletions prj/vs2019/Avx512vnni.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,10 @@
<ClInclude Include="..\..\src\Simd\SimdConst.h" />
<ClInclude Include="..\..\src\Simd\SimdCpu.h" />
<ClInclude Include="..\..\src\Simd\SimdDefs.h" />
<ClInclude Include="..\..\src\Simd\SimdDescrInt.h" />
<ClInclude Include="..\..\src\Simd\SimdDescrIntCommon.h" />
<ClInclude Include="..\..\src\Simd\SimdEnable.h" />
<ClInclude Include="..\..\src\Simd\SimdErf.h" />
<ClInclude Include="..\..\src\Simd\SimdExp.h" />
<ClInclude Include="..\..\src\Simd\SimdExtract.h" />
<ClInclude Include="..\..\src\Simd\SimdFmadd.h" />
Expand All @@ -32,6 +35,7 @@
<ClInclude Include="..\..\src\Simd\SimdParallel.hpp" />
<ClInclude Include="..\..\src\Simd\SimdPerformance.h" />
<ClInclude Include="..\..\src\Simd\SimdRuntime.h" />
<ClInclude Include="..\..\src\Simd\SimdSet.h" />
<ClInclude Include="..\..\src\Simd\SimdStore.h" />
<ClInclude Include="..\..\src\Simd\SimdSynet.h" />
<ClInclude Include="..\..\src\Simd\SimdSynetConvolution32fCommon.h" />
Expand All @@ -43,6 +47,8 @@
</ItemGroup>
<ItemGroup>
<ClCompile Include="..\..\src\Simd\SimdAvx512vnniCpu.cpp" />
<ClCompile Include="..\..\src\Simd\SimdAvx512vnniDescrInt.cpp" />
<ClCompile Include="..\..\src\Simd\SimdAvx512vnniDescrIntCdu.cpp" />
<ClCompile Include="..\..\src\Simd\SimdAvx512vnniSynetConvolution8iDepthwise.cpp" />
<ClCompile Include="..\..\src\Simd\SimdAvx512vnniSynetConvolution8iDirect.cpp" />
<ClCompile Include="..\..\src\Simd\SimdAvx512vnniSynetConvolution8iDirect1x1.cpp" />
Expand Down
18 changes: 18 additions & 0 deletions prj/vs2019/Avx512vnni.vcxproj.filters
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,18 @@
<ClInclude Include="..\..\src\Simd\SimdUnpack.h">
<Filter>Inc</Filter>
</ClInclude>
<ClInclude Include="..\..\src\Simd\SimdDescrInt.h">
<Filter>Inc</Filter>
</ClInclude>
<ClInclude Include="..\..\src\Simd\SimdDescrIntCommon.h">
<Filter>Inc</Filter>
</ClInclude>
<ClInclude Include="..\..\src\Simd\SimdErf.h">
<Filter>Inc</Filter>
</ClInclude>
<ClInclude Include="..\..\src\Simd\SimdSet.h">
<Filter>Inc</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<ClCompile Include="..\..\src\Simd\SimdAvx512vnniSynetConvolution8iDepthwise.cpp">
Expand All @@ -125,5 +137,11 @@
<ClCompile Include="..\..\src\Simd\SimdAvx512vnniSynetMergedConvolution8iOutput.cpp">
<Filter>Avx512vnni</Filter>
</ClCompile>
<ClCompile Include="..\..\src\Simd\SimdAvx512vnniDescrInt.cpp">
<Filter>Avx512vnni</Filter>
</ClCompile>
<ClCompile Include="..\..\src\Simd\SimdAvx512vnniDescrIntCdu.cpp">
<Filter>Avx512vnni</Filter>
</ClCompile>
</ItemGroup>
</Project>
6 changes: 6 additions & 0 deletions prj/vs2022/Avx512vnni.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,10 @@
<ClInclude Include="..\..\src\Simd\SimdConst.h" />
<ClInclude Include="..\..\src\Simd\SimdCpu.h" />
<ClInclude Include="..\..\src\Simd\SimdDefs.h" />
<ClInclude Include="..\..\src\Simd\SimdDescrInt.h" />
<ClInclude Include="..\..\src\Simd\SimdDescrIntCommon.h" />
<ClInclude Include="..\..\src\Simd\SimdEnable.h" />
<ClInclude Include="..\..\src\Simd\SimdErf.h" />
<ClInclude Include="..\..\src\Simd\SimdExp.h" />
<ClInclude Include="..\..\src\Simd\SimdExtract.h" />
<ClInclude Include="..\..\src\Simd\SimdFmadd.h" />
Expand All @@ -32,6 +35,7 @@
<ClInclude Include="..\..\src\Simd\SimdParallel.hpp" />
<ClInclude Include="..\..\src\Simd\SimdPerformance.h" />
<ClInclude Include="..\..\src\Simd\SimdRuntime.h" />
<ClInclude Include="..\..\src\Simd\SimdSet.h" />
<ClInclude Include="..\..\src\Simd\SimdStore.h" />
<ClInclude Include="..\..\src\Simd\SimdSynet.h" />
<ClInclude Include="..\..\src\Simd\SimdSynetConvolution32fCommon.h" />
Expand All @@ -43,6 +47,8 @@
</ItemGroup>
<ItemGroup>
<ClCompile Include="..\..\src\Simd\SimdAvx512vnniCpu.cpp" />
<ClCompile Include="..\..\src\Simd\SimdAvx512vnniDescrInt.cpp" />
<ClCompile Include="..\..\src\Simd\SimdAvx512vnniDescrIntCdu.cpp" />
<ClCompile Include="..\..\src\Simd\SimdAvx512vnniSynetConvolution8iDepthwise.cpp" />
<ClCompile Include="..\..\src\Simd\SimdAvx512vnniSynetConvolution8iDirect.cpp" />
<ClCompile Include="..\..\src\Simd\SimdAvx512vnniSynetConvolution8iDirect1x1.cpp" />
Expand Down
18 changes: 18 additions & 0 deletions prj/vs2022/Avx512vnni.vcxproj.filters
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,18 @@
<ClInclude Include="..\..\src\Simd\SimdUnpack.h">
<Filter>Inc</Filter>
</ClInclude>
<ClInclude Include="..\..\src\Simd\SimdDescrInt.h">
<Filter>Inc</Filter>
</ClInclude>
<ClInclude Include="..\..\src\Simd\SimdDescrIntCommon.h">
<Filter>Inc</Filter>
</ClInclude>
<ClInclude Include="..\..\src\Simd\SimdErf.h">
<Filter>Inc</Filter>
</ClInclude>
<ClInclude Include="..\..\src\Simd\SimdSet.h">
<Filter>Inc</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<ClCompile Include="..\..\src\Simd\SimdAvx512vnniSynetConvolution8iDepthwise.cpp">
Expand All @@ -125,5 +137,11 @@
<ClCompile Include="..\..\src\Simd\SimdAvx512vnniSynetMergedConvolution8iOutput.cpp">
<Filter>Avx512vnni</Filter>
</ClCompile>
<ClCompile Include="..\..\src\Simd\SimdAvx512vnniDescrInt.cpp">
<Filter>Avx512vnni</Filter>
</ClCompile>
<ClCompile Include="..\..\src\Simd\SimdAvx512vnniDescrIntCdu.cpp">
<Filter>Avx512vnni</Filter>
</ClCompile>
</ItemGroup>
</Project>
59 changes: 59 additions & 0 deletions src/Simd/SimdAvx512vnniDescrInt.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
/*
* Simd Library (http://ermig1979.github.io/Simd).
*
* Copyright (c) 2011-2023 Yermalayeu Ihar.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "Simd/SimdMemory.h"
#include "Simd/SimdStore.h"
#include "Simd/SimdExtract.h"
#include "Simd/SimdArray.h"
#include "Simd/SimdUnpack.h"
#include "Simd/SimdDescrInt.h"
#include "Simd/SimdDescrIntCommon.h"
#include "Simd/SimdCpu.h"

namespace Simd
{
#ifdef SIMD_AVX512VNNI_ENABLE
namespace Avx512vnni
{
DescrInt::DescrInt(size_t size, size_t depth)
: Avx512bw::DescrInt(size, depth)
{
if (_depth != 8)
{
_macroCosineDistancesUnpack = GetMacroCosineDistancesUnpack(_depth);
_microMu = 12;
_microNu = 32;
}
}

//-------------------------------------------------------------------------------------------------

void* DescrIntInit(size_t size, size_t depth)
{
if (!Base::DescrInt::Valid(size, depth))
return NULL;
return new Avx512vnni::DescrInt(size, depth);
}
}
#endif
}
Loading

0 comments on commit 6057373

Please sign in to comment.