Skip to content

Commit

Permalink
+add AVX2 optimizations of class SynetDeconvolution16bNhwcGemm (part 2).
Browse files Browse the repository at this point in the history
  • Loading branch information
ermig1979 committed Sep 11, 2024
1 parent d71b103 commit 3491a83
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 4 deletions.
8 changes: 4 additions & 4 deletions src/Simd/SimdBaseSynetDeconvolution16bNhwcGemm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,10 +70,10 @@ namespace Simd
a.microK = microK;
a.bufK = AlignHi(a.K, a.microK);
a.bufN = AlignHi(a.N, a.microN);
a.bufM = p.dstH * AlignHi(p.dstW, a.F);
a.bufM = p.srcH * AlignHi(p.srcW, a.F);
a.macroK = Simd::RestrictRange(AlignLo(L1 / a.microN / 2, a.microK), a.microK, a.bufK);
a.macroH = Simd::RestrictRange(L2 / a.macroK / p.dstW / 2, size_t(1), p.dstH);
a.macroM = a.macroH * p.dstW;
a.macroH = Simd::RestrictRange(L2 / a.macroK / p.srcW / 2, size_t(1), p.srcH);
a.macroM = a.macroH * p.srcW;
a.macroN = Simd::RestrictRange(AlignLoAny(L3 / a.macroK / 2, a.microN), a.microN, a.bufN);
a.elem = _elemD;
_stepS = p.srcH * p.srcW * p.srcC * _elemS;
Expand Down Expand Up @@ -135,7 +135,7 @@ namespace Simd
_convert(src, p, a, 0, p.srcH, bufS);
GemmCommon(src16b, buf32f);
if (!_is1x1)
_toImg(buf32f, p, a, p.dstC, 0, p.dstH, dst32f);
_toImg(buf32f, p, a, p.dstC, 0, p.srcH, dst32f);
_biasAct(dst32f, p, a, p.dstC, 0, p.dstH, _bias.data, _params.data, dst);
}

Expand Down
10 changes: 10 additions & 0 deletions src/Simd/SimdSynetDeconvolution16b.h
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,16 @@ namespace Simd
#ifdef SIMD_AVX512BW_ENABLE
namespace Avx512bw
{
class SynetDeconvolution16bNhwcGemm : public Avx2::SynetDeconvolution16bNhwcGemm
{
public:
SynetDeconvolution16bNhwcGemm(const DeconvParam& p);
virtual String Ext() const { return "Avx512bw"; }
};

//-------------------------------------------------------------------------------------------------

void* SynetDeconvolution16bInit(size_t batch, const SimdConvolutionParameters* conv, SimdSynetCompatibilityType compatibility);
}
#endif
}
Expand Down

0 comments on commit 3491a83

Please sign in to comment.