Skip to content

Commit

Permalink
+add Support of YUV420P format to Simd::Frame.
Browse files Browse the repository at this point in the history
  • Loading branch information
ermig1979 committed May 30, 2024
1 parent 0695755 commit 985ae43
Show file tree
Hide file tree
Showing 4 changed files with 121 additions and 2 deletions.
1 change: 1 addition & 0 deletions docs/2024.html
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ <h5>New features</h5>
<li>Base implementation, SSE4.1, AVX2, AVX-512BW, AVX-512BF16 optimizations of class SynetMergedConvolution16bDc.</li>
<li>Base implementation, SSE4.1, AVX2, AVX-512BW, AVX-512BF16 optimizations of class SynetMergedConvolution16bCd.</li>
<li>Base implementation, SSE4.1, AVX2, AVX-512BW, AVX-512BF16 optimizations of class SynetMergedConvolution16bCdc.</li>
<li>Support of YUV420P format to Simd::Frame.</li>
</ul>
<h5>Improving</h5>
<ul>
Expand Down
2 changes: 1 addition & 1 deletion src/Simd/SimdAmxBf16SynetMergedConvolution16bDepthwise.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ namespace Simd

template <> SIMD_INLINE __m512 LoadSrc<uint16_t>(const uint16_t* src)
{
return _mm512_cvtpbh_ps((__m256bh)_mm256_loadu_si256((__m256i*)src));
return BFloat16ToFloat32(_mm256_loadu_si256((__m256i*)src));
}

//-------------------------------------------------------------------------------------------------
Expand Down
8 changes: 8 additions & 0 deletions src/Simd/SimdBFloat16.h
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,14 @@ namespace Simd
__m512 s1 = _mm512_maskz_loadu_ps(__mmask16(loadMask >> 1 * 16), src + 1 * F);
_mm512_mask_storeu_epi16(dst, saveMask, (__m512i)_mm512_cvtne2ps_pbh(s1, s0));
}

SIMD_INLINE __m512 BFloat16ToFloat32(__m256i value)
{
static const __m512i K16_PERM = SIMD_MM512_SETR_EPI16(
0x10, 0x00, 0x10, 0x01, 0x10, 0x02, 0x10, 0x03, 0x10, 0x04, 0x10, 0x05, 0x10, 0x06, 0x10, 0x07,
0x10, 0x08, 0x10, 0x09, 0x10, 0x0A, 0x10, 0x0B, 0x10, 0x0C, 0x10, 0x0D, 0x10, 0x0E, 0x10, 0x0F);
return _mm512_castsi512_ps(_mm512_permutexvar_epi16(K16_PERM, _mm512_castsi256_si512(value)));
}
}
#endif

Expand Down
112 changes: 111 additions & 1 deletion src/Simd/SimdFrame.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@ namespace Simd
Rgb24,
/*! One plane 32-bit (4 8-bit channels) RGBA (Red, Green, Blue, Alpha) pixel format. */
Rgba32,
/*! Three planes (8-bit full size Y, U, V planes) YUV444P pixel format. */
Yuv444p,
};

const size_t width; /*!< \brief A width of the frame. */
Expand Down Expand Up @@ -571,6 +573,13 @@ namespace Simd
if (yuvType != SimdYuvUnknown)
*(SimdYuvType*)&yuvType = SimdYuvUnknown;
break;
case Yuv444p:
planes[0] = View<A>(width, height, stride0, View<A>::Gray8, data0);
planes[1] = View<A>(width, height, stride1, View<A>::Gray8, data1);
planes[2] = View<A>(width, height, stride2, View<A>::Gray8, data2);
if (yuvType == SimdYuvUnknown)
*(SimdYuvType*)&yuvType = SimdYuvBt601;
break;
default:
assert(0);
}
Expand Down Expand Up @@ -697,6 +706,13 @@ namespace Simd
if (yuvType != SimdYuvUnknown)
*(SimdYuvType*)&yuvType = SimdYuvUnknown;
break;
case Yuv444p:
planes[0].Recreate(width, height, View<A>::Gray8);
planes[1].Recreate(width, height, View<A>::Gray8);
planes[2].Recreate(width, height, View<A>::Gray8);
if (yuvType == SimdYuvUnknown)
*(SimdYuvType*)&yuvType = SimdYuvBt601;
break;
default:
assert(0);
}
Expand Down Expand Up @@ -746,6 +762,12 @@ namespace Simd
if (format == Yuv420p)
frame.planes[2] = planes[2].Region(left / 2, top / 2, right / 2, bottom / 2);

if (format == Yuv444p)
{
frame.planes[1] = planes[1].Region(left, top, right, bottom);
frame.planes[2] = planes[2].Region(left, top, right, bottom);
}

return frame;
}
else
Expand Down Expand Up @@ -816,6 +838,7 @@ namespace Simd
case Gray8: return 1;
case Rgb24: return 1;
case Rgba32: return 1;
case Yuv444p: return 3;
default: assert(0); return 0;
}
}
Expand Down Expand Up @@ -947,6 +970,16 @@ namespace Simd
BgrToRgba(bgr, dst.planes[0]);
break;
}
case Frame<A>::Yuv444p:
{
assert(src.yuvType == dst.yuvType);
Copy(src.planes[0], dst.planes[0]);
View<A> u(src.Size(), View<A>::Gray8), v(src.Size(), View<A>::Gray8);
DeinterleaveUv(src.planes[1], u, v);
Simd::StretchGray2x2(u, dst.planes[1]);
Simd::StretchGray2x2(v, dst.planes[2]);
break;
}
default:
assert(0);
}
Expand Down Expand Up @@ -982,6 +1015,14 @@ namespace Simd
BgrToRgba(bgr, dst.planes[0]);
break;
}
case Frame<A>::Yuv444p:
{
assert(src.yuvType == dst.yuvType);
Copy(src.planes[0], dst.planes[0]);
Simd::StretchGray2x2(src.planes[0], dst.planes[1]);
Simd::StretchGray2x2(src.planes[1], dst.planes[2]);
break;
}
default:
assert(0);
}
Expand Down Expand Up @@ -1012,6 +1053,9 @@ namespace Simd
case Frame<A>::Rgba32:
BgraToRgba(src.planes[0], dst.planes[0]);
break;
case Frame<A>::Yuv444p:
BgraToYuv444p(src.planes[0], dst.planes[0], dst.planes[1], dst.planes[2], dst.yuvType);
break;
default:
assert(0);
}
Expand Down Expand Up @@ -1042,6 +1086,9 @@ namespace Simd
case Frame<A>::Rgba32:
BgrToRgba(src.planes[0], dst.planes[0]);
break;
case Frame<A>::Yuv444p:
BgrToYuv444p(src.planes[0], dst.planes[0], dst.planes[1], dst.planes[2], dst.yuvType);
break;
default:
assert(0);
}
Expand All @@ -1058,6 +1105,7 @@ namespace Simd
Fill(dst.planes[1], 128);
break;
case Frame<A>::Yuv420p:
case Frame<A>::Yuv444p:
if (dst.yuvType == SimdYuvTrect871)
Copy(src.planes[0], dst.planes[0]);
else
Expand Down Expand Up @@ -1113,6 +1161,13 @@ namespace Simd
case Frame<A>::Rgba32:
RgbToRgba(src.planes[0], dst.planes[0]);
break;
case Frame<A>::Yuv444p:
{
View<A> bgr(src.Size(), View<A>::Bgr24);
RgbToBgr(src.planes[0], bgr);
BgrToYuv444p(bgr, dst.planes[0], dst.planes[1], dst.planes[2], dst.yuvType);
break;
}
default:
assert(0);
}
Expand Down Expand Up @@ -1149,6 +1204,61 @@ namespace Simd
case Frame<A>::Rgb24:
RgbaToRgb(src.planes[0], dst.planes[0]);
break;
case Frame<A>::Yuv444p:
{
View<A> bgr(src.Size(), View<A>::Bgr24);
RgbaToBgr(src.planes[0], bgr);
BgrToYuv444p(bgr, dst.planes[0], dst.planes[1], dst.planes[2], dst.yuvType);
break;
}
default:
assert(0);
}
break;

case Frame<A>::Yuv444p:
switch (dst.format)
{
case Frame<A>::Nv12:
{
assert(src.yuvType == dst.yuvType);
Copy(src.planes[0], dst.planes[0]);
View<A> u(src.Size() / 2, View<A>::Gray8), v(src.Size() / 2, View<A>::Gray8);
Simd::ReduceGray2x2(src.planes[0], u);
Simd::ReduceGray2x2(src.planes[1], v);
InterleaveUv(u, v, dst.planes[1]);
break;
}
case Frame<A>::Yuv420p:
{
assert(src.yuvType == dst.yuvType);
Copy(src.planes[0], dst.planes[0]);
Simd::ReduceGray2x2(src.planes[0], dst.planes[1]);
Simd::ReduceGray2x2(src.planes[1], dst.planes[2]);
break;
}
case Frame<A>::Bgra32:
Yuv444pToBgra(src.planes[0], src.planes[1], src.planes[2], dst.planes[0], src.yuvType);
break;
case Frame<A>::Bgr24:
Yuv444pToBgr(src.planes[0], src.planes[1], src.planes[2], dst.planes[0], src.yuvType);
break;
case Frame<A>::Gray8:
if (src.yuvType == SimdYuvTrect871)
Copy(src.planes[0], dst.planes[0]);
else
YToGray(src.planes[0], dst.planes[0]);
break;
case Frame<A>::Rgb24:
Yuv444pToRgb(src.planes[0], src.planes[1], src.planes[2], dst.planes[0], src.yuvType);
break;
case Frame<A>::Rgba32:
{
View<A> bgr(src.Size(), View<A>::Bgr24);
Yuv444pToBgr(src.planes[0], src.planes[1], src.planes[2], bgr, src.yuvType);
BgrToRgba(bgr, dst.planes[0]);
break;
}
default:
assert(0);
}
Expand All @@ -1160,4 +1270,4 @@ namespace Simd
}
}

#endif//__SimdFrame_hpp__
#endif

0 comments on commit 985ae43

Please sign in to comment.