From 76661f2c56e6542fe3023cf22814237a25a4a7bd Mon Sep 17 00:00:00 2001 From: rubidium Date: Mon, 13 Jan 2014 17:57:30 +0000 Subject: (svn r26249) -Codechange: simplify/cleanup ALPHA_BLEND macro (MJP) --- src/blitter/32bpp_anim_sse4.cpp | 8 ++++---- src/blitter/32bpp_sse2.hpp | 28 ++++++++++++++++++---------- src/blitter/32bpp_sse4.cpp | 8 ++++---- src/blitter/32bpp_sse4.hpp | 14 +++++++------- src/blitter/32bpp_ssse3.cpp | 8 ++++---- src/blitter/32bpp_ssse3.hpp | 35 ++++++++++++----------------------- 6 files changed, 49 insertions(+), 52 deletions(-) (limited to 'src') diff --git a/src/blitter/32bpp_anim_sse4.cpp b/src/blitter/32bpp_anim_sse4.cpp index 5bc63bbb0..dc10ae364 100644 --- a/src/blitter/32bpp_anim_sse4.cpp +++ b/src/blitter/32bpp_anim_sse4.cpp @@ -124,7 +124,7 @@ inline void Blitter_32bppSSE4_Anim::Draw(const Blitter::BlitterParams *bp, ZoomL /* Blend colours. */ bmno_alpha_blend: - ALPHA_BLEND_2(pack_low_cm); + ALPHA_BLEND_2(); bmno_full_opacity: _mm_storel_epi64((__m128i *) dst, srcABCD); bmno_full_transparency: @@ -150,7 +150,7 @@ bmno_full_transparency: } else { srcABCD = _mm_cvtsi32_si128(src->data); } - ALPHA_BLEND_2(pack_low_cm); + ALPHA_BLEND_2(); dst->data = _mm_cvtsi128_si32(srcABCD); } } @@ -239,7 +239,7 @@ bmno_full_transparency: /* Blend colours. */ bmcr_alpha_blend: - ALPHA_BLEND_2(pack_low_cm); + ALPHA_BLEND_2(); bmcr_full_opacity: _mm_storel_epi64((__m128i *) dst, srcABCD); bmcr_full_transparency: @@ -272,7 +272,7 @@ bmcr_full_transparency: if (src->a < 255) { bmcr_alpha_blend_single: __m128i dstABCD = _mm_cvtsi32_si128(dst->data); - ALPHA_BLEND_2(pack_low_cm); + ALPHA_BLEND_2(); } dst->data = _mm_cvtsi128_si32(srcABCD); } diff --git a/src/blitter/32bpp_sse2.hpp b/src/blitter/32bpp_sse2.hpp index a0ed74cdb..98acb8cd1 100644 --- a/src/blitter/32bpp_sse2.hpp +++ b/src/blitter/32bpp_sse2.hpp @@ -46,13 +46,23 @@ typedef union ALIGN(16) um128i { #define OVERBRIGHT_CONTROL_MASK _mm_setr_epi8( 0, 1, 0, 1, 0, 1, 7, 7, 2, 3, 2, 3, 2, 3, 7, 7) #define TRANSPARENT_NOM_BASE _mm_setr_epi16(256, 256, 256, 256, 256, 256, 256, 256) -#define EXTR32(from, rank) (*(um128i*) &from).m128i_u32[rank] -#define EXTR64(from, rank) (*(um128i*) &from).m128i_u64[rank] -#define INSR32(val, into, rank) { \ - (*(um128i*) &into).m128i = _mm_insert_epi16((*(um128i*) &into).m128i, val, (rank)*2); \ - (*(um128i*) &into).m128i = _mm_insert_epi16((*(um128i*) &into).m128i, (val) >> 16, (rank)*2 + 1); \ +#define EXTR32(m_from, m_rank) (*(um128i*) &m_from).m128i_u32[m_rank] +#define EXTR64(m_from, m_rank) (*(um128i*) &m_from).m128i_u64[m_rank] +#define INSR32(m_val, m_into, m_rank) { \ + (*(um128i*) &m_into).m128i = _mm_insert_epi16((*(um128i*) &m_into).m128i, m_val, (m_rank)*2); \ + (*(um128i*) &m_into).m128i = _mm_insert_epi16((*(um128i*) &m_into).m128i, (m_val) >> 16, (m_rank)*2 + 1); \ } -#define INSR64(val, into, rank) (*(um128i*) &into).m128i_u64[rank] = (val) +#define INSR64(m_val, m_into, m_rank) (*(um128i*) &m_into).m128i_u64[m_rank] = (m_val) + +/* PUT_ALPHA_IN_FRONT_OF_RGB is redefined in 32bpp_ssse3.hpp. */ +#define PUT_ALPHA_IN_FRONT_OF_RGB(m_from, m_into) \ + m_into = _mm_shufflelo_epi16(m_from, 0x3F); /* PSHUFLW, put alpha1 in front of each rgb1 */ \ + m_into = _mm_shufflehi_epi16(m_into, 0x3F); /* PSHUFHW, put alpha2 in front of each rgb2 */ + +/* PACK_AB_WITHOUT_SATURATION is redefined in 32bpp_ssse3.hpp. */ +#define PACK_AB_WITHOUT_SATURATION(m_from, m_into) \ + m_from = _mm_and_si128(m_from, clear_hi); /* PAND, wipe high bytes to keep low bytes when packing */ \ + m_into = _mm_packus_epi16(m_from, m_from); /* PACKUSWB, pack 2 colours (with saturation) */ /* Alpha blend 2 pixels. */ #define ALPHA_BLEND_2() { \ @@ -62,15 +72,13 @@ typedef union ALIGN(16) um128i { __m128i alphaAB = _mm_cmpgt_epi16(srcAB, _mm_setzero_si128()); /* PCMPGTW, if (alpha > 0) a++; */ \ alphaAB = _mm_srli_epi16(alphaAB, 15); \ alphaAB = _mm_add_epi16(alphaAB, srcAB); \ - alphaAB = _mm_shufflelo_epi16(alphaAB, 0x3F); /* PSHUFLW, put alpha1 in front of each rgb1 */ \ - alphaAB = _mm_shufflehi_epi16(alphaAB, 0x3F); /* PSHUFHW, put alpha2 in front of each rgb2 */ \ + PUT_ALPHA_IN_FRONT_OF_RGB(alphaAB, alphaAB); \ \ srcAB = _mm_sub_epi16(srcAB, dstAB); /* PSUBW, (r - Cr) */ \ srcAB = _mm_mullo_epi16(srcAB, alphaAB); /* PMULLW, a*(r - Cr) */ \ srcAB = _mm_srli_epi16(srcAB, 8); /* PSRLW, a*(r - Cr)/256 */ \ srcAB = _mm_add_epi16(srcAB, dstAB); /* PADDW, a*(r - Cr)/256 + Cr */ \ - srcAB = _mm_and_si128(srcAB, clear_hi); /* PAND, wipe high bytes to keep low bytes when packing */ \ - srcABCD = _mm_packus_epi16(srcAB, srcAB); /* PACKUSWB, pack 2 colours (with saturation) */ \ + PACK_AB_WITHOUT_SATURATION(srcAB, srcABCD); \ } /** Base methods for 32bpp SSE blitters. */ diff --git a/src/blitter/32bpp_sse4.cpp b/src/blitter/32bpp_sse4.cpp index c00dab0d7..4eca12873 100644 --- a/src/blitter/32bpp_sse4.cpp +++ b/src/blitter/32bpp_sse4.cpp @@ -79,7 +79,7 @@ inline void Blitter_32bppSSE4::Draw(const Blitter::BlitterParams *bp, ZoomLevel for (uint x = (uint) effective_width / 2; x > 0; x--) { __m128i srcABCD = _mm_loadl_epi64((const __m128i*) src); __m128i dstABCD = _mm_loadl_epi64((__m128i*) dst); - ALPHA_BLEND_2(pack_low_cm); + ALPHA_BLEND_2(); _mm_storel_epi64((__m128i*) dst, srcABCD); src += 2; dst += 2; @@ -87,7 +87,7 @@ inline void Blitter_32bppSSE4::Draw(const Blitter::BlitterParams *bp, ZoomLevel if (bt_last == BT_ODD) { __m128i srcABCD = _mm_cvtsi32_si128(src->data); __m128i dstABCD = _mm_cvtsi32_si128(dst->data); - ALPHA_BLEND_2(pack_low_cm); + ALPHA_BLEND_2(); dst->data = _mm_cvtsi128_si32(srcABCD); } break; @@ -146,7 +146,7 @@ inline void Blitter_32bppSSE4::Draw(const Blitter::BlitterParams *bp, ZoomLevel } /* Blend colours. */ - ALPHA_BLEND_2(pack_low_cm); + ALPHA_BLEND_2(); _mm_storel_epi64((__m128i *) dst, srcABCD); dst += 2; src += 2; @@ -173,7 +173,7 @@ inline void Blitter_32bppSSE4::Draw(const Blitter::BlitterParams *bp, ZoomLevel if (src->a < 255) { bmcr_alpha_blend_single: __m128i dstABCD = _mm_cvtsi32_si128(dst->data); - ALPHA_BLEND_2(pack_low_cm); + ALPHA_BLEND_2(); } dst->data = _mm_cvtsi128_si32(srcABCD); } diff --git a/src/blitter/32bpp_sse4.hpp b/src/blitter/32bpp_sse4.hpp index f3a24fa1e..d21b5ff33 100644 --- a/src/blitter/32bpp_sse4.hpp +++ b/src/blitter/32bpp_sse4.hpp @@ -18,22 +18,22 @@ #include "smmintrin.h" #undef EXTR32 -#define EXTR32(from, rank) _mm_extract_epi32((*(um128i*) &from).m128i, rank) +#define EXTR32(m_from, m_rank) _mm_extract_epi32((*(um128i*) &m_from).m128i, m_rank) #undef INSR32 -#define INSR32(val, into, rank) (*(um128i*) &into).m128i = _mm_insert_epi32((*(um128i*) &into).m128i, val, rank) +#define INSR32(m_val, m_into, m_rank) (*(um128i*) &m_into).m128i = _mm_insert_epi32((*(um128i*) &m_into).m128i, m_val, m_rank) IGNORE_UNINITIALIZED_WARNING_START #ifdef _SQ64 #undef INSR64 - #define INSR64(val, into, rank) (*(um128i*) &into).m128i = _mm_insert_epi64((*(um128i*) &into).m128i, val, rank) + #define INSR64(m_val, m_into, m_rank) (*(um128i*) &m_into).m128i = _mm_insert_epi64((*(um128i*) &m_into).m128i, m_val, m_rank) #else typedef union { uint64 u64; struct _u32 { uint32 low, high; } u32; } u6432; #undef INSR64 - #define INSR64(val, into, rank) { \ + #define INSR64(m_val, m_into, m_rank) { \ u6432 v; \ - v.u64 = val; \ - (*(um128i*) &into).m128i = _mm_insert_epi32((*(um128i*) &into).m128i, v.u32.low, (rank)*2); \ - (*(um128i*) &into).m128i = _mm_insert_epi32((*(um128i*) &into).m128i, v.u32.high, (rank)*2 + 1); \ + v.u64 = m_val; \ + (*(um128i*) &m_into).m128i = _mm_insert_epi32((*(um128i*) &m_into).m128i, v.u32.low, (m_rank)*2); \ + (*(um128i*) &m_into).m128i = _mm_insert_epi32((*(um128i*) &m_into).m128i, v.u32.high, (m_rank)*2 + 1); \ } #endif IGNORE_UNINITIALIZED_WARNING_STOP diff --git a/src/blitter/32bpp_ssse3.cpp b/src/blitter/32bpp_ssse3.cpp index dcd460b3c..15cab5969 100644 --- a/src/blitter/32bpp_ssse3.cpp +++ b/src/blitter/32bpp_ssse3.cpp @@ -79,7 +79,7 @@ inline void Blitter_32bppSSSE3::Draw(const Blitter::BlitterParams *bp, ZoomLevel for (uint x = (uint) effective_width / 2; x > 0; x--) { __m128i srcABCD = _mm_loadl_epi64((const __m128i*) src); __m128i dstABCD = _mm_loadl_epi64((__m128i*) dst); - ALPHA_BLEND_2(pack_low_cm); + ALPHA_BLEND_2(); _mm_storel_epi64((__m128i*) dst, srcABCD); src += 2; dst += 2; @@ -87,7 +87,7 @@ inline void Blitter_32bppSSSE3::Draw(const Blitter::BlitterParams *bp, ZoomLevel if (bt_last == BT_ODD) { __m128i srcABCD = _mm_cvtsi32_si128(src->data); __m128i dstABCD = _mm_cvtsi32_si128(dst->data); - ALPHA_BLEND_2(pack_low_cm); + ALPHA_BLEND_2(); dst->data = _mm_cvtsi128_si32(srcABCD); } break; @@ -146,7 +146,7 @@ inline void Blitter_32bppSSSE3::Draw(const Blitter::BlitterParams *bp, ZoomLevel } /* Blend colours. */ - ALPHA_BLEND_2(pack_low_cm); + ALPHA_BLEND_2(); _mm_storel_epi64((__m128i *) dst, srcABCD); dst += 2; src += 2; @@ -173,7 +173,7 @@ inline void Blitter_32bppSSSE3::Draw(const Blitter::BlitterParams *bp, ZoomLevel if (src->a < 255) { bmcr_alpha_blend_single: __m128i dstABCD = _mm_cvtsi32_si128(dst->data); - ALPHA_BLEND_2(pack_low_cm); + ALPHA_BLEND_2(); } dst->data = _mm_cvtsi128_si32(srcABCD); } diff --git a/src/blitter/32bpp_ssse3.hpp b/src/blitter/32bpp_ssse3.hpp index 97c67657e..d8d7dd204 100644 --- a/src/blitter/32bpp_ssse3.hpp +++ b/src/blitter/32bpp_ssse3.hpp @@ -17,37 +17,26 @@ #include "32bpp_sse2.hpp" #include "tmmintrin.h" -/* Alpha blend 2 pixels. */ -#undef ALPHA_BLEND_2 -#define ALPHA_BLEND_2(m_pack_mask) { \ - __m128i srcAB = _mm_unpacklo_epi8(srcABCD, _mm_setzero_si128()); /* PUNPCKLBW, expand each uint8 into uint16 */ \ - __m128i dstAB = _mm_unpacklo_epi8(dstABCD, _mm_setzero_si128()); \ - \ - __m128i alphaAB = _mm_cmpgt_epi16(srcAB, _mm_setzero_si128()); /* PCMPGTW, if (alpha > 0) a++; */ \ - alphaAB = _mm_srli_epi16(alphaAB, 15); \ - alphaAB = _mm_add_epi16(alphaAB, srcAB); \ - alphaAB = _mm_shuffle_epi8(alphaAB, a_cm); /* PSHUFB, put alpha in front of each rgb */ \ - \ - srcAB = _mm_sub_epi16(srcAB, dstAB); /* PSUBW, (r - Cr) */ \ - srcAB = _mm_mullo_epi16(srcAB, alphaAB); /* PMULLW, a*(r - Cr) */ \ - srcAB = _mm_srli_epi16(srcAB, 8); /* PSRLW, a*(r - Cr)/256 */ \ - srcAB = _mm_add_epi16(srcAB, dstAB); /* PADDW, a*(r - Cr)/256 + Cr */ \ - srcABCD = _mm_shuffle_epi8(srcAB, m_pack_mask); /* PSHUFB, pack 2 Colour (without saturation) */ \ -} +/* Use PSHUFB instead of PSHUFHW+PSHUFLW. */ +#undef PUT_ALPHA_IN_FRONT_OF_RGB +#define PUT_ALPHA_IN_FRONT_OF_RGB(m_from, m_into) m_into = _mm_shuffle_epi8(m_from, a_cm); + +#undef PACK_AB_WITHOUT_SATURATION +#define PACK_AB_WITHOUT_SATURATION(m_from, m_into) m_into = _mm_shuffle_epi8(m_from, pack_low_cm); /* Adjust brightness of 2 pixels. */ -#define ADJUST_BRIGHTNESS_2(colourX2, brightnessX2) \ +#define ADJUST_BRIGHTNESS_2(m_colourX2, m_brightnessX2) \ /* The following dataflow differs from the one of AdjustBrightness() only for alpha. * In order to keep alpha in colAB, insert a 1 in a unused brightness byte (a*1->a). * OK, not a 1 but DEFAULT_BRIGHTNESS to compensate the div. */ \ - brightnessX2 &= 0xFF00FF00; \ - brightnessX2 += DEFAULT_BRIGHTNESS; \ + m_brightnessX2 &= 0xFF00FF00; \ + m_brightnessX2 += DEFAULT_BRIGHTNESS; \ \ __m128i zero = _mm_setzero_si128(); \ - __m128i colAB = _mm_unpacklo_epi8(colourX2, zero); \ + __m128i colAB = _mm_unpacklo_epi8(m_colourX2, zero); \ \ - __m128i briAB = _mm_cvtsi32_si128(brightnessX2); \ + __m128i briAB = _mm_cvtsi32_si128(m_brightnessX2); \ briAB = _mm_shuffle_epi8(briAB, briAB_cm); /* DEFAULT_BRIGHTNESS in 0, 0x00 in 2. */ \ colAB = _mm_mullo_epi16(colAB, briAB); \ __m128i colAB_ob = _mm_srli_epi16(colAB, 8+7); \ @@ -71,7 +60,7 @@ retAB = _mm_srli_epi16(retAB, 8); /* ob*(255 - rgb)/256 */ \ retAB = _mm_add_epi16(retAB, colAB); /* ob*(255 - rgb)/256 + rgb */ \ \ - colourX2 = _mm_packus_epi16(retAB, retAB); + m_colourX2 = _mm_packus_epi16(retAB, retAB); /** The SSSE3 32 bpp blitter (without palette animation). */ class Blitter_32bppSSSE3 : public Blitter_32bppSSE2 { -- cgit v1.2.3-70-g09d2