summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/blitter/32bpp_anim_sse4.cpp18
-rw-r--r--src/blitter/32bpp_sse2.cpp24
-rw-r--r--src/blitter/32bpp_sse2.hpp13
-rw-r--r--src/blitter/32bpp_sse4.cpp22
-rw-r--r--src/blitter/32bpp_ssse3.cpp22
5 files changed, 24 insertions, 75 deletions
diff --git a/src/blitter/32bpp_anim_sse4.cpp b/src/blitter/32bpp_anim_sse4.cpp
index dc10ae364..1775f66e7 100644
--- a/src/blitter/32bpp_anim_sse4.cpp
+++ b/src/blitter/32bpp_anim_sse4.cpp
@@ -290,14 +290,7 @@ bmcr_alpha_blend_single:
for (uint x = (uint) bp->width / 2; x > 0; x--) {
__m128i srcABCD = _mm_loadl_epi64((const __m128i*) src);
__m128i dstABCD = _mm_loadl_epi64((__m128i*) dst);
- __m128i srcAB = _mm_unpacklo_epi8(srcABCD, _mm_setzero_si128());
- __m128i dstAB = _mm_unpacklo_epi8(dstABCD, _mm_setzero_si128());
- __m128i alphaAB = _mm_shuffle_epi8(srcAB, a_cm);
- alphaAB = _mm_srli_epi16(alphaAB, 2); // Reduce to 64 levels of shades so the max value fits in 16 bits.
- __m128i nom = _mm_sub_epi16(tr_nom_base, alphaAB);
- dstAB = _mm_mullo_epi16(dstAB, nom);
- dstAB = _mm_srli_epi16(dstAB, 8);
- dstAB = _mm_packus_epi16(dstAB, dstAB);
+ DARKEN_2();
_mm_storel_epi64((__m128i *) dst, dstAB);
src += 2;
dst += 2;
@@ -308,14 +301,7 @@ bmcr_alpha_blend_single:
if (bp->width & 1) {
__m128i srcABCD = _mm_cvtsi32_si128(src->data);
__m128i dstABCD = _mm_cvtsi32_si128(dst->data);
- __m128i srcAB = _mm_unpacklo_epi8(srcABCD, _mm_setzero_si128());
- __m128i dstAB = _mm_unpacklo_epi8(dstABCD, _mm_setzero_si128());
- __m128i alphaAB = _mm_shuffle_epi8(srcAB, a_cm);
- alphaAB = _mm_srli_epi16(alphaAB, 2);
- __m128i nom = _mm_sub_epi16(tr_nom_base, alphaAB);
- dstAB = _mm_mullo_epi16(dstAB, nom);
- dstAB = _mm_srli_epi16(dstAB, 8);
- dstAB = _mm_packus_epi16(dstAB, dstAB);
+ DARKEN_2();
dst->data = _mm_cvtsi128_si32(dstAB);
if (src[0].a) anim[0] = 0;
}
diff --git a/src/blitter/32bpp_sse2.cpp b/src/blitter/32bpp_sse2.cpp
index c5e7e70d0..07ba2f208 100644
--- a/src/blitter/32bpp_sse2.cpp
+++ b/src/blitter/32bpp_sse2.cpp
@@ -144,21 +144,11 @@ bmcr_alpha_blend_single:
break;
}
case BM_TRANSPARENT: {
- /* Make the current colour a bit more black, so it looks like this image is transparent.
- * rgb = rgb * ((256/4) * 4 - (alpha/4)) / ((256/4) * 4)
- */
+ /* Make the current colour a bit more black, so it looks like this image is transparent. */
for (uint x = (uint) bp->width / 2; x > 0; x--) {
__m128i srcABCD = _mm_loadl_epi64((const __m128i*) src);
__m128i dstABCD = _mm_loadl_epi64((__m128i*) dst);
- __m128i srcAB = _mm_unpacklo_epi8(srcABCD, _mm_setzero_si128());
- __m128i dstAB = _mm_unpacklo_epi8(dstABCD, _mm_setzero_si128());
- __m128i alphaAB = _mm_shufflelo_epi16(srcAB, 0x3F);
- alphaAB = _mm_shufflehi_epi16(alphaAB, 0x3F);
- alphaAB = _mm_srli_epi16(alphaAB, 2); // Reduce to 64 levels of shades so the max value fits in 16 bits.
- __m128i nom = _mm_sub_epi16(tr_nom_base, alphaAB);
- dstAB = _mm_mullo_epi16(dstAB, nom);
- dstAB = _mm_srli_epi16(dstAB, 8);
- dstAB = _mm_packus_epi16(dstAB, dstAB);
+ DARKEN_2();
_mm_storel_epi64((__m128i *) dst, dstAB);
src += 2;
dst += 2;
@@ -166,15 +156,7 @@ bmcr_alpha_blend_single:
if (bp->width & 1) {
__m128i srcABCD = _mm_cvtsi32_si128(src->data);
__m128i dstABCD = _mm_cvtsi32_si128(dst->data);
- __m128i srcAB = _mm_unpacklo_epi8(srcABCD, _mm_setzero_si128());
- __m128i dstAB = _mm_unpacklo_epi8(dstABCD, _mm_setzero_si128());
- __m128i alphaAB = _mm_shufflelo_epi16(srcAB, 0x3F);
- alphaAB = _mm_shufflehi_epi16(alphaAB, 0x3F);
- alphaAB = _mm_srli_epi16(alphaAB, 2);
- __m128i nom = _mm_sub_epi16(tr_nom_base, alphaAB);
- dstAB = _mm_mullo_epi16(dstAB, nom);
- dstAB = _mm_srli_epi16(dstAB, 8);
- dstAB = _mm_packus_epi16(dstAB, dstAB);
+ DARKEN_2();
dst->data = _mm_cvtsi128_si32(dstAB);
}
break;
diff --git a/src/blitter/32bpp_sse2.hpp b/src/blitter/32bpp_sse2.hpp
index 98acb8cd1..386b5d3e3 100644
--- a/src/blitter/32bpp_sse2.hpp
+++ b/src/blitter/32bpp_sse2.hpp
@@ -81,6 +81,19 @@ typedef union ALIGN(16) um128i {
PACK_AB_WITHOUT_SATURATION(srcAB, srcABCD); \
}
+/* Darken 2 pixels.
+ * rgb = rgb * ((256/4) * 4 - (alpha/4)) / ((256/4) * 4)
+ */
+#define DARKEN_2() \
+ __m128i srcAB = _mm_unpacklo_epi8(srcABCD, _mm_setzero_si128()); \
+ __m128i dstAB = _mm_unpacklo_epi8(dstABCD, _mm_setzero_si128()); \
+ __m128i PUT_ALPHA_IN_FRONT_OF_RGB(srcAB, alphaAB); \
+ alphaAB = _mm_srli_epi16(alphaAB, 2); /* Reduce to 64 levels of shades so the max value fits in 16 bits. */ \
+ __m128i nom = _mm_sub_epi16(tr_nom_base, alphaAB); \
+ dstAB = _mm_mullo_epi16(dstAB, nom); \
+ dstAB = _mm_srli_epi16(dstAB, 8); \
+ dstAB = _mm_packus_epi16(dstAB, dstAB);
+
/** Base methods for 32bpp SSE blitters. */
class Blitter_32bppSSE_Base {
public:
diff --git a/src/blitter/32bpp_sse4.cpp b/src/blitter/32bpp_sse4.cpp
index 4eca12873..298082e32 100644
--- a/src/blitter/32bpp_sse4.cpp
+++ b/src/blitter/32bpp_sse4.cpp
@@ -188,20 +188,11 @@ bmcr_alpha_blend_single:
}
case BM_TRANSPARENT: {
- /* Make the current colour a bit more black, so it looks like this image is transparent.
- * rgb = rgb * ((256/4) * 4 - (alpha/4)) / ((256/4) * 4)
- */
+ /* Make the current colour a bit more black, so it looks like this image is transparent. */
for (uint x = (uint) bp->width / 2; x > 0; x--) {
__m128i srcABCD = _mm_loadl_epi64((const __m128i*) src);
__m128i dstABCD = _mm_loadl_epi64((__m128i*) dst);
- __m128i srcAB = _mm_unpacklo_epi8(srcABCD, _mm_setzero_si128());
- __m128i dstAB = _mm_unpacklo_epi8(dstABCD, _mm_setzero_si128());
- __m128i alphaAB = _mm_shuffle_epi8(srcAB, a_cm);
- alphaAB = _mm_srli_epi16(alphaAB, 2); // Reduce to 64 levels of shades so the max value fits in 16 bits.
- __m128i nom = _mm_sub_epi16(tr_nom_base, alphaAB);
- dstAB = _mm_mullo_epi16(dstAB, nom);
- dstAB = _mm_srli_epi16(dstAB, 8);
- dstAB = _mm_packus_epi16(dstAB, dstAB);
+ DARKEN_2();
_mm_storel_epi64((__m128i *) dst, dstAB);
src += 2;
dst += 2;
@@ -209,14 +200,7 @@ bmcr_alpha_blend_single:
if (bp->width & 1) {
__m128i srcABCD = _mm_cvtsi32_si128(src->data);
__m128i dstABCD = _mm_cvtsi32_si128(dst->data);
- __m128i srcAB = _mm_unpacklo_epi8(srcABCD, _mm_setzero_si128());
- __m128i dstAB = _mm_unpacklo_epi8(dstABCD, _mm_setzero_si128());
- __m128i alphaAB = _mm_shuffle_epi8(srcAB, a_cm);
- alphaAB = _mm_srli_epi16(alphaAB, 2);
- __m128i nom = _mm_sub_epi16(tr_nom_base, alphaAB);
- dstAB = _mm_mullo_epi16(dstAB, nom);
- dstAB = _mm_srli_epi16(dstAB, 8);
- dstAB = _mm_packus_epi16(dstAB, dstAB);
+ DARKEN_2();
dst->data = _mm_cvtsi128_si32(dstAB);
}
diff --git a/src/blitter/32bpp_ssse3.cpp b/src/blitter/32bpp_ssse3.cpp
index 15cab5969..261c01c16 100644
--- a/src/blitter/32bpp_ssse3.cpp
+++ b/src/blitter/32bpp_ssse3.cpp
@@ -188,20 +188,11 @@ bmcr_alpha_blend_single:
}
case BM_TRANSPARENT: {
- /* Make the current colour a bit more black, so it looks like this image is transparent.
- * rgb = rgb * ((256/4) * 4 - (alpha/4)) / ((256/4) * 4)
- */
+ /* Make the current colour a bit more black, so it looks like this image is transparent. */
for (uint x = (uint) bp->width / 2; x > 0; x--) {
__m128i srcABCD = _mm_loadl_epi64((const __m128i*) src);
__m128i dstABCD = _mm_loadl_epi64((__m128i*) dst);
- __m128i srcAB = _mm_unpacklo_epi8(srcABCD, _mm_setzero_si128());
- __m128i dstAB = _mm_unpacklo_epi8(dstABCD, _mm_setzero_si128());
- __m128i alphaAB = _mm_shuffle_epi8(srcAB, a_cm);
- alphaAB = _mm_srli_epi16(alphaAB, 2); // Reduce to 64 levels of shades so the max value fits in 16 bits.
- __m128i nom = _mm_sub_epi16(tr_nom_base, alphaAB);
- dstAB = _mm_mullo_epi16(dstAB, nom);
- dstAB = _mm_srli_epi16(dstAB, 8);
- dstAB = _mm_packus_epi16(dstAB, dstAB);
+ DARKEN_2();
_mm_storel_epi64((__m128i *) dst, dstAB);
src += 2;
dst += 2;
@@ -209,14 +200,7 @@ bmcr_alpha_blend_single:
if (bp->width & 1) {
__m128i srcABCD = _mm_cvtsi32_si128(src->data);
__m128i dstABCD = _mm_cvtsi32_si128(dst->data);
- __m128i srcAB = _mm_unpacklo_epi8(srcABCD, _mm_setzero_si128());
- __m128i dstAB = _mm_unpacklo_epi8(dstABCD, _mm_setzero_si128());
- __m128i alphaAB = _mm_shuffle_epi8(srcAB, a_cm);
- alphaAB = _mm_srli_epi16(alphaAB, 2);
- __m128i nom = _mm_sub_epi16(tr_nom_base, alphaAB);
- dstAB = _mm_mullo_epi16(dstAB, nom);
- dstAB = _mm_srli_epi16(dstAB, 8);
- dstAB = _mm_packus_epi16(dstAB, dstAB);
+ DARKEN_2();
dst->data = _mm_cvtsi128_si32(dstAB);
}
break;