From ef9108bd5848fa8ddc7dfe7a322ba947528771ad Mon Sep 17 00:00:00 2001 From: rubidium Date: Mon, 13 Jan 2014 18:20:23 +0000 Subject: (svn r26260) -Codechange: add template parameters for (non) translucent and (non) animated sprites, so the least expensive variant can be chosen (MJP) --- src/blitter/32bpp_anim_sse4.cpp | 182 +++++++++++++++++++++++++--------------- src/blitter/32bpp_anim_sse4.hpp | 2 +- src/blitter/32bpp_sse2.hpp | 2 +- src/blitter/32bpp_sse4.hpp | 2 +- src/blitter/32bpp_sse_func.hpp | 49 +++++++---- src/blitter/32bpp_ssse3.hpp | 2 +- 6 files changed, 151 insertions(+), 88 deletions(-) diff --git a/src/blitter/32bpp_anim_sse4.cpp b/src/blitter/32bpp_anim_sse4.cpp index eb74b959f..004e32076 100644 --- a/src/blitter/32bpp_anim_sse4.cpp +++ b/src/blitter/32bpp_anim_sse4.cpp @@ -28,7 +28,7 @@ static FBlitter_32bppSSE4_Anim iFBlitter_32bppSSE4_Anim; * @param zoom zoom level at which we are drawing */ IGNORE_UNINITIALIZED_WARNING_START -template +template inline void Blitter_32bppSSE4_Anim::Draw(const Blitter::BlitterParams *bp, ZoomLevel zoom) { const byte * const remap = bp->remap; @@ -75,46 +75,70 @@ inline void Blitter_32bppSSE4_Anim::Draw(const Blitter::BlitterParams *bp, ZoomL switch (mode) { default: + if (!translucent) { + for (uint x = (uint) effective_width; x > 0; x--) { + if (src->a) { + if (animated) { + *anim = *(const uint16*) src_mv; + *dst = (src_mv->m >= PALETTE_ANIM_START) ? AdjustBrightneSSE(this->LookupColourInPalette(src_mv->m), src_mv->v) : src->data; + } else { + *anim = 0; + *dst = *src; + } + } + if (animated) src_mv++; + anim++; + src++; + dst++; + } + break; + } + for (uint x = (uint) effective_width/2; x != 0; x--) { uint32 mvX2 = *((uint32 *) const_cast(src_mv)); __m128i srcABCD = _mm_loadl_epi64((const __m128i*) src); __m128i dstABCD = _mm_loadl_epi64((__m128i*) dst); - /* Remap colours. */ - const byte m0 = mvX2; - if (m0 >= PALETTE_ANIM_START) { - const Colour c0 = (this->LookupColourInPalette(m0).data & 0x00FFFFFF) | (src[0].data & 0xFF000000); - InsertFirstUint32(AdjustBrightneSSE(c0, (byte) (mvX2 >> 8)).data, srcABCD); - } - const byte m1 = mvX2 >> 16; - if (m1 >= PALETTE_ANIM_START) { - const Colour c1 = (this->LookupColourInPalette(m1).data & 0x00FFFFFF) | (src[1].data & 0xFF000000); - InsertSecondUint32(AdjustBrightneSSE(c1, (byte) (mvX2 >> 24)).data, srcABCD); - } + if (animated) { + /* Remap colours. */ + const byte m0 = mvX2; + if (m0 >= PALETTE_ANIM_START) { + const Colour c0 = (this->LookupColourInPalette(m0).data & 0x00FFFFFF) | (src[0].data & 0xFF000000); + InsertFirstUint32(AdjustBrightneSSE(c0, (byte) (mvX2 >> 8)).data, srcABCD); + } + const byte m1 = mvX2 >> 16; + if (m1 >= PALETTE_ANIM_START) { + const Colour c1 = (this->LookupColourInPalette(m1).data & 0x00FFFFFF) | (src[1].data & 0xFF000000); + InsertSecondUint32(AdjustBrightneSSE(c1, (byte) (mvX2 >> 24)).data, srcABCD); + } - /* Update anim buffer. */ - const byte a0 = src[0].a; - const byte a1 = src[1].a; - uint32 anim01 = 0; - if (a0 == 255) { - if (a1 == 255) { - *(uint32*) anim = mvX2; - goto bmno_full_opacity; + /* Update anim buffer. */ + const byte a0 = src[0].a; + const byte a1 = src[1].a; + uint32 anim01 = 0; + if (a0 == 255) { + if (a1 == 255) { + *(uint32*) anim = mvX2; + goto bmno_full_opacity; + } + anim01 = (uint16) mvX2; + } else if (a0 == 0) { + if (a1 == 0) { + goto bmno_full_transparency; + } else { + if (a1 == 255) anim[1] = (uint16) (mvX2 >> 16); + goto bmno_alpha_blend; + } } - anim01 = (uint16) mvX2; - } else if (a0 == 0) { - if (a1 == 0) { - goto bmno_full_transparency; + if (a1 > 0) { + if (a1 == 255) anim01 |= mvX2 & 0xFFFF0000; + *(uint32*) anim = anim01; } else { - if (a1 == 255) anim[1] = (uint16) (mvX2 >> 16); - goto bmno_alpha_blend; + anim[0] = (uint16) anim01; } - } - if (a1 > 0) { - if (a1 == 255) anim01 |= mvX2 & 0xFFFF0000; - *(uint32*) anim = anim01; } else { - anim[0] = (uint16) anim01; + if (src[0].a) anim[0] = 0; + if (src[1].a) anim[1] = 0; } /* Blend colours. */ @@ -175,18 +199,19 @@ bmno_full_transparency: } #ifdef _SQ64 uint64 srcs = _mm_cvtsi128_si64(srcABCD); - uint64 dsts = _mm_cvtsi128_si64(dstABCD); + uint64 dsts; + if (animated) dsts = _mm_cvtsi128_si64(dstABCD); uint64 remapped_src = 0; - CMOV_REMAP(c0, dsts, srcs, mvX2); + CMOV_REMAP(c0, animated ? dsts : 0, srcs, mvX2); remapped_src = c0.data; - CMOV_REMAP(c1, dsts >> 32, srcs >> 32, mvX2 >> 16); + CMOV_REMAP(c1, animated ? dsts >> 32 : 0, srcs >> 32, mvX2 >> 16); remapped_src |= (uint64) c1.data << 32; srcABCD = _mm_cvtsi64_si128(remapped_src); #else Colour remapped_src[2]; - CMOV_REMAP(c0, _mm_cvtsi128_si32(dstABCD), _mm_cvtsi128_si32(srcABCD), mvX2); + CMOV_REMAP(c0, animated ? _mm_cvtsi128_si32(dstABCD) : 0, _mm_cvtsi128_si32(srcABCD), mvX2); remapped_src[0] = c0.data; - CMOV_REMAP(c1, dst[1], src[1], mvX2 >> 16); + CMOV_REMAP(c1, animated ? dst[1] : 0, src[1], mvX2 >> 16); remapped_src[1] = c1.data; srcABCD = _mm_loadl_epi64((__m128i*) &remapped_src); #endif @@ -195,30 +220,35 @@ bmno_full_transparency: } /* Update anim buffer. */ - const byte a0 = src[0].a; - const byte a1 = src[1].a; - uint32 anim01 = mvX2 & 0xFF00FF00; - if (a0 == 255) { - anim01 |= r0; - if (a1 == 255) { - *(uint32*) anim = anim01 | (r1 << 16); - goto bmcr_full_opacity; - } - } else if (a0 == 0) { - if (a1 == 0) { - goto bmcr_full_transparency; - } else { + if (animated) { + const byte a0 = src[0].a; + const byte a1 = src[1].a; + uint32 anim01 = mvX2 & 0xFF00FF00; + if (a0 == 255) { + anim01 |= r0; if (a1 == 255) { - anim[1] = r1 | (anim01 >> 16); + *(uint32*) anim = anim01 | (r1 << 16); + goto bmcr_full_opacity; + } + } else if (a0 == 0) { + if (a1 == 0) { + goto bmcr_full_transparency; + } else { + if (a1 == 255) { + anim[1] = r1 | (anim01 >> 16); + } + goto bmcr_alpha_blend; } - goto bmcr_alpha_blend; } - } - if (a1 > 0) { - if (a1 == 255) anim01 |= r1 << 16; - *(uint32*) anim = anim01; + if (a1 > 0) { + if (a1 == 255) anim01 |= r1 << 16; + *(uint32*) anim = anim01; + } else { + anim[0] = (uint16) anim01; + } } else { - anim[0] = (uint16) anim01; + if (src[0].a) anim[0] = 0; + if (src[1].a) anim[1] = 0; } /* Blend colours. */ @@ -239,7 +269,7 @@ bmcr_full_transparency: if (src->a == 0) break; if (src_mv->m) { const uint r = remap[src_mv->m]; - *anim = (src->a == 255) ? r | ((uint16) src_mv->v << 8 ) : 0; + *anim = (animated && src->a == 255) ? r | ((uint16) src_mv->v << 8 ) : 0; if (r != 0) { Colour remapped_colour = AdjustBrightneSSE(this->LookupColourInPalette(r), src_mv->v); if (src->a == 255) { @@ -303,28 +333,46 @@ IGNORE_UNINITIALIZED_WARNING_STOP */ void Blitter_32bppSSE4_Anim::Draw(Blitter::BlitterParams *bp, BlitterMode mode, ZoomLevel zoom) { + const Blitter_32bppSSE_Base::SpriteFlags sprite_flags = ((const Blitter_32bppSSE_Base::SpriteData *) bp->sprite)->flags; switch (mode) { - case BM_NORMAL: { + default: { +bm_normal: if (bp->skip_left != 0 || bp->width <= MARGIN_NORMAL_THRESHOLD) { const BlockType bt_last = (BlockType) (bp->width & 1); - switch (bt_last) { - case BT_EVEN: Draw(bp, zoom); return; - case BT_ODD: Draw(bp, zoom); return; - default: NOT_REACHED(); + if (bt_last == BT_EVEN) { + if (sprite_flags & SF_NO_ANIM) Draw(bp, zoom); + else Draw(bp, zoom); + } else { + if (sprite_flags & SF_NO_ANIM) Draw(bp, zoom); + else Draw(bp, zoom); } } else { - Draw(bp, zoom); return; +#ifdef _SQ64 + if (sprite_flags & SF_TRANSLUCENT) { + if (sprite_flags & SF_NO_ANIM) Draw(bp, zoom); + else Draw(bp, zoom); + } else { + if (sprite_flags & SF_NO_ANIM) Draw(bp, zoom); + else Draw(bp, zoom); + } +#else + if (sprite_flags & SF_NO_ANIM) Draw(bp, zoom); + else Draw(bp, zoom); +#endif } break; } case BM_COLOUR_REMAP: + if (sprite_flags & SF_NO_REMAP) goto bm_normal; if (bp->skip_left != 0 || bp->width <= MARGIN_REMAP_THRESHOLD) { - Draw(bp, zoom); return; + if (sprite_flags & SF_NO_ANIM) Draw(bp, zoom); + else Draw(bp, zoom); } else { - Draw(bp, zoom); return; + if (sprite_flags & SF_NO_ANIM) Draw(bp, zoom); + else Draw(bp, zoom); } - case BM_TRANSPARENT: Draw(bp, zoom); return; - default: NOT_REACHED(); + break; + case BM_TRANSPARENT: Draw(bp, zoom); return; } } diff --git a/src/blitter/32bpp_anim_sse4.hpp b/src/blitter/32bpp_anim_sse4.hpp index 096ee62e0..e2d4cfc23 100644 --- a/src/blitter/32bpp_anim_sse4.hpp +++ b/src/blitter/32bpp_anim_sse4.hpp @@ -33,7 +33,7 @@ class Blitter_32bppSSE4_Anim FINAL : public Blitter_32bppAnim, public Blitter_32 private: public: - template + template /* virtual */ void Draw(const Blitter::BlitterParams *bp, ZoomLevel zoom); /* virtual */ void Draw(Blitter::BlitterParams *bp, BlitterMode mode, ZoomLevel zoom); /* virtual */ Sprite *Encode(const SpriteLoader::Sprite *sprite, AllocatorProc *allocator) { diff --git a/src/blitter/32bpp_sse2.hpp b/src/blitter/32bpp_sse2.hpp index 43a5fc400..d6b17f679 100644 --- a/src/blitter/32bpp_sse2.hpp +++ b/src/blitter/32bpp_sse2.hpp @@ -83,7 +83,7 @@ DECLARE_ENUM_AS_BIT_SET(Blitter_32bppSSE_Base::SpriteFlags); class Blitter_32bppSSE2 : public Blitter_32bppSimple, public Blitter_32bppSSE_Base { public: /* virtual */ void Draw(Blitter::BlitterParams *bp, BlitterMode mode, ZoomLevel zoom); - template + template void Draw(const Blitter::BlitterParams *bp, ZoomLevel zoom); /* virtual */ Sprite *Encode(const SpriteLoader::Sprite *sprite, AllocatorProc *allocator) { diff --git a/src/blitter/32bpp_sse4.hpp b/src/blitter/32bpp_sse4.hpp index 38f00920c..9c59d253f 100644 --- a/src/blitter/32bpp_sse4.hpp +++ b/src/blitter/32bpp_sse4.hpp @@ -28,7 +28,7 @@ class Blitter_32bppSSE4 : public Blitter_32bppSSSE3 { public: /* virtual */ void Draw(Blitter::BlitterParams *bp, BlitterMode mode, ZoomLevel zoom); - template + template void Draw(const Blitter::BlitterParams *bp, ZoomLevel zoom); /* virtual */ const char *GetName() { return "32bpp-sse4"; } }; diff --git a/src/blitter/32bpp_sse_func.hpp b/src/blitter/32bpp_sse_func.hpp index b6a61f9c7..7cec94c77 100644 --- a/src/blitter/32bpp_sse_func.hpp +++ b/src/blitter/32bpp_sse_func.hpp @@ -19,7 +19,8 @@ static inline void InsertFirstUint32(const uint32 value, __m128i &into) #if (SSE_VERSION >= 4) into = _mm_insert_epi32(into, value, 0); #else - NOT_REACHED(); + into = _mm_insert_epi16(into, value, 0); + into = _mm_insert_epi16(into, value >> 16, 1); #endif } @@ -192,7 +193,7 @@ static inline __m128i AdjustBrightnessOfTwoPixels(__m128i from, uint32 brightnes * @param zoom zoom level at which we are drawing */ IGNORE_UNINITIALIZED_WARNING_START -template +template #if (SSE_VERSION == 2) inline void Blitter_32bppSSE2::Draw(const Blitter::BlitterParams *bp, ZoomLevel zoom) #elif (SSE_VERSION == 3) @@ -254,6 +255,15 @@ inline void Blitter_32bppSSE4::Draw(const Blitter::BlitterParams *bp, ZoomLevel switch (mode) { default: + if (!translucent) { + for (uint x = (uint) effective_width; x > 0; x--) { + if (src->a) *dst = *src; + src++; + dst++; + } + break; + } + for (uint x = (uint) effective_width / 2; x > 0; x--) { __m128i srcABCD = _mm_loadl_epi64((const __m128i*) src); __m128i dstABCD = _mm_loadl_epi64((__m128i*) dst); @@ -278,9 +288,9 @@ inline void Blitter_32bppSSE4::Draw(const Blitter::BlitterParams *bp, ZoomLevel /* Remap colours. */ if (mvX2 & 0x00FF00FF) { - #define CMOV_REMAP(m_colour, m_src, m_m) \ + #define CMOV_REMAP(m_colour, m_colour_init, m_src, m_m) \ /* Written so the compiler uses CMOV. */ \ - Colour m_colour = 0; \ + Colour m_colour = m_colour_init; \ { \ const Colour srcm = (Colour) (m_src); \ const uint m = (byte) (m_m); \ @@ -292,16 +302,16 @@ inline void Blitter_32bppSSE4::Draw(const Blitter::BlitterParams *bp, ZoomLevel #ifdef _SQ64 uint64 srcs = _mm_cvtsi128_si64(srcABCD); uint64 remapped_src = 0; - CMOV_REMAP(c0, srcs, mvX2); + CMOV_REMAP(c0, 0, srcs, mvX2); remapped_src = c0.data; - CMOV_REMAP(c1, srcs >> 32, mvX2 >> 16); + CMOV_REMAP(c1, 0, srcs >> 32, mvX2 >> 16); remapped_src |= (uint64) c1.data << 32; srcABCD = _mm_cvtsi64_si128(remapped_src); #else Colour remapped_src[2]; - CMOV_REMAP(c0, _mm_cvtsi128_si32(srcABCD), mvX2); + CMOV_REMAP(c0, 0, _mm_cvtsi128_si32(srcABCD), mvX2); remapped_src[0] = c0.data; - CMOV_REMAP(c1, src[1], mvX2 >> 16); + CMOV_REMAP(c1, 0, src[1], mvX2 >> 16); remapped_src[1] = c1.data; srcABCD = _mm_loadl_epi64((__m128i*) &remapped_src); #endif @@ -393,27 +403,32 @@ void Blitter_32bppSSE4::Draw(Blitter::BlitterParams *bp, BlitterMode mode, ZoomL #endif { switch (mode) { - case BM_NORMAL: { + default: { if (bp->skip_left != 0 || bp->width <= MARGIN_NORMAL_THRESHOLD) { +bm_normal: const BlockType bt_last = (BlockType) (bp->width & 1); switch (bt_last) { - case BT_EVEN: Draw(bp, zoom); return; - case BT_ODD: Draw(bp, zoom); return; - default: NOT_REACHED(); + default: Draw(bp, zoom); return; + case BT_ODD: Draw(bp, zoom); return; } } else { - Draw(bp, zoom); return; + if (((const Blitter_32bppSSE_Base::SpriteData *) bp->sprite)->flags & SF_TRANSLUCENT) { + Draw(bp, zoom); + } else { + Draw(bp, zoom); + } + return; } break; } case BM_COLOUR_REMAP: + if (((const Blitter_32bppSSE_Base::SpriteData *) bp->sprite)->flags & SF_NO_REMAP) goto bm_normal; if (bp->skip_left != 0 || bp->width <= MARGIN_REMAP_THRESHOLD) { - Draw(bp, zoom); return; + Draw(bp, zoom); return; } else { - Draw(bp, zoom); return; + Draw(bp, zoom); return; } - case BM_TRANSPARENT: Draw(bp, zoom); return; - default: NOT_REACHED(); + case BM_TRANSPARENT: Draw(bp, zoom); return; } } #endif /* FULL_ANIMATION */ diff --git a/src/blitter/32bpp_ssse3.hpp b/src/blitter/32bpp_ssse3.hpp index c666408e7..e9cac8ff0 100644 --- a/src/blitter/32bpp_ssse3.hpp +++ b/src/blitter/32bpp_ssse3.hpp @@ -28,7 +28,7 @@ class Blitter_32bppSSSE3 : public Blitter_32bppSSE2 { public: /* virtual */ void Draw(Blitter::BlitterParams *bp, BlitterMode mode, ZoomLevel zoom); - template + template void Draw(const Blitter::BlitterParams *bp, ZoomLevel zoom); /* virtual */ const char *GetName() { return "32bpp-ssse3"; } }; -- cgit v1.2.3-54-g00ecf