diff options
author | rubidium <rubidium@openttd.org> | 2014-01-13 18:05:47 +0000 |
---|---|---|
committer | rubidium <rubidium@openttd.org> | 2014-01-13 18:05:47 +0000 |
commit | 70901e04c55490d7c661f7fa5c31193860e648af (patch) | |
tree | 6f6d5e357aa3b359c5979919f45746e432fd4d76 /src | |
parent | 2f7c4f6d12845e2f5be01285db86d5ba070c45ad (diff) | |
download | openttd-70901e04c55490d7c661f7fa5c31193860e648af.tar.xz |
(svn r26255) -Codechange: improve performance of brightness adjustment (MJP)
Diffstat (limited to 'src')
-rw-r--r-- | src/blitter/32bpp_sse2.cpp | 20 | ||||
-rw-r--r-- | src/blitter/32bpp_sse2.hpp | 6 | ||||
-rw-r--r-- | src/blitter/32bpp_sse4.cpp | 12 | ||||
-rw-r--r-- | src/blitter/32bpp_sse4.hpp | 5 |
4 files changed, 21 insertions, 22 deletions
diff --git a/src/blitter/32bpp_sse2.cpp b/src/blitter/32bpp_sse2.cpp index fa43eb4c5..0b3eb1899 100644 --- a/src/blitter/32bpp_sse2.cpp +++ b/src/blitter/32bpp_sse2.cpp @@ -294,7 +294,7 @@ inline Colour Blitter_32bppSSE2::AdjustBrightness(Colour colour, uint8 brightnes } IGNORE_UNINITIALIZED_WARNING_START -/* static */ Colour Blitter_32bppSSE2::ReallyAdjustBrightness(Colour colour, uint8 brightness) +Colour Blitter_32bppSSE2::ReallyAdjustBrightness(Colour colour, uint8 brightness) { uint64 c16 = colour.b | (uint64) colour.g << 16 | (uint64) colour.r << 32; c16 *= brightness; @@ -304,24 +304,14 @@ IGNORE_UNINITIALIZED_WARNING_START /* Sum overbright (maximum for each rgb is 508, 9 bits, -255 is changed in -256 so we just have to take the 8 lower bits into account). */ c16_ob = (((c16_ob >> (8 + 7)) & 0x0100010001) * 0xFF) & c16; - uint64 ob = (uint16) c16_ob + (uint16) (c16_ob >> 16) + (uint16) (c16_ob >> 32); + const uint ob = ((uint16) c16_ob + (uint16) (c16_ob >> 16) + (uint16) (c16_ob >> 32)) / 2; const uint32 alpha32 = colour.data & 0xFF000000; __m128i ret; -#ifdef _SQ64 - ret = _mm_cvtsi64_si128(c16); -#else - INSR64(c16, ret, 0); -#endif + LOAD64(c16, ret); if (ob != 0) { - /* Reduce overbright strength. */ - ob /= 2; - __m128i ob128; -#ifdef _SQ64 - ob128 = _mm_cvtsi64_si128(ob | ob << 16 | ob << 32); -#else - INSR64(ob | ob << 16 | ob << 32, ob128, 0); -#endif + __m128i ob128 = _mm_cvtsi32_si128(ob); + ob128 = _mm_shufflelo_epi16(ob128, 0xC0); __m128i white = OVERBRIGHT_VALUE_MASK; __m128i c128 = ret; ret = _mm_subs_epu16(white, c128); /* PSUBUSW, (255 - rgb) */ diff --git a/src/blitter/32bpp_sse2.hpp b/src/blitter/32bpp_sse2.hpp index 386b5d3e3..1c3307c70 100644 --- a/src/blitter/32bpp_sse2.hpp +++ b/src/blitter/32bpp_sse2.hpp @@ -54,6 +54,12 @@ typedef union ALIGN(16) um128i { } #define INSR64(m_val, m_into, m_rank) (*(um128i*) &m_into).m128i_u64[m_rank] = (m_val) +#ifdef _SQ64 + #define LOAD64(m_val, m_into) m_into = _mm_cvtsi64_si128(m_val); +#else + #define LOAD64(m_val, m_into) INSR64(m_val, m_into, 0) +#endif + /* PUT_ALPHA_IN_FRONT_OF_RGB is redefined in 32bpp_ssse3.hpp. */ #define PUT_ALPHA_IN_FRONT_OF_RGB(m_from, m_into) \ m_into = _mm_shufflelo_epi16(m_from, 0x3F); /* PSHUFLW, put alpha1 in front of each rgb1 */ \ diff --git a/src/blitter/32bpp_sse4.cpp b/src/blitter/32bpp_sse4.cpp index a0ddf2b63..4feebc221 100644 --- a/src/blitter/32bpp_sse4.cpp +++ b/src/blitter/32bpp_sse4.cpp @@ -232,7 +232,7 @@ inline Colour Blitter_32bppSSE4::AdjustBrightness(Colour colour, uint8 brightnes } IGNORE_UNINITIALIZED_WARNING_START -/* static */ Colour Blitter_32bppSSE4::ReallyAdjustBrightness(Colour colour, uint8 brightness) +Colour Blitter_32bppSSE4::ReallyAdjustBrightness(Colour colour, uint8 brightness) { uint64 c16 = colour.b | (uint64) colour.g << 16 | (uint64) colour.r << 32; c16 *= brightness; @@ -242,16 +242,14 @@ IGNORE_UNINITIALIZED_WARNING_START /* Sum overbright (maximum for each rgb is 508, 9 bits, -255 is changed in -256 so we just have to take the 8 lower bits into account). */ c16_ob = (((c16_ob >> (8 + 7)) & 0x0100010001) * 0xFF) & c16; - uint64 ob = (uint16) c16_ob + (uint16) (c16_ob >> 16) + (uint16) (c16_ob >> 32); + const uint ob = ((uint16) c16_ob + (uint16) (c16_ob >> 16) + (uint16) (c16_ob >> 32)) / 2; const uint32 alpha32 = colour.data & 0xFF000000; __m128i ret; - INSR64(c16, ret, 0); + LOAD64(c16, ret); if (ob != 0) { - /* Reduce overbright strength. */ - ob /= 2; - __m128i ob128; - INSR64(ob | ob << 16 | ob << 32, ob128, 0); + __m128i ob128 = _mm_cvtsi32_si128(ob); + ob128 = _mm_shufflelo_epi16(ob128, 0xC0); __m128i white = OVERBRIGHT_VALUE_MASK; __m128i c128 = ret; ret = _mm_subs_epu16(white, c128); /* PSUBUSW, (255 - rgb) */ diff --git a/src/blitter/32bpp_sse4.hpp b/src/blitter/32bpp_sse4.hpp index d21b5ff33..f8a563b85 100644 --- a/src/blitter/32bpp_sse4.hpp +++ b/src/blitter/32bpp_sse4.hpp @@ -35,6 +35,11 @@ IGNORE_UNINITIALIZED_WARNING_START (*(um128i*) &m_into).m128i = _mm_insert_epi32((*(um128i*) &m_into).m128i, v.u32.low, (m_rank)*2); \ (*(um128i*) &m_into).m128i = _mm_insert_epi32((*(um128i*) &m_into).m128i, v.u32.high, (m_rank)*2 + 1); \ } + + #undef LOAD64 + #define LOAD64(m_val, m_into) \ + m_into = _mm_cvtsi32_si128(m_val); \ + INSR32((m_val) >> 32, m_into, 1); #endif IGNORE_UNINITIALIZED_WARNING_STOP |