diff options
Diffstat (limited to 'src/viewport_sprite_sorter_sse4.cpp')
-rw-r--r-- | src/viewport_sprite_sorter_sse4.cpp | 25 |
1 files changed, 22 insertions, 3 deletions
diff --git a/src/viewport_sprite_sorter_sse4.cpp b/src/viewport_sprite_sorter_sse4.cpp index 05a7f8aa1..e685fff57 100644 --- a/src/viewport_sprite_sorter_sse4.cpp +++ b/src/viewport_sprite_sorter_sse4.cpp @@ -15,6 +15,7 @@ #include "cpu.h" #include "smmintrin.h" #include "viewport_sprite_sorter.h" +#include "core/sort_func.hpp" #include "safeguards.h" @@ -25,12 +26,24 @@ #define LOAD_128 _mm_loadu_si128 #endif +static int CDECL CompareParentSprites(ParentSpriteToDraw * const *psd, ParentSpriteToDraw * const *psd2) +{ + const ParentSpriteToDraw *ps = *psd; + const ParentSpriteToDraw *ps2 = *psd2; + return ps->xmin - ps2->xmin; +} + /** Sort parent sprites pointer array using SSE4.1 optimizations. */ void ViewportSortParentSpritesSSE41(ParentSpriteToSortVector *psdv) { - const __m128i mask_ptest = _mm_setr_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0); + const __m128i mask_ptest = _mm_setr_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0); + const __m128i mask_ptest2 = _mm_setr_epi8(-1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); ParentSpriteToDraw ** const psdvend = psdv->End(); ParentSpriteToDraw **psd = psdv->Begin(); + + /* pre-sort by xmin in ascending order */ + QSortT(psd, psdvend - psd, CompareParentSprites); + while (psd != psdvend) { ParentSpriteToDraw * const ps = *psd; @@ -64,8 +77,14 @@ void ViewportSortParentSpritesSSE41(ParentSpriteToSortVector *psdv) __m128i ps1_max = LOAD_128((__m128i*) &ps->xmax); __m128i ps2_min = LOAD_128((__m128i*) &ps2->xmin); __m128i rslt1 = _mm_cmplt_epi32(ps1_max, ps2_min); - if (!_mm_testz_si128(mask_ptest, rslt1)) - continue; + if (!_mm_testz_si128(mask_ptest, rslt1)) { + if (!_mm_testz_si128(mask_ptest2, rslt1) /* ps->xmax < ps2->xmin */) { + /* all following sprites have xmin >= ps2->xmin */ + break; + } else { + continue; + } + } __m128i ps1_min = LOAD_128((__m128i*) &ps->xmin); __m128i ps2_max = LOAD_128((__m128i*) &ps2->xmax); |