summaryrefslogtreecommitdiff
path: root/src/viewport_sprite_sorter_sse4.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/viewport_sprite_sorter_sse4.cpp')
-rw-r--r--src/viewport_sprite_sorter_sse4.cpp25
1 files changed, 22 insertions, 3 deletions
diff --git a/src/viewport_sprite_sorter_sse4.cpp b/src/viewport_sprite_sorter_sse4.cpp
index 05a7f8aa1..e685fff57 100644
--- a/src/viewport_sprite_sorter_sse4.cpp
+++ b/src/viewport_sprite_sorter_sse4.cpp
@@ -15,6 +15,7 @@
#include "cpu.h"
#include "smmintrin.h"
#include "viewport_sprite_sorter.h"
+#include "core/sort_func.hpp"
#include "safeguards.h"
@@ -25,12 +26,24 @@
#define LOAD_128 _mm_loadu_si128
#endif
+static int CDECL CompareParentSprites(ParentSpriteToDraw * const *psd, ParentSpriteToDraw * const *psd2)
+{
+ const ParentSpriteToDraw *ps = *psd;
+ const ParentSpriteToDraw *ps2 = *psd2;
+ return ps->xmin - ps2->xmin;
+}
+
/** Sort parent sprites pointer array using SSE4.1 optimizations. */
void ViewportSortParentSpritesSSE41(ParentSpriteToSortVector *psdv)
{
- const __m128i mask_ptest = _mm_setr_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0);
+ const __m128i mask_ptest = _mm_setr_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0);
+ const __m128i mask_ptest2 = _mm_setr_epi8(-1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
ParentSpriteToDraw ** const psdvend = psdv->End();
ParentSpriteToDraw **psd = psdv->Begin();
+
+ /* pre-sort by xmin in ascending order */
+ QSortT(psd, psdvend - psd, CompareParentSprites);
+
while (psd != psdvend) {
ParentSpriteToDraw * const ps = *psd;
@@ -64,8 +77,14 @@ void ViewportSortParentSpritesSSE41(ParentSpriteToSortVector *psdv)
__m128i ps1_max = LOAD_128((__m128i*) &ps->xmax);
__m128i ps2_min = LOAD_128((__m128i*) &ps2->xmin);
__m128i rslt1 = _mm_cmplt_epi32(ps1_max, ps2_min);
- if (!_mm_testz_si128(mask_ptest, rslt1))
- continue;
+ if (!_mm_testz_si128(mask_ptest, rslt1)) {
+ if (!_mm_testz_si128(mask_ptest2, rslt1) /* ps->xmax < ps2->xmin */) {
+ /* all following sprites have xmin >= ps2->xmin */
+ break;
+ } else {
+ continue;
+ }
+ }
__m128i ps1_min = LOAD_128((__m128i*) &ps->xmin);
__m128i ps2_max = LOAD_128((__m128i*) &ps2->xmax);