summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJindrich Makovicka <makovick@gmail.com>2018-09-16 18:26:04 +0200
committerMichael Lutz <michi@icosahedron.de>2018-10-26 20:22:38 +0200
commit25ab9c1997f770f4a8a66bb3ad4b82ba87e3a977 (patch)
treeba3950e5d5d9994ff3763ec65d121254bb5b37cc
parent47ff673664a14e250bf01af6f29c65abb1e0975e (diff)
downloadopenttd-25ab9c1997f770f4a8a66bb3ad4b82ba87e3a977.tar.xz
Codechange: Improve (un)zoom performance
When zooming out with a high res display, there can be about 150k sprites to be sorted before displaying. With the O(n^2) complexity of the sprite sorter, this can take several seconds. This patch works around this by sorting the sprites by the xmin coordinate first using QSort, which later allows an early bailout out of the inner loop. This is enough to cut down the full unzoom time on a 4k display to a fraction of second.
-rw-r--r--src/viewport.cpp20
-rw-r--r--src/viewport_sprite_sorter_sse4.cpp25
2 files changed, 39 insertions, 6 deletions
diff --git a/src/viewport.cpp b/src/viewport.cpp
index cb0b36f40..6e7dfc495 100644
--- a/src/viewport.cpp
+++ b/src/viewport.cpp
@@ -88,6 +88,7 @@
#include "command_func.h"
#include "network/network_func.h"
#include "framerate_type.h"
+#include "core/sort_func.hpp"
#include <map>
@@ -1379,11 +1380,22 @@ static bool ViewportSortParentSpritesChecker()
return true;
}
+static int CDECL CompareParentSprites(ParentSpriteToDraw * const *psd, ParentSpriteToDraw * const *psd2)
+{
+ const ParentSpriteToDraw *ps = *psd;
+ const ParentSpriteToDraw *ps2 = *psd2;
+ return ps->xmin - ps2->xmin;
+}
+
/** Sort parent sprites pointer array */
static void ViewportSortParentSprites(ParentSpriteToSortVector *psdv)
{
ParentSpriteToDraw **psdvend = psdv->End();
ParentSpriteToDraw **psd = psdv->Begin();
+
+ /* pre-sort by xmin in ascending order */
+ QSortT(psd, psdvend - psd, CompareParentSprites);
+
while (psd != psdvend) {
ParentSpriteToDraw *ps = *psd;
@@ -1420,9 +1432,11 @@ static void ViewportSortParentSprites(ParentSpriteToSortVector *psdv)
* I.e. every single order of X, Y, Z says ps2 is behind ps or they overlap.
* That is: If one partial order says ps behind ps2, do not change the order.
*/
- if (ps->xmax < ps2->xmin ||
- ps->ymax < ps2->ymin ||
- ps->zmax < ps2->zmin) {
+ if (ps->xmax < ps2->xmin) {
+ /* all following sprites have xmin >= ps2->xmin */
+ break;
+ }
+ if (ps->ymax < ps2->ymin || ps->zmax < ps2->zmin) {
continue;
}
}
diff --git a/src/viewport_sprite_sorter_sse4.cpp b/src/viewport_sprite_sorter_sse4.cpp
index 05a7f8aa1..e685fff57 100644
--- a/src/viewport_sprite_sorter_sse4.cpp
+++ b/src/viewport_sprite_sorter_sse4.cpp
@@ -15,6 +15,7 @@
#include "cpu.h"
#include "smmintrin.h"
#include "viewport_sprite_sorter.h"
+#include "core/sort_func.hpp"
#include "safeguards.h"
@@ -25,12 +26,24 @@
#define LOAD_128 _mm_loadu_si128
#endif
+static int CDECL CompareParentSprites(ParentSpriteToDraw * const *psd, ParentSpriteToDraw * const *psd2)
+{
+ const ParentSpriteToDraw *ps = *psd;
+ const ParentSpriteToDraw *ps2 = *psd2;
+ return ps->xmin - ps2->xmin;
+}
+
/** Sort parent sprites pointer array using SSE4.1 optimizations. */
void ViewportSortParentSpritesSSE41(ParentSpriteToSortVector *psdv)
{
- const __m128i mask_ptest = _mm_setr_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0);
+ const __m128i mask_ptest = _mm_setr_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0);
+ const __m128i mask_ptest2 = _mm_setr_epi8(-1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
ParentSpriteToDraw ** const psdvend = psdv->End();
ParentSpriteToDraw **psd = psdv->Begin();
+
+ /* pre-sort by xmin in ascending order */
+ QSortT(psd, psdvend - psd, CompareParentSprites);
+
while (psd != psdvend) {
ParentSpriteToDraw * const ps = *psd;
@@ -64,8 +77,14 @@ void ViewportSortParentSpritesSSE41(ParentSpriteToSortVector *psdv)
__m128i ps1_max = LOAD_128((__m128i*) &ps->xmax);
__m128i ps2_min = LOAD_128((__m128i*) &ps2->xmin);
__m128i rslt1 = _mm_cmplt_epi32(ps1_max, ps2_min);
- if (!_mm_testz_si128(mask_ptest, rslt1))
- continue;
+ if (!_mm_testz_si128(mask_ptest, rslt1)) {
+ if (!_mm_testz_si128(mask_ptest2, rslt1) /* ps->xmax < ps2->xmin */) {
+ /* all following sprites have xmin >= ps2->xmin */
+ break;
+ } else {
+ continue;
+ }
+ }
__m128i ps1_min = LOAD_128((__m128i*) &ps->xmin);
__m128i ps2_max = LOAD_128((__m128i*) &ps2->xmax);