summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorrubidium <rubidium@openttd.org>2014-01-02 16:48:16 +0000
committerrubidium <rubidium@openttd.org>2014-01-02 16:48:16 +0000
commit3c94485ba0dcf8bb26f94f3a8e74369cd5619c01 (patch)
tree1c45ff84d185eac6392ea561cf7eb7ceb53b709d
parentc98a94da447a34f33894f3d5a7ec7cbe869a726a (diff)
downloadopenttd-3c94485ba0dcf8bb26f94f3a8e74369cd5619c01.tar.xz
(svn r26205) -Feature: SSE 4.1 sprite sorter, improving the sorting performance significantly (MJP)
For example with GCC 4.8, x86_64 Linux, Intel i5-3337U this patch improves the performance of Pile, Treham and Hamac test save games by about 10% in over-all run time at fast forward at 1920x1080 when zoomed out and when trees are not disabled.
-rw-r--r--projects/openttd_vs100.vcxproj2
-rw-r--r--projects/openttd_vs100.vcxproj.filters6
-rw-r--r--projects/openttd_vs80.vcproj8
-rw-r--r--projects/openttd_vs90.vcproj8
-rw-r--r--source.list4
-rw-r--r--src/openttd.cpp5
-rw-r--r--src/viewport.cpp60
-rw-r--r--src/viewport_sprite_sorter.h58
-rw-r--r--src/viewport_sprite_sorter_sse4.cpp109
9 files changed, 233 insertions, 27 deletions
diff --git a/projects/openttd_vs100.vcxproj b/projects/openttd_vs100.vcxproj
index e5c81d539..e55e7dd34 100644
--- a/projects/openttd_vs100.vcxproj
+++ b/projects/openttd_vs100.vcxproj
@@ -385,6 +385,7 @@
<ClCompile Include="..\src\vehicle.cpp" />
<ClCompile Include="..\src\vehiclelist.cpp" />
<ClCompile Include="..\src\viewport.cpp" />
+ <ClCompile Include="..\src\viewport_sprite_sorter_sse4.cpp" />
<ClCompile Include="..\src\waypoint.cpp" />
<ClCompile Include="..\src\widget.cpp" />
<ClCompile Include="..\src\window.cpp" />
@@ -641,6 +642,7 @@
<ClInclude Include="..\src\vehicle_type.h" />
<ClInclude Include="..\src\vehiclelist.h" />
<ClInclude Include="..\src\viewport_func.h" />
+ <ClInclude Include="..\src\viewport_sprite_sorter.h" />
<ClInclude Include="..\src\viewport_type.h" />
<ClInclude Include="..\src\water.h" />
<ClInclude Include="..\src\waypoint_base.h" />
diff --git a/projects/openttd_vs100.vcxproj.filters b/projects/openttd_vs100.vcxproj.filters
index f0b5e1898..60633caf0 100644
--- a/projects/openttd_vs100.vcxproj.filters
+++ b/projects/openttd_vs100.vcxproj.filters
@@ -384,6 +384,9 @@
<ClCompile Include="..\src\viewport.cpp">
<Filter>Source Files</Filter>
</ClCompile>
+ <ClCompile Include="..\src\viewport_sprite_sorter_sse4.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
<ClCompile Include="..\src\waypoint.cpp">
<Filter>Source Files</Filter>
</ClCompile>
@@ -1152,6 +1155,9 @@
<ClInclude Include="..\src\viewport_func.h">
<Filter>Header Files</Filter>
</ClInclude>
+ <ClInclude Include="..\src\viewport_sprite_sorter.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
<ClInclude Include="..\src\viewport_type.h">
<Filter>Header Files</Filter>
</ClInclude>
diff --git a/projects/openttd_vs80.vcproj b/projects/openttd_vs80.vcproj
index a37b5d2e8..fe0847165 100644
--- a/projects/openttd_vs80.vcproj
+++ b/projects/openttd_vs80.vcproj
@@ -811,6 +811,10 @@
>
</File>
<File
+ RelativePath=".\..\src\viewport_sprite_sorter_sse4.cpp"
+ >
+ </File>
+ <File
RelativePath=".\..\src\waypoint.cpp"
>
</File>
@@ -1839,6 +1843,10 @@
>
</File>
<File
+ RelativePath=".\..\src\viewport_sprite_sorter.h"
+ >
+ </File>
+ <File
RelativePath=".\..\src\viewport_type.h"
>
</File>
diff --git a/projects/openttd_vs90.vcproj b/projects/openttd_vs90.vcproj
index 37c413db0..65ae2e1a8 100644
--- a/projects/openttd_vs90.vcproj
+++ b/projects/openttd_vs90.vcproj
@@ -808,6 +808,10 @@
>
</File>
<File
+ RelativePath=".\..\src\viewport_sprite_sorter_sse4.cpp"
+ >
+ </File>
+ <File
RelativePath=".\..\src\waypoint.cpp"
>
</File>
@@ -1836,6 +1840,10 @@
>
</File>
<File
+ RelativePath=".\..\src\viewport_sprite_sorter.h"
+ >
+ </File>
+ <File
RelativePath=".\..\src\viewport_type.h"
>
</File>
diff --git a/source.list b/source.list
index d327f0dd9..4fde7fcae 100644
--- a/source.list
+++ b/source.list
@@ -111,6 +111,9 @@ townname.cpp
vehicle.cpp
vehiclelist.cpp
viewport.cpp
+#if SSE
+viewport_sprite_sorter_sse4.cpp
+#end
waypoint.cpp
widget.cpp
window.cpp
@@ -374,6 +377,7 @@ vehicle_gui_base.h
vehicle_type.h
vehiclelist.h
viewport_func.h
+viewport_sprite_sorter.h
viewport_type.h
water.h
waypoint_base.h
diff --git a/src/openttd.cpp b/src/openttd.cpp
index 293462651..2c76d9ba6 100644
--- a/src/openttd.cpp
+++ b/src/openttd.cpp
@@ -62,13 +62,12 @@
#include "town.h"
#include "subsidy_func.h"
#include "gfx_layout.h"
-
+#include "viewport_sprite_sorter.h"
#include "linkgraph/linkgraphschedule.h"
#include <stdarg.h>
-
void CallLandscapeTick();
void IncreaseDate();
void DoPaletteAnimations();
@@ -775,6 +774,8 @@ int openttd_main(int argc, char *argv[])
}
free(videodriver);
+ InitializeSpriteSorter();
+
/* Initialize the zoom level of the screen to normal */
_screen.zoom = ZOOM_LVL_NORMAL;
diff --git a/src/viewport.cpp b/src/viewport.cpp
index d2556bd8c..e80282d02 100644
--- a/src/viewport.cpp
+++ b/src/viewport.cpp
@@ -46,6 +46,7 @@
#include "tilehighlight_func.h"
#include "window_gui.h"
#include "linkgraph/linkgraph_gui.h"
+#include "viewport_sprite_sorter.h"
#include "table/strings.h"
#include "table/palettes.h"
@@ -78,29 +79,6 @@ struct ChildScreenSpriteToDraw {
int next; ///< next child to draw (-1 at the end)
};
-/** Parent sprite that should be drawn */
-struct ParentSpriteToDraw {
- SpriteID image; ///< sprite to draw
- PaletteID pal; ///< palette to use
- const SubSprite *sub; ///< only draw a rectangular part of the sprite
-
- int32 x; ///< screen X coordinate of sprite
- int32 y; ///< screen Y coordinate of sprite
-
- int32 left; ///< minimal screen X coordinate of sprite (= x + sprite->x_offs), reference point for child sprites
- int32 top; ///< minimal screen Y coordinate of sprite (= y + sprite->y_offs), reference point for child sprites
-
- int32 xmin; ///< minimal world X coordinate of bounding box
- int32 xmax; ///< maximal world X coordinate of bounding box
- int32 ymin; ///< minimal world Y coordinate of bounding box
- int32 ymax; ///< maximal world Y coordinate of bounding box
- int zmin; ///< minimal world Z coordinate of bounding box
- int zmax; ///< maximal world Z coordinate of bounding box
-
- int first_child; ///< the first child to draw.
- bool comparison_done; ///< Used during sprite sorting: true if sprite has been compared with all other sprites
-};
-
/** Enumeration of multi-part foundations */
enum FoundationPart {
FOUNDATION_PART_NONE = 0xFF, ///< Neither foundation nor groundsprite drawn yet.
@@ -122,7 +100,6 @@ enum SpriteCombineMode {
typedef SmallVector<TileSpriteToDraw, 64> TileSpriteToDrawVector;
typedef SmallVector<StringSpriteToDraw, 4> StringSpriteToDrawVector;
typedef SmallVector<ParentSpriteToDraw, 64> ParentSpriteToDrawVector;
-typedef SmallVector<ParentSpriteToDraw*, 64> ParentSpriteToSortVector;
typedef SmallVector<ChildScreenSpriteToDraw, 16> ChildScreenSpriteToDrawVector;
/** Data structure storing rendering information */
@@ -154,6 +131,7 @@ static TileInfo *_cur_ti;
bool _draw_bounding_boxes = false;
bool _draw_dirty_blocks = false;
uint _dirty_block_colour = 0;
+static VpSpriteSorter _vp_sprite_sorter = NULL;
static Point MapXYZToViewport(const ViewPort *vp, int x, int y, int z)
{
@@ -1288,6 +1266,12 @@ static void ViewportDrawTileSprites(const TileSpriteToDrawVector *tstdv)
}
}
+/** This fallback sprite checker always exists. */
+static bool ViewportSortParentSpritesChecker()
+{
+ return true;
+}
+
/** Sort parent sprites pointer array */
static void ViewportSortParentSprites(ParentSpriteToSortVector *psdv)
{
@@ -1480,7 +1464,7 @@ void ViewportDoDraw(const ViewPort *vp, int left, int top, int right, int bottom
*_vd.parent_sprites_to_sort.Append() = it;
}
- ViewportSortParentSprites(&_vd.parent_sprites_to_sort);
+ _vp_sprite_sorter(&_vd.parent_sprites_to_sort);
ViewportDrawParentSprites(&_vd.parent_sprites_to_sort, &_vd.child_screen_sprites_to_draw);
if (_draw_bounding_boxes) ViewportDrawBoundingBoxes(&_vd.parent_sprites_to_sort);
@@ -2998,3 +2982,29 @@ Point GetViewportStationMiddle(const ViewPort *vp, const Station *st)
p.y = UnScaleByZoom(p.y - vp->virtual_top, vp->zoom) + vp->top;
return p;
}
+
+/** Helper class for getting the best sprite sorter. */
+struct ViewportSSCSS {
+ VpSorterChecker fct_checker; ///< The check function.
+ VpSpriteSorter fct_sorter; ///< The sorting function.
+};
+
+/** List of sorters ordered from best to worst. */
+static ViewportSSCSS _vp_sprite_sorters[] = {
+#ifdef WITH_SSE
+ { &ViewportSortParentSpritesSSE41Checker, &ViewportSortParentSpritesSSE41 },
+#endif
+ { &ViewportSortParentSpritesChecker, &ViewportSortParentSprites }
+};
+
+/** Choose the "best" sprite sorter and set _vp_sprite_sorter. */
+void InitializeSpriteSorter()
+{
+ for (uint i = 0; i < lengthof(_vp_sprite_sorters); i++) {
+ if (_vp_sprite_sorters[i].fct_checker()) {
+ _vp_sprite_sorter = _vp_sprite_sorters[i].fct_sorter;
+ break;
+ }
+ }
+ assert(_vp_sprite_sorter != NULL);
+}
diff --git a/src/viewport_sprite_sorter.h b/src/viewport_sprite_sorter.h
new file mode 100644
index 000000000..19b903e15
--- /dev/null
+++ b/src/viewport_sprite_sorter.h
@@ -0,0 +1,58 @@
+/* $Id$ */
+
+/*
+ * This file is part of OpenTTD.
+ * OpenTTD is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, version 2.
+ * OpenTTD is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with OpenTTD. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/** @file viewport_sprite_sorter.h Types related to sprite sorting. */
+
+#include "stdafx.h"
+#include "core/smallvec_type.hpp"
+#include "gfx_type.h"
+
+#ifndef VIEWPORT_SPRITE_SORTER_H
+#define VIEWPORT_SPRITE_SORTER_H
+
+/** Parent sprite that should be drawn */
+struct ParentSpriteToDraw {
+ /* Block of 16B loadable in xmm register */
+ int32 xmin; ///< minimal world X coordinate of bounding box
+ int32 ymin; ///< minimal world Y coordinate of bounding box
+ int32 zmin; ///< minimal world Z coordinate of bounding box
+ int32 x; ///< screen X coordinate of sprite
+
+ /* Second block of 16B loadable in xmm register */
+ int32 xmax; ///< maximal world X coordinate of bounding box
+ int32 ymax; ///< maximal world Y coordinate of bounding box
+ int32 zmax; ///< maximal world Z coordinate of bounding box
+ int32 y; ///< screen Y coordinate of sprite
+
+ SpriteID image; ///< sprite to draw
+ PaletteID pal; ///< palette to use
+ const SubSprite *sub; ///< only draw a rectangular part of the sprite
+
+ int32 left; ///< minimal screen X coordinate of sprite (= x + sprite->x_offs), reference point for child sprites
+ int32 top; ///< minimal screen Y coordinate of sprite (= y + sprite->y_offs), reference point for child sprites
+
+ int first_child; ///< the first child to draw.
+ bool comparison_done; ///< Used during sprite sorting: true if sprite has been compared with all other sprites
+};
+
+typedef SmallVector<ParentSpriteToDraw*, 64> ParentSpriteToSortVector;
+
+/** Type for method for checking whether a viewport sprite sorter exists. */
+typedef bool (*VpSorterChecker)();
+/** Type for the actual viewport sprite sorter. */
+typedef void (*VpSpriteSorter)(ParentSpriteToSortVector *psd);
+
+#ifdef WITH_SSE
+bool ViewportSortParentSpritesSSE41Checker();
+void ViewportSortParentSpritesSSE41(ParentSpriteToSortVector *psdv);
+#endif
+
+void InitializeSpriteSorter();
+
+#endif /* VIEWPORT_SPRITE_SORTER_H */
diff --git a/src/viewport_sprite_sorter_sse4.cpp b/src/viewport_sprite_sorter_sse4.cpp
new file mode 100644
index 000000000..40bbca87d
--- /dev/null
+++ b/src/viewport_sprite_sorter_sse4.cpp
@@ -0,0 +1,109 @@
+/* $Id$ */
+
+/*
+ * This file is part of OpenTTD.
+ * OpenTTD is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, version 2.
+ * OpenTTD is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with OpenTTD. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/** @file viewport_sprite_sorter_sse.cpp Sprite sorter that uses SSE4.1. */
+
+#ifdef WITH_SSE
+
+#include "stdafx.h"
+#include "cpu.h"
+#include "smmintrin.h"
+#include "viewport_sprite_sorter.h"
+
+#ifdef _SQ64
+ assert_compile((sizeof(ParentSpriteToDraw) % 16) == 0);
+ #define LOAD_128 _mm_load_si128
+#else
+ #define LOAD_128 _mm_loadu_si128
+#endif
+
+/** Sort parent sprites pointer array using SSE4.1 optimizations. */
+void ViewportSortParentSpritesSSE41(ParentSpriteToSortVector *psdv)
+{
+ const __m128i mask_ptest = _mm_setr_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0);
+ ParentSpriteToDraw ** const psdvend = psdv->End();
+ ParentSpriteToDraw **psd = psdv->Begin();
+ while (psd != psdvend) {
+ ParentSpriteToDraw * const ps = *psd;
+
+ if (ps->comparison_done) {
+ psd++;
+ continue;
+ }
+
+ ps->comparison_done = true;
+
+ for (ParentSpriteToDraw **psd2 = psd + 1; psd2 != psdvend; psd2++) {
+ ParentSpriteToDraw * const ps2 = *psd2;
+
+ if (ps2->comparison_done) continue;
+
+ /*
+ * Decide which comparator to use, based on whether the bounding boxes overlap
+ *
+ * Original code:
+ * if (ps->xmax >= ps2->xmin && ps->xmin <= ps2->xmax && // overlap in X?
+ * ps->ymax >= ps2->ymin && ps->ymin <= ps2->ymax && // overlap in Y?
+ * ps->zmax >= ps2->zmin && ps->zmin <= ps2->zmax) { // overlap in Z?
+ *
+ * Above conditions are equivalent to:
+ * 1/ !( (ps->xmax >= ps2->xmin) && (ps->ymax >= ps2->ymin) && (ps->zmax >= ps2->zmin) && (ps->xmin <= ps2->xmax) && (ps->ymin <= ps2->ymax) && (ps->zmin <= ps2->zmax) )
+ * 2/ !( (ps->xmax >= ps2->xmin) && (ps->ymax >= ps2->ymin) && (ps->zmax >= ps2->zmin) && (ps2->xmax >= ps->xmin) && (ps2->ymax >= ps->ymin) && (ps2->zmax >= ps->zmin) )
+ * 3/ !( ( (ps->xmax >= ps2->xmin) && (ps->ymax >= ps2->ymin) && (ps->zmax >= ps2->zmin) ) && ( (ps2->xmax >= ps->xmin) && (ps2->ymax >= ps->ymin) && (ps2->zmax >= ps->zmin) ) )
+ * 4/ !( !( (ps->xmax < ps2->xmin) || (ps->ymax < ps2->ymin) || (ps->zmax < ps2->zmin) ) && !( (ps2->xmax < ps->xmin) || (ps2->ymax < ps->ymin) || (ps2->zmax < ps->zmin) ) )
+ * 5/ PTEST <---------------------------------- rslt1 ----------------------------------> <------------------------------ rslt2 -------------------------------------->
+ */
+ __m128i ps1_max = LOAD_128((__m128i*) &ps->xmax);
+ __m128i ps2_min = LOAD_128((__m128i*) &ps2->xmin);
+ __m128i rslt1 = _mm_cmplt_epi32(ps1_max, ps2_min);
+ if (!_mm_testz_si128(mask_ptest, rslt1))
+ continue;
+
+ __m128i ps1_min = LOAD_128((__m128i*) &ps->xmin);
+ __m128i ps2_max = LOAD_128((__m128i*) &ps2->xmax);
+ __m128i rslt2 = _mm_cmplt_epi32(ps2_max, ps1_min);
+ if (_mm_testz_si128(mask_ptest, rslt2)) {
+ /* Use X+Y+Z as the sorting order, so sprites closer to the bottom of
+ * the screen and with higher Z elevation, are drawn in front.
+ * Here X,Y,Z are the coordinates of the "center of mass" of the sprite,
+ * i.e. X=(left+right)/2, etc.
+ * However, since we only care about order, don't actually divide / 2
+ */
+ if (ps->xmin + ps->xmax + ps->ymin + ps->ymax + ps->zmin + ps->zmax <=
+ ps2->xmin + ps2->xmax + ps2->ymin + ps2->ymax + ps2->zmin + ps2->zmax) {
+ continue;
+ }
+ }
+
+ /* Move ps2 in front of ps */
+ ParentSpriteToDraw * const temp = ps2;
+ for (ParentSpriteToDraw **psd3 = psd2; psd3 > psd; psd3--) {
+ *psd3 = *(psd3 - 1);
+ }
+ *psd = temp;
+ }
+ }
+}
+
+/**
+ * Check whether the current CPU supports SSE 4.1.
+ * @return True iff the CPU supports SSE 4.1.
+ */
+bool ViewportSortParentSpritesSSE41Checker()
+{
+ int cpu_info[4] = {-1};
+ ottd_cpuid(cpu_info, 0);
+ unsigned int max_info_type = cpu_info[0];
+ if (max_info_type < 1) return false;
+
+ ottd_cpuid(cpu_info, 1);
+ return (cpu_info[2] & (1 << 19)) != 0;
+}
+
+#endif /* WITH_SSE */