diff options
author | rubidium <rubidium@openttd.org> | 2014-01-02 16:48:16 +0000 |
---|---|---|
committer | rubidium <rubidium@openttd.org> | 2014-01-02 16:48:16 +0000 |
commit | 3c94485ba0dcf8bb26f94f3a8e74369cd5619c01 (patch) | |
tree | 1c45ff84d185eac6392ea561cf7eb7ceb53b709d /src | |
parent | c98a94da447a34f33894f3d5a7ec7cbe869a726a (diff) | |
download | openttd-3c94485ba0dcf8bb26f94f3a8e74369cd5619c01.tar.xz |
(svn r26205) -Feature: SSE 4.1 sprite sorter, improving the sorting performance significantly (MJP)
For example with GCC 4.8, x86_64 Linux, Intel i5-3337U this patch improves the performance of Pile, Treham and Hamac test save games by about 10% in over-all run time at fast forward at 1920x1080 when zoomed out and when trees are not disabled.
Diffstat (limited to 'src')
-rw-r--r-- | src/openttd.cpp | 5 | ||||
-rw-r--r-- | src/viewport.cpp | 60 | ||||
-rw-r--r-- | src/viewport_sprite_sorter.h | 58 | ||||
-rw-r--r-- | src/viewport_sprite_sorter_sse4.cpp | 109 |
4 files changed, 205 insertions, 27 deletions
diff --git a/src/openttd.cpp b/src/openttd.cpp index 293462651..2c76d9ba6 100644 --- a/src/openttd.cpp +++ b/src/openttd.cpp @@ -62,13 +62,12 @@ #include "town.h" #include "subsidy_func.h" #include "gfx_layout.h" - +#include "viewport_sprite_sorter.h" #include "linkgraph/linkgraphschedule.h" #include <stdarg.h> - void CallLandscapeTick(); void IncreaseDate(); void DoPaletteAnimations(); @@ -775,6 +774,8 @@ int openttd_main(int argc, char *argv[]) } free(videodriver); + InitializeSpriteSorter(); + /* Initialize the zoom level of the screen to normal */ _screen.zoom = ZOOM_LVL_NORMAL; diff --git a/src/viewport.cpp b/src/viewport.cpp index d2556bd8c..e80282d02 100644 --- a/src/viewport.cpp +++ b/src/viewport.cpp @@ -46,6 +46,7 @@ #include "tilehighlight_func.h" #include "window_gui.h" #include "linkgraph/linkgraph_gui.h" +#include "viewport_sprite_sorter.h" #include "table/strings.h" #include "table/palettes.h" @@ -78,29 +79,6 @@ struct ChildScreenSpriteToDraw { int next; ///< next child to draw (-1 at the end) }; -/** Parent sprite that should be drawn */ -struct ParentSpriteToDraw { - SpriteID image; ///< sprite to draw - PaletteID pal; ///< palette to use - const SubSprite *sub; ///< only draw a rectangular part of the sprite - - int32 x; ///< screen X coordinate of sprite - int32 y; ///< screen Y coordinate of sprite - - int32 left; ///< minimal screen X coordinate of sprite (= x + sprite->x_offs), reference point for child sprites - int32 top; ///< minimal screen Y coordinate of sprite (= y + sprite->y_offs), reference point for child sprites - - int32 xmin; ///< minimal world X coordinate of bounding box - int32 xmax; ///< maximal world X coordinate of bounding box - int32 ymin; ///< minimal world Y coordinate of bounding box - int32 ymax; ///< maximal world Y coordinate of bounding box - int zmin; ///< minimal world Z coordinate of bounding box - int zmax; ///< maximal world Z coordinate of bounding box - - int first_child; ///< the first child to draw. - bool comparison_done; ///< Used during sprite sorting: true if sprite has been compared with all other sprites -}; - /** Enumeration of multi-part foundations */ enum FoundationPart { FOUNDATION_PART_NONE = 0xFF, ///< Neither foundation nor groundsprite drawn yet. @@ -122,7 +100,6 @@ enum SpriteCombineMode { typedef SmallVector<TileSpriteToDraw, 64> TileSpriteToDrawVector; typedef SmallVector<StringSpriteToDraw, 4> StringSpriteToDrawVector; typedef SmallVector<ParentSpriteToDraw, 64> ParentSpriteToDrawVector; -typedef SmallVector<ParentSpriteToDraw*, 64> ParentSpriteToSortVector; typedef SmallVector<ChildScreenSpriteToDraw, 16> ChildScreenSpriteToDrawVector; /** Data structure storing rendering information */ @@ -154,6 +131,7 @@ static TileInfo *_cur_ti; bool _draw_bounding_boxes = false; bool _draw_dirty_blocks = false; uint _dirty_block_colour = 0; +static VpSpriteSorter _vp_sprite_sorter = NULL; static Point MapXYZToViewport(const ViewPort *vp, int x, int y, int z) { @@ -1288,6 +1266,12 @@ static void ViewportDrawTileSprites(const TileSpriteToDrawVector *tstdv) } } +/** This fallback sprite checker always exists. */ +static bool ViewportSortParentSpritesChecker() +{ + return true; +} + /** Sort parent sprites pointer array */ static void ViewportSortParentSprites(ParentSpriteToSortVector *psdv) { @@ -1480,7 +1464,7 @@ void ViewportDoDraw(const ViewPort *vp, int left, int top, int right, int bottom *_vd.parent_sprites_to_sort.Append() = it; } - ViewportSortParentSprites(&_vd.parent_sprites_to_sort); + _vp_sprite_sorter(&_vd.parent_sprites_to_sort); ViewportDrawParentSprites(&_vd.parent_sprites_to_sort, &_vd.child_screen_sprites_to_draw); if (_draw_bounding_boxes) ViewportDrawBoundingBoxes(&_vd.parent_sprites_to_sort); @@ -2998,3 +2982,29 @@ Point GetViewportStationMiddle(const ViewPort *vp, const Station *st) p.y = UnScaleByZoom(p.y - vp->virtual_top, vp->zoom) + vp->top; return p; } + +/** Helper class for getting the best sprite sorter. */ +struct ViewportSSCSS { + VpSorterChecker fct_checker; ///< The check function. + VpSpriteSorter fct_sorter; ///< The sorting function. +}; + +/** List of sorters ordered from best to worst. */ +static ViewportSSCSS _vp_sprite_sorters[] = { +#ifdef WITH_SSE + { &ViewportSortParentSpritesSSE41Checker, &ViewportSortParentSpritesSSE41 }, +#endif + { &ViewportSortParentSpritesChecker, &ViewportSortParentSprites } +}; + +/** Choose the "best" sprite sorter and set _vp_sprite_sorter. */ +void InitializeSpriteSorter() +{ + for (uint i = 0; i < lengthof(_vp_sprite_sorters); i++) { + if (_vp_sprite_sorters[i].fct_checker()) { + _vp_sprite_sorter = _vp_sprite_sorters[i].fct_sorter; + break; + } + } + assert(_vp_sprite_sorter != NULL); +} diff --git a/src/viewport_sprite_sorter.h b/src/viewport_sprite_sorter.h new file mode 100644 index 000000000..19b903e15 --- /dev/null +++ b/src/viewport_sprite_sorter.h @@ -0,0 +1,58 @@ +/* $Id$ */ + +/* + * This file is part of OpenTTD. + * OpenTTD is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, version 2. + * OpenTTD is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with OpenTTD. If not, see <http://www.gnu.org/licenses/>. + */ + +/** @file viewport_sprite_sorter.h Types related to sprite sorting. */ + +#include "stdafx.h" +#include "core/smallvec_type.hpp" +#include "gfx_type.h" + +#ifndef VIEWPORT_SPRITE_SORTER_H +#define VIEWPORT_SPRITE_SORTER_H + +/** Parent sprite that should be drawn */ +struct ParentSpriteToDraw { + /* Block of 16B loadable in xmm register */ + int32 xmin; ///< minimal world X coordinate of bounding box + int32 ymin; ///< minimal world Y coordinate of bounding box + int32 zmin; ///< minimal world Z coordinate of bounding box + int32 x; ///< screen X coordinate of sprite + + /* Second block of 16B loadable in xmm register */ + int32 xmax; ///< maximal world X coordinate of bounding box + int32 ymax; ///< maximal world Y coordinate of bounding box + int32 zmax; ///< maximal world Z coordinate of bounding box + int32 y; ///< screen Y coordinate of sprite + + SpriteID image; ///< sprite to draw + PaletteID pal; ///< palette to use + const SubSprite *sub; ///< only draw a rectangular part of the sprite + + int32 left; ///< minimal screen X coordinate of sprite (= x + sprite->x_offs), reference point for child sprites + int32 top; ///< minimal screen Y coordinate of sprite (= y + sprite->y_offs), reference point for child sprites + + int first_child; ///< the first child to draw. + bool comparison_done; ///< Used during sprite sorting: true if sprite has been compared with all other sprites +}; + +typedef SmallVector<ParentSpriteToDraw*, 64> ParentSpriteToSortVector; + +/** Type for method for checking whether a viewport sprite sorter exists. */ +typedef bool (*VpSorterChecker)(); +/** Type for the actual viewport sprite sorter. */ +typedef void (*VpSpriteSorter)(ParentSpriteToSortVector *psd); + +#ifdef WITH_SSE +bool ViewportSortParentSpritesSSE41Checker(); +void ViewportSortParentSpritesSSE41(ParentSpriteToSortVector *psdv); +#endif + +void InitializeSpriteSorter(); + +#endif /* VIEWPORT_SPRITE_SORTER_H */ diff --git a/src/viewport_sprite_sorter_sse4.cpp b/src/viewport_sprite_sorter_sse4.cpp new file mode 100644 index 000000000..40bbca87d --- /dev/null +++ b/src/viewport_sprite_sorter_sse4.cpp @@ -0,0 +1,109 @@ +/* $Id$ */ + +/* + * This file is part of OpenTTD. + * OpenTTD is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, version 2. + * OpenTTD is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with OpenTTD. If not, see <http://www.gnu.org/licenses/>. + */ + +/** @file viewport_sprite_sorter_sse.cpp Sprite sorter that uses SSE4.1. */ + +#ifdef WITH_SSE + +#include "stdafx.h" +#include "cpu.h" +#include "smmintrin.h" +#include "viewport_sprite_sorter.h" + +#ifdef _SQ64 + assert_compile((sizeof(ParentSpriteToDraw) % 16) == 0); + #define LOAD_128 _mm_load_si128 +#else + #define LOAD_128 _mm_loadu_si128 +#endif + +/** Sort parent sprites pointer array using SSE4.1 optimizations. */ +void ViewportSortParentSpritesSSE41(ParentSpriteToSortVector *psdv) +{ + const __m128i mask_ptest = _mm_setr_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0); + ParentSpriteToDraw ** const psdvend = psdv->End(); + ParentSpriteToDraw **psd = psdv->Begin(); + while (psd != psdvend) { + ParentSpriteToDraw * const ps = *psd; + + if (ps->comparison_done) { + psd++; + continue; + } + + ps->comparison_done = true; + + for (ParentSpriteToDraw **psd2 = psd + 1; psd2 != psdvend; psd2++) { + ParentSpriteToDraw * const ps2 = *psd2; + + if (ps2->comparison_done) continue; + + /* + * Decide which comparator to use, based on whether the bounding boxes overlap + * + * Original code: + * if (ps->xmax >= ps2->xmin && ps->xmin <= ps2->xmax && // overlap in X? + * ps->ymax >= ps2->ymin && ps->ymin <= ps2->ymax && // overlap in Y? + * ps->zmax >= ps2->zmin && ps->zmin <= ps2->zmax) { // overlap in Z? + * + * Above conditions are equivalent to: + * 1/ !( (ps->xmax >= ps2->xmin) && (ps->ymax >= ps2->ymin) && (ps->zmax >= ps2->zmin) && (ps->xmin <= ps2->xmax) && (ps->ymin <= ps2->ymax) && (ps->zmin <= ps2->zmax) ) + * 2/ !( (ps->xmax >= ps2->xmin) && (ps->ymax >= ps2->ymin) && (ps->zmax >= ps2->zmin) && (ps2->xmax >= ps->xmin) && (ps2->ymax >= ps->ymin) && (ps2->zmax >= ps->zmin) ) + * 3/ !( ( (ps->xmax >= ps2->xmin) && (ps->ymax >= ps2->ymin) && (ps->zmax >= ps2->zmin) ) && ( (ps2->xmax >= ps->xmin) && (ps2->ymax >= ps->ymin) && (ps2->zmax >= ps->zmin) ) ) + * 4/ !( !( (ps->xmax < ps2->xmin) || (ps->ymax < ps2->ymin) || (ps->zmax < ps2->zmin) ) && !( (ps2->xmax < ps->xmin) || (ps2->ymax < ps->ymin) || (ps2->zmax < ps->zmin) ) ) + * 5/ PTEST <---------------------------------- rslt1 ----------------------------------> <------------------------------ rslt2 --------------------------------------> + */ + __m128i ps1_max = LOAD_128((__m128i*) &ps->xmax); + __m128i ps2_min = LOAD_128((__m128i*) &ps2->xmin); + __m128i rslt1 = _mm_cmplt_epi32(ps1_max, ps2_min); + if (!_mm_testz_si128(mask_ptest, rslt1)) + continue; + + __m128i ps1_min = LOAD_128((__m128i*) &ps->xmin); + __m128i ps2_max = LOAD_128((__m128i*) &ps2->xmax); + __m128i rslt2 = _mm_cmplt_epi32(ps2_max, ps1_min); + if (_mm_testz_si128(mask_ptest, rslt2)) { + /* Use X+Y+Z as the sorting order, so sprites closer to the bottom of + * the screen and with higher Z elevation, are drawn in front. + * Here X,Y,Z are the coordinates of the "center of mass" of the sprite, + * i.e. X=(left+right)/2, etc. + * However, since we only care about order, don't actually divide / 2 + */ + if (ps->xmin + ps->xmax + ps->ymin + ps->ymax + ps->zmin + ps->zmax <= + ps2->xmin + ps2->xmax + ps2->ymin + ps2->ymax + ps2->zmin + ps2->zmax) { + continue; + } + } + + /* Move ps2 in front of ps */ + ParentSpriteToDraw * const temp = ps2; + for (ParentSpriteToDraw **psd3 = psd2; psd3 > psd; psd3--) { + *psd3 = *(psd3 - 1); + } + *psd = temp; + } + } +} + +/** + * Check whether the current CPU supports SSE 4.1. + * @return True iff the CPU supports SSE 4.1. + */ +bool ViewportSortParentSpritesSSE41Checker() +{ + int cpu_info[4] = {-1}; + ottd_cpuid(cpu_info, 0); + unsigned int max_info_type = cpu_info[0]; + if (max_info_type < 1) return false; + + ottd_cpuid(cpu_info, 1); + return (cpu_info[2] & (1 << 19)) != 0; +} + +#endif /* WITH_SSE */ |