1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
|
/*
* This file is part of OpenTTD.
* OpenTTD is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, version 2.
* OpenTTD is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
* See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with OpenTTD. If not, see <http://www.gnu.org/licenses/>.
*/
/** @file viewport_sprite_sorter_sse4.cpp Sprite sorter that uses SSE4.1. */
#ifdef WITH_SSE
#include "stdafx.h"
#include "cpu.h"
#include "smmintrin.h"
#include "viewport_sprite_sorter.h"
#include <forward_list>
#include <map>
#include <stack>
#include "safeguards.h"
#ifdef _SQ64
static_assert((sizeof(ParentSpriteToDraw) % 16) == 0);
# define LOAD_128 _mm_load_si128
#else
# define LOAD_128 _mm_loadu_si128
#endif
void ViewportSortParentSpritesSSE41(ParentSpriteToSortVector *psdv)
{
if (psdv->size() < 2) return;
const __m128i mask_ptest = _mm_setr_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0);
/* We rely on sprites being, for the most part, already ordered.
* So we don't need to move many of them and can keep track of their
* order efficiently by using stack. We always move sprites to the front
* of the current position, i.e. to the top of the stack.
* Also use special constants to indicate sorting state without
* adding extra fields to ParentSpriteToDraw structure.
*/
const uint32 ORDER_COMPARED = UINT32_MAX; // Sprite was compared but we still need to compare the ones preceding it
const uint32 ORDER_RETURNED = UINT32_MAX - 1; // Mark sorted sprite in case there are other occurrences of it in the stack
std::stack<ParentSpriteToDraw *> sprite_order;
uint32 next_order = 0;
std::forward_list<std::pair<int64, ParentSpriteToDraw *>> sprite_list; // We store sprites in a list sorted by xmin+ymin
/* Initialize sprite list and order. */
for (auto p = psdv->rbegin(); p != psdv->rend(); p++) {
sprite_list.push_front(std::make_pair((*p)->xmin + (*p)->ymin, *p));
sprite_order.push(*p);
(*p)->order = next_order++;
}
sprite_list.sort();
std::vector<ParentSpriteToDraw *> preceding; // Temporarily stores sprites that precede current and their position in the list
auto preceding_prev = sprite_list.begin(); // Store iterator in case we need to delete a single preciding sprite
auto out = psdv->begin(); // Iterator to output sorted sprites
while (!sprite_order.empty()) {
auto s = sprite_order.top();
sprite_order.pop();
/* Sprite is already sorted, ignore it. */
if (s->order == ORDER_RETURNED) continue;
/* Sprite was already compared, just need to output it. */
if (s->order == ORDER_COMPARED) {
*(out++) = s;
s->order = ORDER_RETURNED;
continue;
}
preceding.clear();
/* We only need sprites with xmin <= s->xmax && ymin <= s->ymax && zmin <= s->zmax
* So by iterating sprites with xmin + ymin <= s->xmax + s->ymax
* we get all we need and some more that we filter out later.
* We don't include zmin into the sum as there are usually more neighbors on x and y than z
* so including it will actually increase the amount of false positives.
* Also min coordinates can be > max so using max(xmin, xmax) + max(ymin, ymax)
* to ensure that we iterate the current sprite as we need to remove it from the list.
*/
auto ssum = std::max(s->xmax, s->xmin) + std::max(s->ymax, s->ymin);
auto prev = sprite_list.before_begin();
auto x = sprite_list.begin();
while (x != sprite_list.end() && ((*x).first <= ssum)) {
auto p = (*x).second;
if (p == s) {
/* We found the current sprite, remove it and move on. */
x = sprite_list.erase_after(prev);
continue;
}
auto p_prev = prev;
prev = x++;
/* Check that p->xmin <= s->xmax && p->ymin <= s->ymax && p->zmin <= s->zmax */
__m128i s_max = LOAD_128((__m128i*) &s->xmax);
__m128i p_min = LOAD_128((__m128i*) &p->xmin);
__m128i r1 = _mm_cmplt_epi32(s_max, p_min);
if (!_mm_testz_si128(mask_ptest, r1))
continue;
/* Check if sprites overlap, i.e.
* s->xmin <= p->xmax && s->ymin <= p->ymax && s->zmin <= p->zmax
*/
__m128i s_min = LOAD_128((__m128i*) &s->xmin);
__m128i p_max = LOAD_128((__m128i*) &p->xmax);
__m128i r2 = _mm_cmplt_epi32(p_max, s_min);
if (_mm_testz_si128(mask_ptest, r2)) {
/* Use X+Y+Z as the sorting order, so sprites closer to the bottom of
* the screen and with higher Z elevation, are drawn in front.
* Here X,Y,Z are the coordinates of the "center of mass" of the sprite,
* i.e. X=(left+right)/2, etc.
* However, since we only care about order, don't actually divide / 2
*/
if (s->xmin + s->xmax + s->ymin + s->ymax + s->zmin + s->zmax <=
p->xmin + p->xmax + p->ymin + p->ymax + p->zmin + p->zmax) {
continue;
}
}
preceding.push_back(p);
preceding_prev = p_prev;
}
if (preceding.empty()) {
/* No preceding sprites, add current one to the output */
*(out++) = s;
s->order = ORDER_RETURNED;
continue;
}
/* Optimization for the case when we only have 1 sprite to move. */
if (preceding.size() == 1) {
auto p = preceding[0];
/* We can only output the preceding sprite if there can't be any other sprites preceding it. */
if (p->xmax <= s->xmax && p->ymax <= s->ymax && p->zmax <= s->zmax) {
p->order = ORDER_RETURNED;
s->order = ORDER_RETURNED;
sprite_list.erase_after(preceding_prev);
*(out++) = p;
*(out++) = s;
continue;
}
}
/* Sort all preceding sprites by order and assign new orders in reverse (as original sorter did). */
std::sort(preceding.begin(), preceding.end(), [](const ParentSpriteToDraw *a, const ParentSpriteToDraw *b) {
return a->order > b->order;
});
s->order = ORDER_COMPARED;
sprite_order.push(s); // Still need to output so push it back for now
for (auto p: preceding) {
p->order = next_order++;
sprite_order.push(p);
}
}
}
/**
* Check whether the current CPU supports SSE 4.1.
* @return True iff the CPU supports SSE 4.1.
*/
bool ViewportSortParentSpritesSSE41Checker()
{
return HasCPUIDFlag(1, 2, 19);
}
#endif /* WITH_SSE */
|