diff options
author | rubidium <rubidium@openttd.org> | 2009-10-15 17:41:06 +0000 |
---|---|---|
committer | rubidium <rubidium@openttd.org> | 2009-10-15 17:41:06 +0000 |
commit | f4f4044859b9ae89f70dcffc459b81d769c0a93f (patch) | |
tree | 88f1b2f84ce673f3489d6fec18479fb2714da5c1 | |
parent | 7f52cfe72bd6471395aa841c7814d2083c179d9f (diff) | |
download | openttd-f4f4044859b9ae89f70dcffc459b81d769c0a93f.tar.xz |
(svn r17776) -Codechange: [SDL] make "update the video card"-process asynchronious. Profiling with gprof etc. hasn't shown us that DrawSurfaceToScreen takes a significant amount of CPU; only using TIC/TOC it became apparant that it was a heavy CPU-cycle user or that it was waiting for something.
The benefit of making this function asynchronious ranges from 2%-25% (real time) during fast forward on dual core/hyperthreading-enabled CPUs; 8bpp improvements are, in my test cases, significantly smaller than 32bpp improvements.
On single core non-hyperthreading-enabled CPUs the extra locking/scheduling costs up to 1% extra realtime in fast forward. You can use -v sdl:no_threads to disable threading and undo this loss.
During normal non-fast-forwarded games the benefit/costs are negligable except when the gameloop takes more than about 90% of the time of a tick.
Note that allegro's performance does not improve with this system, likely due to their way of getting data to the video card. It is not implemented for the OS X/Windows video backends, unless (ofcourse) SDL is used there.
Funny is that the performance of the 32bpp(-anim) blitter is, at least in some test cases, significantly faster (more than 10%) than the 8bpp(-optimized) blitter when looking at real time in fast forward on a dual core CPU; it was slower.
The idea comes from a paper/report by Idar Borlaug and Knut Imar Hagen.
-rw-r--r-- | src/thread/thread.h | 13 | ||||
-rw-r--r-- | src/thread/thread_none.cpp | 2 | ||||
-rw-r--r-- | src/thread/thread_pthread.cpp | 18 | ||||
-rw-r--r-- | src/thread/thread_win32.cpp | 15 | ||||
-rw-r--r-- | src/video/allegro_v.cpp | 7 | ||||
-rw-r--r-- | src/video/sdl_v.cpp | 85 |
6 files changed, 132 insertions, 8 deletions
diff --git a/src/thread/thread.h b/src/thread/thread.h index 22873bee5..98c48d15d 100644 --- a/src/thread/thread.h +++ b/src/thread/thread.h @@ -68,6 +68,19 @@ public: * End of the critical section */ virtual void EndCritical() = 0; + + /** + * Wait for a signal to be send. + * @pre You must be in the critical section. + * @note While waiting the critical section is left. + * @post You will be in the critical section. + */ + virtual void WaitForSignal() = 0; + + /** + * Send a signal and wake the 'thread' that was waiting for it. + */ + virtual void SendSignal() = 0; }; #endif /* THREAD_H */ diff --git a/src/thread/thread_none.cpp b/src/thread/thread_none.cpp index 861f2cfd6..67679bc81 100644 --- a/src/thread/thread_none.cpp +++ b/src/thread/thread_none.cpp @@ -23,6 +23,8 @@ class ThreadMutex_None : public ThreadMutex { public: virtual void BeginCritical() {} virtual void EndCritical() {} + virtual void WaitForSignal() {} + virtual void SendSignal() {} }; /* static */ ThreadMutex *ThreadMutex::New() diff --git a/src/thread/thread_pthread.cpp b/src/thread/thread_pthread.cpp index 483b71c0f..ca1fb8f5a 100644 --- a/src/thread/thread_pthread.cpp +++ b/src/thread/thread_pthread.cpp @@ -12,6 +12,7 @@ #include "../stdafx.h" #include "thread.h" #include <pthread.h> +#include <errno.h> /** * POSIX pthread version for ThreadObject. @@ -95,16 +96,21 @@ private: class ThreadMutex_pthread : public ThreadMutex { private: pthread_mutex_t mutex; + pthread_cond_t condition; public: ThreadMutex_pthread() { pthread_mutex_init(&this->mutex, NULL); + pthread_cond_init(&this->condition, NULL); } /* virtual */ ~ThreadMutex_pthread() { - pthread_mutex_destroy(&this->mutex); + int err = pthread_cond_destroy(&this->condition); + assert(err != EBUSY); + err = pthread_mutex_destroy(&this->mutex); + assert(err != EBUSY); } /* virtual */ void BeginCritical() @@ -116,6 +122,16 @@ public: { pthread_mutex_unlock(&this->mutex); } + + /* virtual */ void WaitForSignal() + { + pthread_cond_wait(&this->condition, &this->mutex); + } + + /* virtual */ void SendSignal() + { + pthread_cond_signal(&this->condition); + } }; /* static */ ThreadMutex *ThreadMutex::New() diff --git a/src/thread/thread_win32.cpp b/src/thread/thread_win32.cpp index e752de1ae..a3b2d3734 100644 --- a/src/thread/thread_win32.cpp +++ b/src/thread/thread_win32.cpp @@ -107,16 +107,19 @@ private: class ThreadMutex_Win32 : public ThreadMutex { private: CRITICAL_SECTION critical_section; + HANDLE event; public: ThreadMutex_Win32() { InitializeCriticalSection(&this->critical_section); + this->event = CreateEvent(NULL, FALSE, FALSE, NULL); } /* virtual */ ~ThreadMutex_Win32() { DeleteCriticalSection(&this->critical_section); + CloseHandle(this->event); } /* virtual */ void BeginCritical() @@ -128,6 +131,18 @@ public: { LeaveCriticalSection(&this->critical_section); } + + /* virtual */ void WaitForSignal() + { + this->EndCritical(); + WaitForSingleObject(this->event, INFINITE); + this->BeginCritical(); + } + + /* virtual */ void SendSignal() + { + SetEvent(this->event); + } }; /* static */ ThreadMutex *ThreadMutex::New() diff --git a/src/video/allegro_v.cpp b/src/video/allegro_v.cpp index e566a8dd5..a92a115e2 100644 --- a/src/video/allegro_v.cpp +++ b/src/video/allegro_v.cpp @@ -7,7 +7,12 @@ * See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with OpenTTD. If not, see <http://www.gnu.org/licenses/>. */ -/** @file allegro_v.cpp Implementation of the Allegro video driver. */ +/** + * @file allegro_v.cpp Implementation of the Allegro video driver. + * @note Implementing threaded pushing of data to the display is + * not faster (it's a few percent slower) in contrast to the + * results gained with threading it for SDL. + */ #ifdef WITH_ALLEGRO diff --git a/src/video/sdl_v.cpp b/src/video/sdl_v.cpp index a17c95ebd..1cb6d828b 100644 --- a/src/video/sdl_v.cpp +++ b/src/video/sdl_v.cpp @@ -20,6 +20,7 @@ #include "../blitter/factory.hpp" #include "../network/network.h" #include "../functions.h" +#include "../thread/thread.h" #include "sdl_v.h" #include <SDL.h> @@ -28,6 +29,15 @@ static FVideoDriver_SDL iFVideoDriver_SDL; static SDL_Surface *_sdl_screen; static bool _all_modes; +/** Whether the drawing is/may be done in a separate thread. */ +static bool _draw_threaded; +/** Thread used to 'draw' to the screen, i.e. push data to the screen. */ +static ThreadObject *_draw_thread = NULL; +/** Mutex to keep the access to the shared memory controlled. */ +static ThreadMutex *_draw_mutex = NULL; +/** Should we keep continue drawing? */ +static volatile bool _draw_continue; + #define MAX_DIRTY_RECTS 100 static SDL_Rect _dirty_rects[MAX_DIRTY_RECTS]; static int _num_dirty_rects; @@ -99,6 +109,22 @@ static void DrawSurfaceToScreen() } } +static void DrawSurfaceToScreenThread(void *) +{ + /* First wait till we 'may' start */ + _draw_mutex->BeginCritical(); + _draw_mutex->WaitForSignal(); + + while (_draw_continue) { + /* Then just draw and wait till we stop */ + DrawSurfaceToScreen(); + _draw_mutex->WaitForSignal(); + } + + _draw_mutex->EndCritical(); + _draw_thread->Exit(); +} + static const Dimension _default_resolutions[] = { { 640, 480}, { 800, 600}, @@ -214,6 +240,9 @@ static bool CreateMainSurface(uint w, uint h) return false; } + /* Delay drawing for this cycle; the next cycle will redraw the whole screen */ + _num_dirty_rects = 0; + _screen.width = newscreen->w; _screen.height = newscreen->h; _screen.pitch = newscreen->pitch / (bpp / 8); @@ -445,6 +474,9 @@ const char *VideoDriver_SDL::Start(const char * const *parm) SDL_CALL SDL_EnableKeyRepeat(SDL_DEFAULT_REPEAT_DELAY, SDL_DEFAULT_REPEAT_INTERVAL); SDL_CALL SDL_EnableUNICODE(1); + + _draw_threaded = GetDriverParam(parm, "no_threads") == NULL && GetDriverParam(parm, "no_thread") == NULL; + return NULL; } @@ -463,6 +495,25 @@ void VideoDriver_SDL::MainLoop() int numkeys; Uint8 *keys; + if (_draw_threaded) { + /* Initialise the mutex first, because that's the thing we *need* + * directly in the newly created thread. */ + _draw_mutex = ThreadMutex::New(); + if (_draw_mutex == NULL) { + _draw_threaded = false; + } else { + _draw_mutex->BeginCritical(); + _draw_continue = true; + + _draw_threaded = ThreadObject::New(&DrawSurfaceToScreenThread, NULL, &_draw_thread); + } + + /* Free the mutex if we won't be able to use it. */ + if (!_draw_threaded) delete _draw_mutex; + } + + DEBUG(driver, 1, "SDL: using %sthreads", _draw_threaded ? "" : "no "); + for (;;) { uint32 prev_cur_ticks = cur_ticks; // to check for wrapping InteractiveRandom(); // randomness @@ -505,23 +556,45 @@ void VideoDriver_SDL::MainLoop() if (old_ctrl_pressed != _ctrl_pressed) HandleCtrlChanged(); + /* The gameloop is the part that can run asynchroniously. The rest + * except sleeping can't. */ + if (_draw_threaded) _draw_mutex->EndCritical(); + GameLoop(); + if (_draw_threaded) _draw_mutex->BeginCritical(); + _screen.dst_ptr = _sdl_screen->pixels; UpdateWindows(); if (++pal_tick > 4) { CheckPaletteAnim(); pal_tick = 1; } - DrawSurfaceToScreen(); + + /* End of the critical part. */ + if (_draw_threaded) { + _draw_mutex->SendSignal(); + } else { + /* Oh, we didn't have threads, then just draw unthreaded */ + DrawSurfaceToScreen(); + } } else { - SDL_CALL SDL_Delay(1); - _screen.dst_ptr = _sdl_screen->pixels; - NetworkDrawChatMessage(); - DrawMouseCursor(); - DrawSurfaceToScreen(); + /* Release the thread while sleeping */ + if (_draw_threaded) _draw_mutex->EndCritical(); + CSleep(1); + if (_draw_threaded) _draw_mutex->BeginCritical(); } } + + if (_draw_threaded) { + _draw_continue = false; + _draw_mutex->SendSignal(); + _draw_mutex->EndCritical(); + _draw_thread->Join(); + + delete _draw_mutex; + delete _draw_thread; + } } bool VideoDriver_SDL::ChangeResolution(int w, int h) |