Why Use Threads at All算法
What is a Multi-threaded Renderer緩存
How can a multi-threaded renderer increase performance數據結構
How is a multi-threaded renderer implemented
一般一個支持多個圖形API(DirectX11/Vulkan/OpenGL/etc)的跨平臺的遊戲引擎,都會有一個抽象的上層圖形API,這些上層的圖形API看起來跟DirectX device context的APIs很像,這些抽象圖形API的調用最終會轉化爲底層的圖形API調用。這裏有一點注意,底層API一旦調用就直接執行了,而使用多線程渲染器以後,全部的底層API的調用都會被延遲。
不管仿真線程跑在哪一個CPU核上,該核都被認爲是主CPU核,咱們能夠用其餘的子CPU核去執行圖形API的代碼。仿真線程會把圖形渲染相關的工做放入隊列中,讓子CPU核去執行。而且,子CPU核直到執行完前一個任務以後纔會去執行新的任務。這個將圖形渲染相關工做放入和取出隊列的操做,通常是由一個稱之爲Ring Buffer或者Circular Buffer的數據結構來管理的。Ring Buffer是用一個常規的循環數組實現的隊列,當數組沒有空間再存放信息時,只需循環回數組的第一個元素便可。因此你永遠不須要分配更多的內存。在寫多線程代碼時,Ring Buffer是一個很是有用的數據結構。它容許你以一種安全的方式從不一樣的線程插入和彈出隊列。這是由於仿真線程操做的是數組的一個獨有的索引,而渲染線程操做的是數組的另外一個索引。並且你也能夠寫出一個線程安全的無鎖Ring Buffer,無鎖的Ring Buffer能夠進一步提高程序的性能。當一個上層圖形API在仿真線程被調用時,一個圖形命令數據包就會被插入Ring Buffer。當渲染線程完成它前一個渲染指令後,它會從Ring Buffer中取出一個新的指令並執行它。
#include <iostream> #include <thread> #include <atomic> #include <vector>
using namespace std; // Check out the following links for more information on ring buffers. // //
template <typename T>
class RingBuffer { private: int maxCount; T* buffer; atomic<int> readIndex; atomic<int> writeIndex; public: RingBuffer() : maxCount(51), readIndex(0), writeIndex(0) { buffer = new T[maxCount]; memset(buffer, 0, sizeof(buffer[0]) * maxCount); } RingBuffer(int count) : maxCount(count+1), buffer(NULL), readIndex(0), writeIndex(0) { buffer = new T[maxCount]; memset(buffer, 0, sizeof(buffer[0]) * maxCount); } ~RingBuffer() { delete[] buffer; buffer = 0x0; } inline void Enqueue(T value) { // We don't want to overwrite old data if the buffer is full // and the writer thread is trying to add more data. In that case, // block the writer thread until data has been read/removed from the ring buffer.
while (IsFull()) { this_thread::sleep_for(500ns); } buffer[writeIndex] = value; writeIndex = (writeIndex + 1) % maxCount; } inline bool Dequeue(T* outValue) { if (IsEmpty()) return false; *outValue = buffer[readIndex]; readIndex = (readIndex + 1) % maxCount; return true; } inline bool IsEmpty() { return readIndex == writeIndex; } inline bool IsFull() { return readIndex == ((writeIndex + 1) % maxCount); } inline void Clear() { readIndex = writeIndex = 0; memset(buffer, 0, sizeof(buffer[0]) * maxCount); } inline int GetSize() { return abs(writeIndex - readIndex); } inline int GetMaxSize() { return maxCount; } }; struct GfxCmd { public: virtual void Invoke() {}; }; struct GfxCmdSetRenderTarget : public GfxCmd { public: void* resourcePtr; GfxCmdSetRenderTarget(void* resource) : resourcePtr(resource) {} void Invoke() { // Invoke ID3D11DeviceContext::OMSetRenderTargets method here... // id3d11devicecontext-omsetrendertargets
printf("%s(%p);\n", name, resourcePtr); } private: const char* name = "GfxCmdSetRenderTarget"; }; struct GfxCmdClearRenderTargetView : public GfxCmd { public: int r, g, b; GfxCmdClearRenderTargetView(int _r, int _g, int _b) : r(_r), g(_g), b(_b) {} void Invoke() { // Invoke ID3D11DeviceContext::ClearRenderTargetView method method here... //
printf("%s(%d, %d, %d);\n", name, r, g, b); // Pretend this command is requiring the render thread // to do a lot of work.
this_thread::sleep_for(250ms); } private: const char* name = "GfxCmdClearRenderTargetView"; }; struct GfxCmdDraw : public GfxCmd { public: int topology; int vertCount; GfxCmdDraw(int _topology, int _vertCount) : topology(_topology), vertCount(_vertCount) {} void Invoke() { // Invoke ID3D11DeviceContext::DrawIndexed method method here... // id3d11devicecontext-drawindexed
printf("%s(%d, %d);\n", name, topology, vertCount); } private: const char* name = "GfxCmdDraw"; }; void UpdateSimulationThread(RingBuffer<GfxCmd*>& gfxCmdList) { // Update gameplay here. // Determine what to draw based on the new game state below. // The graphics commands will be queued up on the render thread // which will execute the graphics API (I.E. OpenGL/DirectX/Vulcan/etc) calls.
gfxCmdList.Enqueue(new GfxCmdSetRenderTarget{ (void*)0x1 }); gfxCmdList.Enqueue(new GfxCmdClearRenderTargetView{ 255, 0, 245 }); gfxCmdList.Enqueue(new GfxCmdDraw{ 1, 10 }); } void UpdateRenderThread(RingBuffer<GfxCmd*>& gfxCmdList) { GfxCmd* gfxCmd = 0x0; if (gfxCmdList.Dequeue(&gfxCmd)) { gfxCmd->Invoke(); delete gfxCmd; } } void GameLoop() { RingBuffer<GfxCmd*> gfxCmdList(3); atomic<int> counter = 0; atomic<bool> quit = false; // Run this indefinitely...
while (1) { quit = false; counter = 0; gfxCmdList.Clear(); thread simulationThread = thread([&gfxCmdList, &counter, &quit] { UpdateSimulationThread(gfxCmdList); quit = true; }); thread renderThread = thread([&gfxCmdList, &quit] { // Continue to read data from the ring buffer until it is both empty // and the simulation thread is done submitting new items into the ring buffer.
while (!(gfxCmdList.IsEmpty() && quit)) { UpdateRenderThread(gfxCmdList); } }); // Ensure that both the simulation and render threads have completed their work.
simulationThread.join(); renderThread.join(); cout << "---\n"; } } int main(int argc, char** argv[]) { GameLoop(); return 0; }