From aab0e3b0d41b18413de3a7934172d49df4f700bc Mon Sep 17 00:00:00 2001 From: kaetemi Date: Mon, 30 Mar 2015 06:47:10 +0200 Subject: [PATCH] Use unsynchronized triple buffering for volatile vertex buffers --HG-- branch : opengl3 --- .../src/3d/driver/opengl3/driver_opengl.cpp | 10 +- .../nel/src/3d/driver/opengl3/driver_opengl.h | 7 +- .../driver/opengl3/driver_opengl_vertex.cpp | 21 ++- .../opengl3/driver_opengl_vertex_buffer.cpp | 159 ++++++++++++------ .../opengl3/driver_opengl_vertex_buffer.h | 18 +- 5 files changed, 147 insertions(+), 68 deletions(-) diff --git a/code/nel/src/3d/driver/opengl3/driver_opengl.cpp b/code/nel/src/3d/driver/opengl3/driver_opengl.cpp index d9fce8508..55bd2422b 100644 --- a/code/nel/src/3d/driver/opengl3/driver_opengl.cpp +++ b/code/nel/src/3d/driver/opengl3/driver_opengl.cpp @@ -314,7 +314,7 @@ CDriverGL3::CDriverGL3() _CurrentOcclusionQuery = NULL; _SwapBufferCounter = 0; _SwapBufferInFlight = 0; - for (size_t i = 0; i < NL3D_GL3_BUFFER_QUEUE_MAX; ++i) + for (size_t i = 0; i < NL3D_GL3_FRAME_QUEUE_MAX; ++i) _SwapBufferSync[i] = 0; _LightMapDynamicLightEnabled = false; @@ -596,7 +596,7 @@ bool CDriverGL3::swapBuffers() H_AUTO_OGL(CDriverGL3_swapBuffers); // Set fence - size_t syncI = _SwapBufferCounter % NL3D_GL3_BUFFER_QUEUE_MAX; + size_t syncI = _SwapBufferCounter % NL3D_GL3_FRAME_QUEUE_MAX; if (_SwapBufferSync[syncI]) // Wait for oldest fence, if this is still in flight { #if NL3D_GL3_FRAME_IN_FLIGHT_DEBUG @@ -692,9 +692,9 @@ bool CDriverGL3::swapBuffers() updateLostBuffers(); // Check in flight buffers, also checks the current one - for (size_t i = 0; i < NL3D_GL3_BUFFER_QUEUE_MAX; ++i) + for (size_t i = 0; i < NL3D_GL3_FRAME_QUEUE_MAX; ++i) { - size_t syncJ = (syncI + 1 + i) % NL3D_GL3_BUFFER_QUEUE_MAX; + size_t syncJ = (syncI + 1 + i) % NL3D_GL3_FRAME_QUEUE_MAX; if (_SwapBufferSync[syncJ]) // If there's a frame in flight { GLint status = 0; @@ -760,7 +760,7 @@ bool CDriverGL3::release() _SwapBufferCounter = 0; _SwapBufferInFlight = 0; - for (size_t i = 0; i < NL3D_GL3_BUFFER_QUEUE_MAX; ++i) + for (size_t i = 0; i < NL3D_GL3_FRAME_QUEUE_MAX; ++i) { if (_SwapBufferSync[i]) { diff --git a/code/nel/src/3d/driver/opengl3/driver_opengl.h b/code/nel/src/3d/driver/opengl3/driver_opengl.h index d400918ac..5a46fc0ae 100644 --- a/code/nel/src/3d/driver/opengl3/driver_opengl.h +++ b/code/nel/src/3d/driver/opengl3/driver_opengl.h @@ -82,7 +82,8 @@ using NLMISC::CMatrix; using NLMISC::CVector; #define NL3D_GL3_BUFFER_NOT_IN_FLIGHT (std::numeric_limits::max()) -#define NL3D_GL3_BUFFER_QUEUE_MAX (3) // Maximum is three frames behind +#define NL3D_GL3_FRAME_QUEUE_MAX (2) // Maximum is three frames processing (2 frames backlog + current frame) +#define NL3D_GL3_BUFFER_QUEUE_MAX (NL3D_GL3_FRAME_QUEUE_MAX + 1) // Additional buffer for current working namespace NL3D { namespace NLDRIVERGL3 { @@ -266,7 +267,7 @@ public: } void setupVertexBuffer(CVertexBuffer &vb); - void setupVertexBufferHard(IVertexBufferGL3 &vb); + // void setupVertexBufferHard(IVertexBufferGL3 &vb); }; @@ -1348,7 +1349,7 @@ protected: uint64 _SwapBufferCounter; private: uint64 _SwapBufferInFlight; - GLsync _SwapBufferSync[NL3D_GL3_BUFFER_QUEUE_MAX]; + GLsync _SwapBufferSync[NL3D_GL3_FRAME_QUEUE_MAX]; public: void incrementResetCounter() { ++_ResetCounter; } bool isWndActive() const { return _WndActive; } diff --git a/code/nel/src/3d/driver/opengl3/driver_opengl_vertex.cpp b/code/nel/src/3d/driver/opengl3/driver_opengl_vertex.cpp index 5f57a01c3..d6509fa05 100644 --- a/code/nel/src/3d/driver/opengl3/driver_opengl_vertex.cpp +++ b/code/nel/src/3d/driver/opengl3/driver_opengl_vertex.cpp @@ -299,11 +299,17 @@ void CDriverGL3::updateLostBuffers() { for (std::list::iterator it = _LostVBList.begin(); it != _LostVBList.end(); ++it) { - nlassert((*it)->m_VertexObjectId); - GLuint id = (GLuint) (*it)->m_VertexObjectId; - nlassert(nglIsBuffer(id)); - nglDeleteBuffers(1, &id); - (*it)->m_VertexObjectId = 0; + nlassert((*it)->m_VertexObjectId[0]); + for (GLsizei i = 0; i < NL3D_GL3_BUFFER_QUEUE_MAX; ++i) + { + GLuint id = (*it)->m_VertexObjectId[i]; + if (id) + { + nlassert(nglIsBuffer(id)); + nglDeleteBuffers(1, &id); + (*it)->m_VertexObjectId[i] = 0; + } + } (*it)->VB->setLocation(CVertexBuffer::NotResident); } _LostVBList.clear(); @@ -481,10 +487,11 @@ void CVertexBufferInfo::setupVertexBuffer(CVertexBuffer &vb) CVertexBufferReadWrite access; uint8 *ptr; CVBDrvInfosGL3 *info= safe_cast((IVBDrvInfos*)vb.DrvInfos); - nlassert (info); + nlassert(info); + nlassert(info->_VBHard); ptr = (uint8*)info->_VBHard->getPointer(); - info->_VBHard->setupVBInfos(*this); + VertexObjectId = info->_VBHard->getGLuint(); // Get value pointer for (i=0; i_DriverGLStates.forceBindARBVertexBuffer(vertexBufferID); + for (GLsizei i = 0; i < NL3D_GL3_BUFFER_QUEUE_MAX; ++i) + { + m_VertexObjectId[i] = 0; + m_FrameInFlight[i] = NL3D_GL3_BUFFER_NOT_IN_FLIGHT; + } - // Initialize - nglBufferData(GL_ARRAY_BUFFER, size, NULL, drv->vertexBufferUsageGL3(preferred)); - m_VertexObjectId = vertexBufferID; - m_MemType = preferred; + // Create ids + GLsizei nbBuff = vbgl3BufferForType(preferred); + nglGenBuffers(nbBuff, m_VertexObjectId); - // Unbind - drv->_DriverGLStates.forceBindARBVertexBuffer(0); + // Initialize + for (GLsizei i = 0; i < nbBuff; ++i) + { + drv->_DriverGLStates.forceBindARBVertexBuffer(m_VertexObjectId[i]); + nglBufferData(GL_ARRAY_BUFFER, size, NULL, drv->vertexBufferUsageGL3(preferred)); + drv->_DriverGLStates.forceBindARBVertexBuffer(0); + } } // *************************************************************************** @@ -77,16 +99,23 @@ CVertexBufferGL3::~CVertexBufferGL3() H_AUTO_OGL(CVertexBufferGLARB_CVertexBufferGLARBDtor) if (m_Driver && m_VertexObjectId) { - if (m_Driver->_DriverGLStates.getCurrBoundARBVertexBuffer() == m_VertexObjectId) + GLsizei nbBuff = vbgl3BufferForType(m_MemType); + for (GLsizei i = 0; i < nbBuff; ++i) { - m_Driver->_DriverGLStates.forceBindARBVertexBuffer(0); + if (m_Driver->_DriverGLStates.getCurrBoundARBVertexBuffer() == m_VertexObjectId[i]) + { + m_Driver->_DriverGLStates.forceBindARBVertexBuffer(0); + } } } - if (m_VertexObjectId) + for (GLsizei i = 0; i < NL3D_GL3_BUFFER_QUEUE_MAX; ++i) { - GLuint id = (GLuint)m_VertexObjectId; - nlassert(nglIsBuffer(id)); - nglDeleteBuffers(1, &id); + if (m_VertexObjectId[i]) + { + GLuint id = m_VertexObjectId[i]; + nlassert(nglIsBuffer(id)); + nglDeleteBuffers(1, &id); + } } if (m_Driver) { @@ -118,28 +147,33 @@ void *CVertexBufferGL3::lock() nlassert(!m_DummyVB.empty()); return &m_DummyVB[0]; } - // recreate a vb - GLuint vertexBufferID; - + + // Create ids glGetError(); - nglGenBuffers(1, &vertexBufferID); + GLsizei nbBuff = vbgl3BufferForType(m_MemType); + nglGenBuffers(nbBuff, m_VertexObjectId); if (glGetError() != GL_NO_ERROR) { m_Driver->incrementResetCounter(); return &m_DummyVB[0]; } - m_Driver->_DriverGLStates.forceBindARBVertexBuffer(vertexBufferID); - nglBufferData(GL_ARRAY_BUFFER, size, NULL, m_Driver->vertexBufferUsageGL3(m_MemType)); - if (glGetError() != GL_NO_ERROR) + + for (GLsizei i = 0; i < nbBuff; ++i) { - m_Driver->incrementResetCounter(); - nglDeleteBuffers(1, &vertexBufferID); - return &m_DummyVB[0];; + m_Driver->_DriverGLStates.forceBindARBVertexBuffer(m_VertexObjectId[i]); + nglBufferData(GL_ARRAY_BUFFER, size, NULL, m_Driver->vertexBufferUsageGL3(m_MemType)); + m_Driver->_DriverGLStates.forceBindARBVertexBuffer(0); + if (glGetError() != GL_NO_ERROR) + { + m_Driver->incrementResetCounter(); + nglDeleteBuffers(1, &m_VertexObjectId[i]); + return &m_DummyVB[0]; + } } - m_VertexObjectId = vertexBufferID; NLMISC::contReset(m_DummyVB); // free vector memory for real - nlassert(m_VertexObjectId); + nlassert(m_VertexObjectId[m_CurrentIndex]); + m_Invalid = false; m_Driver->_LostVBList.erase(m_IteratorInLostVBList); // continue to standard mapping code below .. @@ -149,7 +183,6 @@ void *CVertexBufferGL3::lock() { beforeLock= CTime::getPerformanceTime(); } - m_Driver->_DriverGLStates.bindARBVertexBuffer(m_VertexObjectId); // m_VertexPtr = nglMapBuffer(GL_ARRAY_BUFFER, GL_WRITE_ONLY); @@ -165,16 +198,43 @@ void *CVertexBufferGL3::lock() { case CVertexBuffer::AGPVolatile: case CVertexBuffer::RAMVolatile: - // NOTE: GL_MAP_INVALIDATE_BUFFER_BIT removes the cost of waiting for synchronization (major performance impact), - // but adds the cost of allocating a new buffer (which hast a much lower performance impact) - m_VertexPtr = nglMapBufferRange(GL_ARRAY_BUFFER, 0, size, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT); + { + if (m_CurrentInFlight) + { + ++m_CurrentIndex; + m_CurrentIndex %= NL3D_GL3_BUFFER_QUEUE_MAX; + m_CurrentInFlight = false; + } + m_Driver->_DriverGLStates.bindARBVertexBuffer(m_VertexObjectId[m_CurrentIndex]); + if (m_FrameInFlight[m_CurrentIndex] != NL3D_GL3_BUFFER_NOT_IN_FLIGHT + && m_FrameInFlight[m_CurrentIndex] >= m_Driver->getSwapBufferInFlight()) + { +#if NL3D_GL3_VERTEX_BUFFER_INFLIGHT_DEBUG + ++m_InvalidateCount; + nldebug("GL: Vertex buffer already in flight (reused: %u, invalidated: %u)", m_ReuseCount, m_InvalidateCount); +#endif + // NOTE: GL_MAP_INVALIDATE_BUFFER_BIT removes the cost of waiting for synchronization (major performance impact), + // but adds the cost of allocating a new buffer (which hast a much lower performance impact) + m_VertexPtr = nglMapBufferRange(GL_ARRAY_BUFFER, 0, size, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT); + } + else + { +#if NL3D_GL3_VERTEX_BUFFER_INFLIGHT_DEBUG + ++m_ReuseCount; + nldebug("GL: Vertex buffer can be reused (reused: %u, invalidated: %u)", m_ReuseCount, m_InvalidateCount); +#endif + m_VertexPtr = nglMapBufferRange(GL_ARRAY_BUFFER, 0, size, GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT); + } break; + } case CVertexBuffer::RAMPreferred: + m_Driver->_DriverGLStates.bindARBVertexBuffer(m_VertexObjectId[m_CurrentIndex]); // m_VertexPtr = nglMapBufferRange(GL_ARRAY_BUFFER, 0, size, GL_MAP_WRITE_BIT | GL_MAP_READ_BIT | GL_MAP_PERSISTENT | GL_MAP_COHERENT); // NOTE: Persistent / Coherent is only available in OpenGL 4.4 (2013/2014 hardware with recent drivers) m_VertexPtr = nglMapBuffer(GL_ARRAY_BUFFER, GL_READ_WRITE); break; default: + m_Driver->_DriverGLStates.bindARBVertexBuffer(m_VertexObjectId[m_CurrentIndex]); // m_VertexPtr = nglMapBufferRange(GL_ARRAY_BUFFER, 0, size, GL_MAP_WRITE_BIT); m_VertexPtr = nglMapBuffer(GL_ARRAY_BUFFER, GL_WRITE_ONLY); break; @@ -183,7 +243,7 @@ void *CVertexBufferGL3::lock() if (!m_VertexPtr) { nglUnmapBuffer(GL_ARRAY_BUFFER); - nlassert(nglIsBuffer((GLuint)m_VertexObjectId)); + nlassert(nglIsBuffer(m_VertexObjectId[m_CurrentIndex])); invalidate(); return &m_DummyVB[0]; } @@ -211,13 +271,13 @@ void CVertexBufferGL3::unlock() m_VertexPtr = NULL; if (m_Invalid) return; - if (!m_VertexObjectId) return; + if (!m_VertexObjectId[m_CurrentIndex]) return; TTicks beforeLock = 0; if (m_Driver->_VBHardProfiling) { beforeLock= CTime::getPerformanceTime(); } - m_Driver->_DriverGLStates.bindARBVertexBuffer(m_VertexObjectId); + m_Driver->_DriverGLStates.bindARBVertexBuffer(m_VertexObjectId[m_CurrentIndex]); // double start = CTime::ticksToSecond(CTime::getPerformanceTime()); #ifdef NL_DEBUG _Unmapping = true; @@ -271,7 +331,7 @@ void CVertexBufferGL3::enable() H_AUTO_OGL(CVertexBufferGLARB_enable) if (m_Driver->_CurrentVertexBufferGL != this) { - m_Driver->_CurrentVertexBufferGL= this; + m_Driver->_CurrentVertexBufferGL = this; } } @@ -282,16 +342,17 @@ void CVertexBufferGL3::disable() H_AUTO_OGL(CVertexBufferGLARB_disable) if (m_Driver->_CurrentVertexBufferGL != NULL) { - m_Driver->_CurrentVertexBufferGL= NULL; + m_Driver->_CurrentVertexBufferGL = NULL; } } // *************************************************************************** -void CVertexBufferGL3::setupVBInfos(CVertexBufferInfo &vb) +GLuint CVertexBufferGL3::getGLuint() { - H_AUTO_OGL(CVertexBufferGLARB_setupVBInfos) - vb.VertexObjectId = m_VertexObjectId; + H_AUTO_OGL(CVertexBufferGLARB_getGLuint); + + return m_VertexObjectId[m_CurrentIndex]; } // *************************************************************************** @@ -300,7 +361,9 @@ void CVertexBufferGL3::setFrameInFlight(uint64 swapBufferCounter) { H_AUTO_OGL(CVertexBufferGL3_setFrameInFlight); - m_FrameInFlight = swapBufferCounter; + // Set buffer frame in flight + m_FrameInFlight[m_CurrentIndex] = swapBufferCounter; + m_CurrentInFlight = true; } // *************************************************************************** @@ -507,11 +570,11 @@ void CVertexBufferAMDPinned::disable() // *************************************************************************** -void CVertexBufferAMDPinned::setupVBInfos(CVertexBufferInfo &vb) +GLuint CVertexBufferAMDPinned::getGLuint() { - H_AUTO_OGL(CVertexBufferAMDPinned_setupVBInfos); + H_AUTO_OGL(CVertexBufferAMDPinned_getGLuint); - vb.VertexObjectId = m_VertexObjectId; + return m_VertexObjectId; } // *************************************************************************** diff --git a/code/nel/src/3d/driver/opengl3/driver_opengl_vertex_buffer.h b/code/nel/src/3d/driver/opengl3/driver_opengl_vertex_buffer.h index b17059ae1..8b7f126e3 100644 --- a/code/nel/src/3d/driver/opengl3/driver_opengl_vertex_buffer.h +++ b/code/nel/src/3d/driver/opengl3/driver_opengl_vertex_buffer.h @@ -22,6 +22,8 @@ namespace NL3D { namespace NLDRIVERGL3 { +#define NL3D_GL3_VERTEX_BUFFER_INFLIGHT_DEBUG 0 + class CDriverGL3; class IVertexBufferGL3; class CVertexBufferInfo; @@ -41,7 +43,7 @@ public: virtual void *getPointer() = 0; virtual void enable() = 0; virtual void disable() = 0; - virtual void setupVBInfos(CVertexBufferInfo &vb) = 0; + virtual GLuint getGLuint() = 0; virtual void setFrameInFlight(uint64 swapBufferCounter) = 0; // test if buffer content is invalid. If so, no rendering should occurs (rendering should silently fail) @@ -71,7 +73,7 @@ public: virtual void *getPointer(); virtual void enable(); virtual void disable(); - virtual void setupVBInfos(CVertexBufferInfo &vb); + virtual GLuint getGLuint(); virtual void setFrameInFlight(uint64 swapBufferCounter); // @} @@ -89,9 +91,15 @@ private: // for use by CVertexArrayRange std::list::iterator m_IteratorInLostVBList; - uint m_VertexObjectId; + GLuint m_VertexObjectId[NL3D_GL3_BUFFER_QUEUE_MAX]; + uint64 m_FrameInFlight[NL3D_GL3_BUFFER_QUEUE_MAX]; + GLsizei m_CurrentIndex; + bool m_CurrentInFlight; - uint64 m_FrameInFlight; // TODO: Array of sz NL3D_GL3_BUFFER_QUEUE_MAX +#if NL3D_GL3_VERTEX_BUFFER_INFLIGHT_DEBUG + uint32 m_ReuseCount; + uint32 m_InvalidateCount; +#endif }; class CVertexBufferAMDPinned : public IVertexBufferGL3 @@ -108,7 +116,7 @@ public: virtual void *getPointer(); virtual void enable(); virtual void disable(); - virtual void setupVBInfos(CVertexBufferInfo &vb); + virtual GLuint getGLuint(); virtual void setFrameInFlight(uint64 swapBufferCounter); // @}