Use unsynchronized triple buffering for volatile vertex buffers

--HG--
branch : opengl3
hg/feature/opengl3
kaetemi 10 years ago
parent 25994fd86b
commit aab0e3b0d4

@ -314,7 +314,7 @@ CDriverGL3::CDriverGL3()
_CurrentOcclusionQuery = NULL; _CurrentOcclusionQuery = NULL;
_SwapBufferCounter = 0; _SwapBufferCounter = 0;
_SwapBufferInFlight = 0; _SwapBufferInFlight = 0;
for (size_t i = 0; i < NL3D_GL3_BUFFER_QUEUE_MAX; ++i) for (size_t i = 0; i < NL3D_GL3_FRAME_QUEUE_MAX; ++i)
_SwapBufferSync[i] = 0; _SwapBufferSync[i] = 0;
_LightMapDynamicLightEnabled = false; _LightMapDynamicLightEnabled = false;
@ -596,7 +596,7 @@ bool CDriverGL3::swapBuffers()
H_AUTO_OGL(CDriverGL3_swapBuffers); H_AUTO_OGL(CDriverGL3_swapBuffers);
// Set fence // Set fence
size_t syncI = _SwapBufferCounter % NL3D_GL3_BUFFER_QUEUE_MAX; size_t syncI = _SwapBufferCounter % NL3D_GL3_FRAME_QUEUE_MAX;
if (_SwapBufferSync[syncI]) // Wait for oldest fence, if this is still in flight if (_SwapBufferSync[syncI]) // Wait for oldest fence, if this is still in flight
{ {
#if NL3D_GL3_FRAME_IN_FLIGHT_DEBUG #if NL3D_GL3_FRAME_IN_FLIGHT_DEBUG
@ -692,9 +692,9 @@ bool CDriverGL3::swapBuffers()
updateLostBuffers(); updateLostBuffers();
// Check in flight buffers, also checks the current one // Check in flight buffers, also checks the current one
for (size_t i = 0; i < NL3D_GL3_BUFFER_QUEUE_MAX; ++i) for (size_t i = 0; i < NL3D_GL3_FRAME_QUEUE_MAX; ++i)
{ {
size_t syncJ = (syncI + 1 + i) % NL3D_GL3_BUFFER_QUEUE_MAX; size_t syncJ = (syncI + 1 + i) % NL3D_GL3_FRAME_QUEUE_MAX;
if (_SwapBufferSync[syncJ]) // If there's a frame in flight if (_SwapBufferSync[syncJ]) // If there's a frame in flight
{ {
GLint status = 0; GLint status = 0;
@ -760,7 +760,7 @@ bool CDriverGL3::release()
_SwapBufferCounter = 0; _SwapBufferCounter = 0;
_SwapBufferInFlight = 0; _SwapBufferInFlight = 0;
for (size_t i = 0; i < NL3D_GL3_BUFFER_QUEUE_MAX; ++i) for (size_t i = 0; i < NL3D_GL3_FRAME_QUEUE_MAX; ++i)
{ {
if (_SwapBufferSync[i]) if (_SwapBufferSync[i])
{ {

@ -82,7 +82,8 @@ using NLMISC::CMatrix;
using NLMISC::CVector; using NLMISC::CVector;
#define NL3D_GL3_BUFFER_NOT_IN_FLIGHT (std::numeric_limits<uint64>::max()) #define NL3D_GL3_BUFFER_NOT_IN_FLIGHT (std::numeric_limits<uint64>::max())
#define NL3D_GL3_BUFFER_QUEUE_MAX (3) // Maximum is three frames behind #define NL3D_GL3_FRAME_QUEUE_MAX (2) // Maximum is three frames processing (2 frames backlog + current frame)
#define NL3D_GL3_BUFFER_QUEUE_MAX (NL3D_GL3_FRAME_QUEUE_MAX + 1) // Additional buffer for current working
namespace NL3D { namespace NL3D {
namespace NLDRIVERGL3 { namespace NLDRIVERGL3 {
@ -266,7 +267,7 @@ public:
} }
void setupVertexBuffer(CVertexBuffer &vb); void setupVertexBuffer(CVertexBuffer &vb);
void setupVertexBufferHard(IVertexBufferGL3 &vb); // void setupVertexBufferHard(IVertexBufferGL3 &vb);
}; };
@ -1348,7 +1349,7 @@ protected:
uint64 _SwapBufferCounter; uint64 _SwapBufferCounter;
private: private:
uint64 _SwapBufferInFlight; uint64 _SwapBufferInFlight;
GLsync _SwapBufferSync[NL3D_GL3_BUFFER_QUEUE_MAX]; GLsync _SwapBufferSync[NL3D_GL3_FRAME_QUEUE_MAX];
public: public:
void incrementResetCounter() { ++_ResetCounter; } void incrementResetCounter() { ++_ResetCounter; }
bool isWndActive() const { return _WndActive; } bool isWndActive() const { return _WndActive; }

@ -299,11 +299,17 @@ void CDriverGL3::updateLostBuffers()
{ {
for (std::list<CVertexBufferGL3 *>::iterator it = _LostVBList.begin(); it != _LostVBList.end(); ++it) for (std::list<CVertexBufferGL3 *>::iterator it = _LostVBList.begin(); it != _LostVBList.end(); ++it)
{ {
nlassert((*it)->m_VertexObjectId); nlassert((*it)->m_VertexObjectId[0]);
GLuint id = (GLuint) (*it)->m_VertexObjectId; for (GLsizei i = 0; i < NL3D_GL3_BUFFER_QUEUE_MAX; ++i)
{
GLuint id = (*it)->m_VertexObjectId[i];
if (id)
{
nlassert(nglIsBuffer(id)); nlassert(nglIsBuffer(id));
nglDeleteBuffers(1, &id); nglDeleteBuffers(1, &id);
(*it)->m_VertexObjectId = 0; (*it)->m_VertexObjectId[i] = 0;
}
}
(*it)->VB->setLocation(CVertexBuffer::NotResident); (*it)->VB->setLocation(CVertexBuffer::NotResident);
} }
_LostVBList.clear(); _LostVBList.clear();
@ -482,9 +488,10 @@ void CVertexBufferInfo::setupVertexBuffer(CVertexBuffer &vb)
uint8 *ptr; uint8 *ptr;
CVBDrvInfosGL3 *info= safe_cast<CVBDrvInfosGL3*>((IVBDrvInfos*)vb.DrvInfos); CVBDrvInfosGL3 *info= safe_cast<CVBDrvInfosGL3*>((IVBDrvInfos*)vb.DrvInfos);
nlassert(info); nlassert(info);
nlassert(info->_VBHard);
ptr = (uint8*)info->_VBHard->getPointer(); ptr = (uint8*)info->_VBHard->getPointer();
info->_VBHard->setupVBInfos(*this); VertexObjectId = info->_VBHard->getGLuint();
// Get value pointer // Get value pointer
for (i=0; i<CVertexBuffer::NumValue; i++) for (i=0; i<CVertexBuffer::NumValue; i++)

@ -48,27 +48,49 @@ IVertexBufferGL3::~IVertexBufferGL3()
// *************************************************************************** // ***************************************************************************
// *************************************************************************** // ***************************************************************************
static inline GLsizei vbgl3BufferForType(CVertexBuffer::TPreferredMemory mem)
{
switch (mem)
{
case CVertexBuffer::AGPVolatile:
case CVertexBuffer::RAMVolatile:
return NL3D_GL3_BUFFER_QUEUE_MAX;
default:
return 1;
}
}
CVertexBufferGL3::CVertexBufferGL3(CDriverGL3 *drv, uint size, uint numVertices, CVertexBuffer::TPreferredMemory preferred, CVertexBuffer *vb) CVertexBufferGL3::CVertexBufferGL3(CDriverGL3 *drv, uint size, uint numVertices, CVertexBuffer::TPreferredMemory preferred, CVertexBuffer *vb)
: IVertexBufferGL3(drv, vb, IVertexBufferGL3::GL3), : IVertexBufferGL3(drv, vb, IVertexBufferGL3::GL3),
m_VertexPtr(NULL), m_VertexPtr(NULL),
m_VertexObjectId(0), m_CurrentIndex(0),
m_FrameInFlight(NL3D_GL3_BUFFER_NOT_IN_FLIGHT) m_CurrentInFlight(false),
#if NL3D_GL3_VERTEX_BUFFER_INFLIGHT_DEBUG
m_ReuseCount(0),
m_InvalidateCount(0),
#endif
m_MemType(preferred)
{ {
H_AUTO_OGL(CVertexBufferGLARB_CVertexBufferGLARB) H_AUTO_OGL(CVertexBufferGLARB_CVertexBufferGLARB);
// Create id and bind for (GLsizei i = 0; i < NL3D_GL3_BUFFER_QUEUE_MAX; ++i)
GLuint vertexBufferID; {
nglGenBuffers(1, &vertexBufferID); m_VertexObjectId[i] = 0;
drv->_DriverGLStates.forceBindARBVertexBuffer(vertexBufferID); m_FrameInFlight[i] = NL3D_GL3_BUFFER_NOT_IN_FLIGHT;
}
// Create ids
GLsizei nbBuff = vbgl3BufferForType(preferred);
nglGenBuffers(nbBuff, m_VertexObjectId);
// Initialize // Initialize
for (GLsizei i = 0; i < nbBuff; ++i)
{
drv->_DriverGLStates.forceBindARBVertexBuffer(m_VertexObjectId[i]);
nglBufferData(GL_ARRAY_BUFFER, size, NULL, drv->vertexBufferUsageGL3(preferred)); nglBufferData(GL_ARRAY_BUFFER, size, NULL, drv->vertexBufferUsageGL3(preferred));
m_VertexObjectId = vertexBufferID;
m_MemType = preferred;
// Unbind
drv->_DriverGLStates.forceBindARBVertexBuffer(0); drv->_DriverGLStates.forceBindARBVertexBuffer(0);
} }
}
// *************************************************************************** // ***************************************************************************
@ -77,17 +99,24 @@ CVertexBufferGL3::~CVertexBufferGL3()
H_AUTO_OGL(CVertexBufferGLARB_CVertexBufferGLARBDtor) H_AUTO_OGL(CVertexBufferGLARB_CVertexBufferGLARBDtor)
if (m_Driver && m_VertexObjectId) if (m_Driver && m_VertexObjectId)
{ {
if (m_Driver->_DriverGLStates.getCurrBoundARBVertexBuffer() == m_VertexObjectId) GLsizei nbBuff = vbgl3BufferForType(m_MemType);
for (GLsizei i = 0; i < nbBuff; ++i)
{
if (m_Driver->_DriverGLStates.getCurrBoundARBVertexBuffer() == m_VertexObjectId[i])
{ {
m_Driver->_DriverGLStates.forceBindARBVertexBuffer(0); m_Driver->_DriverGLStates.forceBindARBVertexBuffer(0);
} }
} }
if (m_VertexObjectId) }
for (GLsizei i = 0; i < NL3D_GL3_BUFFER_QUEUE_MAX; ++i)
{ {
GLuint id = (GLuint)m_VertexObjectId; if (m_VertexObjectId[i])
{
GLuint id = m_VertexObjectId[i];
nlassert(nglIsBuffer(id)); nlassert(nglIsBuffer(id));
nglDeleteBuffers(1, &id); nglDeleteBuffers(1, &id);
} }
}
if (m_Driver) if (m_Driver)
{ {
if (m_Invalid) if (m_Invalid)
@ -118,28 +147,33 @@ void *CVertexBufferGL3::lock()
nlassert(!m_DummyVB.empty()); nlassert(!m_DummyVB.empty());
return &m_DummyVB[0]; return &m_DummyVB[0];
} }
// recreate a vb
GLuint vertexBufferID;
// Create ids
glGetError(); glGetError();
nglGenBuffers(1, &vertexBufferID); GLsizei nbBuff = vbgl3BufferForType(m_MemType);
nglGenBuffers(nbBuff, m_VertexObjectId);
if (glGetError() != GL_NO_ERROR) if (glGetError() != GL_NO_ERROR)
{ {
m_Driver->incrementResetCounter(); m_Driver->incrementResetCounter();
return &m_DummyVB[0]; return &m_DummyVB[0];
} }
m_Driver->_DriverGLStates.forceBindARBVertexBuffer(vertexBufferID);
for (GLsizei i = 0; i < nbBuff; ++i)
{
m_Driver->_DriverGLStates.forceBindARBVertexBuffer(m_VertexObjectId[i]);
nglBufferData(GL_ARRAY_BUFFER, size, NULL, m_Driver->vertexBufferUsageGL3(m_MemType)); nglBufferData(GL_ARRAY_BUFFER, size, NULL, m_Driver->vertexBufferUsageGL3(m_MemType));
m_Driver->_DriverGLStates.forceBindARBVertexBuffer(0);
if (glGetError() != GL_NO_ERROR) if (glGetError() != GL_NO_ERROR)
{ {
m_Driver->incrementResetCounter(); m_Driver->incrementResetCounter();
nglDeleteBuffers(1, &vertexBufferID); nglDeleteBuffers(1, &m_VertexObjectId[i]);
return &m_DummyVB[0];; return &m_DummyVB[0];
}
} }
m_VertexObjectId = vertexBufferID;
NLMISC::contReset(m_DummyVB); // free vector memory for real NLMISC::contReset(m_DummyVB); // free vector memory for real
nlassert(m_VertexObjectId); nlassert(m_VertexObjectId[m_CurrentIndex]);
m_Invalid = false; m_Invalid = false;
m_Driver->_LostVBList.erase(m_IteratorInLostVBList); m_Driver->_LostVBList.erase(m_IteratorInLostVBList);
// continue to standard mapping code below .. // continue to standard mapping code below ..
@ -149,7 +183,6 @@ void *CVertexBufferGL3::lock()
{ {
beforeLock= CTime::getPerformanceTime(); beforeLock= CTime::getPerformanceTime();
} }
m_Driver->_DriverGLStates.bindARBVertexBuffer(m_VertexObjectId);
// m_VertexPtr = nglMapBuffer(GL_ARRAY_BUFFER, GL_WRITE_ONLY); // m_VertexPtr = nglMapBuffer(GL_ARRAY_BUFFER, GL_WRITE_ONLY);
@ -165,16 +198,43 @@ void *CVertexBufferGL3::lock()
{ {
case CVertexBuffer::AGPVolatile: case CVertexBuffer::AGPVolatile:
case CVertexBuffer::RAMVolatile: case CVertexBuffer::RAMVolatile:
{
if (m_CurrentInFlight)
{
++m_CurrentIndex;
m_CurrentIndex %= NL3D_GL3_BUFFER_QUEUE_MAX;
m_CurrentInFlight = false;
}
m_Driver->_DriverGLStates.bindARBVertexBuffer(m_VertexObjectId[m_CurrentIndex]);
if (m_FrameInFlight[m_CurrentIndex] != NL3D_GL3_BUFFER_NOT_IN_FLIGHT
&& m_FrameInFlight[m_CurrentIndex] >= m_Driver->getSwapBufferInFlight())
{
#if NL3D_GL3_VERTEX_BUFFER_INFLIGHT_DEBUG
++m_InvalidateCount;
nldebug("GL: Vertex buffer already in flight (reused: %u, invalidated: %u)", m_ReuseCount, m_InvalidateCount);
#endif
// NOTE: GL_MAP_INVALIDATE_BUFFER_BIT removes the cost of waiting for synchronization (major performance impact), // NOTE: GL_MAP_INVALIDATE_BUFFER_BIT removes the cost of waiting for synchronization (major performance impact),
// but adds the cost of allocating a new buffer (which hast a much lower performance impact) // but adds the cost of allocating a new buffer (which hast a much lower performance impact)
m_VertexPtr = nglMapBufferRange(GL_ARRAY_BUFFER, 0, size, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT); m_VertexPtr = nglMapBufferRange(GL_ARRAY_BUFFER, 0, size, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT);
}
else
{
#if NL3D_GL3_VERTEX_BUFFER_INFLIGHT_DEBUG
++m_ReuseCount;
nldebug("GL: Vertex buffer can be reused (reused: %u, invalidated: %u)", m_ReuseCount, m_InvalidateCount);
#endif
m_VertexPtr = nglMapBufferRange(GL_ARRAY_BUFFER, 0, size, GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT);
}
break; break;
}
case CVertexBuffer::RAMPreferred: case CVertexBuffer::RAMPreferred:
m_Driver->_DriverGLStates.bindARBVertexBuffer(m_VertexObjectId[m_CurrentIndex]);
// m_VertexPtr = nglMapBufferRange(GL_ARRAY_BUFFER, 0, size, GL_MAP_WRITE_BIT | GL_MAP_READ_BIT | GL_MAP_PERSISTENT | GL_MAP_COHERENT); // m_VertexPtr = nglMapBufferRange(GL_ARRAY_BUFFER, 0, size, GL_MAP_WRITE_BIT | GL_MAP_READ_BIT | GL_MAP_PERSISTENT | GL_MAP_COHERENT);
// NOTE: Persistent / Coherent is only available in OpenGL 4.4 (2013/2014 hardware with recent drivers) // NOTE: Persistent / Coherent is only available in OpenGL 4.4 (2013/2014 hardware with recent drivers)
m_VertexPtr = nglMapBuffer(GL_ARRAY_BUFFER, GL_READ_WRITE); m_VertexPtr = nglMapBuffer(GL_ARRAY_BUFFER, GL_READ_WRITE);
break; break;
default: default:
m_Driver->_DriverGLStates.bindARBVertexBuffer(m_VertexObjectId[m_CurrentIndex]);
// m_VertexPtr = nglMapBufferRange(GL_ARRAY_BUFFER, 0, size, GL_MAP_WRITE_BIT); // m_VertexPtr = nglMapBufferRange(GL_ARRAY_BUFFER, 0, size, GL_MAP_WRITE_BIT);
m_VertexPtr = nglMapBuffer(GL_ARRAY_BUFFER, GL_WRITE_ONLY); m_VertexPtr = nglMapBuffer(GL_ARRAY_BUFFER, GL_WRITE_ONLY);
break; break;
@ -183,7 +243,7 @@ void *CVertexBufferGL3::lock()
if (!m_VertexPtr) if (!m_VertexPtr)
{ {
nglUnmapBuffer(GL_ARRAY_BUFFER); nglUnmapBuffer(GL_ARRAY_BUFFER);
nlassert(nglIsBuffer((GLuint)m_VertexObjectId)); nlassert(nglIsBuffer(m_VertexObjectId[m_CurrentIndex]));
invalidate(); invalidate();
return &m_DummyVB[0]; return &m_DummyVB[0];
} }
@ -211,13 +271,13 @@ void CVertexBufferGL3::unlock()
m_VertexPtr = NULL; m_VertexPtr = NULL;
if (m_Invalid) return; if (m_Invalid) return;
if (!m_VertexObjectId) return; if (!m_VertexObjectId[m_CurrentIndex]) return;
TTicks beforeLock = 0; TTicks beforeLock = 0;
if (m_Driver->_VBHardProfiling) if (m_Driver->_VBHardProfiling)
{ {
beforeLock= CTime::getPerformanceTime(); beforeLock= CTime::getPerformanceTime();
} }
m_Driver->_DriverGLStates.bindARBVertexBuffer(m_VertexObjectId); m_Driver->_DriverGLStates.bindARBVertexBuffer(m_VertexObjectId[m_CurrentIndex]);
// double start = CTime::ticksToSecond(CTime::getPerformanceTime()); // double start = CTime::ticksToSecond(CTime::getPerformanceTime());
#ifdef NL_DEBUG #ifdef NL_DEBUG
_Unmapping = true; _Unmapping = true;
@ -288,10 +348,11 @@ void CVertexBufferGL3::disable()
// *************************************************************************** // ***************************************************************************
void CVertexBufferGL3::setupVBInfos(CVertexBufferInfo &vb) GLuint CVertexBufferGL3::getGLuint()
{ {
H_AUTO_OGL(CVertexBufferGLARB_setupVBInfos) H_AUTO_OGL(CVertexBufferGLARB_getGLuint);
vb.VertexObjectId = m_VertexObjectId;
return m_VertexObjectId[m_CurrentIndex];
} }
// *************************************************************************** // ***************************************************************************
@ -300,7 +361,9 @@ void CVertexBufferGL3::setFrameInFlight(uint64 swapBufferCounter)
{ {
H_AUTO_OGL(CVertexBufferGL3_setFrameInFlight); H_AUTO_OGL(CVertexBufferGL3_setFrameInFlight);
m_FrameInFlight = swapBufferCounter; // Set buffer frame in flight
m_FrameInFlight[m_CurrentIndex] = swapBufferCounter;
m_CurrentInFlight = true;
} }
// *************************************************************************** // ***************************************************************************
@ -507,11 +570,11 @@ void CVertexBufferAMDPinned::disable()
// *************************************************************************** // ***************************************************************************
void CVertexBufferAMDPinned::setupVBInfos(CVertexBufferInfo &vb) GLuint CVertexBufferAMDPinned::getGLuint()
{ {
H_AUTO_OGL(CVertexBufferAMDPinned_setupVBInfos); H_AUTO_OGL(CVertexBufferAMDPinned_getGLuint);
vb.VertexObjectId = m_VertexObjectId; return m_VertexObjectId;
} }
// *************************************************************************** // ***************************************************************************

@ -22,6 +22,8 @@
namespace NL3D { namespace NL3D {
namespace NLDRIVERGL3 { namespace NLDRIVERGL3 {
#define NL3D_GL3_VERTEX_BUFFER_INFLIGHT_DEBUG 0
class CDriverGL3; class CDriverGL3;
class IVertexBufferGL3; class IVertexBufferGL3;
class CVertexBufferInfo; class CVertexBufferInfo;
@ -41,7 +43,7 @@ public:
virtual void *getPointer() = 0; virtual void *getPointer() = 0;
virtual void enable() = 0; virtual void enable() = 0;
virtual void disable() = 0; virtual void disable() = 0;
virtual void setupVBInfos(CVertexBufferInfo &vb) = 0; virtual GLuint getGLuint() = 0;
virtual void setFrameInFlight(uint64 swapBufferCounter) = 0; virtual void setFrameInFlight(uint64 swapBufferCounter) = 0;
// test if buffer content is invalid. If so, no rendering should occurs (rendering should silently fail) // test if buffer content is invalid. If so, no rendering should occurs (rendering should silently fail)
@ -71,7 +73,7 @@ public:
virtual void *getPointer(); virtual void *getPointer();
virtual void enable(); virtual void enable();
virtual void disable(); virtual void disable();
virtual void setupVBInfos(CVertexBufferInfo &vb); virtual GLuint getGLuint();
virtual void setFrameInFlight(uint64 swapBufferCounter); virtual void setFrameInFlight(uint64 swapBufferCounter);
// @} // @}
@ -89,9 +91,15 @@ private:
// for use by CVertexArrayRange // for use by CVertexArrayRange
std::list<CVertexBufferGL3*>::iterator m_IteratorInLostVBList; std::list<CVertexBufferGL3*>::iterator m_IteratorInLostVBList;
uint m_VertexObjectId; GLuint m_VertexObjectId[NL3D_GL3_BUFFER_QUEUE_MAX];
uint64 m_FrameInFlight[NL3D_GL3_BUFFER_QUEUE_MAX];
GLsizei m_CurrentIndex;
bool m_CurrentInFlight;
uint64 m_FrameInFlight; // TODO: Array of sz NL3D_GL3_BUFFER_QUEUE_MAX #if NL3D_GL3_VERTEX_BUFFER_INFLIGHT_DEBUG
uint32 m_ReuseCount;
uint32 m_InvalidateCount;
#endif
}; };
class CVertexBufferAMDPinned : public IVertexBufferGL3 class CVertexBufferAMDPinned : public IVertexBufferGL3
@ -108,7 +116,7 @@ public:
virtual void *getPointer(); virtual void *getPointer();
virtual void enable(); virtual void enable();
virtual void disable(); virtual void disable();
virtual void setupVBInfos(CVertexBufferInfo &vb); virtual GLuint getGLuint();
virtual void setFrameInFlight(uint64 swapBufferCounter); virtual void setFrameInFlight(uint64 swapBufferCounter);
// @} // @}

Loading…
Cancel
Save