Use unsynchronized triple buffering for volatile vertex buffers

--HG--
branch : opengl3
hg/feature/opengl3
kaetemi 10 years ago
parent 25994fd86b
commit aab0e3b0d4

@ -314,7 +314,7 @@ CDriverGL3::CDriverGL3()
_CurrentOcclusionQuery = NULL;
_SwapBufferCounter = 0;
_SwapBufferInFlight = 0;
for (size_t i = 0; i < NL3D_GL3_BUFFER_QUEUE_MAX; ++i)
for (size_t i = 0; i < NL3D_GL3_FRAME_QUEUE_MAX; ++i)
_SwapBufferSync[i] = 0;
_LightMapDynamicLightEnabled = false;
@ -596,7 +596,7 @@ bool CDriverGL3::swapBuffers()
H_AUTO_OGL(CDriverGL3_swapBuffers);
// Set fence
size_t syncI = _SwapBufferCounter % NL3D_GL3_BUFFER_QUEUE_MAX;
size_t syncI = _SwapBufferCounter % NL3D_GL3_FRAME_QUEUE_MAX;
if (_SwapBufferSync[syncI]) // Wait for oldest fence, if this is still in flight
{
#if NL3D_GL3_FRAME_IN_FLIGHT_DEBUG
@ -692,9 +692,9 @@ bool CDriverGL3::swapBuffers()
updateLostBuffers();
// Check in flight buffers, also checks the current one
for (size_t i = 0; i < NL3D_GL3_BUFFER_QUEUE_MAX; ++i)
for (size_t i = 0; i < NL3D_GL3_FRAME_QUEUE_MAX; ++i)
{
size_t syncJ = (syncI + 1 + i) % NL3D_GL3_BUFFER_QUEUE_MAX;
size_t syncJ = (syncI + 1 + i) % NL3D_GL3_FRAME_QUEUE_MAX;
if (_SwapBufferSync[syncJ]) // If there's a frame in flight
{
GLint status = 0;
@ -760,7 +760,7 @@ bool CDriverGL3::release()
_SwapBufferCounter = 0;
_SwapBufferInFlight = 0;
for (size_t i = 0; i < NL3D_GL3_BUFFER_QUEUE_MAX; ++i)
for (size_t i = 0; i < NL3D_GL3_FRAME_QUEUE_MAX; ++i)
{
if (_SwapBufferSync[i])
{

@ -82,7 +82,8 @@ using NLMISC::CMatrix;
using NLMISC::CVector;
#define NL3D_GL3_BUFFER_NOT_IN_FLIGHT (std::numeric_limits<uint64>::max())
#define NL3D_GL3_BUFFER_QUEUE_MAX (3) // Maximum is three frames behind
#define NL3D_GL3_FRAME_QUEUE_MAX (2) // Maximum is three frames processing (2 frames backlog + current frame)
#define NL3D_GL3_BUFFER_QUEUE_MAX (NL3D_GL3_FRAME_QUEUE_MAX + 1) // Additional buffer for current working
namespace NL3D {
namespace NLDRIVERGL3 {
@ -266,7 +267,7 @@ public:
}
void setupVertexBuffer(CVertexBuffer &vb);
void setupVertexBufferHard(IVertexBufferGL3 &vb);
// void setupVertexBufferHard(IVertexBufferGL3 &vb);
};
@ -1348,7 +1349,7 @@ protected:
uint64 _SwapBufferCounter;
private:
uint64 _SwapBufferInFlight;
GLsync _SwapBufferSync[NL3D_GL3_BUFFER_QUEUE_MAX];
GLsync _SwapBufferSync[NL3D_GL3_FRAME_QUEUE_MAX];
public:
void incrementResetCounter() { ++_ResetCounter; }
bool isWndActive() const { return _WndActive; }

@ -299,11 +299,17 @@ void CDriverGL3::updateLostBuffers()
{
for (std::list<CVertexBufferGL3 *>::iterator it = _LostVBList.begin(); it != _LostVBList.end(); ++it)
{
nlassert((*it)->m_VertexObjectId);
GLuint id = (GLuint) (*it)->m_VertexObjectId;
nlassert(nglIsBuffer(id));
nglDeleteBuffers(1, &id);
(*it)->m_VertexObjectId = 0;
nlassert((*it)->m_VertexObjectId[0]);
for (GLsizei i = 0; i < NL3D_GL3_BUFFER_QUEUE_MAX; ++i)
{
GLuint id = (*it)->m_VertexObjectId[i];
if (id)
{
nlassert(nglIsBuffer(id));
nglDeleteBuffers(1, &id);
(*it)->m_VertexObjectId[i] = 0;
}
}
(*it)->VB->setLocation(CVertexBuffer::NotResident);
}
_LostVBList.clear();
@ -481,10 +487,11 @@ void CVertexBufferInfo::setupVertexBuffer(CVertexBuffer &vb)
CVertexBufferReadWrite access;
uint8 *ptr;
CVBDrvInfosGL3 *info= safe_cast<CVBDrvInfosGL3*>((IVBDrvInfos*)vb.DrvInfos);
nlassert (info);
nlassert(info);
nlassert(info->_VBHard);
ptr = (uint8*)info->_VBHard->getPointer();
info->_VBHard->setupVBInfos(*this);
VertexObjectId = info->_VBHard->getGLuint();
// Get value pointer
for (i=0; i<CVertexBuffer::NumValue; i++)

@ -48,26 +48,48 @@ IVertexBufferGL3::~IVertexBufferGL3()
// ***************************************************************************
// ***************************************************************************
static inline GLsizei vbgl3BufferForType(CVertexBuffer::TPreferredMemory mem)
{
switch (mem)
{
case CVertexBuffer::AGPVolatile:
case CVertexBuffer::RAMVolatile:
return NL3D_GL3_BUFFER_QUEUE_MAX;
default:
return 1;
}
}
CVertexBufferGL3::CVertexBufferGL3(CDriverGL3 *drv, uint size, uint numVertices, CVertexBuffer::TPreferredMemory preferred, CVertexBuffer *vb)
: IVertexBufferGL3(drv, vb, IVertexBufferGL3::GL3),
m_VertexPtr(NULL),
m_VertexObjectId(0),
m_FrameInFlight(NL3D_GL3_BUFFER_NOT_IN_FLIGHT)
m_CurrentIndex(0),
m_CurrentInFlight(false),
#if NL3D_GL3_VERTEX_BUFFER_INFLIGHT_DEBUG
m_ReuseCount(0),
m_InvalidateCount(0),
#endif
m_MemType(preferred)
{
H_AUTO_OGL(CVertexBufferGLARB_CVertexBufferGLARB)
H_AUTO_OGL(CVertexBufferGLARB_CVertexBufferGLARB);
// Create id and bind
GLuint vertexBufferID;
nglGenBuffers(1, &vertexBufferID);
drv->_DriverGLStates.forceBindARBVertexBuffer(vertexBufferID);
for (GLsizei i = 0; i < NL3D_GL3_BUFFER_QUEUE_MAX; ++i)
{
m_VertexObjectId[i] = 0;
m_FrameInFlight[i] = NL3D_GL3_BUFFER_NOT_IN_FLIGHT;
}
// Initialize
nglBufferData(GL_ARRAY_BUFFER, size, NULL, drv->vertexBufferUsageGL3(preferred));
m_VertexObjectId = vertexBufferID;
m_MemType = preferred;
// Create ids
GLsizei nbBuff = vbgl3BufferForType(preferred);
nglGenBuffers(nbBuff, m_VertexObjectId);
// Unbind
drv->_DriverGLStates.forceBindARBVertexBuffer(0);
// Initialize
for (GLsizei i = 0; i < nbBuff; ++i)
{
drv->_DriverGLStates.forceBindARBVertexBuffer(m_VertexObjectId[i]);
nglBufferData(GL_ARRAY_BUFFER, size, NULL, drv->vertexBufferUsageGL3(preferred));
drv->_DriverGLStates.forceBindARBVertexBuffer(0);
}
}
// ***************************************************************************
@ -77,16 +99,23 @@ CVertexBufferGL3::~CVertexBufferGL3()
H_AUTO_OGL(CVertexBufferGLARB_CVertexBufferGLARBDtor)
if (m_Driver && m_VertexObjectId)
{
if (m_Driver->_DriverGLStates.getCurrBoundARBVertexBuffer() == m_VertexObjectId)
GLsizei nbBuff = vbgl3BufferForType(m_MemType);
for (GLsizei i = 0; i < nbBuff; ++i)
{
m_Driver->_DriverGLStates.forceBindARBVertexBuffer(0);
if (m_Driver->_DriverGLStates.getCurrBoundARBVertexBuffer() == m_VertexObjectId[i])
{
m_Driver->_DriverGLStates.forceBindARBVertexBuffer(0);
}
}
}
if (m_VertexObjectId)
for (GLsizei i = 0; i < NL3D_GL3_BUFFER_QUEUE_MAX; ++i)
{
GLuint id = (GLuint)m_VertexObjectId;
nlassert(nglIsBuffer(id));
nglDeleteBuffers(1, &id);
if (m_VertexObjectId[i])
{
GLuint id = m_VertexObjectId[i];
nlassert(nglIsBuffer(id));
nglDeleteBuffers(1, &id);
}
}
if (m_Driver)
{
@ -118,28 +147,33 @@ void *CVertexBufferGL3::lock()
nlassert(!m_DummyVB.empty());
return &m_DummyVB[0];
}
// recreate a vb
GLuint vertexBufferID;
// Create ids
glGetError();
nglGenBuffers(1, &vertexBufferID);
GLsizei nbBuff = vbgl3BufferForType(m_MemType);
nglGenBuffers(nbBuff, m_VertexObjectId);
if (glGetError() != GL_NO_ERROR)
{
m_Driver->incrementResetCounter();
return &m_DummyVB[0];
}
m_Driver->_DriverGLStates.forceBindARBVertexBuffer(vertexBufferID);
nglBufferData(GL_ARRAY_BUFFER, size, NULL, m_Driver->vertexBufferUsageGL3(m_MemType));
if (glGetError() != GL_NO_ERROR)
for (GLsizei i = 0; i < nbBuff; ++i)
{
m_Driver->incrementResetCounter();
nglDeleteBuffers(1, &vertexBufferID);
return &m_DummyVB[0];;
m_Driver->_DriverGLStates.forceBindARBVertexBuffer(m_VertexObjectId[i]);
nglBufferData(GL_ARRAY_BUFFER, size, NULL, m_Driver->vertexBufferUsageGL3(m_MemType));
m_Driver->_DriverGLStates.forceBindARBVertexBuffer(0);
if (glGetError() != GL_NO_ERROR)
{
m_Driver->incrementResetCounter();
nglDeleteBuffers(1, &m_VertexObjectId[i]);
return &m_DummyVB[0];
}
}
m_VertexObjectId = vertexBufferID;
NLMISC::contReset(m_DummyVB); // free vector memory for real
nlassert(m_VertexObjectId);
nlassert(m_VertexObjectId[m_CurrentIndex]);
m_Invalid = false;
m_Driver->_LostVBList.erase(m_IteratorInLostVBList);
// continue to standard mapping code below ..
@ -149,7 +183,6 @@ void *CVertexBufferGL3::lock()
{
beforeLock= CTime::getPerformanceTime();
}
m_Driver->_DriverGLStates.bindARBVertexBuffer(m_VertexObjectId);
// m_VertexPtr = nglMapBuffer(GL_ARRAY_BUFFER, GL_WRITE_ONLY);
@ -165,16 +198,43 @@ void *CVertexBufferGL3::lock()
{
case CVertexBuffer::AGPVolatile:
case CVertexBuffer::RAMVolatile:
// NOTE: GL_MAP_INVALIDATE_BUFFER_BIT removes the cost of waiting for synchronization (major performance impact),
// but adds the cost of allocating a new buffer (which hast a much lower performance impact)
m_VertexPtr = nglMapBufferRange(GL_ARRAY_BUFFER, 0, size, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT);
{
if (m_CurrentInFlight)
{
++m_CurrentIndex;
m_CurrentIndex %= NL3D_GL3_BUFFER_QUEUE_MAX;
m_CurrentInFlight = false;
}
m_Driver->_DriverGLStates.bindARBVertexBuffer(m_VertexObjectId[m_CurrentIndex]);
if (m_FrameInFlight[m_CurrentIndex] != NL3D_GL3_BUFFER_NOT_IN_FLIGHT
&& m_FrameInFlight[m_CurrentIndex] >= m_Driver->getSwapBufferInFlight())
{
#if NL3D_GL3_VERTEX_BUFFER_INFLIGHT_DEBUG
++m_InvalidateCount;
nldebug("GL: Vertex buffer already in flight (reused: %u, invalidated: %u)", m_ReuseCount, m_InvalidateCount);
#endif
// NOTE: GL_MAP_INVALIDATE_BUFFER_BIT removes the cost of waiting for synchronization (major performance impact),
// but adds the cost of allocating a new buffer (which hast a much lower performance impact)
m_VertexPtr = nglMapBufferRange(GL_ARRAY_BUFFER, 0, size, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT);
}
else
{
#if NL3D_GL3_VERTEX_BUFFER_INFLIGHT_DEBUG
++m_ReuseCount;
nldebug("GL: Vertex buffer can be reused (reused: %u, invalidated: %u)", m_ReuseCount, m_InvalidateCount);
#endif
m_VertexPtr = nglMapBufferRange(GL_ARRAY_BUFFER, 0, size, GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT);
}
break;
}
case CVertexBuffer::RAMPreferred:
m_Driver->_DriverGLStates.bindARBVertexBuffer(m_VertexObjectId[m_CurrentIndex]);
// m_VertexPtr = nglMapBufferRange(GL_ARRAY_BUFFER, 0, size, GL_MAP_WRITE_BIT | GL_MAP_READ_BIT | GL_MAP_PERSISTENT | GL_MAP_COHERENT);
// NOTE: Persistent / Coherent is only available in OpenGL 4.4 (2013/2014 hardware with recent drivers)
m_VertexPtr = nglMapBuffer(GL_ARRAY_BUFFER, GL_READ_WRITE);
break;
default:
m_Driver->_DriverGLStates.bindARBVertexBuffer(m_VertexObjectId[m_CurrentIndex]);
// m_VertexPtr = nglMapBufferRange(GL_ARRAY_BUFFER, 0, size, GL_MAP_WRITE_BIT);
m_VertexPtr = nglMapBuffer(GL_ARRAY_BUFFER, GL_WRITE_ONLY);
break;
@ -183,7 +243,7 @@ void *CVertexBufferGL3::lock()
if (!m_VertexPtr)
{
nglUnmapBuffer(GL_ARRAY_BUFFER);
nlassert(nglIsBuffer((GLuint)m_VertexObjectId));
nlassert(nglIsBuffer(m_VertexObjectId[m_CurrentIndex]));
invalidate();
return &m_DummyVB[0];
}
@ -211,13 +271,13 @@ void CVertexBufferGL3::unlock()
m_VertexPtr = NULL;
if (m_Invalid) return;
if (!m_VertexObjectId) return;
if (!m_VertexObjectId[m_CurrentIndex]) return;
TTicks beforeLock = 0;
if (m_Driver->_VBHardProfiling)
{
beforeLock= CTime::getPerformanceTime();
}
m_Driver->_DriverGLStates.bindARBVertexBuffer(m_VertexObjectId);
m_Driver->_DriverGLStates.bindARBVertexBuffer(m_VertexObjectId[m_CurrentIndex]);
// double start = CTime::ticksToSecond(CTime::getPerformanceTime());
#ifdef NL_DEBUG
_Unmapping = true;
@ -271,7 +331,7 @@ void CVertexBufferGL3::enable()
H_AUTO_OGL(CVertexBufferGLARB_enable)
if (m_Driver->_CurrentVertexBufferGL != this)
{
m_Driver->_CurrentVertexBufferGL= this;
m_Driver->_CurrentVertexBufferGL = this;
}
}
@ -282,16 +342,17 @@ void CVertexBufferGL3::disable()
H_AUTO_OGL(CVertexBufferGLARB_disable)
if (m_Driver->_CurrentVertexBufferGL != NULL)
{
m_Driver->_CurrentVertexBufferGL= NULL;
m_Driver->_CurrentVertexBufferGL = NULL;
}
}
// ***************************************************************************
void CVertexBufferGL3::setupVBInfos(CVertexBufferInfo &vb)
GLuint CVertexBufferGL3::getGLuint()
{
H_AUTO_OGL(CVertexBufferGLARB_setupVBInfos)
vb.VertexObjectId = m_VertexObjectId;
H_AUTO_OGL(CVertexBufferGLARB_getGLuint);
return m_VertexObjectId[m_CurrentIndex];
}
// ***************************************************************************
@ -300,7 +361,9 @@ void CVertexBufferGL3::setFrameInFlight(uint64 swapBufferCounter)
{
H_AUTO_OGL(CVertexBufferGL3_setFrameInFlight);
m_FrameInFlight = swapBufferCounter;
// Set buffer frame in flight
m_FrameInFlight[m_CurrentIndex] = swapBufferCounter;
m_CurrentInFlight = true;
}
// ***************************************************************************
@ -507,11 +570,11 @@ void CVertexBufferAMDPinned::disable()
// ***************************************************************************
void CVertexBufferAMDPinned::setupVBInfos(CVertexBufferInfo &vb)
GLuint CVertexBufferAMDPinned::getGLuint()
{
H_AUTO_OGL(CVertexBufferAMDPinned_setupVBInfos);
H_AUTO_OGL(CVertexBufferAMDPinned_getGLuint);
vb.VertexObjectId = m_VertexObjectId;
return m_VertexObjectId;
}
// ***************************************************************************

@ -22,6 +22,8 @@
namespace NL3D {
namespace NLDRIVERGL3 {
#define NL3D_GL3_VERTEX_BUFFER_INFLIGHT_DEBUG 0
class CDriverGL3;
class IVertexBufferGL3;
class CVertexBufferInfo;
@ -41,7 +43,7 @@ public:
virtual void *getPointer() = 0;
virtual void enable() = 0;
virtual void disable() = 0;
virtual void setupVBInfos(CVertexBufferInfo &vb) = 0;
virtual GLuint getGLuint() = 0;
virtual void setFrameInFlight(uint64 swapBufferCounter) = 0;
// test if buffer content is invalid. If so, no rendering should occurs (rendering should silently fail)
@ -71,7 +73,7 @@ public:
virtual void *getPointer();
virtual void enable();
virtual void disable();
virtual void setupVBInfos(CVertexBufferInfo &vb);
virtual GLuint getGLuint();
virtual void setFrameInFlight(uint64 swapBufferCounter);
// @}
@ -89,9 +91,15 @@ private:
// for use by CVertexArrayRange
std::list<CVertexBufferGL3*>::iterator m_IteratorInLostVBList;
uint m_VertexObjectId;
GLuint m_VertexObjectId[NL3D_GL3_BUFFER_QUEUE_MAX];
uint64 m_FrameInFlight[NL3D_GL3_BUFFER_QUEUE_MAX];
GLsizei m_CurrentIndex;
bool m_CurrentInFlight;
uint64 m_FrameInFlight; // TODO: Array of sz NL3D_GL3_BUFFER_QUEUE_MAX
#if NL3D_GL3_VERTEX_BUFFER_INFLIGHT_DEBUG
uint32 m_ReuseCount;
uint32 m_InvalidateCount;
#endif
};
class CVertexBufferAMDPinned : public IVertexBufferGL3
@ -108,7 +116,7 @@ public:
virtual void *getPointer();
virtual void enable();
virtual void disable();
virtual void setupVBInfos(CVertexBufferInfo &vb);
virtual GLuint getGLuint();
virtual void setFrameInFlight(uint64 swapBufferCounter);
// @}

Loading…
Cancel
Save