GL3: Add experimental implementation for volatile buffers using pinned memory

--HG--
branch : opengl3
hg/feature/opengl3
kaetemi 11 years ago
parent aab353672c
commit 42936097ce

@ -262,6 +262,8 @@ CDriverGL3::CDriverGL3()
_UserTexMatEnabled = 0;
_AMDPinnedAllocator = NULL;
// reserve enough space to never reallocate, nor test for reallocation.
_LightMapLUT.resize(NL3D_DRV_MAX_LIGHTMAP);
// must set replace for alpha part.
@ -687,6 +689,8 @@ bool CDriverGL3::release()
// Make sure vertex buffers are really all gone
// FIXME VERTEXBUFFER
delete _AMDPinnedAllocator;
_AMDPinnedAllocator = NULL;
// destroy window and associated ressources
destroyWindow();

@ -1127,15 +1127,19 @@ private:
CPtrSet<IVertexBufferGL3> _VertexBufferGLSet;
friend class CVertexBufferGL3;
friend class CVertexBufferAMDPinned;
friend class CVertexBufferAMDPinnedAllocator;
friend class CVertexBufferAMDPinnedVolatile;
friend class CVBDrvInfosGL3;
// The VertexBufferHardGL activated.
IVertexBufferGL3 *_CurrentVertexBufferGL;
IVertexBufferGL3 *_CurrentVertexBufferGL;
GLenum vertexBufferUsageGL3(CVertexBuffer::TPreferredMemory usage);
// Handle lost buffers
void updateLostBuffers();
std::list<CVertexBufferGL3 *> _LostVBList;
class CVertexBufferAMDPinnedAllocator *_AMDPinnedAllocator;
// @}

@ -170,6 +170,14 @@ PFNGLGETCOMPRESSEDTEXIMAGEPROC nglGetCompressedTexImage;
PFNGLBLENDCOLORPROC nglBlendColor;
PFNGLFENCESYNCPROC nglFenceSync;
PFNGLISSYNCPROC nglIsSync;
PFNGLDELETESYNCPROC nglDeleteSync;
PFNGLCLIENTWAITSYNCPROC nglClientWaitSync;
PFNGLWAITSYNCPROC nglWaitSync;
PFNGLGETINTEGER64VPROC nglGetInteger64v;
PFNGLGETSYNCIVPROC nglGetSynciv;
// GL_ARB_separate_shader_objects
PFNGLUSEPROGRAMSTAGESPROC nglUseProgramStages;
PFNGLACTIVESHADERPROGRAMPROC nglActiveShaderProgram;
@ -468,6 +476,14 @@ static bool setupGLCore(std::vector<const char *> &glext)
CHECK_ADDRESS(PFNGLBLENDCOLORPROC, glBlendColor);
CHECK_ADDRESS(PFNGLFENCESYNCPROC, glFenceSync);
CHECK_ADDRESS(PFNGLISSYNCPROC, glIsSync);
CHECK_ADDRESS(PFNGLDELETESYNCPROC, glDeleteSync);
CHECK_ADDRESS(PFNGLCLIENTWAITSYNCPROC, glClientWaitSync);
CHECK_ADDRESS(PFNGLWAITSYNCPROC, glWaitSync);
CHECK_ADDRESS(PFNGLGETINTEGER64VPROC, glGetInteger64v);
CHECK_ADDRESS(PFNGLGETSYNCIVPROC, glGetSynciv);
return true;
}

@ -262,6 +262,14 @@ extern PFNGLGETCOMPRESSEDTEXIMAGEPROC nglGetCompressedTexImage;
extern PFNGLBLENDCOLORPROC nglBlendColor;
extern PFNGLFENCESYNCPROC nglFenceSync;
extern PFNGLISSYNCPROC nglIsSync;
extern PFNGLDELETESYNCPROC nglDeleteSync;
extern PFNGLCLIENTWAITSYNCPROC nglClientWaitSync;
extern PFNGLWAITSYNCPROC nglWaitSync;
extern PFNGLGETINTEGER64VPROC nglGetInteger64v;
extern PFNGLGETSYNCIVPROC nglGetSynciv;
// GL_ARB_separate_shader_objects
extern PFNGLUSEPROGRAMSTAGESPROC nglUseProgramStages;
extern PFNGLACTIVESHADERPROGRAMPROC nglActiveShaderProgram;

@ -70,6 +70,9 @@ bool CDriverGL3::renderLines(CMaterial& mat, uint32 firstIndex, uint32 nlines)
_PrimitiveProfileIn.NLines+= nlines;
_PrimitiveProfileOut.NLines+= nlines;
if (_CurrentVertexBufferGL)
_CurrentVertexBufferGL->setFence();
return true;
}
@ -121,6 +124,9 @@ bool CDriverGL3::renderTriangles(CMaterial& mat, uint32 firstIndex, uint32 ntris
_PrimitiveProfileIn.NTriangles+= ntris;
_PrimitiveProfileOut.NTriangles+= ntris * nPass;
if (_CurrentVertexBufferGL)
_CurrentVertexBufferGL->setFence();
return true;
}
@ -154,6 +160,9 @@ bool CDriverGL3::renderSimpleTriangles(uint32 firstTri, uint32 ntris)
_PrimitiveProfileIn.NTriangles+= ntris;
_PrimitiveProfileOut.NTriangles+= ntris;
if (_CurrentVertexBufferGL)
_CurrentVertexBufferGL->setFence();
return true;
}
@ -192,6 +201,9 @@ bool CDriverGL3::renderRawPoints(CMaterial& mat, uint32 startIndex, uint32 numPo
_PrimitiveProfileIn.NPoints+= numPoints;
_PrimitiveProfileOut.NPoints+= numPoints * nPass;
if (_CurrentVertexBufferGL)
_CurrentVertexBufferGL->setFence();
return true;
}
@ -230,6 +242,9 @@ bool CDriverGL3::renderRawLines(CMaterial& mat, uint32 startIndex, uint32 numLin
_PrimitiveProfileIn.NLines += numLines ;
_PrimitiveProfileOut.NLines += numLines * nPass;
if (_CurrentVertexBufferGL)
_CurrentVertexBufferGL->setFence();
return true;
}
@ -270,6 +285,9 @@ bool CDriverGL3::renderRawTriangles(CMaterial& mat, uint32 startIndex, uint32 nu
_PrimitiveProfileIn.NTriangles += numTris ;
_PrimitiveProfileOut.NTriangles += numTris * nPass;
if (_CurrentVertexBufferGL)
_CurrentVertexBufferGL->setFence();
return true;
}
@ -390,6 +408,9 @@ bool CDriverGL3::renderRawQuads(CMaterial& mat, uint32 startIndex, uint32 numQua
_PrimitiveProfileIn.NQuads += numQuads ;
_PrimitiveProfileOut.NQuads += numQuads * nPass;
if (_CurrentVertexBufferGL)
_CurrentVertexBufferGL->setFence();
return true;
}

@ -137,6 +137,10 @@ bool CDriverGL3::setupVertexBuffer(CVertexBuffer& VB)
// TODO: Verify how the vegetation vb allocator handles RAMResident/AGPResident
location = CVertexBuffer::RAMResident;
}
else if (info->_VBHard->VBType == IVertexBufferGL3::AMDPinnedVolatile)
{
location = CVertexBuffer::RAMResident;
}
else
{
switch (preferred)
@ -283,6 +287,14 @@ IVertexBufferGL3 *CDriverGL3::createVertexBufferGL(uint size, uint numVertices,
{
result = new CVertexBufferAMDPinned(this, size, numVertices, preferred, vb);
}
/*else if (_Extensions.AMDPinnedMemory && (
preferred == CVertexBuffer::RAMVolatile
|| preferred == CVertexBuffer::AGPVolatile
))
{
// NOTE: Performance of this is lower...
result = new CVertexBufferAMDPinnedVolatile(this, size, numVertices, preferred, vb);
}*/
else
{
result = new CVertexBufferGL3(this, size, numVertices, preferred, vb);
@ -516,6 +528,11 @@ bool CDriverGL3::initVertexBufferHard(uint agpMem, uint vramMem)
{
H_AUTO_OGL(CDriverGL3_initVertexBufferHard)
if (_Extensions.AMDPinnedMemory)
{
_AMDPinnedAllocator = new CVertexBufferAMDPinnedAllocator(this);
}
return true;
}

@ -298,6 +298,15 @@ void CVertexBufferGL3::setupVBInfos(CVertexBufferInfo &vb)
// ***************************************************************************
void CVertexBufferGL3::setFence()
{
H_AUTO_OGL(CVertexBufferGLARB_setFence)
// no-op
}
// ***************************************************************************
void CVertexBufferGL3::invalidate()
{
H_AUTO_OGL(CVertexBufferGLARB_invalidate)
@ -339,7 +348,7 @@ CVertexBufferAMDPinned::CVertexBufferAMDPinned(CDriverGL3 *drv, uint size, uint
nglBufferData(GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD, size, m_VertexPtrAligned, drv->vertexBufferUsageGL3(preferred));
if (glGetError() == GL_INVALID_OPERATION)
{
nlerror("Failed to pin memory");
nlerror("GL3: Failed to pin memory");
nglDeleteBuffers(1, &vertexBufferID);
vertexBufferID = 0;
}
@ -506,6 +515,360 @@ void CVertexBufferAMDPinned::setupVBInfos(CVertexBufferInfo &vb)
vb.VertexObjectId = m_VertexObjectId;
}
// ***************************************************************************
void CVertexBufferAMDPinned::setFence()
{
H_AUTO_OGL(CVertexBufferAMDPinned_setFence)
// no-op
}
// ***************************************************************************
// ***************************************************************************
// ***************************************************************************
CVertexBufferAMDPinnedVolatile::CVertexBufferAMDPinnedVolatile(CDriverGL3 *drv, uint size, uint numVertices, CVertexBuffer::TPreferredMemory preferred, CVertexBuffer *vb)
: IVertexBufferGL3(drv, vb, IVertexBufferGL3::AMDPinnedVolatile),
m_Block(NULL),
m_VertexPtr(NULL)
{
H_AUTO_OGL(CVertexBufferAMDPinnedVolatile_CVertexBufferAMDPinned)
// no-op
}
// ***************************************************************************
CVertexBufferAMDPinnedVolatile::~CVertexBufferAMDPinnedVolatile()
{
H_AUTO_OGL(CVertexBufferAMDPinnedVolatile_CVertexBufferAMDPinnedDtor)
if (m_Block)
{
m_Driver->_AMDPinnedAllocator->free(m_Block);
m_Block = NULL;
}
}
// ***************************************************************************
void *CVertexBufferAMDPinnedVolatile::lock()
{
H_AUTO_OGL(CVertexBufferAMDPinnedVolatile_lock);
if (m_VertexPtr) // Already locked...
return m_VertexPtr;
// Profiling
TTicks beforeLock = 0;
if (m_Driver->_VBHardProfiling)
{
beforeLock = CTime::getPerformanceTime();
}
// Allocate if necessary
const uint size = VB->getNumVertices() * VB->getVertexSize();
if (m_Block)
{
// Verify size
uint index = 0;
uint sizeShift = size >> (NLDRV_GL3_AMD_PINNED_VOLATILE_SHIFT);
while (sizeShift)
{
sizeShift >>= 1;
++index;
}
if (m_Block->Bin != index)
{
// nldebug("GL3: Size inconsistency"); // NOTE: May have to handle this for other buffers...
// Wrong size, want a different block size
m_Driver->_AMDPinnedAllocator->free(m_Block);
m_Block = NULL;
}
}
if (m_Block)
{
// Check fence
if (m_Block->FenceId)
{
GLint status = 0;
nglGetSynciv(m_Block->FenceId, GL_SYNC_STATUS, 1, NULL, &status);
if (status == GL_SIGNALED)
{
// Can use this
nglDeleteSync(m_Block->FenceId);
m_Block->FenceId = 0;
}
else
{
// Not ready, get a different block
m_Driver->_AMDPinnedAllocator->free(m_Block);
m_Block = NULL;
}
}
}
if (!m_Block)
{
m_Block = m_Driver->_AMDPinnedAllocator->allocate(size);
}
// Lock
nlassert(m_Block);
m_VertexPtr = m_Block->Buffer;
nlassert(m_VertexPtr);
// Profiling
if (m_Driver->_VBHardProfiling)
{
TTicks afterLock;
afterLock= CTime::getPerformanceTime();
m_Driver->appendVBHardLockProfile(afterLock-beforeLock, VB);
}
return m_VertexPtr;
}
// ***************************************************************************
void CVertexBufferAMDPinnedVolatile::unlock()
{
H_AUTO_OGL(CVertexBufferAMDPinnedVolatile_unlock);
m_VertexPtr = NULL;
if (!m_Block)
return;
// Profiling
/*TTicks beforeLock = 0;
if (m_Driver->_VBHardProfiling)
{
beforeLock = CTime::getPerformanceTime();
}
// Unlock
m_Driver->_DriverGLStates.bindARBVertexBuffer(m_Block->VertexObjectId);
nglUnmapBuffer(GL_ARRAY_BUFFER);
m_Driver->_DriverGLStates.forceBindARBVertexBuffer(0);
// Profiling
if (m_Driver->_VBHardProfiling)
{
TTicks afterLock;
afterLock= CTime::getPerformanceTime();
m_Driver->appendVBHardLockProfile(afterLock-beforeLock, VB);
}*/
}
// ***************************************************************************
void *CVertexBufferAMDPinnedVolatile::getPointer()
{
H_AUTO_OGL(CVertexBufferAMDPinnedVolatile_getPointer)
return m_VertexPtr;
}
// ***************************************************************************
void CVertexBufferAMDPinnedVolatile::unlock(uint /* startVert */,uint /* endVert */)
{
H_AUTO_OGL(CVertexBufferAMDPinnedVolatile_unlock)
unlock();
}
// ***************************************************************************
void CVertexBufferAMDPinnedVolatile::enable()
{
H_AUTO_OGL(CVertexBufferAMDPinnedVolatile_enable)
if (m_Driver->_CurrentVertexBufferGL != this)
{
m_Driver->_CurrentVertexBufferGL = this;
}
}
// ***************************************************************************
void CVertexBufferAMDPinnedVolatile::disable()
{
H_AUTO_OGL(CVertexBufferAMDPinned_disable)
if (m_Driver->_CurrentVertexBufferGL != NULL)
{
m_Driver->_CurrentVertexBufferGL = NULL;
}
}
// ***************************************************************************
void CVertexBufferAMDPinnedVolatile::setupVBInfos(CVertexBufferInfo &vb)
{
H_AUTO_OGL(CVertexBufferAMDPinned_setupVBInfos)
vb.VertexObjectId = m_Block->VertexObjectId;
}
// ***************************************************************************
void CVertexBufferAMDPinnedVolatile::setFence()
{
H_AUTO_OGL(CVertexBufferAMDPinnedVolatile_setFence)
// Set the fence
nlassert(m_Block);
if (m_Block->FenceId)
{
nglDeleteSync(m_Block->FenceId);
m_Block->FenceId = 0;
}
m_Block->FenceId = nglFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
nlassert(m_Block->FenceId);
}
// ***************************************************************************
// ***************************************************************************
// ***************************************************************************
CVertexBufferAMDPinnedAllocator::CVertexBufferAMDPinnedAllocator(CDriverGL3 *driver)
{
H_AUTO_OGL(CVertexBufferAMDPinnedAllocator_ctor)
m_Driver = driver;
}
// ***************************************************************************
CVertexBufferAMDPinnedAllocator::~CVertexBufferAMDPinnedAllocator()
{
H_AUTO_OGL(CVertexBufferAMDPinnedAllocator_dtor)
// Release all pools
for (uint bin = 0; bin < NLDRV_GL3_AMD_PINNED_VOLATILE_BINS; ++bin)
{
while (!m_Pool[bin].empty())
{
CVertexBufferAMDPinnedBlock *block = m_Pool[bin].front();
m_Pool[bin].pop();
if (m_Driver && block->VertexObjectId)
{
if (m_Driver->_DriverGLStates.getCurrBoundARBVertexBuffer() == block->VertexObjectId)
{
m_Driver->_DriverGLStates.forceBindARBVertexBuffer(0);
}
}
if (block->VertexObjectId)
{
nlassert(nglIsBuffer(block->VertexObjectId));
nglDeleteBuffers(1, &block->VertexObjectId);
}
delete block->Allocated;
delete block;
}
}
m_Driver = NULL;
}
// ***************************************************************************
CVertexBufferAMDPinnedBlock *CVertexBufferAMDPinnedAllocator::allocate(uint size)
{
H_AUTO_OGL(CVertexBufferAMDPinnedAllocator_allocate)
uint index = 0;
uint sizeShift = size >> (NLDRV_GL3_AMD_PINNED_VOLATILE_SHIFT);
while (sizeShift)
{
sizeShift >>= 1;
++index;
}
nlassert(index < NLDRV_GL3_AMD_PINNED_VOLATILE_BINS); // Not implemented over 8MB
if (!m_Pool[index].empty())
{
CVertexBufferAMDPinnedBlock *block = m_Pool[index].front();
if (block->FenceId == 0)
{
// No fence, don't check
m_Pool[index].pop();
return block;
}
GLint status = 0;
nlassert(nglIsSync(block->FenceId));
nglGetSynciv(block->FenceId, GL_SYNC_STATUS, 1, NULL, &status);
if (status == GL_SIGNALED)
{
// Ready, can use this block
// nldebug("GL3: APV Reuse (idx %i, glid %i)", index, block->VertexObjectId);
nglDeleteSync(block->FenceId);
block->FenceId = 0;
m_Pool[index].pop();
return block;
}
nlassert(status == GL_UNSIGNALED);
}
else
{
// nldebug("GL3: Empty container");
}
// Create new block
CVertexBufferAMDPinnedBlock *newblock = new CVertexBufferAMDPinnedBlock();
newblock->Bin = index;
// Allocate memory
uint allocSize = 1 << (NLDRV_GL3_AMD_PINNED_VOLATILE_SHIFT + index);
static int totalAlloc = 0;
totalAlloc += allocSize + 4096;
nldebug("GL3: APV Alloc (%i -> %i, total: %i)", size, allocSize, totalAlloc);
newblock->Allocated = new char[allocSize + 4096];
nlassert(newblock->Allocated);
uintptr_t addr = (uintptr_t)newblock->Allocated;
addr = (addr + 4095) & (~0xfff);
void *addrAligned = (void *)addr;
newblock->Buffer = addrAligned;
// Create id and bind
nglGenBuffers(1, &newblock->VertexObjectId);
nglBindBuffer(GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD, newblock->VertexObjectId);
// Set buffer
nglBufferData(GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD, allocSize, addrAligned, GL_DYNAMIC_DRAW);
if (glGetError() == GL_INVALID_OPERATION)
{
nlerror("GL3: Failed to pin memory (volatile)");
nglDeleteBuffers(1, &newblock->VertexObjectId);
newblock->VertexObjectId = 0;
}
// Unbind
nglBindBuffer(GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD, 0);
// TEST LOCK
/*m_Driver->_DriverGLStates.bindARBVertexBuffer(newblock->VertexObjectId);
// m_VertexPtr = nglMapBufferRange(GL_ARRAY_BUFFER, 0, size, GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT);
void *testptr = nglMapBuffer(GL_ARRAY_BUFFER, GL_WRITE_ONLY);
nlassert(testptr);
m_Driver->_DriverGLStates.bindARBVertexBuffer(0);*/
return newblock;
}
// ***************************************************************************
void CVertexBufferAMDPinnedAllocator::free(CVertexBufferAMDPinnedBlock *block)
{
H_AUTO_OGL(CVertexBufferAMDPinnedAllocator_free)
// nldebug("GL3: Free block (glid: %i)", block->VertexObjectId);
m_Pool[block->Bin].push(block);
}
// ***************************************************************************
// ***************************************************************************
// ***************************************************************************

@ -19,6 +19,8 @@
#include "nel/misc/types_nl.h"
#include <queue>
namespace NL3D {
#ifdef NL_STATIC
@ -30,10 +32,14 @@ class IVertexBufferGL3;
class CVertexBufferInfo;
class CVertexBufferGL3;
// ***************************************************************************
// ***************************************************************************
// ***************************************************************************
class IVertexBufferGL3
{
public:
enum TVBType { GL3, AMDPinned };
enum TVBType { GL3, AMDPinned, AMDPinnedVolatile };
IVertexBufferGL3(CDriverGL3 *drv, CVertexBuffer *vb, TVBType vbType);
virtual ~IVertexBufferGL3();
@ -45,6 +51,7 @@ public:
virtual void enable() = 0;
virtual void disable() = 0;
virtual void setupVBInfos(CVertexBufferInfo &vb) = 0;
virtual void setFence() = 0; // Called after rendering with this buffer
// test if buffer content is invalid. If so, no rendering should occurs (rendering should silently fail)
inline bool isInvalid() { return m_Invalid; }
@ -58,6 +65,10 @@ protected:
bool m_Invalid;
};
// ***************************************************************************
// ***************************************************************************
// ***************************************************************************
/* GL Core vertex buffer. */
class CVertexBufferGL3 : public IVertexBufferGL3
{
@ -74,6 +85,7 @@ public:
virtual void enable();
virtual void disable();
virtual void setupVBInfos(CVertexBufferInfo &vb);
virtual void setFence();
// @}
/// Invalidate the buffer (when it is lost, or when a lock fails)
@ -93,6 +105,10 @@ private:
uint m_VertexObjectId;
};
// ***************************************************************************
// ***************************************************************************
// ***************************************************************************
class CVertexBufferAMDPinned : public IVertexBufferGL3
{
public:
@ -108,6 +124,7 @@ public:
virtual void enable();
virtual void disable();
virtual void setupVBInfos(CVertexBufferInfo &vb);
virtual void setFence();
// @}
private:
@ -118,6 +135,69 @@ private:
uint m_VertexObjectId;
};
// ***************************************************************************
// ***************************************************************************
// ***************************************************************************
#define NLDRV_GL3_AMD_PINNED_VOLATILE_MAX (8388608 - 4096) // Works up to 8MB
#define NLDRV_GL3_AMD_PINNED_VOLATILE_BINS (12)
#define NLDRV_GL3_AMD_PINNED_VOLATILE_SHIFT (12) // Shift to start at 4096
#define NLDRV_GL3_AMD_PINNED_VOLATILE_START (4096) // Start at 4096
struct CVertexBufferAMDPinnedBlock
{
CVertexBufferAMDPinnedBlock()
: FenceId(0), VertexObjectId(0), Allocated(NULL), Buffer(NULL) { }
GLsync FenceId;
GLuint VertexObjectId;
void *Allocated;
void *Buffer;
uint Bin;
};
class CVertexBufferAMDPinnedAllocator
{
public:
CVertexBufferAMDPinnedAllocator(CDriverGL3 *driver);
~CVertexBufferAMDPinnedAllocator();
CVertexBufferAMDPinnedBlock *allocate(uint size);
void free(CVertexBufferAMDPinnedBlock *block);
private:
CDriverGL3 *m_Driver;
std::queue<CVertexBufferAMDPinnedBlock *> m_Pool[NLDRV_GL3_AMD_PINNED_VOLATILE_BINS];
};
// ***************************************************************************
// ***************************************************************************
// ***************************************************************************
class CVertexBufferAMDPinnedVolatile : public IVertexBufferGL3
{
public:
CVertexBufferAMDPinnedVolatile(CDriverGL3 *drv, uint size, uint numVertices, CVertexBuffer::TPreferredMemory preferred, CVertexBuffer *vb);
virtual ~CVertexBufferAMDPinnedVolatile();
/// \name Implementation
// @{
virtual void *lock();
virtual void unlock();
virtual void unlock(uint startVert, uint endVert);
virtual void *getPointer();
virtual void enable();
virtual void disable();
virtual void setupVBInfos(CVertexBufferInfo &vb);
virtual void setFence();
// @}
private:
CVertexBufferAMDPinnedBlock *m_Block;
void *m_VertexPtr;
};
#ifdef NL_STATIC
} // NLDRIVERGL3
#endif

Loading…
Cancel
Save