From 55aae0d138081e9c02568af29a20fa6f59532f31 Mon Sep 17 00:00:00 2001 From: kaetemi Date: Sun, 8 Jun 2014 15:48:57 +0200 Subject: [PATCH] Without sync but memory intensive, performance gain vs regular stream buffer only minimal --HG-- branch : opengl3 --- .../src/3d/driver/opengl3/driver_opengl.cpp | 4 + .../driver/opengl3/driver_opengl_vertex.cpp | 4 +- .../opengl3/driver_opengl_vertex_buffer.cpp | 168 +++++------------- .../opengl3/driver_opengl_vertex_buffer.h | 6 +- 4 files changed, 50 insertions(+), 132 deletions(-) diff --git a/code/nel/src/3d/driver/opengl3/driver_opengl.cpp b/code/nel/src/3d/driver/opengl3/driver_opengl.cpp index 8e29cba9e..66d1100f6 100644 --- a/code/nel/src/3d/driver/opengl3/driver_opengl.cpp +++ b/code/nel/src/3d/driver/opengl3/driver_opengl.cpp @@ -643,6 +643,10 @@ bool CDriverGL3::swapBuffers() // Check all vertex buffer to see which one are lost updateLostBuffers(); + // Swap volatile pinned memory + if (_AMDPinnedAllocator) + _AMDPinnedAllocator->swap(); + return true; } diff --git a/code/nel/src/3d/driver/opengl3/driver_opengl_vertex.cpp b/code/nel/src/3d/driver/opengl3/driver_opengl_vertex.cpp index 0a097b025..fbf36147b 100644 --- a/code/nel/src/3d/driver/opengl3/driver_opengl_vertex.cpp +++ b/code/nel/src/3d/driver/opengl3/driver_opengl_vertex.cpp @@ -287,14 +287,14 @@ IVertexBufferGL3 *CDriverGL3::createVertexBufferGL(uint size, uint numVertices, { result = new CVertexBufferAMDPinned(this, size, numVertices, preferred, vb); } - /*else if (_Extensions.AMDPinnedMemory && ( + else if (_Extensions.AMDPinnedMemory && ( preferred == CVertexBuffer::RAMVolatile || preferred == CVertexBuffer::AGPVolatile )) { // NOTE: Performance of this is lower... result = new CVertexBufferAMDPinnedVolatile(this, size, numVertices, preferred, vb); - }*/ + } else { result = new CVertexBufferGL3(this, size, numVertices, preferred, vb); diff --git a/code/nel/src/3d/driver/opengl3/driver_opengl_vertex_buffer.cpp b/code/nel/src/3d/driver/opengl3/driver_opengl_vertex_buffer.cpp index 25943edc9..b089aba84 100644 --- a/code/nel/src/3d/driver/opengl3/driver_opengl_vertex_buffer.cpp +++ b/code/nel/src/3d/driver/opengl3/driver_opengl_vertex_buffer.cpp @@ -544,11 +544,7 @@ CVertexBufferAMDPinnedVolatile::~CVertexBufferAMDPinnedVolatile() { H_AUTO_OGL(CVertexBufferAMDPinnedVolatile_CVertexBufferAMDPinnedDtor) - if (m_Block) - { - m_Driver->_AMDPinnedAllocator->free(m_Block); - m_Block = NULL; - } + // no-op } // *************************************************************************** @@ -567,51 +563,9 @@ void *CVertexBufferAMDPinnedVolatile::lock() beforeLock = CTime::getPerformanceTime(); } - // Allocate if necessary + // Allocate const uint size = VB->getNumVertices() * VB->getVertexSize(); - if (m_Block) - { - // Verify size - uint index = 0; - uint sizeShift = size >> (NLDRV_GL3_AMD_PINNED_VOLATILE_SHIFT); - while (sizeShift) - { - sizeShift >>= 1; - ++index; - } - if (m_Block->Bin != index) - { - // nldebug("GL3: Size inconsistency"); // NOTE: May have to handle this for other buffers... - // Wrong size, want a different block size - m_Driver->_AMDPinnedAllocator->free(m_Block); - m_Block = NULL; - } - } - if (m_Block) - { - // Check fence - if (m_Block->FenceId) - { - GLint status = 0; - nglGetSynciv(m_Block->FenceId, GL_SYNC_STATUS, 1, NULL, &status); - if (status == GL_SIGNALED) - { - // Can use this - nglDeleteSync(m_Block->FenceId); - m_Block->FenceId = 0; - } - else - { - // Not ready, get a different block - m_Driver->_AMDPinnedAllocator->free(m_Block); - m_Block = NULL; - } - } - } - if (!m_Block) - { - m_Block = m_Driver->_AMDPinnedAllocator->allocate(size); - } + m_Block = m_Driver->_AMDPinnedAllocator->allocate(size); // Lock nlassert(m_Block); @@ -636,29 +590,7 @@ void CVertexBufferAMDPinnedVolatile::unlock() H_AUTO_OGL(CVertexBufferAMDPinnedVolatile_unlock); m_VertexPtr = NULL; - - if (!m_Block) - return; - - // Profiling - /*TTicks beforeLock = 0; - if (m_Driver->_VBHardProfiling) - { - beforeLock = CTime::getPerformanceTime(); - } - - // Unlock - m_Driver->_DriverGLStates.bindARBVertexBuffer(m_Block->VertexObjectId); - nglUnmapBuffer(GL_ARRAY_BUFFER); - m_Driver->_DriverGLStates.forceBindARBVertexBuffer(0); - - // Profiling - if (m_Driver->_VBHardProfiling) - { - TTicks afterLock; - afterLock= CTime::getPerformanceTime(); - m_Driver->appendVBHardLockProfile(afterLock-beforeLock, VB); - }*/ + // m_Block = NULL; } // *************************************************************************** @@ -707,6 +639,7 @@ void CVertexBufferAMDPinnedVolatile::setupVBInfos(CVertexBufferInfo &vb) { H_AUTO_OGL(CVertexBufferAMDPinned_setupVBInfos) + nlassert(m_Block); vb.VertexObjectId = m_Block->VertexObjectId; } @@ -716,15 +649,7 @@ void CVertexBufferAMDPinnedVolatile::setFence() { H_AUTO_OGL(CVertexBufferAMDPinnedVolatile_setFence) - // Set the fence - nlassert(m_Block); - if (m_Block->FenceId) - { - nglDeleteSync(m_Block->FenceId); - m_Block->FenceId = 0; - } - m_Block->FenceId = nglFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); - nlassert(m_Block->FenceId); + // no-op } // *************************************************************************** @@ -736,6 +661,7 @@ CVertexBufferAMDPinnedAllocator::CVertexBufferAMDPinnedAllocator(CDriverGL3 *dri H_AUTO_OGL(CVertexBufferAMDPinnedAllocator_ctor) m_Driver = driver; + swap(); } // *************************************************************************** @@ -747,10 +673,9 @@ CVertexBufferAMDPinnedAllocator::~CVertexBufferAMDPinnedAllocator() // Release all pools for (uint bin = 0; bin < NLDRV_GL3_AMD_PINNED_VOLATILE_BINS; ++bin) { - while (!m_Pool[bin].empty()) + for (uint idx = 0; idx < m_Pool[bin].size(); ++idx) { - CVertexBufferAMDPinnedBlock *block = m_Pool[bin].front(); - m_Pool[bin].pop(); + CVertexBufferAMDPinnedBlock *block = m_Pool[bin][idx]; if (m_Driver && block->VertexObjectId) { @@ -769,6 +694,8 @@ CVertexBufferAMDPinnedAllocator::~CVertexBufferAMDPinnedAllocator() delete block->Allocated; delete block; } + + m_Pool[bin].clear(); } m_Driver = NULL; @@ -780,75 +707,58 @@ CVertexBufferAMDPinnedBlock *CVertexBufferAMDPinnedAllocator::allocate(uint size { H_AUTO_OGL(CVertexBufferAMDPinnedAllocator_allocate) - uint index = 0; + uint bin = 0; uint sizeShift = size >> (NLDRV_GL3_AMD_PINNED_VOLATILE_SHIFT); while (sizeShift) { sizeShift >>= 1; - ++index; + ++bin; } - nlassert(index < NLDRV_GL3_AMD_PINNED_VOLATILE_BINS); // Not implemented over 8MB + nlassert(bin < NLDRV_GL3_AMD_PINNED_VOLATILE_BINS); // Not implemented over 8MB - if (!m_Pool[index].empty()) - { - CVertexBufferAMDPinnedBlock *block = m_Pool[index].front(); - if (block->FenceId == 0) - { - // No fence, don't check - m_Pool[index].pop(); - return block; - } - GLint status = 0; - nlassert(nglIsSync(block->FenceId)); - nglGetSynciv(block->FenceId, GL_SYNC_STATUS, 1, NULL, &status); - if (status == GL_SIGNALED) - { - // Ready, can use this block - // nldebug("GL3: APV Reuse (idx %i, glid %i)", index, block->VertexObjectId); - nglDeleteSync(block->FenceId); - block->FenceId = 0; - m_Pool[index].pop(); - return block; - } - nlassert(status == GL_UNSIGNALED); - } - else + uint idx = m_PoolIndex[bin]; + ++m_PoolIndex[bin]; + + if (idx < m_Pool[bin].size()) { - // nldebug("GL3: Empty container"); + return m_Pool[bin][idx]; } // Create new block - CVertexBufferAMDPinnedBlock *newblock = new CVertexBufferAMDPinnedBlock(); - newblock->Bin = index; + CVertexBufferAMDPinnedBlock *block = new CVertexBufferAMDPinnedBlock(); + block->Bin = bin; // Allocate memory - uint allocSize = 1 << (NLDRV_GL3_AMD_PINNED_VOLATILE_SHIFT + index); + uint allocSize = 1 << (NLDRV_GL3_AMD_PINNED_VOLATILE_SHIFT + bin); static int totalAlloc = 0; totalAlloc += allocSize + 4096; nldebug("GL3: APV Alloc (%i -> %i, total: %i)", size, allocSize, totalAlloc); - newblock->Allocated = new char[allocSize + 4096]; - nlassert(newblock->Allocated); - uintptr_t addr = (uintptr_t)newblock->Allocated; + block->Allocated = new char[allocSize + 4096]; + nlassert(block->Allocated); + uintptr_t addr = (uintptr_t)block->Allocated; addr = (addr + 4095) & (~0xfff); void *addrAligned = (void *)addr; - newblock->Buffer = addrAligned; + block->Buffer = addrAligned; // Create id and bind - nglGenBuffers(1, &newblock->VertexObjectId); - nglBindBuffer(GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD, newblock->VertexObjectId); + nglGenBuffers(1, &block->VertexObjectId); + nglBindBuffer(GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD, block->VertexObjectId); // Set buffer nglBufferData(GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD, allocSize, addrAligned, GL_DYNAMIC_DRAW); if (glGetError() == GL_INVALID_OPERATION) { nlerror("GL3: Failed to pin memory (volatile)"); - nglDeleteBuffers(1, &newblock->VertexObjectId); - newblock->VertexObjectId = 0; + nglDeleteBuffers(1, &block->VertexObjectId); + block->VertexObjectId = 0; } // Unbind nglBindBuffer(GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD, 0); + // Add to pool + m_Pool[bin].push_back(block); + // TEST LOCK /*m_Driver->_DriverGLStates.bindARBVertexBuffer(newblock->VertexObjectId); // m_VertexPtr = nglMapBufferRange(GL_ARRAY_BUFFER, 0, size, GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT); @@ -856,17 +766,19 @@ CVertexBufferAMDPinnedBlock *CVertexBufferAMDPinnedAllocator::allocate(uint size nlassert(testptr); m_Driver->_DriverGLStates.bindARBVertexBuffer(0);*/ - return newblock; + return block; } // *************************************************************************** -void CVertexBufferAMDPinnedAllocator::free(CVertexBufferAMDPinnedBlock *block) +void CVertexBufferAMDPinnedAllocator::swap() { - H_AUTO_OGL(CVertexBufferAMDPinnedAllocator_free) + H_AUTO_OGL(CVertexBufferAMDPinnedAllocator_swap) - // nldebug("GL3: Free block (glid: %i)", block->VertexObjectId); - m_Pool[block->Bin].push(block); + for (uint bin = 0; bin < NLDRV_GL3_AMD_PINNED_VOLATILE_BINS; ++bin) + { + m_PoolIndex[bin] = 0; + } } // *************************************************************************** diff --git a/code/nel/src/3d/driver/opengl3/driver_opengl_vertex_buffer.h b/code/nel/src/3d/driver/opengl3/driver_opengl_vertex_buffer.h index 3950444f0..589d91630 100644 --- a/code/nel/src/3d/driver/opengl3/driver_opengl_vertex_buffer.h +++ b/code/nel/src/3d/driver/opengl3/driver_opengl_vertex_buffer.h @@ -162,11 +162,13 @@ public: ~CVertexBufferAMDPinnedAllocator(); CVertexBufferAMDPinnedBlock *allocate(uint size); - void free(CVertexBufferAMDPinnedBlock *block); + // void free(CVertexBufferAMDPinnedBlock *block); + void swap(); private: CDriverGL3 *m_Driver; - std::queue m_Pool[NLDRV_GL3_AMD_PINNED_VOLATILE_BINS]; + std::vector m_Pool[NLDRV_GL3_AMD_PINNED_VOLATILE_BINS]; + uint m_PoolIndex[NLDRV_GL3_AMD_PINNED_VOLATILE_BINS]; };