diff --git a/code/nel/src/3d/driver/opengl3/driver_opengl.cpp b/code/nel/src/3d/driver/opengl3/driver_opengl.cpp index 66d1100f6..8e29cba9e 100644 --- a/code/nel/src/3d/driver/opengl3/driver_opengl.cpp +++ b/code/nel/src/3d/driver/opengl3/driver_opengl.cpp @@ -643,10 +643,6 @@ bool CDriverGL3::swapBuffers() // Check all vertex buffer to see which one are lost updateLostBuffers(); - // Swap volatile pinned memory - if (_AMDPinnedAllocator) - _AMDPinnedAllocator->swap(); - return true; } diff --git a/code/nel/src/3d/driver/opengl3/driver_opengl_vertex.cpp b/code/nel/src/3d/driver/opengl3/driver_opengl_vertex.cpp index fbf36147b..0a097b025 100644 --- a/code/nel/src/3d/driver/opengl3/driver_opengl_vertex.cpp +++ b/code/nel/src/3d/driver/opengl3/driver_opengl_vertex.cpp @@ -287,14 +287,14 @@ IVertexBufferGL3 *CDriverGL3::createVertexBufferGL(uint size, uint numVertices, { result = new CVertexBufferAMDPinned(this, size, numVertices, preferred, vb); } - else if (_Extensions.AMDPinnedMemory && ( + /*else if (_Extensions.AMDPinnedMemory && ( preferred == CVertexBuffer::RAMVolatile || preferred == CVertexBuffer::AGPVolatile )) { // NOTE: Performance of this is lower... result = new CVertexBufferAMDPinnedVolatile(this, size, numVertices, preferred, vb); - } + }*/ else { result = new CVertexBufferGL3(this, size, numVertices, preferred, vb); diff --git a/code/nel/src/3d/driver/opengl3/driver_opengl_vertex_buffer.cpp b/code/nel/src/3d/driver/opengl3/driver_opengl_vertex_buffer.cpp index b089aba84..25943edc9 100644 --- a/code/nel/src/3d/driver/opengl3/driver_opengl_vertex_buffer.cpp +++ b/code/nel/src/3d/driver/opengl3/driver_opengl_vertex_buffer.cpp @@ -544,7 +544,11 @@ CVertexBufferAMDPinnedVolatile::~CVertexBufferAMDPinnedVolatile() { H_AUTO_OGL(CVertexBufferAMDPinnedVolatile_CVertexBufferAMDPinnedDtor) - // no-op + if (m_Block) + { + m_Driver->_AMDPinnedAllocator->free(m_Block); + m_Block = NULL; + } } // *************************************************************************** @@ -563,9 +567,51 @@ void *CVertexBufferAMDPinnedVolatile::lock() beforeLock = CTime::getPerformanceTime(); } - // Allocate + // Allocate if necessary const uint size = VB->getNumVertices() * VB->getVertexSize(); - m_Block = m_Driver->_AMDPinnedAllocator->allocate(size); + if (m_Block) + { + // Verify size + uint index = 0; + uint sizeShift = size >> (NLDRV_GL3_AMD_PINNED_VOLATILE_SHIFT); + while (sizeShift) + { + sizeShift >>= 1; + ++index; + } + if (m_Block->Bin != index) + { + // nldebug("GL3: Size inconsistency"); // NOTE: May have to handle this for other buffers... + // Wrong size, want a different block size + m_Driver->_AMDPinnedAllocator->free(m_Block); + m_Block = NULL; + } + } + if (m_Block) + { + // Check fence + if (m_Block->FenceId) + { + GLint status = 0; + nglGetSynciv(m_Block->FenceId, GL_SYNC_STATUS, 1, NULL, &status); + if (status == GL_SIGNALED) + { + // Can use this + nglDeleteSync(m_Block->FenceId); + m_Block->FenceId = 0; + } + else + { + // Not ready, get a different block + m_Driver->_AMDPinnedAllocator->free(m_Block); + m_Block = NULL; + } + } + } + if (!m_Block) + { + m_Block = m_Driver->_AMDPinnedAllocator->allocate(size); + } // Lock nlassert(m_Block); @@ -590,7 +636,29 @@ void CVertexBufferAMDPinnedVolatile::unlock() H_AUTO_OGL(CVertexBufferAMDPinnedVolatile_unlock); m_VertexPtr = NULL; - // m_Block = NULL; + + if (!m_Block) + return; + + // Profiling + /*TTicks beforeLock = 0; + if (m_Driver->_VBHardProfiling) + { + beforeLock = CTime::getPerformanceTime(); + } + + // Unlock + m_Driver->_DriverGLStates.bindARBVertexBuffer(m_Block->VertexObjectId); + nglUnmapBuffer(GL_ARRAY_BUFFER); + m_Driver->_DriverGLStates.forceBindARBVertexBuffer(0); + + // Profiling + if (m_Driver->_VBHardProfiling) + { + TTicks afterLock; + afterLock= CTime::getPerformanceTime(); + m_Driver->appendVBHardLockProfile(afterLock-beforeLock, VB); + }*/ } // *************************************************************************** @@ -639,7 +707,6 @@ void CVertexBufferAMDPinnedVolatile::setupVBInfos(CVertexBufferInfo &vb) { H_AUTO_OGL(CVertexBufferAMDPinned_setupVBInfos) - nlassert(m_Block); vb.VertexObjectId = m_Block->VertexObjectId; } @@ -649,7 +716,15 @@ void CVertexBufferAMDPinnedVolatile::setFence() { H_AUTO_OGL(CVertexBufferAMDPinnedVolatile_setFence) - // no-op + // Set the fence + nlassert(m_Block); + if (m_Block->FenceId) + { + nglDeleteSync(m_Block->FenceId); + m_Block->FenceId = 0; + } + m_Block->FenceId = nglFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); + nlassert(m_Block->FenceId); } // *************************************************************************** @@ -661,7 +736,6 @@ CVertexBufferAMDPinnedAllocator::CVertexBufferAMDPinnedAllocator(CDriverGL3 *dri H_AUTO_OGL(CVertexBufferAMDPinnedAllocator_ctor) m_Driver = driver; - swap(); } // *************************************************************************** @@ -673,9 +747,10 @@ CVertexBufferAMDPinnedAllocator::~CVertexBufferAMDPinnedAllocator() // Release all pools for (uint bin = 0; bin < NLDRV_GL3_AMD_PINNED_VOLATILE_BINS; ++bin) { - for (uint idx = 0; idx < m_Pool[bin].size(); ++idx) + while (!m_Pool[bin].empty()) { - CVertexBufferAMDPinnedBlock *block = m_Pool[bin][idx]; + CVertexBufferAMDPinnedBlock *block = m_Pool[bin].front(); + m_Pool[bin].pop(); if (m_Driver && block->VertexObjectId) { @@ -694,8 +769,6 @@ CVertexBufferAMDPinnedAllocator::~CVertexBufferAMDPinnedAllocator() delete block->Allocated; delete block; } - - m_Pool[bin].clear(); } m_Driver = NULL; @@ -707,58 +780,75 @@ CVertexBufferAMDPinnedBlock *CVertexBufferAMDPinnedAllocator::allocate(uint size { H_AUTO_OGL(CVertexBufferAMDPinnedAllocator_allocate) - uint bin = 0; + uint index = 0; uint sizeShift = size >> (NLDRV_GL3_AMD_PINNED_VOLATILE_SHIFT); while (sizeShift) { sizeShift >>= 1; - ++bin; + ++index; } - nlassert(bin < NLDRV_GL3_AMD_PINNED_VOLATILE_BINS); // Not implemented over 8MB + nlassert(index < NLDRV_GL3_AMD_PINNED_VOLATILE_BINS); // Not implemented over 8MB - uint idx = m_PoolIndex[bin]; - ++m_PoolIndex[bin]; - - if (idx < m_Pool[bin].size()) + if (!m_Pool[index].empty()) { - return m_Pool[bin][idx]; + CVertexBufferAMDPinnedBlock *block = m_Pool[index].front(); + if (block->FenceId == 0) + { + // No fence, don't check + m_Pool[index].pop(); + return block; + } + GLint status = 0; + nlassert(nglIsSync(block->FenceId)); + nglGetSynciv(block->FenceId, GL_SYNC_STATUS, 1, NULL, &status); + if (status == GL_SIGNALED) + { + // Ready, can use this block + // nldebug("GL3: APV Reuse (idx %i, glid %i)", index, block->VertexObjectId); + nglDeleteSync(block->FenceId); + block->FenceId = 0; + m_Pool[index].pop(); + return block; + } + nlassert(status == GL_UNSIGNALED); + } + else + { + // nldebug("GL3: Empty container"); } // Create new block - CVertexBufferAMDPinnedBlock *block = new CVertexBufferAMDPinnedBlock(); - block->Bin = bin; + CVertexBufferAMDPinnedBlock *newblock = new CVertexBufferAMDPinnedBlock(); + newblock->Bin = index; // Allocate memory - uint allocSize = 1 << (NLDRV_GL3_AMD_PINNED_VOLATILE_SHIFT + bin); + uint allocSize = 1 << (NLDRV_GL3_AMD_PINNED_VOLATILE_SHIFT + index); static int totalAlloc = 0; totalAlloc += allocSize + 4096; nldebug("GL3: APV Alloc (%i -> %i, total: %i)", size, allocSize, totalAlloc); - block->Allocated = new char[allocSize + 4096]; - nlassert(block->Allocated); - uintptr_t addr = (uintptr_t)block->Allocated; + newblock->Allocated = new char[allocSize + 4096]; + nlassert(newblock->Allocated); + uintptr_t addr = (uintptr_t)newblock->Allocated; addr = (addr + 4095) & (~0xfff); void *addrAligned = (void *)addr; - block->Buffer = addrAligned; + newblock->Buffer = addrAligned; // Create id and bind - nglGenBuffers(1, &block->VertexObjectId); - nglBindBuffer(GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD, block->VertexObjectId); + nglGenBuffers(1, &newblock->VertexObjectId); + nglBindBuffer(GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD, newblock->VertexObjectId); // Set buffer nglBufferData(GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD, allocSize, addrAligned, GL_DYNAMIC_DRAW); if (glGetError() == GL_INVALID_OPERATION) { nlerror("GL3: Failed to pin memory (volatile)"); - nglDeleteBuffers(1, &block->VertexObjectId); - block->VertexObjectId = 0; + nglDeleteBuffers(1, &newblock->VertexObjectId); + newblock->VertexObjectId = 0; } // Unbind nglBindBuffer(GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD, 0); - // Add to pool - m_Pool[bin].push_back(block); - // TEST LOCK /*m_Driver->_DriverGLStates.bindARBVertexBuffer(newblock->VertexObjectId); // m_VertexPtr = nglMapBufferRange(GL_ARRAY_BUFFER, 0, size, GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT); @@ -766,19 +856,17 @@ CVertexBufferAMDPinnedBlock *CVertexBufferAMDPinnedAllocator::allocate(uint size nlassert(testptr); m_Driver->_DriverGLStates.bindARBVertexBuffer(0);*/ - return block; + return newblock; } // *************************************************************************** -void CVertexBufferAMDPinnedAllocator::swap() +void CVertexBufferAMDPinnedAllocator::free(CVertexBufferAMDPinnedBlock *block) { - H_AUTO_OGL(CVertexBufferAMDPinnedAllocator_swap) + H_AUTO_OGL(CVertexBufferAMDPinnedAllocator_free) - for (uint bin = 0; bin < NLDRV_GL3_AMD_PINNED_VOLATILE_BINS; ++bin) - { - m_PoolIndex[bin] = 0; - } + // nldebug("GL3: Free block (glid: %i)", block->VertexObjectId); + m_Pool[block->Bin].push(block); } // *************************************************************************** diff --git a/code/nel/src/3d/driver/opengl3/driver_opengl_vertex_buffer.h b/code/nel/src/3d/driver/opengl3/driver_opengl_vertex_buffer.h index 589d91630..3950444f0 100644 --- a/code/nel/src/3d/driver/opengl3/driver_opengl_vertex_buffer.h +++ b/code/nel/src/3d/driver/opengl3/driver_opengl_vertex_buffer.h @@ -162,13 +162,11 @@ public: ~CVertexBufferAMDPinnedAllocator(); CVertexBufferAMDPinnedBlock *allocate(uint size); - // void free(CVertexBufferAMDPinnedBlock *block); - void swap(); + void free(CVertexBufferAMDPinnedBlock *block); private: CDriverGL3 *m_Driver; - std::vector m_Pool[NLDRV_GL3_AMD_PINNED_VOLATILE_BINS]; - uint m_PoolIndex[NLDRV_GL3_AMD_PINNED_VOLATILE_BINS]; + std::queue m_Pool[NLDRV_GL3_AMD_PINNED_VOLATILE_BINS]; };