Without sync but memory intensive, performance gain vs regular stream buffer only minimal

--HG-- branch : opengl3
11 years ago · 55aae0d138
parent 5bc87eeeed
commit 55aae0d138
4 changed files with 50 additions and 132 deletions
--- a/code/nel/src/3d/driver/opengl3/driver_opengl.cpp
+++ b/code/nel/src/3d/driver/opengl3/driver_opengl.cpp
@ -643,6 +643,10 @@ bool CDriverGL3::swapBuffers()
 	// Check all vertex buffer to see which one are lost
 	updateLostBuffers();
 	// Swap volatile pinned memory
 	if (_AMDPinnedAllocator)
 		_AMDPinnedAllocator->swap();
 	return true;
 }
--- a/code/nel/src/3d/driver/opengl3/driver_opengl_vertex.cpp
+++ b/code/nel/src/3d/driver/opengl3/driver_opengl_vertex.cpp
@ -287,14 +287,14 @@ IVertexBufferGL3	*CDriverGL3::createVertexBufferGL(uint size, uint numVertices,
 	{
 		result = new CVertexBufferAMDPinned(this, size, numVertices, preferred, vb);
 	}
-	/*else if (_Extensions.AMDPinnedMemory && (
+	else if (_Extensions.AMDPinnedMemory && (
 		preferred == CVertexBuffer::RAMVolatile
 		|| preferred == CVertexBuffer::AGPVolatile
 		))
 	{
 		// NOTE: Performance of this is lower...
 		result = new CVertexBufferAMDPinnedVolatile(this, size, numVertices, preferred, vb);
-	}*/
+	}
 	else
 	{
 		result = new CVertexBufferGL3(this, size, numVertices, preferred, vb);
--- a/code/nel/src/3d/driver/opengl3/driver_opengl_vertex_buffer.cpp
+++ b/code/nel/src/3d/driver/opengl3/driver_opengl_vertex_buffer.cpp
@ -544,11 +544,7 @@ CVertexBufferAMDPinnedVolatile::~CVertexBufferAMDPinnedVolatile()
 {
 	H_AUTO_OGL(CVertexBufferAMDPinnedVolatile_CVertexBufferAMDPinnedDtor)
-	if (m_Block)
+	// no-op
 	{
 		m_Driver->_AMDPinnedAllocator->free(m_Block);
 		m_Block = NULL;
 	}
 }
 // ***************************************************************************
@ -567,51 +563,9 @@ void *CVertexBufferAMDPinnedVolatile::lock()
 		beforeLock = CTime::getPerformanceTime();
 	}
-	// Allocate if necessary
+	// Allocate
 	const uint size = VB->getNumVertices() * VB->getVertexSize();
-	if (m_Block)
+	m_Block = m_Driver->_AMDPinnedAllocator->allocate(size);
 	{
 		// Verify size
 		uint index = 0;
 		uint sizeShift = size >> (NLDRV_GL3_AMD_PINNED_VOLATILE_SHIFT);
 		while (sizeShift)
 		{
 			sizeShift >>= 1;
 			++index;
 		}
 		if (m_Block->Bin != index)
 		{
 			// nldebug("GL3: Size inconsistency"); // NOTE: May have to handle this for other buffers...
 			// Wrong size, want a different block size
 			m_Driver->_AMDPinnedAllocator->free(m_Block);
 			m_Block = NULL;
 		}
 	}
 	if (m_Block)
 	{
 		// Check fence
 		if (m_Block->FenceId)
 		{
 			GLint status = 0;
 			nglGetSynciv(m_Block->FenceId, GL_SYNC_STATUS, 1, NULL, &status);
 			if (status == GL_SIGNALED)
 			{
 				// Can use this
 				nglDeleteSync(m_Block->FenceId);
 				m_Block->FenceId = 0;
 			}
 			else
 			{
 				// Not ready, get a different block
 				m_Driver->_AMDPinnedAllocator->free(m_Block);
 				m_Block = NULL;
 			}
 		}
 	}
 	if (!m_Block)
 	{
 		m_Block = m_Driver->_AMDPinnedAllocator->allocate(size);
 	}
 	// Lock
 	nlassert(m_Block);
@ -636,29 +590,7 @@ void CVertexBufferAMDPinnedVolatile::unlock()
 	H_AUTO_OGL(CVertexBufferAMDPinnedVolatile_unlock);
 	m_VertexPtr = NULL;
-
+	// m_Block = NULL;
 	if (!m_Block)
 		return;
 	// Profiling
 	/*TTicks beforeLock = 0;
 	if (m_Driver->_VBHardProfiling)
 	{
 		beforeLock = CTime::getPerformanceTime();
 	}
 	// Unlock
 	m_Driver->_DriverGLStates.bindARBVertexBuffer(m_Block->VertexObjectId);
 	nglUnmapBuffer(GL_ARRAY_BUFFER);
 	m_Driver->_DriverGLStates.forceBindARBVertexBuffer(0);
 	// Profiling
 	if (m_Driver->_VBHardProfiling)
 	{
 		TTicks	afterLock;
 		afterLock= CTime::getPerformanceTime();
 		m_Driver->appendVBHardLockProfile(afterLock-beforeLock, VB);
 	}*/
 }
 // ***************************************************************************
@ -707,6 +639,7 @@ void CVertexBufferAMDPinnedVolatile::setupVBInfos(CVertexBufferInfo &vb)
 {
 	H_AUTO_OGL(CVertexBufferAMDPinned_setupVBInfos)
 	nlassert(m_Block);
 	vb.VertexObjectId = m_Block->VertexObjectId;
 }
@ -716,15 +649,7 @@ void CVertexBufferAMDPinnedVolatile::setFence()
 {
 	H_AUTO_OGL(CVertexBufferAMDPinnedVolatile_setFence)
-	// Set the fence
+	// no-op
 	nlassert(m_Block);
 	if (m_Block->FenceId)
 	{
 		nglDeleteSync(m_Block->FenceId);
 		m_Block->FenceId = 0;
 	}
 	m_Block->FenceId = nglFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
 	nlassert(m_Block->FenceId);
 }
 // ***************************************************************************
@ -736,6 +661,7 @@ CVertexBufferAMDPinnedAllocator::CVertexBufferAMDPinnedAllocator(CDriverGL3 *dri
 	H_AUTO_OGL(CVertexBufferAMDPinnedAllocator_ctor)
 	m_Driver = driver;
 	swap();
 }
 // ***************************************************************************
@ -747,10 +673,9 @@ CVertexBufferAMDPinnedAllocator::~CVertexBufferAMDPinnedAllocator()
 	// Release all pools
 	for (uint bin = 0; bin < NLDRV_GL3_AMD_PINNED_VOLATILE_BINS; ++bin)
 	{
-		while (!m_Pool[bin].empty())
+		for (uint idx = 0; idx < m_Pool[bin].size(); ++idx)
 		{
-			CVertexBufferAMDPinnedBlock *block = m_Pool[bin].front();
+			CVertexBufferAMDPinnedBlock *block = m_Pool[bin][idx];
 			m_Pool[bin].pop();
 			if (m_Driver && block->VertexObjectId)
 			{
@ -769,6 +694,8 @@ CVertexBufferAMDPinnedAllocator::~CVertexBufferAMDPinnedAllocator()
 			delete block->Allocated;
 			delete block;
 		}
 		m_Pool[bin].clear();
 	}
 	m_Driver = NULL;
@ -780,75 +707,58 @@ CVertexBufferAMDPinnedBlock *CVertexBufferAMDPinnedAllocator::allocate(uint size
 {
 	H_AUTO_OGL(CVertexBufferAMDPinnedAllocator_allocate)
-	uint index = 0;
+	uint bin = 0;
 	uint sizeShift = size >> (NLDRV_GL3_AMD_PINNED_VOLATILE_SHIFT);
 	while (sizeShift)
 	{
 		sizeShift >>= 1;
-		++index;
+		++bin;
 	}
-	nlassert(index < NLDRV_GL3_AMD_PINNED_VOLATILE_BINS); // Not implemented over 8MB
+	nlassert(bin < NLDRV_GL3_AMD_PINNED_VOLATILE_BINS); // Not implemented over 8MB
-	if (!m_Pool[index].empty())
+	uint idx = m_PoolIndex[bin];
-	{
+	++m_PoolIndex[bin];
-		CVertexBufferAMDPinnedBlock *block = m_Pool[index].front();
+
-		if (block->FenceId == 0)
+	if (idx < m_Pool[bin].size())
 		{
 			// No fence, don't check
 			m_Pool[index].pop();
 			return block;
 		}
 		GLint status = 0;
 		nlassert(nglIsSync(block->FenceId));
 		nglGetSynciv(block->FenceId, GL_SYNC_STATUS, 1, NULL, &status);
 		if (status == GL_SIGNALED)
 		{
 			// Ready, can use this block
 			// nldebug("GL3: APV Reuse (idx %i, glid %i)", index, block->VertexObjectId);
 			nglDeleteSync(block->FenceId);
 			block->FenceId = 0;
 			m_Pool[index].pop();
 			return block;
 		}
 		nlassert(status == GL_UNSIGNALED);
 	}
 	else
 	{
-		// nldebug("GL3: Empty container");
+		return m_Pool[bin][idx];
 	}
 	// Create new block
-	CVertexBufferAMDPinnedBlock *newblock = new CVertexBufferAMDPinnedBlock();
+	CVertexBufferAMDPinnedBlock *block = new CVertexBufferAMDPinnedBlock();
-	newblock->Bin = index;
+	block->Bin = bin;
 	// Allocate memory
-	uint allocSize = 1 << (NLDRV_GL3_AMD_PINNED_VOLATILE_SHIFT + index);
+	uint allocSize = 1 << (NLDRV_GL3_AMD_PINNED_VOLATILE_SHIFT + bin);
 	static int totalAlloc = 0;
 	totalAlloc += allocSize + 4096;
 	nldebug("GL3: APV Alloc (%i -> %i, total: %i)", size, allocSize, totalAlloc);
-	newblock->Allocated = new char[allocSize + 4096];
+	block->Allocated = new char[allocSize + 4096];
-	nlassert(newblock->Allocated);
+	nlassert(block->Allocated);
-	uintptr_t addr = (uintptr_t)newblock->Allocated;
+	uintptr_t addr = (uintptr_t)block->Allocated;
 	addr = (addr + 4095) & (~0xfff);
 	void *addrAligned = (void *)addr;
-	newblock->Buffer = addrAligned;
+	block->Buffer = addrAligned;
 	// Create id and bind
-	nglGenBuffers(1, &newblock->VertexObjectId);
+	nglGenBuffers(1, &block->VertexObjectId);
-	nglBindBuffer(GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD, newblock->VertexObjectId);
+	nglBindBuffer(GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD, block->VertexObjectId);
 	// Set buffer
 	nglBufferData(GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD, allocSize, addrAligned, GL_DYNAMIC_DRAW);
 	if (glGetError() == GL_INVALID_OPERATION)
 	{
 		nlerror("GL3: Failed to pin memory (volatile)");
-		nglDeleteBuffers(1, &newblock->VertexObjectId);
+		nglDeleteBuffers(1, &block->VertexObjectId);
-		newblock->VertexObjectId = 0;
+		block->VertexObjectId = 0;
 	}
 	// Unbind
 	nglBindBuffer(GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD, 0);
 	// Add to pool
 	m_Pool[bin].push_back(block);
 	// TEST LOCK
 	/*m_Driver->_DriverGLStates.bindARBVertexBuffer(newblock->VertexObjectId);
 	// m_VertexPtr = nglMapBufferRange(GL_ARRAY_BUFFER, 0, size, GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT);
@ -856,17 +766,19 @@ CVertexBufferAMDPinnedBlock *CVertexBufferAMDPinnedAllocator::allocate(uint size
 	nlassert(testptr);
 	m_Driver->_DriverGLStates.bindARBVertexBuffer(0);*/
-	return newblock;
+	return block;
 }
 // ***************************************************************************
-void CVertexBufferAMDPinnedAllocator::free(CVertexBufferAMDPinnedBlock *block)
+void CVertexBufferAMDPinnedAllocator::swap()
 {
-	H_AUTO_OGL(CVertexBufferAMDPinnedAllocator_free)
+	H_AUTO_OGL(CVertexBufferAMDPinnedAllocator_swap)
-	// nldebug("GL3: Free block (glid: %i)", block->VertexObjectId);
+	for (uint bin = 0; bin < NLDRV_GL3_AMD_PINNED_VOLATILE_BINS; ++bin)
-	m_Pool[block->Bin].push(block);
+	{
 		m_PoolIndex[bin] = 0;
 	}
 }
 // ***************************************************************************
--- a/code/nel/src/3d/driver/opengl3/driver_opengl_vertex_buffer.h
+++ b/code/nel/src/3d/driver/opengl3/driver_opengl_vertex_buffer.h
@ -162,11 +162,13 @@ public:
 	~CVertexBufferAMDPinnedAllocator();
 	CVertexBufferAMDPinnedBlock *allocate(uint size);
-	void free(CVertexBufferAMDPinnedBlock *block);
+	// void free(CVertexBufferAMDPinnedBlock *block);
 	void swap();
 private:
 	CDriverGL3 *m_Driver;
-	std::queue<CVertexBufferAMDPinnedBlock *> m_Pool[NLDRV_GL3_AMD_PINNED_VOLATILE_BINS];	
+	std::vector<CVertexBufferAMDPinnedBlock *> m_Pool[NLDRV_GL3_AMD_PINNED_VOLATILE_BINS];
 	uint m_PoolIndex[NLDRV_GL3_AMD_PINNED_VOLATILE_BINS];
 };