From d94a49b3d847aeb235af86ad2b4e07abaf08c767 Mon Sep 17 00:00:00 2001 From: kaetemi Date: Fri, 13 Jun 2014 00:53:13 +0200 Subject: [PATCH] SSE2: More CVector alignment fixes --HG-- branch : sse2 --- code/nel/include/nel/3d/shadow_skin.h | 1 + code/nel/src/3d/lod_character_manager.cpp | 2 +- code/nel/src/3d/mesh.cpp | 8 ++--- code/nel/src/3d/mesh_mrm.cpp | 12 +++---- code/nel/src/3d/mesh_mrm_skin.cpp | 36 +++++++++++++-------- code/nel/src/3d/mesh_mrm_skinned.cpp | 2 +- code/nel/src/3d/mesh_multi_lod_instance.cpp | 2 +- code/nel/src/3d/patch_render.cpp | 6 ++-- code/nel/src/3d/shadow_skin.cpp | 28 ++++++++++++++++ code/nel/src/3d/vegetable_manager.cpp | 16 ++++----- code/nel/src/3d/vertex_buffer.cpp | 4 +-- 11 files changed, 77 insertions(+), 40 deletions(-) diff --git a/code/nel/include/nel/3d/shadow_skin.h b/code/nel/include/nel/3d/shadow_skin.h index 2b63a635d..3ecc56631 100644 --- a/code/nel/include/nel/3d/shadow_skin.h +++ b/code/nel/include/nel/3d/shadow_skin.h @@ -74,6 +74,7 @@ public: public: // skinning + void applySkin(NLMISC::CVectorPacked *dst, std::vector &boneMat3x4); void applySkin(NLMISC::CVector *dst, std::vector &boneMat3x4); /** return ray intersection. diff --git a/code/nel/src/3d/lod_character_manager.cpp b/code/nel/src/3d/lod_character_manager.cpp index 48c2a500f..46a6bacf8 100644 --- a/code/nel/src/3d/lod_character_manager.cpp +++ b/code/nel/src/3d/lod_character_manager.cpp @@ -676,7 +676,7 @@ bool CLodCharacterManager::addRenderCharacterKey(CLodCharacterInstance &instan { // NB: order is important for AGP filling optimisation // transform vertex, and store. - CVector *dstVector= (CVector*)dstPtr; + CVectorPacked *dstVector= (CVectorPacked*)dstPtr; fVect.x= vertPtr->x; fVect.y= vertPtr->y; fVect.z= vertPtr->z; ++vertPtr; dstVector->x= a00 * fVect.x + a01 * fVect.y + a02 * fVect.z + matPos.x; diff --git a/code/nel/src/3d/mesh.cpp b/code/nel/src/3d/mesh.cpp index dfaed0ce4..4bd444fb0 100644 --- a/code/nel/src/3d/mesh.cpp +++ b/code/nel/src/3d/mesh.cpp @@ -1049,7 +1049,7 @@ bool CMeshGeom::retrieveVertices(std::vector &vertices) const uint vSize= vb.getVertexSize(); for(i=0;iMatrixId[0]; diff --git a/code/nel/src/3d/mesh_mrm.cpp b/code/nel/src/3d/mesh_mrm.cpp index 999b3b62d..d0d733283 100644 --- a/code/nel/src/3d/mesh_mrm.cpp +++ b/code/nel/src/3d/mesh_mrm.cpp @@ -2066,7 +2066,7 @@ void CMeshMRMGeom::bkupOriginalSkinVerticesSubset(uint wedgeStart, uint wedgeEnd _OriginalTGSpace.resize(_VBufferFinal.getNumVertices()); for(uint i=wedgeStart; i &vertice // Final remaping of vertex to final index vertexRemap[i]= dstIndex; // copy to dest - *pDstVert= *(CVector*)pSrcVert; + *pDstVert= *(CVectorPacked*)pSrcVert; // next dest pDstVert++; @@ -3467,7 +3467,7 @@ sint CMeshMRMGeom::renderShadowSkinGeom(CMeshMRMInstance *mi, uint remainingVe CLod &lod= _Lods[_Lods.size()-1]; computeBoneMatrixes3x4(boneMat3x4, lod.MatrixInfluences, skeleton); - _ShadowSkin.applySkin((CVector*)vbDest, boneMat3x4); + _ShadowSkin.applySkin((CVectorPacked*)vbDest, boneMat3x4); // How many vertices are added to the VBuffer ??? diff --git a/code/nel/src/3d/mesh_mrm_skin.cpp b/code/nel/src/3d/mesh_mrm_skin.cpp index 13e8bdd21..d8460a1a5 100644 --- a/code/nel/src/3d/mesh_mrm_skin.cpp +++ b/code/nel/src/3d/mesh_mrm_skin.cpp @@ -222,11 +222,13 @@ void CMeshMRMGeom::applySkin(CLod &lod, const CSkeletonModel *skeleton) CMesh::CSkinWeight *srcSkin= srcSkinPtr + index; CVector *srcVertex= srcVertexPtr + index; uint8 *dstVertexVB= destVertexPtr + index * vertexSize; - CVector *dstVertex= (CVector*)(dstVertexVB); + CVectorPacked *dstVertex= (CVectorPacked*)(dstVertexVB); // Vertex. - boneMat3x4[ srcSkin->MatrixId[0] ].mulSetPoint( *srcVertex, *dstVertex); + CVector temp; + boneMat3x4[ srcSkin->MatrixId[0] ].mulSetPoint( *srcVertex, temp); + *dstVertex = temp; } break; @@ -239,12 +241,14 @@ void CMeshMRMGeom::applySkin(CLod &lod, const CSkeletonModel *skeleton) CMesh::CSkinWeight *srcSkin= srcSkinPtr + index; CVector *srcVertex= srcVertexPtr + index; uint8 *dstVertexVB= destVertexPtr + index * vertexSize; - CVector *dstVertex= (CVector*)(dstVertexVB); + CVectorPacked *dstVertex= (CVectorPacked*)(dstVertexVB); // Vertex. - boneMat3x4[ srcSkin->MatrixId[0] ].mulSetPoint( *srcVertex, srcSkin->Weights[0], *dstVertex); - boneMat3x4[ srcSkin->MatrixId[1] ].mulAddPoint( *srcVertex, srcSkin->Weights[1], *dstVertex); + CVector temp; + boneMat3x4[ srcSkin->MatrixId[0] ].mulSetPoint( *srcVertex, srcSkin->Weights[0], temp); + boneMat3x4[ srcSkin->MatrixId[1] ].mulAddPoint( *srcVertex, srcSkin->Weights[1], temp); + *dstVertex = temp; } break; @@ -257,13 +261,15 @@ void CMeshMRMGeom::applySkin(CLod &lod, const CSkeletonModel *skeleton) CMesh::CSkinWeight *srcSkin= srcSkinPtr + index; CVector *srcVertex= srcVertexPtr + index; uint8 *dstVertexVB= destVertexPtr + index * vertexSize; - CVector *dstVertex= (CVector*)(dstVertexVB); + CVectorPacked *dstVertex= (CVectorPacked*)(dstVertexVB); // Vertex. - boneMat3x4[ srcSkin->MatrixId[0] ].mulSetPoint( *srcVertex, srcSkin->Weights[0], *dstVertex); - boneMat3x4[ srcSkin->MatrixId[1] ].mulAddPoint( *srcVertex, srcSkin->Weights[1], *dstVertex); - boneMat3x4[ srcSkin->MatrixId[2] ].mulAddPoint( *srcVertex, srcSkin->Weights[2], *dstVertex); + CVector temp; + boneMat3x4[ srcSkin->MatrixId[0] ].mulSetPoint( *srcVertex, srcSkin->Weights[0], temp); + boneMat3x4[ srcSkin->MatrixId[1] ].mulAddPoint( *srcVertex, srcSkin->Weights[1], temp); + boneMat3x4[ srcSkin->MatrixId[2] ].mulAddPoint( *srcVertex, srcSkin->Weights[2], temp); + *dstVertex = temp; } break; @@ -276,14 +282,16 @@ void CMeshMRMGeom::applySkin(CLod &lod, const CSkeletonModel *skeleton) CMesh::CSkinWeight *srcSkin= srcSkinPtr + index; CVector *srcVertex= srcVertexPtr + index; uint8 *dstVertexVB= destVertexPtr + index * vertexSize; - CVector *dstVertex= (CVector*)(dstVertexVB); + CVectorPacked *dstVertex= (CVectorPacked*)(dstVertexVB); // Vertex. - boneMat3x4[ srcSkin->MatrixId[0] ].mulSetPoint( *srcVertex, srcSkin->Weights[0], *dstVertex); - boneMat3x4[ srcSkin->MatrixId[1] ].mulAddPoint( *srcVertex, srcSkin->Weights[1], *dstVertex); - boneMat3x4[ srcSkin->MatrixId[2] ].mulAddPoint( *srcVertex, srcSkin->Weights[2], *dstVertex); - boneMat3x4[ srcSkin->MatrixId[3] ].mulAddPoint( *srcVertex, srcSkin->Weights[3], *dstVertex); + CVector temp; + boneMat3x4[ srcSkin->MatrixId[0] ].mulSetPoint( *srcVertex, srcSkin->Weights[0], temp); + boneMat3x4[ srcSkin->MatrixId[1] ].mulAddPoint( *srcVertex, srcSkin->Weights[1], temp); + boneMat3x4[ srcSkin->MatrixId[2] ].mulAddPoint( *srcVertex, srcSkin->Weights[2], temp); + boneMat3x4[ srcSkin->MatrixId[3] ].mulAddPoint( *srcVertex, srcSkin->Weights[3], temp); + *dstVertex = temp; } break; diff --git a/code/nel/src/3d/mesh_mrm_skinned.cpp b/code/nel/src/3d/mesh_mrm_skinned.cpp index 2b1c3beb6..c4f795c87 100644 --- a/code/nel/src/3d/mesh_mrm_skinned.cpp +++ b/code/nel/src/3d/mesh_mrm_skinned.cpp @@ -1962,7 +1962,7 @@ sint CMeshMRMSkinnedGeom::renderShadowSkinGeom(CMeshMRMSkinnedInstance *mi, ui CLod &lod= _Lods[_Lods.size()-1]; computeBoneMatrixes3x4(boneMat3x4, lod.MatrixInfluences, skeleton); - _ShadowSkin.applySkin((CVector*)vbDest, boneMat3x4); + _ShadowSkin.applySkin((CVectorPacked*)vbDest, boneMat3x4); // How many vertices are added to the VBuffer ??? diff --git a/code/nel/src/3d/mesh_multi_lod_instance.cpp b/code/nel/src/3d/mesh_multi_lod_instance.cpp index c6c8fa237..f3dbbab93 100644 --- a/code/nel/src/3d/mesh_multi_lod_instance.cpp +++ b/code/nel/src/3d/mesh_multi_lod_instance.cpp @@ -302,7 +302,7 @@ void CMeshMultiLodInstance::setPosCoarseMesh( CMeshGeom &geom, const CMatrix &m for (uint i=0; i<_LastCoarseMeshNumVertices; i++) { // Transform position - *(CVector*)vDest = matrix.mulPoint (*(const CVector*)vSrc); + *(CVectorPacked*)vDest = matrix.mulPoint (CVector(*(const CVectorPacked*)vSrc)); // Next point vSrc+=vtSrcSize; diff --git a/code/nel/src/3d/patch_render.cpp b/code/nel/src/3d/patch_render.cpp index 135b9fdb3..76687cb38 100644 --- a/code/nel/src/3d/patch_render.cpp +++ b/code/nel/src/3d/patch_render.cpp @@ -1056,7 +1056,7 @@ inline void CPatch::fillFar0VertexVB(CTessFarVertex *pVert) // v[11]== EndPos - StartPos CHECK_VBA_RANGE(CLandscapeGlobals::CurrentFar0VBInfo.Accessor, CurVBPtr + CLandscapeGlobals::CurrentFar0VBInfo.DeltaPosOff, sizeof(CVector)) - *(CVector*)(CurVBPtr + CLandscapeGlobals::CurrentFar0VBInfo.DeltaPosOff)= + *(CVectorPacked*)(CurVBPtr + CLandscapeGlobals::CurrentFar0VBInfo.DeltaPosOff)= pVert->Src->EndPos - pVert->Src->StartPos; } } @@ -1144,7 +1144,7 @@ inline void CPatch::fillFar1VertexVB(CTessFarVertex *pVert) // v[11]== EndPos - StartPos CHECK_VBA_RANGE(CLandscapeGlobals::CurrentFar1VBInfo.Accessor, CurVBPtr + CLandscapeGlobals::CurrentFar1VBInfo.DeltaPosOff, sizeof(CVector)) - *(CVector*)(CurVBPtr + CLandscapeGlobals::CurrentFar1VBInfo.DeltaPosOff)= + *(CVectorPacked*)(CurVBPtr + CLandscapeGlobals::CurrentFar1VBInfo.DeltaPosOff)= pVert->Src->EndPos - pVert->Src->StartPos; // v[12]== Alpha information @@ -1214,7 +1214,7 @@ inline void CPatch::fillTileVertexVB(CTessNearVertex *pVert) // v[11]== EndPos - StartPos CHECK_VBA_RANGE(CLandscapeGlobals::CurrentTileVBInfo.Accessor, CurVBPtr + CLandscapeGlobals::CurrentTileVBInfo.DeltaPosOff, sizeof(CVector)) - *(CVector*)(CurVBPtr + CLandscapeGlobals::CurrentTileVBInfo.DeltaPosOff)= + *(CVectorPacked*)(CurVBPtr + CLandscapeGlobals::CurrentTileVBInfo.DeltaPosOff)= pVert->Src->EndPos - pVert->Src->StartPos; } } diff --git a/code/nel/src/3d/shadow_skin.cpp b/code/nel/src/3d/shadow_skin.cpp index 717d81f74..7aef7821e 100644 --- a/code/nel/src/3d/shadow_skin.cpp +++ b/code/nel/src/3d/shadow_skin.cpp @@ -38,6 +38,33 @@ uint CShadowSkin::NumCacheVertexShadow= NL_BlockByteL1 / sizeof(CShadowVertex); // *************************************************************************** +void CShadowSkin::applySkin(CVectorPacked *dst, std::vector &boneMat3x4) +{ + if(Vertices.empty()) + return; + uint numVerts= (uint)Vertices.size(); + CShadowVertex *src= &Vertices[0]; + + // Then do the skin + for(;numVerts>0;) + { + // number of vertices to process for this block. + uint nBlockInf= min(NumCacheVertexShadow, numVerts); + // next block. + numVerts-= nBlockInf; + + // cache the data in L1 cache. + CFastMem::precache(src, nBlockInf * sizeof(CShadowVertex)); + + CVector temp; + // for all InfluencedVertices only. + for(;nBlockInf>0;nBlockInf--, src++, dst++) + { + boneMat3x4[ src->MatrixId ].mulSetPoint( src->Vertex, temp ); + *dst = temp; + } + } +} void CShadowSkin::applySkin(CVector *dst, std::vector &boneMat3x4) { if(Vertices.empty()) @@ -56,6 +83,7 @@ void CShadowSkin::applySkin(CVector *dst, std::vector &boneMat3x4) // cache the data in L1 cache. CFastMem::precache(src, nBlockInf * sizeof(CShadowVertex)); + CVector temp; // for all InfluencedVertices only. for(;nBlockInf>0;nBlockInf--, src++, dst++) { diff --git a/code/nel/src/3d/vegetable_manager.cpp b/code/nel/src/3d/vegetable_manager.cpp index ba44a766f..f860b5b59 100644 --- a/code/nel/src/3d/vegetable_manager.cpp +++ b/code/nel/src/3d/vegetable_manager.cpp @@ -1379,21 +1379,21 @@ void CVegetableManager::addInstance(CVegetableInstanceGroup *ig, // Pos. //------- // Separate Center and relative pos. - CVector relPos= mat.mulVector(*(CVector*)srcPtr); // mulVector, because translation in v[center] + CVector relPos= mat.mulVector(*(CVectorPacked*)srcPtr); // mulVector, because translation in v[center] // compute bendCenterPos CVector bendCenterPos; if(shape->BendCenterMode == CVegetableShapeBuild::BendCenterNull) bendCenterPos= CVector::Null; else { - CVector v= *(CVector*)srcPtr; + CVector v= *(CVectorPacked*)srcPtr; v.z= 0; bendCenterPos= mat.mulVector(v); // mulVector, because translation in v[center] } // copy deltaPos= relPos-bendCenterPos; - *(CVector*)dstPtr= deltaPos; - *(CVector*)(dstPtr + dstCenterOff)= instancePos + bendCenterPos; + *(CVectorPacked*)dstPtr= deltaPos; + *(CVectorPacked*)(dstPtr + dstCenterOff)= instancePos + bendCenterPos; // if !destLighted, then VP is different if(!destLighted) { @@ -1426,7 +1426,7 @@ void CVegetableManager::addInstance(CVegetableInstanceGroup *ig, if(destLighted) { // normal - *(CVector*)(dstPtr + dstNormalOff)= normalMat.mulVector( *(CVector*)(srcPtr + srcNormalOff) ); + *(CVectorPacked*)(dstPtr + dstNormalOff)= normalMat.mulVector( *(CVectorPacked*)(srcPtr + srcNormalOff) ); } // If destLighted, secondaryRGBA is the ambient // else secondaryRGBA is used only for Alpha (DLM uv.v). @@ -1437,7 +1437,7 @@ void CVegetableManager::addInstance(CVegetableInstanceGroup *ig, nlassert(!destLighted); // compute normal. - CVector rotNormal= normalMat.mulVector( *(CVector*)(srcPtr + srcNormalOff) ); + CVector rotNormal= normalMat.mulVector( *(CVectorPacked*)(srcPtr + srcNormalOff) ); // must normalize() because scale is possible. rotNormal.normalize(); @@ -1466,7 +1466,7 @@ void CVegetableManager::addInstance(CVegetableInstanceGroup *ig, // Bend. //------- - CVector *dstBendPtr= (CVector*)(dstPtr + dstBendOff); + CVectorPacked *dstBendPtr= (CVectorPacked*)(dstPtr + dstBendOff); // setup bend Phase. dstBendPtr->y= bendPhase; // setup bend Weight. @@ -2704,7 +2704,7 @@ uint CVegetableManager::updateInstanceLighting(CVegetableInstanceGroup *ig, uin nlassert(!destLighted); // compute normal. - CVector rotNormal= normalMat.mulVector( *(CVector*)(srcPtr + srcNormalOff) ); + CVector rotNormal= normalMat.mulVector( *(CVectorPacked*)(srcPtr + srcNormalOff) ); // must normalize() because scale is possible. rotNormal.normalize(); diff --git a/code/nel/src/3d/vertex_buffer.cpp b/code/nel/src/3d/vertex_buffer.cpp index 94f269a2a..e8c5ac0c9 100644 --- a/code/nel/src/3d/vertex_buffer.cpp +++ b/code/nel/src/3d/vertex_buffer.cpp @@ -674,13 +674,13 @@ void CVertexBuffer::serialOldV1Minus(NLMISC::IStream &f, sint ver) // XYZ. if(_Flags & PositionFlag) { - CVector &vert= *(CVector*)(pointer + stridedId + _Offset[Position]); + CVectorPacked &vert= *(CVectorPacked*)(pointer + stridedId + _Offset[Position]); f.serial(vert); } // Normal if(_Flags & NormalFlag) { - CVector &norm= *(CVector*)(pointer + stridedId + _Offset[Normal]); + CVectorPacked &norm= *(CVectorPacked*)(pointer + stridedId + _Offset[Normal]); f.serial(norm); } // Uvs.