SSE2: More CVector alignment fixes

--HG--
branch : sse2
hg/feature/sse2
kaetemi 11 years ago
parent 31b2141b12
commit d94a49b3d8

@ -74,6 +74,7 @@ public:
public: public:
// skinning // skinning
void applySkin(NLMISC::CVectorPacked *dst, std::vector<CMatrix3x4> &boneMat3x4);
void applySkin(NLMISC::CVector *dst, std::vector<CMatrix3x4> &boneMat3x4); void applySkin(NLMISC::CVector *dst, std::vector<CMatrix3x4> &boneMat3x4);
/** return ray intersection. /** return ray intersection.

@ -676,7 +676,7 @@ bool CLodCharacterManager::addRenderCharacterKey(CLodCharacterInstance &instan
{ {
// NB: order is important for AGP filling optimisation // NB: order is important for AGP filling optimisation
// transform vertex, and store. // transform vertex, and store.
CVector *dstVector= (CVector*)dstPtr; CVectorPacked *dstVector= (CVectorPacked*)dstPtr;
fVect.x= vertPtr->x; fVect.y= vertPtr->y; fVect.z= vertPtr->z; fVect.x= vertPtr->x; fVect.y= vertPtr->y; fVect.z= vertPtr->z;
++vertPtr; ++vertPtr;
dstVector->x= a00 * fVect.x + a01 * fVect.y + a02 * fVect.z + matPos.x; dstVector->x= a00 * fVect.x + a01 * fVect.y + a02 * fVect.z + matPos.x;

@ -1049,7 +1049,7 @@ bool CMeshGeom::retrieveVertices(std::vector<NLMISC::CVector> &vertices) const
uint vSize= vb.getVertexSize(); uint vSize= vb.getVertexSize();
for(i=0;i<vertices.size();i++) for(i=0;i<vertices.size();i++)
{ {
vertices[i]= *(const CVector*)pVert; vertices[i]= *(const CVectorPacked*)pVert;
pVert+= vSize; pVert+= vSize;
} }
} }
@ -1718,7 +1718,7 @@ void CMeshGeom::bkupOriginalSkinVertices()
_OriginalTGSpace.resize(numVertices); _OriginalTGSpace.resize(numVertices);
for(uint i=0; i<numVertices;i++) for(uint i=0; i<numVertices;i++)
{ {
_OriginalTGSpace[i]= *(CVector*)vba.getTexCoordPointer(i, tgSpaceStage); _OriginalTGSpace[i]= *(CVectorPacked*)vba.getTexCoordPointer(i, tgSpaceStage);
} }
} }
} }
@ -1760,7 +1760,7 @@ void CMeshGeom::restoreOriginalSkinVertices()
// copy tangent space vectors // copy tangent space vectors
for(uint i = 0; i < numVertices; ++i) for(uint i = 0; i < numVertices; ++i)
{ {
*(CVector*)vba.getTexCoordPointer(i, numTexCoords - 1)= _OriginalTGSpace[i]; *(CVectorPacked*)vba.getTexCoordPointer(i, numTexCoords - 1)= _OriginalTGSpace[i];
} }
} }
@ -2117,7 +2117,7 @@ void CMeshGeom::buildShadowSkin()
for(uint i=0; i<numVertices;i++) for(uint i=0; i<numVertices;i++)
{ {
// Copy Vertex // Copy Vertex
_ShadowSkin.Vertices[i].Vertex= *((CVector*)srcVert); _ShadowSkin.Vertices[i].Vertex= *((CVectorPacked*)srcVert);
// Suppose the 0 matrix inf is the highest (we are at least sure it is not 0) // Suppose the 0 matrix inf is the highest (we are at least sure it is not 0)
// And SkinWeight Export show the 0th is the highest one... // And SkinWeight Export show the 0th is the highest one...
_ShadowSkin.Vertices[i].MatrixId= ((CPaletteSkin*)srcPal)->MatrixId[0]; _ShadowSkin.Vertices[i].MatrixId= ((CPaletteSkin*)srcPal)->MatrixId[0];

@ -2066,7 +2066,7 @@ void CMeshMRMGeom::bkupOriginalSkinVerticesSubset(uint wedgeStart, uint wedgeEnd
_OriginalTGSpace.resize(_VBufferFinal.getNumVertices()); _OriginalTGSpace.resize(_VBufferFinal.getNumVertices());
for(uint i=wedgeStart; i<wedgeEnd;i++) for(uint i=wedgeStart; i<wedgeEnd;i++)
{ {
_OriginalTGSpace[i]= *(CVector*)vba.getTexCoordPointer(i, tgSpaceStage); _OriginalTGSpace[i]= *(CVectorPacked*)vba.getTexCoordPointer(i, tgSpaceStage);
} }
} }
} }
@ -2105,7 +2105,7 @@ void CMeshMRMGeom::restoreOriginalSkinVertices()
// copy tangent space vectors // copy tangent space vectors
for(uint i = 0; i < _VBufferFinal.getNumVertices(); ++i) for(uint i = 0; i < _VBufferFinal.getNumVertices(); ++i)
{ {
*(CVector*)vba.getTexCoordPointer(i, numTexCoords - 1)= _OriginalTGSpace[i]; *(CVectorPacked*)vba.getTexCoordPointer(i, numTexCoords - 1)= _OriginalTGSpace[i];
} }
} }
} }
@ -2164,8 +2164,8 @@ void CMeshMRMGeom::restoreOriginalSkinPart(CLod &lod)
CVector *srcVertex= srcVertexPtr + index; CVector *srcVertex= srcVertexPtr + index;
CVector *srcNormal= srcNormalPtr + index; CVector *srcNormal= srcNormalPtr + index;
uint8 *dstVertexVB= destVertexPtr + index * vertexSize; uint8 *dstVertexVB= destVertexPtr + index * vertexSize;
CVector *dstVertex= (CVector*)(dstVertexVB); CVectorPacked *dstVertex= (CVectorPacked*)(dstVertexVB);
CVector *dstNormal= (CVector*)(dstVertexVB + normalOff); CVectorPacked *dstNormal= (CVectorPacked*)(dstVertexVB + normalOff);
// Vertex. // Vertex.
@ -2621,7 +2621,7 @@ bool CMeshMRMGeom::buildGeometryForLod(uint lodId, std::vector<CVector> &vertice
// Final remaping of vertex to final index // Final remaping of vertex to final index
vertexRemap[i]= dstIndex; vertexRemap[i]= dstIndex;
// copy to dest // copy to dest
*pDstVert= *(CVector*)pSrcVert; *pDstVert= *(CVectorPacked*)pSrcVert;
// next dest // next dest
pDstVert++; pDstVert++;
@ -3467,7 +3467,7 @@ sint CMeshMRMGeom::renderShadowSkinGeom(CMeshMRMInstance *mi, uint remainingVe
CLod &lod= _Lods[_Lods.size()-1]; CLod &lod= _Lods[_Lods.size()-1];
computeBoneMatrixes3x4(boneMat3x4, lod.MatrixInfluences, skeleton); computeBoneMatrixes3x4(boneMat3x4, lod.MatrixInfluences, skeleton);
_ShadowSkin.applySkin((CVector*)vbDest, boneMat3x4); _ShadowSkin.applySkin((CVectorPacked*)vbDest, boneMat3x4);
// How many vertices are added to the VBuffer ??? // How many vertices are added to the VBuffer ???

@ -222,11 +222,13 @@ void CMeshMRMGeom::applySkin(CLod &lod, const CSkeletonModel *skeleton)
CMesh::CSkinWeight *srcSkin= srcSkinPtr + index; CMesh::CSkinWeight *srcSkin= srcSkinPtr + index;
CVector *srcVertex= srcVertexPtr + index; CVector *srcVertex= srcVertexPtr + index;
uint8 *dstVertexVB= destVertexPtr + index * vertexSize; uint8 *dstVertexVB= destVertexPtr + index * vertexSize;
CVector *dstVertex= (CVector*)(dstVertexVB); CVectorPacked *dstVertex= (CVectorPacked*)(dstVertexVB);
// Vertex. // Vertex.
boneMat3x4[ srcSkin->MatrixId[0] ].mulSetPoint( *srcVertex, *dstVertex); CVector temp;
boneMat3x4[ srcSkin->MatrixId[0] ].mulSetPoint( *srcVertex, temp);
*dstVertex = temp;
} }
break; break;
@ -239,12 +241,14 @@ void CMeshMRMGeom::applySkin(CLod &lod, const CSkeletonModel *skeleton)
CMesh::CSkinWeight *srcSkin= srcSkinPtr + index; CMesh::CSkinWeight *srcSkin= srcSkinPtr + index;
CVector *srcVertex= srcVertexPtr + index; CVector *srcVertex= srcVertexPtr + index;
uint8 *dstVertexVB= destVertexPtr + index * vertexSize; uint8 *dstVertexVB= destVertexPtr + index * vertexSize;
CVector *dstVertex= (CVector*)(dstVertexVB); CVectorPacked *dstVertex= (CVectorPacked*)(dstVertexVB);
// Vertex. // Vertex.
boneMat3x4[ srcSkin->MatrixId[0] ].mulSetPoint( *srcVertex, srcSkin->Weights[0], *dstVertex); CVector temp;
boneMat3x4[ srcSkin->MatrixId[1] ].mulAddPoint( *srcVertex, srcSkin->Weights[1], *dstVertex); boneMat3x4[ srcSkin->MatrixId[0] ].mulSetPoint( *srcVertex, srcSkin->Weights[0], temp);
boneMat3x4[ srcSkin->MatrixId[1] ].mulAddPoint( *srcVertex, srcSkin->Weights[1], temp);
*dstVertex = temp;
} }
break; break;
@ -257,13 +261,15 @@ void CMeshMRMGeom::applySkin(CLod &lod, const CSkeletonModel *skeleton)
CMesh::CSkinWeight *srcSkin= srcSkinPtr + index; CMesh::CSkinWeight *srcSkin= srcSkinPtr + index;
CVector *srcVertex= srcVertexPtr + index; CVector *srcVertex= srcVertexPtr + index;
uint8 *dstVertexVB= destVertexPtr + index * vertexSize; uint8 *dstVertexVB= destVertexPtr + index * vertexSize;
CVector *dstVertex= (CVector*)(dstVertexVB); CVectorPacked *dstVertex= (CVectorPacked*)(dstVertexVB);
// Vertex. // Vertex.
boneMat3x4[ srcSkin->MatrixId[0] ].mulSetPoint( *srcVertex, srcSkin->Weights[0], *dstVertex); CVector temp;
boneMat3x4[ srcSkin->MatrixId[1] ].mulAddPoint( *srcVertex, srcSkin->Weights[1], *dstVertex); boneMat3x4[ srcSkin->MatrixId[0] ].mulSetPoint( *srcVertex, srcSkin->Weights[0], temp);
boneMat3x4[ srcSkin->MatrixId[2] ].mulAddPoint( *srcVertex, srcSkin->Weights[2], *dstVertex); boneMat3x4[ srcSkin->MatrixId[1] ].mulAddPoint( *srcVertex, srcSkin->Weights[1], temp);
boneMat3x4[ srcSkin->MatrixId[2] ].mulAddPoint( *srcVertex, srcSkin->Weights[2], temp);
*dstVertex = temp;
} }
break; break;
@ -276,14 +282,16 @@ void CMeshMRMGeom::applySkin(CLod &lod, const CSkeletonModel *skeleton)
CMesh::CSkinWeight *srcSkin= srcSkinPtr + index; CMesh::CSkinWeight *srcSkin= srcSkinPtr + index;
CVector *srcVertex= srcVertexPtr + index; CVector *srcVertex= srcVertexPtr + index;
uint8 *dstVertexVB= destVertexPtr + index * vertexSize; uint8 *dstVertexVB= destVertexPtr + index * vertexSize;
CVector *dstVertex= (CVector*)(dstVertexVB); CVectorPacked *dstVertex= (CVectorPacked*)(dstVertexVB);
// Vertex. // Vertex.
boneMat3x4[ srcSkin->MatrixId[0] ].mulSetPoint( *srcVertex, srcSkin->Weights[0], *dstVertex); CVector temp;
boneMat3x4[ srcSkin->MatrixId[1] ].mulAddPoint( *srcVertex, srcSkin->Weights[1], *dstVertex); boneMat3x4[ srcSkin->MatrixId[0] ].mulSetPoint( *srcVertex, srcSkin->Weights[0], temp);
boneMat3x4[ srcSkin->MatrixId[2] ].mulAddPoint( *srcVertex, srcSkin->Weights[2], *dstVertex); boneMat3x4[ srcSkin->MatrixId[1] ].mulAddPoint( *srcVertex, srcSkin->Weights[1], temp);
boneMat3x4[ srcSkin->MatrixId[3] ].mulAddPoint( *srcVertex, srcSkin->Weights[3], *dstVertex); boneMat3x4[ srcSkin->MatrixId[2] ].mulAddPoint( *srcVertex, srcSkin->Weights[2], temp);
boneMat3x4[ srcSkin->MatrixId[3] ].mulAddPoint( *srcVertex, srcSkin->Weights[3], temp);
*dstVertex = temp;
} }
break; break;

@ -1962,7 +1962,7 @@ sint CMeshMRMSkinnedGeom::renderShadowSkinGeom(CMeshMRMSkinnedInstance *mi, ui
CLod &lod= _Lods[_Lods.size()-1]; CLod &lod= _Lods[_Lods.size()-1];
computeBoneMatrixes3x4(boneMat3x4, lod.MatrixInfluences, skeleton); computeBoneMatrixes3x4(boneMat3x4, lod.MatrixInfluences, skeleton);
_ShadowSkin.applySkin((CVector*)vbDest, boneMat3x4); _ShadowSkin.applySkin((CVectorPacked*)vbDest, boneMat3x4);
// How many vertices are added to the VBuffer ??? // How many vertices are added to the VBuffer ???

@ -302,7 +302,7 @@ void CMeshMultiLodInstance::setPosCoarseMesh( CMeshGeom &geom, const CMatrix &m
for (uint i=0; i<_LastCoarseMeshNumVertices; i++) for (uint i=0; i<_LastCoarseMeshNumVertices; i++)
{ {
// Transform position // Transform position
*(CVector*)vDest = matrix.mulPoint (*(const CVector*)vSrc); *(CVectorPacked*)vDest = matrix.mulPoint (CVector(*(const CVectorPacked*)vSrc));
// Next point // Next point
vSrc+=vtSrcSize; vSrc+=vtSrcSize;

@ -1056,7 +1056,7 @@ inline void CPatch::fillFar0VertexVB(CTessFarVertex *pVert)
// v[11]== EndPos - StartPos // v[11]== EndPos - StartPos
CHECK_VBA_RANGE(CLandscapeGlobals::CurrentFar0VBInfo.Accessor, CurVBPtr + CLandscapeGlobals::CurrentFar0VBInfo.DeltaPosOff, sizeof(CVector)) CHECK_VBA_RANGE(CLandscapeGlobals::CurrentFar0VBInfo.Accessor, CurVBPtr + CLandscapeGlobals::CurrentFar0VBInfo.DeltaPosOff, sizeof(CVector))
*(CVector*)(CurVBPtr + CLandscapeGlobals::CurrentFar0VBInfo.DeltaPosOff)= *(CVectorPacked*)(CurVBPtr + CLandscapeGlobals::CurrentFar0VBInfo.DeltaPosOff)=
pVert->Src->EndPos - pVert->Src->StartPos; pVert->Src->EndPos - pVert->Src->StartPos;
} }
} }
@ -1144,7 +1144,7 @@ inline void CPatch::fillFar1VertexVB(CTessFarVertex *pVert)
// v[11]== EndPos - StartPos // v[11]== EndPos - StartPos
CHECK_VBA_RANGE(CLandscapeGlobals::CurrentFar1VBInfo.Accessor, CurVBPtr + CLandscapeGlobals::CurrentFar1VBInfo.DeltaPosOff, sizeof(CVector)) CHECK_VBA_RANGE(CLandscapeGlobals::CurrentFar1VBInfo.Accessor, CurVBPtr + CLandscapeGlobals::CurrentFar1VBInfo.DeltaPosOff, sizeof(CVector))
*(CVector*)(CurVBPtr + CLandscapeGlobals::CurrentFar1VBInfo.DeltaPosOff)= *(CVectorPacked*)(CurVBPtr + CLandscapeGlobals::CurrentFar1VBInfo.DeltaPosOff)=
pVert->Src->EndPos - pVert->Src->StartPos; pVert->Src->EndPos - pVert->Src->StartPos;
// v[12]== Alpha information // v[12]== Alpha information
@ -1214,7 +1214,7 @@ inline void CPatch::fillTileVertexVB(CTessNearVertex *pVert)
// v[11]== EndPos - StartPos // v[11]== EndPos - StartPos
CHECK_VBA_RANGE(CLandscapeGlobals::CurrentTileVBInfo.Accessor, CurVBPtr + CLandscapeGlobals::CurrentTileVBInfo.DeltaPosOff, sizeof(CVector)) CHECK_VBA_RANGE(CLandscapeGlobals::CurrentTileVBInfo.Accessor, CurVBPtr + CLandscapeGlobals::CurrentTileVBInfo.DeltaPosOff, sizeof(CVector))
*(CVector*)(CurVBPtr + CLandscapeGlobals::CurrentTileVBInfo.DeltaPosOff)= *(CVectorPacked*)(CurVBPtr + CLandscapeGlobals::CurrentTileVBInfo.DeltaPosOff)=
pVert->Src->EndPos - pVert->Src->StartPos; pVert->Src->EndPos - pVert->Src->StartPos;
} }
} }

@ -38,6 +38,33 @@ uint CShadowSkin::NumCacheVertexShadow= NL_BlockByteL1 / sizeof(CShadowVertex);
// *************************************************************************** // ***************************************************************************
void CShadowSkin::applySkin(CVectorPacked *dst, std::vector<CMatrix3x4> &boneMat3x4)
{
if(Vertices.empty())
return;
uint numVerts= (uint)Vertices.size();
CShadowVertex *src= &Vertices[0];
// Then do the skin
for(;numVerts>0;)
{
// number of vertices to process for this block.
uint nBlockInf= min(NumCacheVertexShadow, numVerts);
// next block.
numVerts-= nBlockInf;
// cache the data in L1 cache.
CFastMem::precache(src, nBlockInf * sizeof(CShadowVertex));
CVector temp;
// for all InfluencedVertices only.
for(;nBlockInf>0;nBlockInf--, src++, dst++)
{
boneMat3x4[ src->MatrixId ].mulSetPoint( src->Vertex, temp );
*dst = temp;
}
}
}
void CShadowSkin::applySkin(CVector *dst, std::vector<CMatrix3x4> &boneMat3x4) void CShadowSkin::applySkin(CVector *dst, std::vector<CMatrix3x4> &boneMat3x4)
{ {
if(Vertices.empty()) if(Vertices.empty())
@ -56,6 +83,7 @@ void CShadowSkin::applySkin(CVector *dst, std::vector<CMatrix3x4> &boneMat3x4)
// cache the data in L1 cache. // cache the data in L1 cache.
CFastMem::precache(src, nBlockInf * sizeof(CShadowVertex)); CFastMem::precache(src, nBlockInf * sizeof(CShadowVertex));
CVector temp;
// for all InfluencedVertices only. // for all InfluencedVertices only.
for(;nBlockInf>0;nBlockInf--, src++, dst++) for(;nBlockInf>0;nBlockInf--, src++, dst++)
{ {

@ -1379,21 +1379,21 @@ void CVegetableManager::addInstance(CVegetableInstanceGroup *ig,
// Pos. // Pos.
//------- //-------
// Separate Center and relative pos. // Separate Center and relative pos.
CVector relPos= mat.mulVector(*(CVector*)srcPtr); // mulVector, because translation in v[center] CVector relPos= mat.mulVector(*(CVectorPacked*)srcPtr); // mulVector, because translation in v[center]
// compute bendCenterPos // compute bendCenterPos
CVector bendCenterPos; CVector bendCenterPos;
if(shape->BendCenterMode == CVegetableShapeBuild::BendCenterNull) if(shape->BendCenterMode == CVegetableShapeBuild::BendCenterNull)
bendCenterPos= CVector::Null; bendCenterPos= CVector::Null;
else else
{ {
CVector v= *(CVector*)srcPtr; CVector v= *(CVectorPacked*)srcPtr;
v.z= 0; v.z= 0;
bendCenterPos= mat.mulVector(v); // mulVector, because translation in v[center] bendCenterPos= mat.mulVector(v); // mulVector, because translation in v[center]
} }
// copy // copy
deltaPos= relPos-bendCenterPos; deltaPos= relPos-bendCenterPos;
*(CVector*)dstPtr= deltaPos; *(CVectorPacked*)dstPtr= deltaPos;
*(CVector*)(dstPtr + dstCenterOff)= instancePos + bendCenterPos; *(CVectorPacked*)(dstPtr + dstCenterOff)= instancePos + bendCenterPos;
// if !destLighted, then VP is different // if !destLighted, then VP is different
if(!destLighted) if(!destLighted)
{ {
@ -1426,7 +1426,7 @@ void CVegetableManager::addInstance(CVegetableInstanceGroup *ig,
if(destLighted) if(destLighted)
{ {
// normal // normal
*(CVector*)(dstPtr + dstNormalOff)= normalMat.mulVector( *(CVector*)(srcPtr + srcNormalOff) ); *(CVectorPacked*)(dstPtr + dstNormalOff)= normalMat.mulVector( *(CVectorPacked*)(srcPtr + srcNormalOff) );
} }
// If destLighted, secondaryRGBA is the ambient // If destLighted, secondaryRGBA is the ambient
// else secondaryRGBA is used only for Alpha (DLM uv.v). // else secondaryRGBA is used only for Alpha (DLM uv.v).
@ -1437,7 +1437,7 @@ void CVegetableManager::addInstance(CVegetableInstanceGroup *ig,
nlassert(!destLighted); nlassert(!destLighted);
// compute normal. // compute normal.
CVector rotNormal= normalMat.mulVector( *(CVector*)(srcPtr + srcNormalOff) ); CVector rotNormal= normalMat.mulVector( *(CVectorPacked*)(srcPtr + srcNormalOff) );
// must normalize() because scale is possible. // must normalize() because scale is possible.
rotNormal.normalize(); rotNormal.normalize();
@ -1466,7 +1466,7 @@ void CVegetableManager::addInstance(CVegetableInstanceGroup *ig,
// Bend. // Bend.
//------- //-------
CVector *dstBendPtr= (CVector*)(dstPtr + dstBendOff); CVectorPacked *dstBendPtr= (CVectorPacked*)(dstPtr + dstBendOff);
// setup bend Phase. // setup bend Phase.
dstBendPtr->y= bendPhase; dstBendPtr->y= bendPhase;
// setup bend Weight. // setup bend Weight.
@ -2704,7 +2704,7 @@ uint CVegetableManager::updateInstanceLighting(CVegetableInstanceGroup *ig, uin
nlassert(!destLighted); nlassert(!destLighted);
// compute normal. // compute normal.
CVector rotNormal= normalMat.mulVector( *(CVector*)(srcPtr + srcNormalOff) ); CVector rotNormal= normalMat.mulVector( *(CVectorPacked*)(srcPtr + srcNormalOff) );
// must normalize() because scale is possible. // must normalize() because scale is possible.
rotNormal.normalize(); rotNormal.normalize();

@ -674,13 +674,13 @@ void CVertexBuffer::serialOldV1Minus(NLMISC::IStream &f, sint ver)
// XYZ. // XYZ.
if(_Flags & PositionFlag) if(_Flags & PositionFlag)
{ {
CVector &vert= *(CVector*)(pointer + stridedId + _Offset[Position]); CVectorPacked &vert= *(CVectorPacked*)(pointer + stridedId + _Offset[Position]);
f.serial(vert); f.serial(vert);
} }
// Normal // Normal
if(_Flags & NormalFlag) if(_Flags & NormalFlag)
{ {
CVector &norm= *(CVector*)(pointer + stridedId + _Offset[Normal]); CVectorPacked &norm= *(CVectorPacked*)(pointer + stridedId + _Offset[Normal]);
f.serial(norm); f.serial(norm);
} }
// Uvs. // Uvs.

Loading…
Cancel
Save