SSE2: More CVector alignment fixes

--HG--
branch : sse2
hg/feature/sse2
kaetemi 11 years ago
parent 31b2141b12
commit d94a49b3d8

@ -74,6 +74,7 @@ public:
public:
// skinning
void applySkin(NLMISC::CVectorPacked *dst, std::vector<CMatrix3x4> &boneMat3x4);
void applySkin(NLMISC::CVector *dst, std::vector<CMatrix3x4> &boneMat3x4);
/** return ray intersection.

@ -676,7 +676,7 @@ bool CLodCharacterManager::addRenderCharacterKey(CLodCharacterInstance &instan
{
// NB: order is important for AGP filling optimisation
// transform vertex, and store.
CVector *dstVector= (CVector*)dstPtr;
CVectorPacked *dstVector= (CVectorPacked*)dstPtr;
fVect.x= vertPtr->x; fVect.y= vertPtr->y; fVect.z= vertPtr->z;
++vertPtr;
dstVector->x= a00 * fVect.x + a01 * fVect.y + a02 * fVect.z + matPos.x;

@ -1049,7 +1049,7 @@ bool CMeshGeom::retrieveVertices(std::vector<NLMISC::CVector> &vertices) const
uint vSize= vb.getVertexSize();
for(i=0;i<vertices.size();i++)
{
vertices[i]= *(const CVector*)pVert;
vertices[i]= *(const CVectorPacked*)pVert;
pVert+= vSize;
}
}
@ -1718,7 +1718,7 @@ void CMeshGeom::bkupOriginalSkinVertices()
_OriginalTGSpace.resize(numVertices);
for(uint i=0; i<numVertices;i++)
{
_OriginalTGSpace[i]= *(CVector*)vba.getTexCoordPointer(i, tgSpaceStage);
_OriginalTGSpace[i]= *(CVectorPacked*)vba.getTexCoordPointer(i, tgSpaceStage);
}
}
}
@ -1760,7 +1760,7 @@ void CMeshGeom::restoreOriginalSkinVertices()
// copy tangent space vectors
for(uint i = 0; i < numVertices; ++i)
{
*(CVector*)vba.getTexCoordPointer(i, numTexCoords - 1)= _OriginalTGSpace[i];
*(CVectorPacked*)vba.getTexCoordPointer(i, numTexCoords - 1)= _OriginalTGSpace[i];
}
}
@ -2117,7 +2117,7 @@ void CMeshGeom::buildShadowSkin()
for(uint i=0; i<numVertices;i++)
{
// Copy Vertex
_ShadowSkin.Vertices[i].Vertex= *((CVector*)srcVert);
_ShadowSkin.Vertices[i].Vertex= *((CVectorPacked*)srcVert);
// Suppose the 0 matrix inf is the highest (we are at least sure it is not 0)
// And SkinWeight Export show the 0th is the highest one...
_ShadowSkin.Vertices[i].MatrixId= ((CPaletteSkin*)srcPal)->MatrixId[0];

@ -2066,7 +2066,7 @@ void CMeshMRMGeom::bkupOriginalSkinVerticesSubset(uint wedgeStart, uint wedgeEnd
_OriginalTGSpace.resize(_VBufferFinal.getNumVertices());
for(uint i=wedgeStart; i<wedgeEnd;i++)
{
_OriginalTGSpace[i]= *(CVector*)vba.getTexCoordPointer(i, tgSpaceStage);
_OriginalTGSpace[i]= *(CVectorPacked*)vba.getTexCoordPointer(i, tgSpaceStage);
}
}
}
@ -2105,7 +2105,7 @@ void CMeshMRMGeom::restoreOriginalSkinVertices()
// copy tangent space vectors
for(uint i = 0; i < _VBufferFinal.getNumVertices(); ++i)
{
*(CVector*)vba.getTexCoordPointer(i, numTexCoords - 1)= _OriginalTGSpace[i];
*(CVectorPacked*)vba.getTexCoordPointer(i, numTexCoords - 1)= _OriginalTGSpace[i];
}
}
}
@ -2164,8 +2164,8 @@ void CMeshMRMGeom::restoreOriginalSkinPart(CLod &lod)
CVector *srcVertex= srcVertexPtr + index;
CVector *srcNormal= srcNormalPtr + index;
uint8 *dstVertexVB= destVertexPtr + index * vertexSize;
CVector *dstVertex= (CVector*)(dstVertexVB);
CVector *dstNormal= (CVector*)(dstVertexVB + normalOff);
CVectorPacked *dstVertex= (CVectorPacked*)(dstVertexVB);
CVectorPacked *dstNormal= (CVectorPacked*)(dstVertexVB + normalOff);
// Vertex.
@ -2621,7 +2621,7 @@ bool CMeshMRMGeom::buildGeometryForLod(uint lodId, std::vector<CVector> &vertice
// Final remaping of vertex to final index
vertexRemap[i]= dstIndex;
// copy to dest
*pDstVert= *(CVector*)pSrcVert;
*pDstVert= *(CVectorPacked*)pSrcVert;
// next dest
pDstVert++;
@ -3467,7 +3467,7 @@ sint CMeshMRMGeom::renderShadowSkinGeom(CMeshMRMInstance *mi, uint remainingVe
CLod &lod= _Lods[_Lods.size()-1];
computeBoneMatrixes3x4(boneMat3x4, lod.MatrixInfluences, skeleton);
_ShadowSkin.applySkin((CVector*)vbDest, boneMat3x4);
_ShadowSkin.applySkin((CVectorPacked*)vbDest, boneMat3x4);
// How many vertices are added to the VBuffer ???

@ -222,11 +222,13 @@ void CMeshMRMGeom::applySkin(CLod &lod, const CSkeletonModel *skeleton)
CMesh::CSkinWeight *srcSkin= srcSkinPtr + index;
CVector *srcVertex= srcVertexPtr + index;
uint8 *dstVertexVB= destVertexPtr + index * vertexSize;
CVector *dstVertex= (CVector*)(dstVertexVB);
CVectorPacked *dstVertex= (CVectorPacked*)(dstVertexVB);
// Vertex.
boneMat3x4[ srcSkin->MatrixId[0] ].mulSetPoint( *srcVertex, *dstVertex);
CVector temp;
boneMat3x4[ srcSkin->MatrixId[0] ].mulSetPoint( *srcVertex, temp);
*dstVertex = temp;
}
break;
@ -239,12 +241,14 @@ void CMeshMRMGeom::applySkin(CLod &lod, const CSkeletonModel *skeleton)
CMesh::CSkinWeight *srcSkin= srcSkinPtr + index;
CVector *srcVertex= srcVertexPtr + index;
uint8 *dstVertexVB= destVertexPtr + index * vertexSize;
CVector *dstVertex= (CVector*)(dstVertexVB);
CVectorPacked *dstVertex= (CVectorPacked*)(dstVertexVB);
// Vertex.
boneMat3x4[ srcSkin->MatrixId[0] ].mulSetPoint( *srcVertex, srcSkin->Weights[0], *dstVertex);
boneMat3x4[ srcSkin->MatrixId[1] ].mulAddPoint( *srcVertex, srcSkin->Weights[1], *dstVertex);
CVector temp;
boneMat3x4[ srcSkin->MatrixId[0] ].mulSetPoint( *srcVertex, srcSkin->Weights[0], temp);
boneMat3x4[ srcSkin->MatrixId[1] ].mulAddPoint( *srcVertex, srcSkin->Weights[1], temp);
*dstVertex = temp;
}
break;
@ -257,13 +261,15 @@ void CMeshMRMGeom::applySkin(CLod &lod, const CSkeletonModel *skeleton)
CMesh::CSkinWeight *srcSkin= srcSkinPtr + index;
CVector *srcVertex= srcVertexPtr + index;
uint8 *dstVertexVB= destVertexPtr + index * vertexSize;
CVector *dstVertex= (CVector*)(dstVertexVB);
CVectorPacked *dstVertex= (CVectorPacked*)(dstVertexVB);
// Vertex.
boneMat3x4[ srcSkin->MatrixId[0] ].mulSetPoint( *srcVertex, srcSkin->Weights[0], *dstVertex);
boneMat3x4[ srcSkin->MatrixId[1] ].mulAddPoint( *srcVertex, srcSkin->Weights[1], *dstVertex);
boneMat3x4[ srcSkin->MatrixId[2] ].mulAddPoint( *srcVertex, srcSkin->Weights[2], *dstVertex);
CVector temp;
boneMat3x4[ srcSkin->MatrixId[0] ].mulSetPoint( *srcVertex, srcSkin->Weights[0], temp);
boneMat3x4[ srcSkin->MatrixId[1] ].mulAddPoint( *srcVertex, srcSkin->Weights[1], temp);
boneMat3x4[ srcSkin->MatrixId[2] ].mulAddPoint( *srcVertex, srcSkin->Weights[2], temp);
*dstVertex = temp;
}
break;
@ -276,14 +282,16 @@ void CMeshMRMGeom::applySkin(CLod &lod, const CSkeletonModel *skeleton)
CMesh::CSkinWeight *srcSkin= srcSkinPtr + index;
CVector *srcVertex= srcVertexPtr + index;
uint8 *dstVertexVB= destVertexPtr + index * vertexSize;
CVector *dstVertex= (CVector*)(dstVertexVB);
CVectorPacked *dstVertex= (CVectorPacked*)(dstVertexVB);
// Vertex.
boneMat3x4[ srcSkin->MatrixId[0] ].mulSetPoint( *srcVertex, srcSkin->Weights[0], *dstVertex);
boneMat3x4[ srcSkin->MatrixId[1] ].mulAddPoint( *srcVertex, srcSkin->Weights[1], *dstVertex);
boneMat3x4[ srcSkin->MatrixId[2] ].mulAddPoint( *srcVertex, srcSkin->Weights[2], *dstVertex);
boneMat3x4[ srcSkin->MatrixId[3] ].mulAddPoint( *srcVertex, srcSkin->Weights[3], *dstVertex);
CVector temp;
boneMat3x4[ srcSkin->MatrixId[0] ].mulSetPoint( *srcVertex, srcSkin->Weights[0], temp);
boneMat3x4[ srcSkin->MatrixId[1] ].mulAddPoint( *srcVertex, srcSkin->Weights[1], temp);
boneMat3x4[ srcSkin->MatrixId[2] ].mulAddPoint( *srcVertex, srcSkin->Weights[2], temp);
boneMat3x4[ srcSkin->MatrixId[3] ].mulAddPoint( *srcVertex, srcSkin->Weights[3], temp);
*dstVertex = temp;
}
break;

@ -1962,7 +1962,7 @@ sint CMeshMRMSkinnedGeom::renderShadowSkinGeom(CMeshMRMSkinnedInstance *mi, ui
CLod &lod= _Lods[_Lods.size()-1];
computeBoneMatrixes3x4(boneMat3x4, lod.MatrixInfluences, skeleton);
_ShadowSkin.applySkin((CVector*)vbDest, boneMat3x4);
_ShadowSkin.applySkin((CVectorPacked*)vbDest, boneMat3x4);
// How many vertices are added to the VBuffer ???

@ -302,7 +302,7 @@ void CMeshMultiLodInstance::setPosCoarseMesh( CMeshGeom &geom, const CMatrix &m
for (uint i=0; i<_LastCoarseMeshNumVertices; i++)
{
// Transform position
*(CVector*)vDest = matrix.mulPoint (*(const CVector*)vSrc);
*(CVectorPacked*)vDest = matrix.mulPoint (CVector(*(const CVectorPacked*)vSrc));
// Next point
vSrc+=vtSrcSize;

@ -1056,7 +1056,7 @@ inline void CPatch::fillFar0VertexVB(CTessFarVertex *pVert)
// v[11]== EndPos - StartPos
CHECK_VBA_RANGE(CLandscapeGlobals::CurrentFar0VBInfo.Accessor, CurVBPtr + CLandscapeGlobals::CurrentFar0VBInfo.DeltaPosOff, sizeof(CVector))
*(CVector*)(CurVBPtr + CLandscapeGlobals::CurrentFar0VBInfo.DeltaPosOff)=
*(CVectorPacked*)(CurVBPtr + CLandscapeGlobals::CurrentFar0VBInfo.DeltaPosOff)=
pVert->Src->EndPos - pVert->Src->StartPos;
}
}
@ -1144,7 +1144,7 @@ inline void CPatch::fillFar1VertexVB(CTessFarVertex *pVert)
// v[11]== EndPos - StartPos
CHECK_VBA_RANGE(CLandscapeGlobals::CurrentFar1VBInfo.Accessor, CurVBPtr + CLandscapeGlobals::CurrentFar1VBInfo.DeltaPosOff, sizeof(CVector))
*(CVector*)(CurVBPtr + CLandscapeGlobals::CurrentFar1VBInfo.DeltaPosOff)=
*(CVectorPacked*)(CurVBPtr + CLandscapeGlobals::CurrentFar1VBInfo.DeltaPosOff)=
pVert->Src->EndPos - pVert->Src->StartPos;
// v[12]== Alpha information
@ -1214,7 +1214,7 @@ inline void CPatch::fillTileVertexVB(CTessNearVertex *pVert)
// v[11]== EndPos - StartPos
CHECK_VBA_RANGE(CLandscapeGlobals::CurrentTileVBInfo.Accessor, CurVBPtr + CLandscapeGlobals::CurrentTileVBInfo.DeltaPosOff, sizeof(CVector))
*(CVector*)(CurVBPtr + CLandscapeGlobals::CurrentTileVBInfo.DeltaPosOff)=
*(CVectorPacked*)(CurVBPtr + CLandscapeGlobals::CurrentTileVBInfo.DeltaPosOff)=
pVert->Src->EndPos - pVert->Src->StartPos;
}
}

@ -38,6 +38,33 @@ uint CShadowSkin::NumCacheVertexShadow= NL_BlockByteL1 / sizeof(CShadowVertex);
// ***************************************************************************
void CShadowSkin::applySkin(CVectorPacked *dst, std::vector<CMatrix3x4> &boneMat3x4)
{
if(Vertices.empty())
return;
uint numVerts= (uint)Vertices.size();
CShadowVertex *src= &Vertices[0];
// Then do the skin
for(;numVerts>0;)
{
// number of vertices to process for this block.
uint nBlockInf= min(NumCacheVertexShadow, numVerts);
// next block.
numVerts-= nBlockInf;
// cache the data in L1 cache.
CFastMem::precache(src, nBlockInf * sizeof(CShadowVertex));
CVector temp;
// for all InfluencedVertices only.
for(;nBlockInf>0;nBlockInf--, src++, dst++)
{
boneMat3x4[ src->MatrixId ].mulSetPoint( src->Vertex, temp );
*dst = temp;
}
}
}
void CShadowSkin::applySkin(CVector *dst, std::vector<CMatrix3x4> &boneMat3x4)
{
if(Vertices.empty())
@ -56,6 +83,7 @@ void CShadowSkin::applySkin(CVector *dst, std::vector<CMatrix3x4> &boneMat3x4)
// cache the data in L1 cache.
CFastMem::precache(src, nBlockInf * sizeof(CShadowVertex));
CVector temp;
// for all InfluencedVertices only.
for(;nBlockInf>0;nBlockInf--, src++, dst++)
{

@ -1379,21 +1379,21 @@ void CVegetableManager::addInstance(CVegetableInstanceGroup *ig,
// Pos.
//-------
// Separate Center and relative pos.
CVector relPos= mat.mulVector(*(CVector*)srcPtr); // mulVector, because translation in v[center]
CVector relPos= mat.mulVector(*(CVectorPacked*)srcPtr); // mulVector, because translation in v[center]
// compute bendCenterPos
CVector bendCenterPos;
if(shape->BendCenterMode == CVegetableShapeBuild::BendCenterNull)
bendCenterPos= CVector::Null;
else
{
CVector v= *(CVector*)srcPtr;
CVector v= *(CVectorPacked*)srcPtr;
v.z= 0;
bendCenterPos= mat.mulVector(v); // mulVector, because translation in v[center]
}
// copy
deltaPos= relPos-bendCenterPos;
*(CVector*)dstPtr= deltaPos;
*(CVector*)(dstPtr + dstCenterOff)= instancePos + bendCenterPos;
*(CVectorPacked*)dstPtr= deltaPos;
*(CVectorPacked*)(dstPtr + dstCenterOff)= instancePos + bendCenterPos;
// if !destLighted, then VP is different
if(!destLighted)
{
@ -1426,7 +1426,7 @@ void CVegetableManager::addInstance(CVegetableInstanceGroup *ig,
if(destLighted)
{
// normal
*(CVector*)(dstPtr + dstNormalOff)= normalMat.mulVector( *(CVector*)(srcPtr + srcNormalOff) );
*(CVectorPacked*)(dstPtr + dstNormalOff)= normalMat.mulVector( *(CVectorPacked*)(srcPtr + srcNormalOff) );
}
// If destLighted, secondaryRGBA is the ambient
// else secondaryRGBA is used only for Alpha (DLM uv.v).
@ -1437,7 +1437,7 @@ void CVegetableManager::addInstance(CVegetableInstanceGroup *ig,
nlassert(!destLighted);
// compute normal.
CVector rotNormal= normalMat.mulVector( *(CVector*)(srcPtr + srcNormalOff) );
CVector rotNormal= normalMat.mulVector( *(CVectorPacked*)(srcPtr + srcNormalOff) );
// must normalize() because scale is possible.
rotNormal.normalize();
@ -1466,7 +1466,7 @@ void CVegetableManager::addInstance(CVegetableInstanceGroup *ig,
// Bend.
//-------
CVector *dstBendPtr= (CVector*)(dstPtr + dstBendOff);
CVectorPacked *dstBendPtr= (CVectorPacked*)(dstPtr + dstBendOff);
// setup bend Phase.
dstBendPtr->y= bendPhase;
// setup bend Weight.
@ -2704,7 +2704,7 @@ uint CVegetableManager::updateInstanceLighting(CVegetableInstanceGroup *ig, uin
nlassert(!destLighted);
// compute normal.
CVector rotNormal= normalMat.mulVector( *(CVector*)(srcPtr + srcNormalOff) );
CVector rotNormal= normalMat.mulVector( *(CVectorPacked*)(srcPtr + srcNormalOff) );
// must normalize() because scale is possible.
rotNormal.normalize();

@ -674,13 +674,13 @@ void CVertexBuffer::serialOldV1Minus(NLMISC::IStream &f, sint ver)
// XYZ.
if(_Flags & PositionFlag)
{
CVector &vert= *(CVector*)(pointer + stridedId + _Offset[Position]);
CVectorPacked &vert= *(CVectorPacked*)(pointer + stridedId + _Offset[Position]);
f.serial(vert);
}
// Normal
if(_Flags & NormalFlag)
{
CVector &norm= *(CVector*)(pointer + stridedId + _Offset[Normal]);
CVectorPacked &norm= *(CVectorPacked*)(pointer + stridedId + _Offset[Normal]);
f.serial(norm);
}
// Uvs.

Loading…
Cancel
Save