SSE2: Vectorize some code

--HG--
branch : sse2
hg/feature/sse2
kaetemi 11 years ago
parent afa315b1b5
commit 60879d87e4

@ -403,10 +403,10 @@ template<class T, class TKeyVal> inline void copyToValue(T &value, const TKeyVal
inline void copyToValue(NLMISC::CRGBA &col, const CVector &v)
{
sint i;
i= (sint)(v.x*255); NLMISC::clamp(i,0,255); col.R= (uint8) i;
i= (sint)(v.y*255); NLMISC::clamp(i,0,255); col.G= (uint8) i;
i= (sint)(v.z*255); NLMISC::clamp(i,0,255); col.B= (uint8) i;
CVector mul255 = v * 255;
i= (sint)(mul255.x); NLMISC::clamp(i,0,255); col.R= (uint8) i;
i= (sint)(mul255.y); NLMISC::clamp(i,0,255); col.G= (uint8) i;
i= (sint)(mul255.z); NLMISC::clamp(i,0,255); col.B= (uint8) i;
col.A=255;
}

@ -152,6 +152,7 @@ public: // Methods.
// friends.
friend CVector operator*(float f, const CVector &v0);
friend CVector operator/(float f, const CVector &v0);
};
class CVectorPacked

@ -136,6 +136,18 @@ inline CVector operator*(float f, const CVector &v)
#endif
}
inline CVector operator/(float f, const CVector &v)
{
#ifdef USE_SSE2
CVector res;
res.mm = _mm_div_ps(_mm_set1_ps(f), v.mm);
return res;
#else
CVector ret(f/v.x, f/v.y, f/v.z);
return ret;
#endif
}
#ifdef USE_SSE2
inline __m128 dotsplat(const __m128 &l, const __m128 &r)
{
@ -150,6 +162,19 @@ inline __m128 dotsplat(const __m128 &l, const __m128 &r)
}
#endif
inline CVector mul(const CVector &l, const CVector &r)
{
CVector res;
#ifdef USE_SSE2
res.mm = _mm_mul_ps(l.mm, r.mm);
#else
res.x = l.x * r.x;
res.y = l.y * r.y;
res.z = l.z * r.z;
#endif
return res;
}
// ============================================================================================
// Advanced Maths.
inline float CVector::operator*(const CVector &v) const

@ -189,9 +189,7 @@ void CBone::compute(CBone *parent, const CMatrix &rootMatrix, CSkeletonModel *sk
// retrieve scale from our father.
parent->getScale(fatherScale);
// inverse this scale.
fatherScale.x= 1.0f / fatherScale.x;
fatherScale.y= 1.0f / fatherScale.y;
fatherScale.z= 1.0f / fatherScale.z;
fatherScale = 1.0f / fatherScale;
// Compute InverseScale compensation:
// with UnheritScale, formula per bone should be T*Sf-1*P*R*S*P-1.
@ -199,9 +197,7 @@ void CBone::compute(CBone *parent, const CMatrix &rootMatrix, CSkeletonModel *sk
// So we must compute T*Sf-1*T-1, in order to get wanted result.
invScaleComp.setScale(fatherScale);
// Faster compute of the translation part: just "trans + fatherScale MUL -trans" where MUL is comp mul
trans.x-= fatherScale.x * trans.x;
trans.y-= fatherScale.y * trans.y;
trans.z-= fatherScale.z * trans.z;
trans -= mul(trans, fatherScale);
invScaleComp.setPos(trans);

@ -487,10 +487,10 @@ void CCloud::dispXYZ (CMaterial *pMat)
rVB.lock (vba);
pVertices = vba.getVertexCoordPointer (0);
*pVertices = CVector(_Pos.x, _Pos.y, _Pos.z+_Size.z*(_NbW*_NbH-d)*oneOverNbWNbH); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize );
*pVertices = CVector(_Pos.x+_Size.x, _Pos.y, _Pos.z+_Size.z*(_NbW*_NbH-d)*oneOverNbWNbH); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize );
*pVertices = CVector(_Pos.x+_Size.x, _Pos.y+_Size.y, _Pos.z+_Size.z*(_NbW*_NbH-d)*oneOverNbWNbH); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize );
*pVertices = CVector(_Pos.x, _Pos.y+_Size.y, _Pos.z+_Size.z*(_NbW*_NbH-d)*oneOverNbWNbH);
*pVertices = CVectorPacked(_Pos.x, _Pos.y, _Pos.z+_Size.z*(_NbW*_NbH-d)*oneOverNbWNbH); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize );
*pVertices = CVectorPacked(_Pos.x+_Size.x, _Pos.y, _Pos.z+_Size.z*(_NbW*_NbH-d)*oneOverNbWNbH); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize );
*pVertices = CVectorPacked(_Pos.x+_Size.x, _Pos.y+_Size.y, _Pos.z+_Size.z*(_NbW*_NbH-d)*oneOverNbWNbH); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize );
*pVertices = CVectorPacked(_Pos.x, _Pos.y+_Size.y, _Pos.z+_Size.z*(_NbW*_NbH-d)*oneOverNbWNbH);
pUV = vba.getTexCoordPointer (0, 0);
pUV->U = i*oneOverNbW; pUV->V = j*oneOverNbH; pUV = (CUV*)( ((uint8*)pUV) + nVSize );
@ -513,10 +513,10 @@ void CCloud::dispXYZ (CMaterial *pMat)
CVertexBufferReadWrite vba;
rVB.lock (vba);
CVectorPacked *pVertices = vba.getVertexCoordPointer (0);
*pVertices = CVector((float)0.25f, 0, (float)0.0f); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize );
*pVertices = CVector((float)0.75f, 0, (float)0.0f); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize );
*pVertices = CVector((float)0.75f, 0, (float)0.75f); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize );
*pVertices = CVector((float)0.25f, 0, (float)0.75f);
*pVertices = CVectorPacked((float)0.25f, 0, (float)0.0f); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize );
*pVertices = CVectorPacked((float)0.75f, 0, (float)0.0f); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize );
*pVertices = CVectorPacked((float)0.75f, 0, (float)0.75f); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize );
*pVertices = CVectorPacked((float)0.25f, 0, (float)0.75f);
}
}
@ -640,7 +640,7 @@ void CCloud::genBill (CCamera *pCam, uint32 nBillSize)
//CMatrix CamMat = pCam->getMatrix();
//CVector Viewer = CamMat.getPos();
CVector Viewer = CVector (0,0,0);
CVector Center = CVector (_Pos.x+_Size.x/2, _Pos.y+_Size.y/2, _Pos.z+_Size.z/2);
CVector Center = _Pos + (_Size / 2); //CVector (_Pos.x+_Size.x/2, _Pos.y+_Size.y/2, _Pos.z+_Size.z/2);
CVector Size = _Size;
CVector I, J, K;
float Left, Right, Top, Bottom, Near, Far;
@ -650,7 +650,7 @@ void CCloud::genBill (CCamera *pCam, uint32 nBillSize)
CMatrix mat;
mat.identity();
mat.setRot(I,J,K, true);
mat.setPos(CVector(Viewer.x, Viewer.y, Viewer.z));
mat.setPos(Viewer);
mat.invert();
// Clear background for cloud creation
@ -708,7 +708,7 @@ void CCloud::dispBill (CCamera *pCam)
// CMatrix CamMat = pCam->getMatrix();
// CVector Viewer = CamMat.getPos();
CVector Viewer = CVector (0,0,0);
CVector Center = CVector (_Pos.x+_Size.x/2, _Pos.y+_Size.y/2, _Pos.z+_Size.z/2);
CVector Center = _Pos + (_Size / 2); //CVector (_Pos.x+_Size.x/2, _Pos.y+_Size.y/2, _Pos.z+_Size.z/2);
CVector Size = _Size;
// Prepare vertices.

Loading…
Cancel
Save