SSE2: Vectorize some code

--HG-- branch : sse2
11 years ago · 60879d87e4
parent afa315b1b5
commit 60879d87e4
5 changed files with 43 additions and 21 deletions
--- a/code/nel/include/nel/3d/track_keyframer.h
+++ b/code/nel/include/nel/3d/track_keyframer.h
@ -403,10 +403,10 @@ template<class T, class TKeyVal> inline void	copyToValue(T &value, const TKeyVal
 inline void	copyToValue(NLMISC::CRGBA &col, const CVector &v)
 {
 	sint	i;
-
+	CVector mul255 = v * 255;
-	i= (sint)(v.x*255); NLMISC::clamp(i,0,255); col.R= (uint8) i;
+	i= (sint)(mul255.x); NLMISC::clamp(i,0,255); col.R= (uint8) i;
-	i= (sint)(v.y*255); NLMISC::clamp(i,0,255); col.G= (uint8) i;
+	i= (sint)(mul255.y); NLMISC::clamp(i,0,255); col.G= (uint8) i;
-	i= (sint)(v.z*255); NLMISC::clamp(i,0,255); col.B= (uint8) i;
+	i= (sint)(mul255.z); NLMISC::clamp(i,0,255); col.B= (uint8) i;
 	col.A=255;
 }
--- a/code/nel/include/nel/misc/vector.h
+++ b/code/nel/include/nel/misc/vector.h
@ -152,6 +152,7 @@ public:		// Methods.
 	// friends.
 	friend	CVector	operator*(float f, const CVector &v0);
 	friend	CVector	operator/(float f, const CVector &v0);
 };
 class CVectorPacked
--- a/code/nel/include/nel/misc/vector_inline.h
+++ b/code/nel/include/nel/misc/vector_inline.h
@ -136,6 +136,18 @@ inline CVector	operator*(float f, const CVector &v)
 #endif
 }
 inline CVector	operator/(float f, const CVector &v)
 {
 #ifdef USE_SSE2
 	CVector res;
 	res.mm = _mm_div_ps(_mm_set1_ps(f), v.mm);
 	return res;
 #else
 	CVector	ret(f/v.x, f/v.y, f/v.z);
 	return ret;
 #endif
 }
 #ifdef USE_SSE2
 inline __m128 dotsplat(const __m128 &l, const __m128 &r)
 {
@ -150,6 +162,19 @@ inline __m128 dotsplat(const __m128 &l, const __m128 &r)
 }
 #endif
 inline CVector mul(const CVector &l, const CVector &r)
 {
 	CVector res;
 #ifdef USE_SSE2
 	res.mm = _mm_mul_ps(l.mm, r.mm);
 #else
 	res.x = l.x * r.x;
 	res.y = l.y * r.y;
 	res.z = l.z * r.z;
 #endif
 	return res;
 }
 // ============================================================================================
 // Advanced Maths.
 inline	float	CVector::operator*(const CVector &v) const
--- a/code/nel/src/3d/bone.cpp
+++ b/code/nel/src/3d/bone.cpp
@ -189,9 +189,7 @@ void	CBone::compute(CBone *parent, const CMatrix &rootMatrix, CSkeletonModel *sk
 			// retrieve scale from our father.
 			parent->getScale(fatherScale);
 			// inverse this scale.
-			fatherScale.x= 1.0f / fatherScale.x;
+			fatherScale = 1.0f / fatherScale;
 			fatherScale.y= 1.0f / fatherScale.y;
 			fatherScale.z= 1.0f / fatherScale.z;
 			// Compute InverseScale compensation:
 			// with UnheritScale, formula per bone should be  T*Sf-1*P*R*S*P-1.
@ -199,9 +197,7 @@ void	CBone::compute(CBone *parent, const CMatrix &rootMatrix, CSkeletonModel *sk
 			// So we must compute T*Sf-1*T-1, in order to get wanted result.
 			invScaleComp.setScale(fatherScale);
 			// Faster compute of the translation part: just "trans + fatherScale MUL -trans" where MUL is comp mul
-			trans.x-= fatherScale.x * trans.x;
+			trans -= mul(trans, fatherScale);
 			trans.y-= fatherScale.y * trans.y;
 			trans.z-= fatherScale.z * trans.z;
 			invScaleComp.setPos(trans);
--- a/code/nel/src/3d/cloud.cpp
+++ b/code/nel/src/3d/cloud.cpp
@ -487,10 +487,10 @@ void CCloud::dispXYZ (CMaterial *pMat)
 				rVB.lock (vba);
 				pVertices = vba.getVertexCoordPointer (0);
-				*pVertices = CVector(_Pos.x,			_Pos.y,			_Pos.z+_Size.z*(_NbW*_NbH-d)*oneOverNbWNbH); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize );
+				*pVertices = CVectorPacked(_Pos.x,			_Pos.y,			_Pos.z+_Size.z*(_NbW*_NbH-d)*oneOverNbWNbH); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize );
-				*pVertices = CVector(_Pos.x+_Size.x,	_Pos.y,			_Pos.z+_Size.z*(_NbW*_NbH-d)*oneOverNbWNbH); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize );
+				*pVertices = CVectorPacked(_Pos.x+_Size.x,	_Pos.y,			_Pos.z+_Size.z*(_NbW*_NbH-d)*oneOverNbWNbH); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize );
-				*pVertices = CVector(_Pos.x+_Size.x,	_Pos.y+_Size.y,	_Pos.z+_Size.z*(_NbW*_NbH-d)*oneOverNbWNbH); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize );
+				*pVertices = CVectorPacked(_Pos.x+_Size.x,	_Pos.y+_Size.y,	_Pos.z+_Size.z*(_NbW*_NbH-d)*oneOverNbWNbH); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize );
-				*pVertices = CVector(_Pos.x,			_Pos.y+_Size.y,	_Pos.z+_Size.z*(_NbW*_NbH-d)*oneOverNbWNbH);
+				*pVertices = CVectorPacked(_Pos.x,			_Pos.y+_Size.y,	_Pos.z+_Size.z*(_NbW*_NbH-d)*oneOverNbWNbH);
 				pUV = vba.getTexCoordPointer (0, 0);
 				pUV->U = i*oneOverNbW;		pUV->V = j*oneOverNbH;		pUV = (CUV*)( ((uint8*)pUV) + nVSize );
@ -513,10 +513,10 @@ void CCloud::dispXYZ (CMaterial *pMat)
 		CVertexBufferReadWrite vba;
 		rVB.lock (vba);
 		CVectorPacked *pVertices = vba.getVertexCoordPointer (0);
-		*pVertices = CVector((float)0.25f,	0, (float)0.0f); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize );
+		*pVertices = CVectorPacked((float)0.25f,	0, (float)0.0f); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize );
-		*pVertices = CVector((float)0.75f,	0, (float)0.0f); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize );
+		*pVertices = CVectorPacked((float)0.75f,	0, (float)0.0f); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize );
-		*pVertices = CVector((float)0.75f,	0, (float)0.75f); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize );
+		*pVertices = CVectorPacked((float)0.75f,	0, (float)0.75f); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize );
-		*pVertices = CVector((float)0.25f,	0, (float)0.75f);
+		*pVertices = CVectorPacked((float)0.25f,	0, (float)0.75f);
 	}
 }
@ -640,7 +640,7 @@ void CCloud::genBill (CCamera *pCam, uint32 nBillSize)
 	//CMatrix CamMat = pCam->getMatrix();
 	//CVector Viewer = CamMat.getPos();
 	CVector Viewer = CVector (0,0,0);
-	CVector Center = CVector (_Pos.x+_Size.x/2, _Pos.y+_Size.y/2, _Pos.z+_Size.z/2);
+	CVector Center = _Pos + (_Size / 2); //CVector (_Pos.x+_Size.x/2, _Pos.y+_Size.y/2, _Pos.z+_Size.z/2);
 	CVector Size = _Size;
 	CVector I, J, K;
 	float Left, Right, Top, Bottom, Near, Far;
@ -650,7 +650,7 @@ void CCloud::genBill (CCamera *pCam, uint32 nBillSize)
 	CMatrix mat;
 	mat.identity();
 	mat.setRot(I,J,K, true);
-	mat.setPos(CVector(Viewer.x, Viewer.y, Viewer.z));
+	mat.setPos(Viewer);
 	mat.invert();
 	// Clear background for cloud creation
@ -708,7 +708,7 @@ void CCloud::dispBill (CCamera *pCam)
 //	CMatrix CamMat = pCam->getMatrix();
 //	CVector Viewer = CamMat.getPos();
 	CVector Viewer = CVector (0,0,0);
-	CVector Center = CVector (_Pos.x+_Size.x/2, _Pos.y+_Size.y/2, _Pos.z+_Size.z/2);
+	CVector Center = _Pos + (_Size / 2); //CVector (_Pos.x+_Size.x/2, _Pos.y+_Size.y/2, _Pos.z+_Size.z/2);
 	CVector Size = _Size;
 	// Prepare vertices.