SSE2: Vectorize CVector

--HG--
branch : sse2
hg/feature/sse2
kaetemi 11 years ago
parent 267e544bc4
commit 923e5a3473

@ -64,7 +64,11 @@ public: // Methods.
/// Constructor .
CVector(float _x, float _y, float _z) : x(_x), y(_y), z(_z) { /* if (((uintptr_t)(void *)(this) & 0xF) != 0) nlerror("Vector alignment error"); */ }
/// Copy Constructor.
#ifdef NL_HAS_SSE2
CVector(const CVector &v) : mm(v.mm) { /* if (((uintptr_t)(void *)(this) & 0xF) != 0) nlerror("Vector alignment error"); */ }
#else
CVector(const CVector &v) : x(v.x), y(v.y), z(v.z) { /* if (((uintptr_t)(void *)(this) & 0xF) != 0) nlerror("Vector alignment error"); */ }
#endif
//@}
/// @name Base Maths.

@ -31,95 +31,196 @@ namespace NLMISC
// Base Maths.
inline CVector &CVector::operator+=(const CVector &v)
{
#ifdef NL_HAS_SSE2
mm = _mm_add_ps(mm, v.mm);
#else
x+=v.x;
y+=v.y;
z+=v.z;
#endif
return *this;
}
inline CVector &CVector::operator-=(const CVector &v)
{
#ifdef NL_HAS_SSE2
mm = _mm_sub_ps(mm, v.mm);
#else
x-=v.x;
y-=v.y;
z-=v.z;
#endif
return *this;
}
inline CVector &CVector::operator*=(float f)
{
#ifdef NL_HAS_SSE2
mm = _mm_mul_ps(mm, _mm_set1_ps(f));
#else
x*=f;
y*=f;
z*=f;
#endif
return *this;
}
inline CVector &CVector::operator/=(float f)
{
#ifdef NL_HAS_SSE2
mm = _mm_mul_ps(mm, _mm_set1_ps(1.0f / f));
return *this;
#else
return *this*= (1.0f/f);
#endif
}
inline CVector CVector::operator+(const CVector &v) const
{
#ifdef NL_HAS_SSE2
CVector ret;
ret.mm = _mm_add_ps(mm, v.mm);
return ret;
#else
CVector ret(x+v.x, y+v.y, z+v.z);
return ret;
#endif
}
inline CVector CVector::operator-(const CVector &v) const
{
#ifdef NL_HAS_SSE2
CVector ret;
ret.mm = _mm_sub_ps(mm, v.mm);
return ret;
#else
CVector ret(x-v.x, y-v.y, z-v.z);
return ret;
#endif
}
inline CVector CVector::operator*(float f) const
{
#ifdef NL_HAS_SSE2
CVector ret;
ret.mm = _mm_mul_ps(mm, _mm_set1_ps(f));
return ret;
#else
CVector ret(x*f, y*f, z*f);
return ret;
#endif
}
inline CVector CVector::operator/(float f) const
{
#ifdef NL_HAS_SSE2
CVector ret;
ret.mm = _mm_mul_ps(mm, _mm_set1_ps(1.0f / f));
return ret;
#else
return *this*(1.0f/f);
#endif
}
inline CVector CVector::operator-() const
{
#ifdef NL_HAS_SSE2
CVector ret;
ret.mm = _mm_mul_ps(mm, _mm_set1_ps(-1.0f));
return ret;
#else
return CVector(-x,-y,-z);
#endif
}
inline CVector operator*(float f, const CVector &v)
{
#ifdef NL_HAS_SSE2
CVector ret;
ret.mm = _mm_mul_ps(_mm_set1_ps(f), v.mm);
return ret;
#else
CVector ret(v.x*f, v.y*f, v.z*f);
return ret;
#endif
}
#ifdef NL_HAS_SSE2
inline __m128 dotsplat(const __m128 &l, const __m128 &r)
{
// TODO: _mm_hadd_ps SSE3
__m128 mult = _mm_mul_ps(l, r);
__m128 vx = _mm_shuffle_ps(mult, mult, _MM_SHUFFLE(0, 0, 0, 0));
__m128 vy = _mm_shuffle_ps(mult, mult, _MM_SHUFFLE(1, 1, 1, 1));
__m128 vz = _mm_shuffle_ps(mult, mult, _MM_SHUFFLE(2, 2, 2, 2));
__m128 result = _mm_add_ps(_mm_add_ps(vx, vy), vz);
return result;
}
#endif
// ============================================================================================
// Advanced Maths.
inline float CVector::operator*(const CVector &v) const
{
#ifdef NL_HAS_SSE2
return _mm_cvtss_f32(dotsplat(mm, v.mm));
#else
return x*v.x + y*v.y + z*v.z;
#endif
}
inline CVector CVector::operator^(const CVector &v) const
{
#ifdef NL_HAS_SSE2
CVector ret;
__m128 l = _mm_shuffle_ps(mm, mm, _MM_SHUFFLE(3, 0, 2, 1));
__m128 r = _mm_shuffle_ps(v.mm, v.mm, _MM_SHUFFLE(3, 1, 0, 2));
__m128 mul1 = _mm_mul_ps(l, r);
l = _mm_shuffle_ps(mm, mm, _MM_SHUFFLE(3, 1, 0, 2));
r = _mm_shuffle_ps(v.mm, v.mm, _MM_SHUFFLE(3, 0, 2, 1));
__m128 mul2 = _mm_mul_ps(l, r);
ret.mm = _mm_sub_ps(mul1, mul2);
return ret;
#else
CVector ret;
ret.x= y*v.z - z*v.y;
ret.y= z*v.x - x*v.z;
ret.z= x*v.y - y*v.x;
return ret;
#endif
}
inline float CVector::sqrnorm() const
{
#ifdef NL_HAS_SSE2
return _mm_cvtss_f32(dotsplat(mm, mm));
#else
return (float)(x*x + y*y + z*z);
#endif
}
inline float CVector::norm() const
{
#ifdef NL_HAS_SSE2
return sqrt(_mm_cvtss_f32(dotsplat(mm, mm)));
#else
return (float)sqrt(x*x + y*y + z*z);
#endif
}
inline void CVector::normalize()
{
#ifdef NL_HAS_SSE2
__m128 normsplat = _mm_sqrt_ps(dotsplat(mm, mm));
if (_mm_cvtss_f32(normsplat))
mm = _mm_div_ps(mm, normsplat);
#else
float n=norm();
if(n)
*this/=n;
#endif
}
inline CVector CVector::normed() const
{
#ifdef NL_HAS_SSE2
CVector res;
__m128 normsplat = _mm_sqrt_ps(dotsplat(mm, mm));
if (_mm_cvtss_f32(normsplat))
res.mm = _mm_div_ps(mm, normsplat);
return res;
#else
CVector ret;
ret= *this;
ret.normalize();
return ret;
#endif
}
@ -127,19 +228,35 @@ inline CVector CVector::normed() const
// Misc.
inline void CVector::set(float _x, float _y, float _z)
{
#ifdef NL_HAS_SSE2
mm = _mm_setr_ps(_x, _y, _z, 0.0f);
#else
x=_x; y=_y; z=_z;
#endif
}
inline bool CVector::operator==(const CVector &v) const
{
#ifdef NL_HAS_SSE2
return (_mm_movemask_ps(_mm_cmpeq_ps(mm, v.mm)) & 0x07) == 0x07;
#else
return x==v.x && y==v.y && z==v.z;
#endif
}
inline bool CVector::operator!=(const CVector &v) const
{
#ifdef NL_HAS_SSE2
return (_mm_movemask_ps(_mm_cmpneq_ps(mm, v.mm)) & 0x07) != 0;
#else
return !(*this==v);
#endif
}
inline bool CVector::isNull() const
{
#ifdef NL_HAS_SSE2
return (_mm_movemask_ps(_mm_cmpeq_ps(mm, _mm_setzero_ps())) & 0x07) == 0x07;
#else
return *this==CVector::Null;
#endif
}
inline bool CVector::operator<(const CVector &v) const
{
@ -177,15 +294,23 @@ inline void CVector::sphericToCartesian(float r, float theta,float phi)
}
inline void CVector::minof(const CVector &a, const CVector &b)
{
#ifdef NL_HAS_SSE2
mm = _mm_min_ps(a.mm, b.mm);
#else
x= std::min(a.x, b.x);
y= std::min(a.y, b.y);
z= std::min(a.z, b.z);
#endif
}
inline void CVector::maxof(const CVector &a, const CVector &b)
{
#ifdef NL_HAS_SSE2
mm = _mm_max_ps(a.mm, b.mm);
#else
x= std::max(a.x, b.x);
y= std::max(a.y, b.y);
z= std::max(a.z, b.z);
#endif
}
inline void CVector::serial(IStream &f)
{

Loading…
Cancel
Save