From 923e5a34738a975a3c3833057feaaf302271f50f Mon Sep 17 00:00:00 2001 From: kaetemi Date: Fri, 20 Jun 2014 06:37:47 +0200 Subject: [PATCH] SSE2: Vectorize CVector --HG-- branch : sse2 --- code/nel/include/nel/misc/vector.h | 4 + code/nel/include/nel/misc/vector_inline.h | 129 +++++++++++++++++++++- 2 files changed, 131 insertions(+), 2 deletions(-) diff --git a/code/nel/include/nel/misc/vector.h b/code/nel/include/nel/misc/vector.h index ebf56c048..6c1e27479 100644 --- a/code/nel/include/nel/misc/vector.h +++ b/code/nel/include/nel/misc/vector.h @@ -64,7 +64,11 @@ public: // Methods. /// Constructor . CVector(float _x, float _y, float _z) : x(_x), y(_y), z(_z) { /* if (((uintptr_t)(void *)(this) & 0xF) != 0) nlerror("Vector alignment error"); */ } /// Copy Constructor. +#ifdef NL_HAS_SSE2 + CVector(const CVector &v) : mm(v.mm) { /* if (((uintptr_t)(void *)(this) & 0xF) != 0) nlerror("Vector alignment error"); */ } +#else CVector(const CVector &v) : x(v.x), y(v.y), z(v.z) { /* if (((uintptr_t)(void *)(this) & 0xF) != 0) nlerror("Vector alignment error"); */ } +#endif //@} /// @name Base Maths. diff --git a/code/nel/include/nel/misc/vector_inline.h b/code/nel/include/nel/misc/vector_inline.h index 9f890f637..299e4c148 100644 --- a/code/nel/include/nel/misc/vector_inline.h +++ b/code/nel/include/nel/misc/vector_inline.h @@ -31,95 +31,196 @@ namespace NLMISC // Base Maths. inline CVector &CVector::operator+=(const CVector &v) { +#ifdef NL_HAS_SSE2 + mm = _mm_add_ps(mm, v.mm); +#else x+=v.x; y+=v.y; z+=v.z; +#endif return *this; } inline CVector &CVector::operator-=(const CVector &v) { +#ifdef NL_HAS_SSE2 + mm = _mm_sub_ps(mm, v.mm); +#else x-=v.x; y-=v.y; z-=v.z; +#endif return *this; } inline CVector &CVector::operator*=(float f) { +#ifdef NL_HAS_SSE2 + mm = _mm_mul_ps(mm, _mm_set1_ps(f)); +#else x*=f; y*=f; z*=f; +#endif return *this; } inline CVector &CVector::operator/=(float f) { +#ifdef NL_HAS_SSE2 + mm = _mm_mul_ps(mm, _mm_set1_ps(1.0f / f)); + return *this; +#else return *this*= (1.0f/f); +#endif } inline CVector CVector::operator+(const CVector &v) const { +#ifdef NL_HAS_SSE2 + CVector ret; + ret.mm = _mm_add_ps(mm, v.mm); + return ret; +#else CVector ret(x+v.x, y+v.y, z+v.z); return ret; +#endif } inline CVector CVector::operator-(const CVector &v) const { +#ifdef NL_HAS_SSE2 + CVector ret; + ret.mm = _mm_sub_ps(mm, v.mm); + return ret; +#else CVector ret(x-v.x, y-v.y, z-v.z); return ret; +#endif } inline CVector CVector::operator*(float f) const { +#ifdef NL_HAS_SSE2 + CVector ret; + ret.mm = _mm_mul_ps(mm, _mm_set1_ps(f)); + return ret; +#else CVector ret(x*f, y*f, z*f); return ret; +#endif } inline CVector CVector::operator/(float f) const { +#ifdef NL_HAS_SSE2 + CVector ret; + ret.mm = _mm_mul_ps(mm, _mm_set1_ps(1.0f / f)); + return ret; +#else return *this*(1.0f/f); +#endif } inline CVector CVector::operator-() const { +#ifdef NL_HAS_SSE2 + CVector ret; + ret.mm = _mm_mul_ps(mm, _mm_set1_ps(-1.0f)); + return ret; +#else return CVector(-x,-y,-z); +#endif } inline CVector operator*(float f, const CVector &v) { +#ifdef NL_HAS_SSE2 + CVector ret; + ret.mm = _mm_mul_ps(_mm_set1_ps(f), v.mm); + return ret; +#else CVector ret(v.x*f, v.y*f, v.z*f); return ret; +#endif } +#ifdef NL_HAS_SSE2 +inline __m128 dotsplat(const __m128 &l, const __m128 &r) +{ + // TODO: _mm_hadd_ps SSE3 + __m128 mult = _mm_mul_ps(l, r); + __m128 vx = _mm_shuffle_ps(mult, mult, _MM_SHUFFLE(0, 0, 0, 0)); + __m128 vy = _mm_shuffle_ps(mult, mult, _MM_SHUFFLE(1, 1, 1, 1)); + __m128 vz = _mm_shuffle_ps(mult, mult, _MM_SHUFFLE(2, 2, 2, 2)); + __m128 result = _mm_add_ps(_mm_add_ps(vx, vy), vz); + return result; +} +#endif // ============================================================================================ // Advanced Maths. inline float CVector::operator*(const CVector &v) const { +#ifdef NL_HAS_SSE2 + return _mm_cvtss_f32(dotsplat(mm, v.mm)); +#else return x*v.x + y*v.y + z*v.z; +#endif } inline CVector CVector::operator^(const CVector &v) const { +#ifdef NL_HAS_SSE2 + CVector ret; + __m128 l = _mm_shuffle_ps(mm, mm, _MM_SHUFFLE(3, 0, 2, 1)); + __m128 r = _mm_shuffle_ps(v.mm, v.mm, _MM_SHUFFLE(3, 1, 0, 2)); + __m128 mul1 = _mm_mul_ps(l, r); + l = _mm_shuffle_ps(mm, mm, _MM_SHUFFLE(3, 1, 0, 2)); + r = _mm_shuffle_ps(v.mm, v.mm, _MM_SHUFFLE(3, 0, 2, 1)); + __m128 mul2 = _mm_mul_ps(l, r); + ret.mm = _mm_sub_ps(mul1, mul2); + return ret; +#else CVector ret; - ret.x= y*v.z - z*v.y; ret.y= z*v.x - x*v.z; ret.z= x*v.y - y*v.x; - return ret; +#endif } inline float CVector::sqrnorm() const { +#ifdef NL_HAS_SSE2 + return _mm_cvtss_f32(dotsplat(mm, mm)); +#else return (float)(x*x + y*y + z*z); +#endif } inline float CVector::norm() const { +#ifdef NL_HAS_SSE2 + return sqrt(_mm_cvtss_f32(dotsplat(mm, mm))); +#else return (float)sqrt(x*x + y*y + z*z); +#endif } inline void CVector::normalize() { +#ifdef NL_HAS_SSE2 + __m128 normsplat = _mm_sqrt_ps(dotsplat(mm, mm)); + if (_mm_cvtss_f32(normsplat)) + mm = _mm_div_ps(mm, normsplat); +#else float n=norm(); if(n) *this/=n; +#endif } inline CVector CVector::normed() const { +#ifdef NL_HAS_SSE2 + CVector res; + __m128 normsplat = _mm_sqrt_ps(dotsplat(mm, mm)); + if (_mm_cvtss_f32(normsplat)) + res.mm = _mm_div_ps(mm, normsplat); + return res; +#else CVector ret; ret= *this; ret.normalize(); return ret; +#endif } @@ -127,19 +228,35 @@ inline CVector CVector::normed() const // Misc. inline void CVector::set(float _x, float _y, float _z) { +#ifdef NL_HAS_SSE2 + mm = _mm_setr_ps(_x, _y, _z, 0.0f); +#else x=_x; y=_y; z=_z; +#endif } inline bool CVector::operator==(const CVector &v) const { +#ifdef NL_HAS_SSE2 + return (_mm_movemask_ps(_mm_cmpeq_ps(mm, v.mm)) & 0x07) == 0x07; +#else return x==v.x && y==v.y && z==v.z; +#endif } inline bool CVector::operator!=(const CVector &v) const { +#ifdef NL_HAS_SSE2 + return (_mm_movemask_ps(_mm_cmpneq_ps(mm, v.mm)) & 0x07) != 0; +#else return !(*this==v); +#endif } inline bool CVector::isNull() const { +#ifdef NL_HAS_SSE2 + return (_mm_movemask_ps(_mm_cmpeq_ps(mm, _mm_setzero_ps())) & 0x07) == 0x07; +#else return *this==CVector::Null; +#endif } inline bool CVector::operator<(const CVector &v) const { @@ -177,15 +294,23 @@ inline void CVector::sphericToCartesian(float r, float theta,float phi) } inline void CVector::minof(const CVector &a, const CVector &b) { +#ifdef NL_HAS_SSE2 + mm = _mm_min_ps(a.mm, b.mm); +#else x= std::min(a.x, b.x); y= std::min(a.y, b.y); z= std::min(a.z, b.z); +#endif } inline void CVector::maxof(const CVector &a, const CVector &b) { +#ifdef NL_HAS_SSE2 + mm = _mm_max_ps(a.mm, b.mm); +#else x= std::max(a.x, b.x); y= std::max(a.y, b.y); z= std::max(a.z, b.z); +#endif } inline void CVector::serial(IStream &f) {