diff --git a/code/nel/include/nel/misc/types_nl.h b/code/nel/include/nel/misc/types_nl.h index b94ffe50f..21cd8b39e 100644 --- a/code/nel/include/nel/misc/types_nl.h +++ b/code/nel/include/nel/misc/types_nl.h @@ -336,14 +336,82 @@ typedef unsigned int uint; // at least 32bits (depend of processor) #endif #ifdef USE_SSE2 + extern void *operator new(size_t size) throw(std::bad_alloc); extern void *operator new[](size_t size) throw(std::bad_alloc); extern void operator delete(void *p) throw(); extern void operator delete[](void *p) throw(); + #define NL_ALIGN_SSE2(nb) NL_ALIGN(nb) + +# ifdef NL_COMP_VC + +inline void *aligned_malloc(size_t size, size_t alignment) +{ + return _aligned_malloc(size, alignment); +} + +inline void aligned_free(void *ptr) +{ + _aligned_free(ptr); +} + +# else + +inline void *aligned_malloc(size_t size, size_t alignment) +{ + return memalign(alignment, size); +} + +inline void aligned_free(void *ptr) +{ + free(ptr); +} + +# endif /* NL_COMP_ */ + +template +class aligned_allocator : public std::allocator +{ +public: + typedef size_t size_type; + typedef std::ptrdiff_t difference_type; + typedef T* pointer; + typedef const T* const_pointer; + typedef T& reference; + typedef const T& const_reference; + typedef T value_type; + + template + struct rebind + { + typedef aligned_allocator other; + }; + + aligned_allocator() : std::allocator() {} + + aligned_allocator(const aligned_allocator& other) : std::allocator(other) {} + + template + aligned_allocator(const aligned_allocator& other) : std::allocator(other) {} + + ~aligned_allocator() {} + + pointer allocate(size_type num, const void* /*hint*/ = 0) + { + return static_cast(aligned_malloc(NL_DEFAULT_MEMORY_ALIGNMENT, num * sizeof(T))); + } + + void deallocate(pointer p, size_type /*num*/) + { + aligned_free(p); + } +}; + #else #define NL_ALIGN_SSE2(nb) -#endif +#endif /* USE_SSE2 */ + // CHashMap, CHashSet and CHashMultiMap definitions #if defined(_STLPORT_VERSION) // STLport detected diff --git a/code/nel/include/nel/misc/vector.h b/code/nel/include/nel/misc/vector.h index b1e2573d5..f11137764 100644 --- a/code/nel/include/nel/misc/vector.h +++ b/code/nel/include/nel/misc/vector.h @@ -35,15 +35,24 @@ class IStream; * \author Lionel Berenguier * \author Nevrax France * \date 2000 + * \author Jan Boon + * \date 2014 */ NL_ALIGN_SSE2(16) class CVector { public: // Attributes. - float x,y,z; - #ifdef USE_SSE2 - float w; // Padding + union + { + struct + { + float x, y, z, P; + }; + __m128 mm; + }; +#else + float x,y,z; #endif public: // const. @@ -187,9 +196,21 @@ public: inline CVector blend(const CVector &v0, const CVector &v1, float lambda) { float invLambda = 1.f - lambda; +#ifdef USE_SSE2 + CVector res; + __m128 mLambda = _mm_set1_ps(lambda); + __m128 mInvLambda = _mm_set1_ps(invLambda); + __m128 mv0 = v0.mm; + __m128 mv1 = v1.mm; + mv0 = _mm_mul_ps(mv0, mInvLambda); + mv1 = _mm_mul_ps(mv1, mLambda); + res.mm = _mm_add_ps(mv0, mv1); + return res; +#else return CVector(invLambda * v0.x + lambda * v1.x, invLambda * v0.y + lambda * v1.y, invLambda * v0.z + lambda * v1.z); +#endif } diff --git a/code/nel/include/nel/misc/vector_inline.h b/code/nel/include/nel/misc/vector_inline.h index 9f890f637..61f20e367 100644 --- a/code/nel/include/nel/misc/vector_inline.h +++ b/code/nel/include/nel/misc/vector_inline.h @@ -31,23 +31,35 @@ namespace NLMISC // Base Maths. inline CVector &CVector::operator+=(const CVector &v) { +#ifdef USE_SSE2 + mm = _mm_add_ps(mm, v.mm); +#else x+=v.x; y+=v.y; z+=v.z; +#endif return *this; } inline CVector &CVector::operator-=(const CVector &v) { +#ifdef USE_SSE2 + mm = _mm_sub_ps(mm, v.mm); +#else x-=v.x; y-=v.y; z-=v.z; +#endif return *this; } inline CVector &CVector::operator*=(float f) { +#ifdef USE_SSE2 + mm = _mm_mul_ps(mm, _mm_set1_ps(f)); +#else x*=f; y*=f; z*=f; +#endif return *this; } inline CVector &CVector::operator/=(float f) @@ -56,18 +68,36 @@ inline CVector &CVector::operator/=(float f) } inline CVector CVector::operator+(const CVector &v) const { +#ifdef USE_SSE2 + CVector res; + res.mm = _mm_add_ps(mm, v.mm); + return res; +#else CVector ret(x+v.x, y+v.y, z+v.z); return ret; +#endif } inline CVector CVector::operator-(const CVector &v) const { +#ifdef USE_SSE2 + CVector res; + res.mm = _mm_sub_ps(mm, v.mm); + return res; +#else CVector ret(x-v.x, y-v.y, z-v.z); return ret; +#endif } inline CVector CVector::operator*(float f) const { +#ifdef USE_SSE2 + CVector res; + res.mm = _mm_mul_ps(mm, _mm_set1_ps(f)); + return res; +#else CVector ret(x*f, y*f, z*f); return ret; +#endif } inline CVector CVector::operator/(float f) const { @@ -75,12 +105,24 @@ inline CVector CVector::operator/(float f) const } inline CVector CVector::operator-() const { +#ifdef USE_SSE2 + CVector res; + res.mm = _mm_mul_ps(mm, _mm_set1_ps(-1.0f)); + return res; +#else return CVector(-x,-y,-z); +#endif } inline CVector operator*(float f, const CVector &v) { +#ifdef USE_SSE2 + CVector res; + res.mm = _mm_mul_ps(v.mm, _mm_set1_ps(f)); + return res; +#else CVector ret(v.x*f, v.y*f, v.z*f); return ret; +#endif } diff --git a/code/nel/src/misc/common.cpp b/code/nel/src/misc/common.cpp index b58792a65..dd244667b 100644 --- a/code/nel/src/misc/common.cpp +++ b/code/nel/src/misc/common.cpp @@ -71,33 +71,7 @@ extern "C" long _ftol2( double dblSource ) { return _ftol( dblSource ); } #endif // NL_OS_WINDOWS -#ifdef HAS_SSE2 - -# ifdef NL_COMP_VC - -inline void *aligned_malloc(size_t size, size_t alignment) -{ - return _aligned_malloc(size, alignment); -} - -inline void aligned_free(void *p) -{ - _aligned_free(ptr); -} - -# else - -inline void *aligned_malloc(size_t size, size_t alignment) -{ - return memalign(alignment, size); -} - -inline void aligned_free(void *ptr) -{ - free(ptr); -} - -# endif /* NL_COMP_ */ +#ifdef USE_SSE2 void *operator new(size_t size) throw(std::bad_alloc) {