SSE2: Some initial CVector SSE2 math

--HG--
branch : sse2
hg/feature/sse2
kaetemi 11 years ago
parent d94a49b3d8
commit ba2231f068

@ -336,14 +336,82 @@ typedef unsigned int uint; // at least 32bits (depend of processor)
#endif
#ifdef USE_SSE2
extern void *operator new(size_t size) throw(std::bad_alloc);
extern void *operator new[](size_t size) throw(std::bad_alloc);
extern void operator delete(void *p) throw();
extern void operator delete[](void *p) throw();
#define NL_ALIGN_SSE2(nb) NL_ALIGN(nb)
# ifdef NL_COMP_VC
inline void *aligned_malloc(size_t size, size_t alignment)
{
return _aligned_malloc(size, alignment);
}
inline void aligned_free(void *ptr)
{
_aligned_free(ptr);
}
# else
inline void *aligned_malloc(size_t size, size_t alignment)
{
return memalign(alignment, size);
}
inline void aligned_free(void *ptr)
{
free(ptr);
}
# endif /* NL_COMP_ */
template<class T>
class aligned_allocator : public std::allocator<T>
{
public:
typedef size_t size_type;
typedef std::ptrdiff_t difference_type;
typedef T* pointer;
typedef const T* const_pointer;
typedef T& reference;
typedef const T& const_reference;
typedef T value_type;
template<class U>
struct rebind
{
typedef aligned_allocator<U> other;
};
aligned_allocator() : std::allocator<T>() {}
aligned_allocator(const aligned_allocator& other) : std::allocator<T>(other) {}
template<class U>
aligned_allocator(const aligned_allocator<U>& other) : std::allocator<T>(other) {}
~aligned_allocator() {}
pointer allocate(size_type num, const void* /*hint*/ = 0)
{
return static_cast<pointer>(aligned_malloc(NL_DEFAULT_MEMORY_ALIGNMENT, num * sizeof(T)));
}
void deallocate(pointer p, size_type /*num*/)
{
aligned_free(p);
}
};
#else
#define NL_ALIGN_SSE2(nb)
#endif
#endif /* USE_SSE2 */
// CHashMap, CHashSet and CHashMultiMap definitions
#if defined(_STLPORT_VERSION) // STLport detected

@ -35,15 +35,24 @@ class IStream;
* \author Lionel Berenguier
* \author Nevrax France
* \date 2000
* \author Jan Boon
* \date 2014
*/
NL_ALIGN_SSE2(16)
class CVector
{
public: // Attributes.
float x,y,z;
#ifdef USE_SSE2
float w; // Padding
union
{
struct
{
float x, y, z, P;
};
__m128 mm;
};
#else
float x,y,z;
#endif
public: // const.
@ -187,9 +196,21 @@ public:
inline CVector blend(const CVector &v0, const CVector &v1, float lambda)
{
float invLambda = 1.f - lambda;
#ifdef USE_SSE2
CVector res;
__m128 mLambda = _mm_set1_ps(lambda);
__m128 mInvLambda = _mm_set1_ps(invLambda);
__m128 mv0 = v0.mm;
__m128 mv1 = v1.mm;
mv0 = _mm_mul_ps(mv0, mInvLambda);
mv1 = _mm_mul_ps(mv1, mLambda);
res.mm = _mm_add_ps(mv0, mv1);
return res;
#else
return CVector(invLambda * v0.x + lambda * v1.x,
invLambda * v0.y + lambda * v1.y,
invLambda * v0.z + lambda * v1.z);
#endif
}

@ -31,23 +31,35 @@ namespace NLMISC
// Base Maths.
inline CVector &CVector::operator+=(const CVector &v)
{
#ifdef USE_SSE2
mm = _mm_add_ps(mm, v.mm);
#else
x+=v.x;
y+=v.y;
z+=v.z;
#endif
return *this;
}
inline CVector &CVector::operator-=(const CVector &v)
{
#ifdef USE_SSE2
mm = _mm_sub_ps(mm, v.mm);
#else
x-=v.x;
y-=v.y;
z-=v.z;
#endif
return *this;
}
inline CVector &CVector::operator*=(float f)
{
#ifdef USE_SSE2
mm = _mm_mul_ps(mm, _mm_set1_ps(f));
#else
x*=f;
y*=f;
z*=f;
#endif
return *this;
}
inline CVector &CVector::operator/=(float f)
@ -56,18 +68,36 @@ inline CVector &CVector::operator/=(float f)
}
inline CVector CVector::operator+(const CVector &v) const
{
#ifdef USE_SSE2
CVector res;
res.mm = _mm_add_ps(mm, v.mm);
return res;
#else
CVector ret(x+v.x, y+v.y, z+v.z);
return ret;
#endif
}
inline CVector CVector::operator-(const CVector &v) const
{
#ifdef USE_SSE2
CVector res;
res.mm = _mm_sub_ps(mm, v.mm);
return res;
#else
CVector ret(x-v.x, y-v.y, z-v.z);
return ret;
#endif
}
inline CVector CVector::operator*(float f) const
{
#ifdef USE_SSE2
CVector res;
res.mm = _mm_mul_ps(mm, _mm_set1_ps(f));
return res;
#else
CVector ret(x*f, y*f, z*f);
return ret;
#endif
}
inline CVector CVector::operator/(float f) const
{
@ -75,12 +105,24 @@ inline CVector CVector::operator/(float f) const
}
inline CVector CVector::operator-() const
{
#ifdef USE_SSE2
CVector res;
res.mm = _mm_mul_ps(mm, _mm_set1_ps(-1.0f));
return res;
#else
return CVector(-x,-y,-z);
#endif
}
inline CVector operator*(float f, const CVector &v)
{
#ifdef USE_SSE2
CVector res;
res.mm = _mm_mul_ps(v.mm, _mm_set1_ps(f));
return res;
#else
CVector ret(v.x*f, v.y*f, v.z*f);
return ret;
#endif
}

@ -71,33 +71,7 @@ extern "C" long _ftol2( double dblSource ) { return _ftol( dblSource ); }
#endif // NL_OS_WINDOWS
#ifdef HAS_SSE2
# ifdef NL_COMP_VC
inline void *aligned_malloc(size_t size, size_t alignment)
{
return _aligned_malloc(size, alignment);
}
inline void aligned_free(void *p)
{
_aligned_free(ptr);
}
# else
inline void *aligned_malloc(size_t size, size_t alignment)
{
return memalign(alignment, size);
}
inline void aligned_free(void *ptr)
{
free(ptr);
}
# endif /* NL_COMP_ */
#ifdef USE_SSE2
void *operator new(size_t size) throw(std::bad_alloc)
{

Loading…
Cancel
Save