SSE2: Link regular matrix class with fast matrix

--HG--
branch : sse2
hg/feature/sse2
kaetemi 11 years ago
parent c5d7664ce5
commit 3ec065c9a2

@ -20,7 +20,11 @@
#include "vector.h" #include "vector.h"
#include "vector_h.h" #include "vector_h.h"
#include "quat.h" #include "quat.h"
#include "vectorf.h"
#ifdef NL_HAS_SSE2
# include "matrixf.h"
#endif
namespace NLMISC namespace NLMISC
{ {
@ -358,9 +362,24 @@ public:
/// Plane (line vector) multiplication. /// Plane (line vector) multiplication.
friend CPlane operator*(const CPlane &p, const CMatrix &m); friend CPlane operator*(const CPlane &p, const CMatrix &m);
#ifdef NL_HAS_SSE2
inline CMatrix44F &getMatrix() { testExpandRot(); testExpandProj(); return MF; }
inline const CMatrix44F &getMatrix() const { testExpandRot(); testExpandProj(); return MF; }
#else
inline CMatrix44F &getMatrix() { return reinterpret_cast<CMatrix44F &>(*this); }
inline const CMatrix44F &getMatrix() const { return reinterpret_cast<const CMatrix44F &>(*this); }
#endif
private: private:
#ifdef NL_HAS_SSE2
union
{
float M[16];
CMatrix44F MF;
};
#else
float M[16]; float M[16];
#endif
float Scale33; float Scale33;
uint32 StateBit; // BitVector. 0<=>identity. uint32 StateBit; // BitVector. 0<=>identity.

@ -27,6 +27,11 @@ using namespace std;
#define new DEBUG_NEW #define new DEBUG_NEW
#endif #endif
#ifdef NL_HAS_SSE2
// #define NL_MATRIX_MUL_SSE2
#define NL_MATRIX_COPY_SSE2
#endif
namespace NLMISC namespace NLMISC
{ {
@ -161,6 +166,20 @@ CMatrix::CMatrix(const CMatrix &m)
// ====================================================================================================== // ======================================================================================================
CMatrix &CMatrix::operator=(const CMatrix &m) CMatrix &CMatrix::operator=(const CMatrix &m)
{ {
#ifdef NL_MATRIX_COPY_SSE2
if (m.StateBit & ((~MAT_VALIDALL) & (~MAT_TRANS)))
{
MF = m.MF;
StateBit = m.StateBit;
Scale33 = m.Scale33;
}
else
{
// Must always copy Trans part.
MF.d = m.MF.d;
StateBit = m.StateBit & MAT_TRANS;
}
#else
StateBit= m.StateBit & ~MAT_VALIDALL; StateBit= m.StateBit & ~MAT_VALIDALL;
if(hasAll()) if(hasAll())
{ {
@ -186,6 +205,7 @@ CMatrix &CMatrix::operator=(const CMatrix &m)
// Must always copy Trans part. // Must always copy Trans part.
memcpy(&a14, &m.a14, 3*sizeof(float)); memcpy(&a14, &m.a14, 3*sizeof(float));
} }
#endif
return *this; return *this;
} }
@ -694,6 +714,13 @@ void CMatrix::scale(const CVector &v)
// *************************************************************************** // ***************************************************************************
void CMatrix::setMulMatrixNoProj(const CMatrix &m1, const CMatrix &m2) void CMatrix::setMulMatrixNoProj(const CMatrix &m1, const CMatrix &m2)
{ {
#ifdef NL_MATRIX_MUL_SSE2
m1.testExpandRot();
m1.testExpandProj();
m2.testExpandRot();
m2.testExpandProj();
MF = mul(m1.MF, m2.MF);
#else
/* /*
For a fast MulMatrix, it appears to be better to not take State bits into account (no test/if() overhead) For a fast MulMatrix, it appears to be better to not take State bits into account (no test/if() overhead)
Just do heavy mul all the time (common case, and not so slow) Just do heavy mul all the time (common case, and not so slow)
@ -720,6 +747,7 @@ void CMatrix::setMulMatrixNoProj(const CMatrix &m1, const CMatrix &m2)
a14= m1.a11*m2.a14 + m1.a12*m2.a24 + m1.a13*m2.a34 + m1.a14; a14= m1.a11*m2.a14 + m1.a12*m2.a24 + m1.a13*m2.a34 + m1.a14;
a24= m1.a21*m2.a14 + m1.a22*m2.a24 + m1.a23*m2.a34 + m1.a24; a24= m1.a21*m2.a14 + m1.a22*m2.a24 + m1.a23*m2.a34 + m1.a24;
a34= m1.a31*m2.a14 + m1.a32*m2.a24 + m1.a33*m2.a34 + m1.a34; a34= m1.a31*m2.a14 + m1.a32*m2.a24 + m1.a33*m2.a34 + m1.a34;
#endif
// Setup no proj at all, and force valid rot (still may be identity, but 0/1 are filled) // Setup no proj at all, and force valid rot (still may be identity, but 0/1 are filled)
StateBit= (m1.StateBit | m2.StateBit | MAT_VALIDROT) & ~(MAT_PROJ|MAT_VALIDPROJ); StateBit= (m1.StateBit | m2.StateBit | MAT_VALIDROT) & ~(MAT_PROJ|MAT_VALIDPROJ);
@ -736,6 +764,17 @@ void CMatrix::setMulMatrixNoProj(const CMatrix &m1, const CMatrix &m2)
// *************************************************************************** // ***************************************************************************
void CMatrix::setMulMatrix(const CMatrix &m1, const CMatrix &m2) void CMatrix::setMulMatrix(const CMatrix &m1, const CMatrix &m2)
{ {
#ifdef NL_MATRIX_MUL_SSE2
m1.testExpandRot();
m1.testExpandProj();
m2.testExpandRot();
m2.testExpandProj();
MF = mul(m1.MF, m2.MF);
StateBit = m1.StateBit | m2.StateBit;
StateBit |= MAT_VALIDALL;
if (m1.hasTrans() && m2.hasProj())
StateBit |= MAT_ROT | MAT_SCALEANY;
#else
// Do *this= m1*m2 // Do *this= m1*m2
identity(); identity();
StateBit= m1.StateBit | m2.StateBit; StateBit= m1.StateBit | m2.StateBit;
@ -824,18 +863,22 @@ void CMatrix::setMulMatrix(const CMatrix &m1, const CMatrix &m2)
a32+= m1.a34*m2.a42; a32+= m1.a34*m2.a42;
a33+= m1.a34*m2.a43; a33+= m1.a34*m2.a43;
} }
#endif
// Modify Scale. // Modify Scale.
if( (StateBit & MAT_SCALEUNI) && !(StateBit & MAT_SCALEANY) ) if( (StateBit & MAT_SCALEUNI) && !(StateBit & MAT_SCALEANY) )
{ {
// Must have correct Scale33 // Must have correct Scale33
#ifndef NL_MATRIX_MUL_SSE2
m1.testExpandRot(); m1.testExpandRot();
m2.testExpandRot(); m2.testExpandRot();
#endif
Scale33= m1.Scale33*m2.Scale33; Scale33= m1.Scale33*m2.Scale33;
} }
else else
Scale33=1; Scale33=1;
#ifndef NL_MATRIX_MUL_SSE2
// In every case, I am valid now! // In every case, I am valid now!
StateBit|=MAT_VALIDROT; StateBit|=MAT_VALIDROT;
@ -902,6 +945,7 @@ void CMatrix::setMulMatrix(const CMatrix &m1, const CMatrix &m2)
{ {
// Don't copy proj part, and leave MAT_VALIDPROJ not set // Don't copy proj part, and leave MAT_VALIDPROJ not set
} }
#endif
} }
// ====================================================================================================== // ======================================================================================================
void CMatrix::invert() void CMatrix::invert()

Loading…
Cancel
Save