From 5f54f75802910517b4248e632d3d8431092707d3 Mon Sep 17 00:00:00 2001 From: kaetemi Date: Thu, 12 Jun 2014 21:57:34 +0200 Subject: [PATCH 01/21] SSE2: Initial testing implementation --HG-- branch : sse2 --- code/CMakeLists.txt | 4 + code/CMakeModules/nel.cmake | 2 + code/nel/include/nel/3d/computed_string.h | 2 +- .../nel/3d/ps_attrib_maker_bin_op_inline.h | 20 +-- .../include/nel/3d/ps_attrib_maker_helper.h | 2 +- .../include/nel/3d/ps_attrib_maker_template.h | 6 +- code/nel/include/nel/3d/ps_color.h | 4 +- code/nel/include/nel/3d/ps_edit.h | 2 +- code/nel/include/nel/3d/ps_force.h | 2 +- code/nel/include/nel/3d/ps_located.h | 2 +- code/nel/include/nel/3d/ps_zone.h | 4 +- code/nel/include/nel/3d/quad_grid.h | 8 +- .../nel/include/nel/3d/shadow_poly_receiver.h | 19 ++- code/nel/include/nel/3d/static_quad_grid.h | 4 +- code/nel/include/nel/3d/zone_lighter.h | 2 +- code/nel/include/nel/ligo/primitive.h | 2 +- code/nel/include/nel/misc/types_nl.h | 14 ++ code/nel/include/nel/pacs/chain_quad.h | 2 +- code/nel/include/nel/pacs/edge_quad.h | 2 +- code/nel/include/nel/pacs/local_retriever.h | 6 +- code/nel/include/nel/pacs/quad_grid.h | 8 +- .../nel/sound/background_sound_manager.h | 2 +- code/nel/src/3d/computed_string.cpp | 4 +- code/nel/src/3d/mesh_mrm_skin_template.cpp | 2 +- code/nel/src/3d/mesh_mrm_skinned_template.cpp | 2 +- code/nel/src/3d/ps_zone.cpp | 4 +- code/nel/src/3d/zone_lighter.cpp | 2 +- code/nel/src/ligo/primitive.cpp | 2 +- code/nel/src/misc/common.cpp | 55 +++++++ code/nel/src/misc/matrix.cpp | 146 +++++++++++++++++- code/nel/src/misc/polygon.cpp | 2 +- code/nel/src/pacs/chain_quad.cpp | 5 +- code/nel/src/pacs/edge_quad.cpp | 5 +- code/nel/src/pacs/local_retriever.cpp | 6 +- code/ryzom/client/src/decal.cpp | 10 +- 35 files changed, 307 insertions(+), 57 deletions(-) diff --git a/code/CMakeLists.txt b/code/CMakeLists.txt index 4f0439dfd..f2fb9ac81 100644 --- a/code/CMakeLists.txt +++ b/code/CMakeLists.txt @@ -131,6 +131,10 @@ IF(FINAL_VERSION) ADD_DEFINITIONS(-DFINAL_VERSION=1) ENDIF(FINAL_VERSION) +IF(WITH_SSE2) + ADD_DEFINITIONS(-DUSE_SSE2) +ENDIF(WITH_SSE2) + IF(WITH_QT) FIND_PACKAGE(Qt4 COMPONENTS QtCore QtGui QtXml QtOpenGL REQUIRED) ENDIF(WITH_QT) diff --git a/code/CMakeModules/nel.cmake b/code/CMakeModules/nel.cmake index b194b5ff9..5a4002ed4 100644 --- a/code/CMakeModules/nel.cmake +++ b/code/CMakeModules/nel.cmake @@ -324,6 +324,8 @@ MACRO(NL_SETUP_NEL_DEFAULT_OPTIONS) OPTION(WITH_LIBOVR "With LibOVR support" OFF) OPTION(WITH_LIBVR "With LibVR support" OFF) OPTION(WITH_PERFHUD "With NVIDIA PerfHUD support" OFF) + + OPTION(WITH_SSE2 "With SSE2" ON ) ENDMACRO(NL_SETUP_NEL_DEFAULT_OPTIONS) MACRO(NL_SETUP_NELNS_DEFAULT_OPTIONS) diff --git a/code/nel/include/nel/3d/computed_string.h b/code/nel/include/nel/3d/computed_string.h index fcb758da4..517200383 100644 --- a/code/nel/include/nel/3d/computed_string.h +++ b/code/nel/include/nel/3d/computed_string.h @@ -290,7 +290,7 @@ public: * \param matrix transformation matrix * \param hotspot position of string origine */ - void render3D (IDriver& driver,CMatrix matrix,THotSpot hotspot = MiddleMiddle); + void render3D (IDriver& driver, const CMatrix &matrix, THotSpot hotspot = MiddleMiddle); }; diff --git a/code/nel/include/nel/3d/ps_attrib_maker_bin_op_inline.h b/code/nel/include/nel/3d/ps_attrib_maker_bin_op_inline.h index 2a9cbff45..0070ffb38 100644 --- a/code/nel/include/nel/3d/ps_attrib_maker_bin_op_inline.h +++ b/code/nel/include/nel/3d/ps_attrib_maker_bin_op_inline.h @@ -33,14 +33,14 @@ namespace NL3D { */ template -inline T PSBinOpModulate(T arg1, T arg2) { return arg1 * arg2; } +inline T PSBinOpModulate(const T &arg1, const T &arg2) { return arg1 * arg2; } template -inline T PSBinOpAdd(T arg1, T arg2) { return arg1 + arg2; } +inline T PSBinOpAdd(const T &arg1, const T &arg2) { return arg1 + arg2; } template -inline T PSBinOpSubtract(T arg1, T arg2) { return arg1 - arg2; } +inline T PSBinOpSubtract(const T &arg1, const T &arg2) { return arg1 - arg2; } template <> -inline CPlaneBasis PSBinOpModulate(CPlaneBasis p1, CPlaneBasis p2) +inline CPlaneBasis PSBinOpModulate(const CPlaneBasis &p1, const CPlaneBasis &p2) { // we compute p1 * p2 NLMISC::CVector z = p1.X ^ p1.Y; @@ -57,13 +57,13 @@ inline CPlaneBasis PSBinOpModulate(CPlaneBasis p1, CPlaneBasis p2) } template <> -inline CPlaneBasis PSBinOpAdd(CPlaneBasis /* p1 */, CPlaneBasis /* p2 */) +inline CPlaneBasis PSBinOpAdd(const CPlaneBasis &/* p1 */, const CPlaneBasis &/* p2 */) { nlassert(0); // not allowed for now return CPlaneBasis(NLMISC::CVector::Null); } template <> -inline CPlaneBasis PSBinOpSubtract(CPlaneBasis /* p1 */, CPlaneBasis /* p2 */) +inline CPlaneBasis PSBinOpSubtract(const CPlaneBasis &/* p1 */, const CPlaneBasis &/* p2 */) { nlassert(0); // not allowed for now return CPlaneBasis(NLMISC::CVector::Null); @@ -71,21 +71,21 @@ inline CPlaneBasis PSBinOpSubtract(CPlaneBasis /* p1 */, CPlaneBasis /* p2 */) template <> -inline uint32 PSBinOpSubtract(uint32 lhs, uint32 rhs) +inline uint32 PSBinOpSubtract(const uint32 &lhs, const uint32 &rhs) { return rhs > lhs ? 0 : lhs - rhs; // avoid overflow } template <> -inline NLMISC::CRGBA PSBinOpModulate(NLMISC::CRGBA t1, NLMISC::CRGBA t2) +inline NLMISC::CRGBA PSBinOpModulate(const NLMISC::CRGBA &t1, const NLMISC::CRGBA &t2) { NLMISC::CRGBA result; result.modulateFromColor(t1, t2); return result; } template <> -inline NLMISC::CRGBA PSBinOpAdd(NLMISC::CRGBA t1, NLMISC::CRGBA t2) +inline NLMISC::CRGBA PSBinOpAdd(const NLMISC::CRGBA &t1, const NLMISC::CRGBA &t2) { NLMISC::CRGBA r; uint S = t1.R + t2.R; if (S > 255) S = 255; r.R = (uint8) S; @@ -94,7 +94,7 @@ inline NLMISC::CRGBA PSBinOpAdd(NLMISC::CRGBA t1, NLMISC::CRGBA t2) return r; } template <> -inline NLMISC::CRGBA PSBinOpSubtract(NLMISC::CRGBA t1, NLMISC::CRGBA t2) +inline NLMISC::CRGBA PSBinOpSubtract(const NLMISC::CRGBA &t1, const NLMISC::CRGBA &t2) { NLMISC::CRGBA r; sint S = t1.R - t2.R; if (S < 0) S = 0; r.R = (uint8) S; diff --git a/code/nel/include/nel/3d/ps_attrib_maker_helper.h b/code/nel/include/nel/3d/ps_attrib_maker_helper.h index 10d29fe52..147d1ae5d 100644 --- a/code/nel/include/nel/3d/ps_attrib_maker_helper.h +++ b/code/nel/include/nel/3d/ps_attrib_maker_helper.h @@ -1432,7 +1432,7 @@ public: * */ - virtual void setDefaultValue(T defaultValue) { _DefaultValue = defaultValue;} + virtual void setDefaultValue(const T &defaultValue) { _DefaultValue = defaultValue;} /// get the default value : virtual T getDefaultValue(void) const { return _DefaultValue; } diff --git a/code/nel/include/nel/3d/ps_attrib_maker_template.h b/code/nel/include/nel/3d/ps_attrib_maker_template.h index 92953b86f..72bc62df9 100644 --- a/code/nel/include/nel/3d/ps_attrib_maker_template.h +++ b/code/nel/include/nel/3d/ps_attrib_maker_template.h @@ -68,7 +68,7 @@ template struct CPSValueBlendFuncBase { virtual ~CPSValueBlendFuncBase() {} virtual void getValues(T &startValue, T &endValue) const = 0; - virtual void setValues(T startValue, T endValue) = 0; + virtual void setValues(const T &startValue, const T &endValue) = 0; }; @@ -122,7 +122,7 @@ public: } /// Set the Values between which to blend. - virtual void setValues(T startValue, T endValue) + virtual void setValues(const T &startValue, const T &endValue) { _StartValue = startValue; _EndValue = endValue; @@ -210,7 +210,7 @@ public: /// set the Values - virtual void setValues(T startValue, T endValue) + virtual void setValues(const T &startValue, const T &endValue) { float step = 1.f / n; float alpha = 0.0f; diff --git a/code/nel/include/nel/3d/ps_color.h b/code/nel/include/nel/3d/ps_color.h index d05d9cf11..bd92bcbe6 100644 --- a/code/nel/include/nel/3d/ps_color.h +++ b/code/nel/include/nel/3d/ps_color.h @@ -62,7 +62,7 @@ public: endValue = convertVBColor(endValue, _ColorType); } - virtual void setValues(NLMISC::CRGBA startValue, NLMISC::CRGBA endValue) + virtual void setValues(const NLMISC::CRGBA &startValue, const NLMISC::CRGBA &endValue) { CPSValueBlendFunc::setValues(convertVBColor(startValue, _ColorType), convertVBColor(endValue, _ColorType)); } @@ -96,7 +96,7 @@ public: endValue = convertVBColor(endValue, _ColorType); } - virtual void setValues(NLMISC::CRGBA startValue, NLMISC::CRGBA endValue) + virtual void setValues(const NLMISC::CRGBA &startValue, const NLMISC::CRGBA &endValue) { CPSValueBlendSampleFunc::setValues(convertVBColor(startValue, _ColorType), convertVBColor(endValue, _ColorType)); } diff --git a/code/nel/include/nel/3d/ps_edit.h b/code/nel/include/nel/3d/ps_edit.h index de7957f54..0c2da9e71 100644 --- a/code/nel/include/nel/3d/ps_edit.h +++ b/code/nel/include/nel/3d/ps_edit.h @@ -82,7 +82,7 @@ struct IPSMover virtual NLMISC::CVector getNormal(uint32 /* index */) { NL_PS_FUNC(getNormal); return NLMISC::CVector::Null ; } /// if the object only stores a normal, this set the normal of the object. Otherwise it has no effect - virtual void setNormal(uint32 /* index */, NLMISC::CVector /* n */) { NL_PS_FUNC(setNormal); } + virtual void setNormal(uint32 /* index */, const NLMISC::CVector &/* n */) { NL_PS_FUNC(setNormal); } // set a new orthogonal matrix for the object virtual void setMatrix(uint32 index, const NLMISC::CMatrix &m) = 0 ; diff --git a/code/nel/include/nel/3d/ps_force.h b/code/nel/include/nel/3d/ps_force.h index e93c21361..76f22f40b 100644 --- a/code/nel/include/nel/3d/ps_force.h +++ b/code/nel/include/nel/3d/ps_force.h @@ -741,7 +741,7 @@ public: virtual NLMISC::CVector getScale(uint32 k) const { return NLMISC::CVector(_Radius[k], _Radius[k], _Radius[k]); } virtual bool onlyStoreNormal(void) const { return true; } virtual NLMISC::CVector getNormal(uint32 index) { return _Normal[index]; } - virtual void setNormal(uint32 index, NLMISC::CVector n) { _Normal[index] = n; } + virtual void setNormal(uint32 index, const NLMISC::CVector &n) { _Normal[index] = n; } virtual void setMatrix(uint32 index, const NLMISC::CMatrix &m); virtual NLMISC::CMatrix getMatrix(uint32 index) const; diff --git a/code/nel/include/nel/3d/ps_located.h b/code/nel/include/nel/3d/ps_located.h index 30fa7defa..2c4862b63 100644 --- a/code/nel/include/nel/3d/ps_located.h +++ b/code/nel/include/nel/3d/ps_located.h @@ -613,7 +613,7 @@ public: struct CParametricInfo { CParametricInfo() {} - CParametricInfo(NLMISC::CVector pos, NLMISC::CVector speed, float date) + CParametricInfo(const NLMISC::CVector &pos, const NLMISC::CVector &speed, float date) : Pos(pos), Speed(speed), Date(date) { } diff --git a/code/nel/include/nel/3d/ps_zone.h b/code/nel/include/nel/3d/ps_zone.h index 7289e64e0..cf29bc258 100644 --- a/code/nel/include/nel/3d/ps_zone.h +++ b/code/nel/include/nel/3d/ps_zone.h @@ -153,7 +153,7 @@ class CPSZonePlane : public CPSZone, public IPSMover virtual NLMISC::CMatrix getMatrix(uint32 index) const; virtual bool onlyStoreNormal(void) const { return true; } virtual NLMISC::CVector getNormal(uint32 index); - virtual void setNormal(uint32 index, NLMISC::CVector n); + virtual void setNormal(uint32 index, const NLMISC::CVector &n); virtual void serial(NLMISC::IStream &f) throw(NLMISC::EStream); @@ -255,7 +255,7 @@ class CPSZoneDisc : public CPSZone, public IPSMover virtual NLMISC::CVector getScale(uint32 k) const; virtual bool onlyStoreNormal(void) const { return true; } virtual NLMISC::CVector getNormal(uint32 index); - virtual void setNormal(uint32 index, NLMISC::CVector n); + virtual void setNormal(uint32 index, const NLMISC::CVector &n); virtual void serial(NLMISC::IStream &f) throw(NLMISC::EStream); diff --git a/code/nel/include/nel/3d/quad_grid.h b/code/nel/include/nel/3d/quad_grid.h index 12160b540..e97543896 100644 --- a/code/nel/include/nel/3d/quad_grid.h +++ b/code/nel/include/nel/3d/quad_grid.h @@ -314,11 +314,11 @@ private:// Methods. } // return the coordinates on the grid of what include the bbox. - void selectQuads(CVector bmin, CVector bmax, sint &x0, sint &x1, sint &y0, sint &y1) + void selectQuads(const CVector &bminp, const CVector &bmaxp, sint &x0, sint &x1, sint &y0, sint &y1) { - CVector bminp, bmaxp; - bminp= bmin; - bmaxp= bmax; + CVector bmin, bmax; + bmin= bminp; + bmax= bmaxp; bmin.minof(bminp, bmaxp); bmax.maxof(bminp, bmaxp); bmin/= _EltSize; diff --git a/code/nel/include/nel/3d/shadow_poly_receiver.h b/code/nel/include/nel/3d/shadow_poly_receiver.h index 5c9476849..c781578ea 100644 --- a/code/nel/include/nel/3d/shadow_poly_receiver.h +++ b/code/nel/include/nel/3d/shadow_poly_receiver.h @@ -85,10 +85,27 @@ public: // a vertex struct CRGBAVertex { +#if USE_SSE2 + float X, Y, Z; +#else CVector V; +#endif CRGBA Color; CRGBAVertex() {} - CRGBAVertex(const CVector &v, CRGBA c) : V(v), Color(c) {} +#if USE_SSE2 + CRGBAVertex(const CVector &v, CRGBA c) : X(v.x), Y(v.y), Z(v.z), Color(c) {} + const CVector &asVector() const + { + //nlctassert(sizeof(CVector) == sizeof(CRGBAVertex)); + nlctassert(sizeof(CVector) + 4 == sizeof(CRGBAVertex)); + *reinterpret_cast(this); + } +#else + const CVector &asVector() const + { + return V; + } +#endif }; /** Compute list of clipped tri under the shadow mat diff --git a/code/nel/include/nel/3d/static_quad_grid.h b/code/nel/include/nel/3d/static_quad_grid.h index 568ae3c0e..0bc171a74 100644 --- a/code/nel/include/nel/3d/static_quad_grid.h +++ b/code/nel/include/nel/3d/static_quad_grid.h @@ -102,8 +102,10 @@ private:// Atttributes. // return the coordinates on the grid of what include the bbox. - void selectPoint(CVector point, sint &x0, sint &y0) + void selectPoint(const CVector &pointp, sint &x0, sint &y0) { + CVector point = pointp; + point/= _EltSize; x0= (sint)(floor(point.x)); y0= (sint)(floor(point.y)); diff --git a/code/nel/include/nel/3d/zone_lighter.h b/code/nel/include/nel/3d/zone_lighter.h index 4f2910c52..52ef66199 100644 --- a/code/nel/include/nel/3d/zone_lighter.h +++ b/code/nel/include/nel/3d/zone_lighter.h @@ -421,7 +421,7 @@ private: * The vector of water shapes is released then * \param bbox the bbox of the zone containing the water shapes */ - void makeQuadGridFromWaterShapes(NLMISC::CAABBox zoneBBox); + void makeQuadGridFromWaterShapes(const NLMISC::CAABBox &zoneBBox); /** For each tile of the current zone, check whether it below or above water. diff --git a/code/nel/include/nel/ligo/primitive.h b/code/nel/include/nel/ligo/primitive.h index c050f14b6..b2f703015 100644 --- a/code/nel/include/nel/ligo/primitive.h +++ b/code/nel/include/nel/ligo/primitive.h @@ -523,7 +523,7 @@ public: std::vector VPoints; - static float getSegmentDist(const NLMISC::CVector v, const NLMISC::CVector &p1, const NLMISC::CVector &p2, NLMISC::CVector &nearPos); + static float getSegmentDist(const NLMISC::CVector &v, const NLMISC::CVector &p1, const NLMISC::CVector &p2, NLMISC::CVector &nearPos); public: diff --git a/code/nel/include/nel/misc/types_nl.h b/code/nel/include/nel/misc/types_nl.h index 5c3b80475..b5aa77e68 100644 --- a/code/nel/include/nel/misc/types_nl.h +++ b/code/nel/include/nel/misc/types_nl.h @@ -328,6 +328,20 @@ typedef unsigned int uint; // at least 32bits (depend of processor) #endif // NL_OS_UNIX +#define NL_DEFAULT_MEMORY_ALIGNMENT 16 +#ifdef NL_COMP_VC +#define NL_ALIGN(nb) __declspec(align(nb)) +#else +#define NL_ALIGN(nb) __attribute__((aligned(nb))) +#endif + +#ifdef USE_SSE2 +extern void *operator new(size_t size) throw(std::bad_alloc); +extern void *operator new[](size_t size) throw(std::bad_alloc); +extern void operator delete(void *p) throw(); +extern void operator delete[](void *p) throw(); +#endif + // CHashMap, CHashSet and CHashMultiMap definitions #if defined(_STLPORT_VERSION) // STLport detected # include diff --git a/code/nel/include/nel/pacs/chain_quad.h b/code/nel/include/nel/pacs/chain_quad.h index 0a9df779e..fb332f1f9 100644 --- a/code/nel/include/nel/pacs/chain_quad.h +++ b/code/nel/include/nel/pacs/chain_quad.h @@ -81,7 +81,7 @@ public: * \param cst the array of CEdgeChainEntry to fill. contain also OChainLUT, an array for internal use. In: must be filled with 0xFFFF. Out: still filled with 0xFFFF. * \return number of edgechain found. stored in cst.EdgeChainEntries (array cleared first). */ - sint selectEdges(CVector start, CVector end, CCollisionSurfaceTemp &cst) const; + sint selectEdges(const CVector &start, const CVector &end, CCollisionSurfaceTemp &cst) const; /// serial. diff --git a/code/nel/include/nel/pacs/edge_quad.h b/code/nel/include/nel/pacs/edge_quad.h index 71785cb13..95e57042c 100644 --- a/code/nel/include/nel/pacs/edge_quad.h +++ b/code/nel/include/nel/pacs/edge_quad.h @@ -92,7 +92,7 @@ public: * \param cst the array of CExteriorEdgeEntry to fill. contain also OChainLUT, an array for internal use. In: must be filled with 0xFFFF. Out: still filled with 0xFFFF. * \return number of exterioredge found. stored in cst.ExteriorEdgeEntries (array cleared first). */ - sint selectEdges(CVector start, CVector end, CCollisionSurfaceTemp &cst) const; + sint selectEdges(const CVector &start, const CVector &end, CCollisionSurfaceTemp &cst) const; /// Get the whole set of edge entries diff --git a/code/nel/include/nel/pacs/local_retriever.h b/code/nel/include/nel/pacs/local_retriever.h index d76aa0891..d90ed6dc8 100644 --- a/code/nel/include/nel/pacs/local_retriever.h +++ b/code/nel/include/nel/pacs/local_retriever.h @@ -548,12 +548,12 @@ public: /** * Check all surfaces integrity */ - bool checkSurfacesIntegrity(NLMISC::CVector translation = NLMISC::CVector::Null, bool verbose = false) const; + bool checkSurfacesIntegrity(const NLMISC::CVector &translation = NLMISC::CVector::Null, bool verbose = false) const; /** * Check surface integrity */ - bool checkSurfaceIntegrity(uint surf, NLMISC::CVector translation = NLMISC::CVector::Null, bool verbose = false) const; + bool checkSurfaceIntegrity(uint surf, const NLMISC::CVector &translation = NLMISC::CVector::Null, bool verbose = false) const; // @} @@ -565,7 +565,7 @@ protected: bool insurePosition(ULocalPosition &local) const; /// Retrieves a position inside the retriever (from the local position), returns true if the position is close to a border - void retrievePosition(NLMISC::CVector estimated, CCollisionSurfaceTemp &cst) const; + void retrievePosition(const NLMISC::CVector &estimated, CCollisionSurfaceTemp &cst) const; /// Retrieves a position inside the retriever (from the local position), returns true if the position is close to a border void retrieveAccuratePosition(CVector2s estimated, CCollisionSurfaceTemp &cst, bool &onBorder) const; diff --git a/code/nel/include/nel/pacs/quad_grid.h b/code/nel/include/nel/pacs/quad_grid.h index aa2b383b0..61cf76c6f 100644 --- a/code/nel/include/nel/pacs/quad_grid.h +++ b/code/nel/include/nel/pacs/quad_grid.h @@ -187,11 +187,11 @@ private:// Atttributes. private:// Methods. // return the coordinates on the grid of what include the bbox. - void selectQuads(CVector bmin, CVector bmax, sint &x0, sint &x1, sint &y0, sint &y1) + void selectQuads(const CVector &bminp, const CVector &bmaxp, sint &x0, sint &x1, sint &y0, sint &y1) { - CVector bminp, bmaxp; - bminp= bmin; - bmaxp= bmax; + CVector bmin, bmax; + bmin= bminp; + bmax= bmaxp; bmin.minof(bminp, bmaxp); bmax.maxof(bminp, bmaxp); bmin/= _EltSize; diff --git a/code/nel/include/nel/sound/background_sound_manager.h b/code/nel/include/nel/sound/background_sound_manager.h index 11f33d2be..326ece0f3 100644 --- a/code/nel/include/nel/sound/background_sound_manager.h +++ b/code/nel/include/nel/sound/background_sound_manager.h @@ -270,7 +270,7 @@ private: /// flag if inside a sound zone bool Inside; /// Constructor. - TSoundStatus(TSoundData &sd, NLMISC::CVector position, float gain, float distance, bool inside) + TSoundStatus(TSoundData &sd, const NLMISC::CVector &position, float gain, float distance, bool inside) : SoundData(sd), Position(position), Gain(gain), Distance(distance), Inside(inside) {} }; diff --git a/code/nel/src/3d/computed_string.cpp b/code/nel/src/3d/computed_string.cpp index a57191cc0..1c8962f5e 100644 --- a/code/nel/src/3d/computed_string.cpp +++ b/code/nel/src/3d/computed_string.cpp @@ -143,11 +143,13 @@ void CComputedString::render2D (IDriver& driver, /*------------------------------------------------------------------*\ render3D() \*------------------------------------------------------------------*/ -void CComputedString::render3D (IDriver& driver,CMatrix matrix,THotSpot hotspot) +void CComputedString::render3D (IDriver& driver,const CMatrix &matrixp,THotSpot hotspot) { if (Vertices.getNumVertices() == 0) return; + CMatrix matrix = matrixp; + // get window size uint32 wndWidth, wndHeight; driver.getWindowSize(wndWidth, wndHeight); diff --git a/code/nel/src/3d/mesh_mrm_skin_template.cpp b/code/nel/src/3d/mesh_mrm_skin_template.cpp index 1958cae90..808dce31a 100644 --- a/code/nel/src/3d/mesh_mrm_skin_template.cpp +++ b/code/nel/src/3d/mesh_mrm_skin_template.cpp @@ -494,7 +494,7 @@ void CMeshMRMGeom::applySkinWithTangentSpace(CLod &lod, const CSkeletonModel *sk On a P4-2.4Ghz, for 40000 vertices skinned, both no precaching and asm saves 27% of execution time in the applyRawSkinNormal*() loop (ie 1 ms) */ -#if defined(NL_OS_WINDOWS) && !defined(NL_NO_ASM) +#if defined(NL_OS_WINDOWS) && !defined(NL_NO_ASM) &&!defined(USE_SSE2) //#define NL3D_RAWSKIN_PRECACHE #define NL3D_RAWSKIN_ASM #endif diff --git a/code/nel/src/3d/mesh_mrm_skinned_template.cpp b/code/nel/src/3d/mesh_mrm_skinned_template.cpp index 5d1b2f582..be072713f 100644 --- a/code/nel/src/3d/mesh_mrm_skinned_template.cpp +++ b/code/nel/src/3d/mesh_mrm_skinned_template.cpp @@ -43,7 +43,7 @@ On a P4-2.4Ghz, for 40000 vertices skinned, both no precaching and asm saves 27% of execution time in the applyRawSkinNormal*() loop (ie 1 ms) */ -#if defined(NL_OS_WINDOWS) && !defined(NL_NO_ASM) +#if defined(NL_OS_WINDOWS) && !defined(NL_NO_ASM) &&!defined(USE_SSE2) //#define NL3D_RAWSKIN_PRECACHE #define NL3D_RAWSKIN_ASM #endif diff --git a/code/nel/src/3d/ps_zone.cpp b/code/nel/src/3d/ps_zone.cpp index 30349906f..813103896 100644 --- a/code/nel/src/3d/ps_zone.cpp +++ b/code/nel/src/3d/ps_zone.cpp @@ -267,7 +267,7 @@ CVector CPSZonePlane::getNormal(uint32 index) NL_PS_FUNC(CPSZonePlane_getNormal) return _Normal[index]; } -void CPSZonePlane::setNormal(uint32 index, CVector n) +void CPSZonePlane::setNormal(uint32 index, const CVector &n) { NL_PS_FUNC(CPSZonePlane_setNormal) _Normal[index] = n; @@ -576,7 +576,7 @@ CVector CPSZoneDisc::getNormal(uint32 index) NL_PS_FUNC(CPSZoneDisc_getNormal) return _Normal[index]; } -void CPSZoneDisc::setNormal(uint32 index, CVector n) +void CPSZoneDisc::setNormal(uint32 index, const CVector &n) { NL_PS_FUNC(CPSZoneDisc_setNormal) _Normal[index] = n; diff --git a/code/nel/src/3d/zone_lighter.cpp b/code/nel/src/3d/zone_lighter.cpp index 1d7ec5a66..b78fa5635 100644 --- a/code/nel/src/3d/zone_lighter.cpp +++ b/code/nel/src/3d/zone_lighter.cpp @@ -3109,7 +3109,7 @@ void CZoneLighter::addWaterShape(CWaterShape *shape, const NLMISC::CMatrix &MT) } // *********************************************************** -void CZoneLighter::makeQuadGridFromWaterShapes(NLMISC::CAABBox zoneBBox) +void CZoneLighter::makeQuadGridFromWaterShapes(const NLMISC::CAABBox &zoneBBox) { if (!_WaterShapes.size()) return; diff --git a/code/nel/src/ligo/primitive.cpp b/code/nel/src/ligo/primitive.cpp index 9cf7df13f..34b650f2a 100644 --- a/code/nel/src/ligo/primitive.cpp +++ b/code/nel/src/ligo/primitive.cpp @@ -875,7 +875,7 @@ bool CPrimZone::contains (const NLMISC::CVector &v, const std::vector & // *************************************************************************** -float CPrimZone::getSegmentDist(const NLMISC::CVector v, const NLMISC::CVector &p1, const NLMISC::CVector &p2, NLMISC::CVector &nearPos) +float CPrimZone::getSegmentDist(const NLMISC::CVector &v, const NLMISC::CVector &p1, const NLMISC::CVector &p2, NLMISC::CVector &nearPos) { // two points, compute distance to the segment. CVector V = (p2-p1).normed(); diff --git a/code/nel/src/misc/common.cpp b/code/nel/src/misc/common.cpp index 36e167260..b58792a65 100644 --- a/code/nel/src/misc/common.cpp +++ b/code/nel/src/misc/common.cpp @@ -71,6 +71,61 @@ extern "C" long _ftol2( double dblSource ) { return _ftol( dblSource ); } #endif // NL_OS_WINDOWS +#ifdef HAS_SSE2 + +# ifdef NL_COMP_VC + +inline void *aligned_malloc(size_t size, size_t alignment) +{ + return _aligned_malloc(size, alignment); +} + +inline void aligned_free(void *p) +{ + _aligned_free(ptr); +} + +# else + +inline void *aligned_malloc(size_t size, size_t alignment) +{ + return memalign(alignment, size); +} + +inline void aligned_free(void *ptr) +{ + free(ptr); +} + +# endif /* NL_COMP_ */ + +void *operator new(size_t size) throw(std::bad_alloc) +{ + void *p = aligned_malloc(size, NL_DEFAULT_MEMORY_ALIGNMENT); + if (p == NULL) throw std::bad_alloc(); + return p; +} + +void *operator new[](size_t size) throw(std::bad_alloc) +{ + void *p = aligned_malloc(size, NL_DEFAULT_MEMORY_ALIGNMENT); + if (p == NULL) throw std::bad_alloc(); + return p; +} + +void operator delete(void *p) throw() +{ + aligned_free(p); +} + +void operator delete[](void *p) throw() +{ + aligned_free(p); +} + +#endif /* HAS_SSE2 */ + + #ifdef DEBUG_NEW #define new DEBUG_NEW #endif diff --git a/code/nel/src/misc/matrix.cpp b/code/nel/src/misc/matrix.cpp index dd884f4d5..e99e04304 100644 --- a/code/nel/src/misc/matrix.cpp +++ b/code/nel/src/misc/matrix.cpp @@ -16,6 +16,11 @@ #include "stdmisc.h" +#if (USE_SSE2) +# include +# include +#endif + #include "nel/misc/matrix.h" #include "nel/misc/plane.h" #include "nel/misc/debug.h" @@ -690,10 +695,86 @@ void CMatrix::scale(const CVector &v) // ====================================================================================================== // ====================================================================================================== +void CMatrix::setMulMatrixSSE2(const CMatrix &m1, const CMatrix &m2) +{ + m1.testExpandRot(); + m1.testExpandProj(); + m2.testExpandRot(); + m2.testExpandProj(); + + // Use exactly the 8 MMX registers we have + register __m128 in1a = _mm_loadu_ps(&m1.M[0]); + register __m128 in1b = _mm_loadu_ps(&m1.M[4]); + register __m128 in1c = _mm_loadu_ps(&m1.M[8]); + register __m128 in1d = _mm_loadu_ps(&m1.M[12]); + register __m128 in2; + register __m128 outrow; + register __m128 tempsplat; + register __m128 tempmul; + + in2 = _mm_loadu_ps(&m2.M[0]); + tempsplat = _mm_shuffle_ps(in2, in2, _MM_SHUFFLE(0, 0, 0, 0)); + outrow = _mm_mul_ps(in1a, tempsplat); + tempsplat = _mm_shuffle_ps(in2, in2, _MM_SHUFFLE(1, 1, 1, 1)); + tempmul = _mm_mul_ps(in1b, tempsplat); + outrow = _mm_add_ps(outrow, tempmul); + tempsplat = _mm_shuffle_ps(in2, in2, _MM_SHUFFLE(2, 2, 2, 2)); + tempmul = _mm_mul_ps(in1c, tempsplat); + outrow = _mm_add_ps(outrow, tempmul); + tempsplat = _mm_shuffle_ps(in2, in2, _MM_SHUFFLE(3, 3, 3, 3)); + tempmul = _mm_mul_ps(in1d, tempsplat); + outrow = _mm_add_ps(outrow, tempmul); + _mm_storeu_ps(&M[0], outrow); + + in2 = _mm_loadu_ps(&m2.M[4]); + tempsplat = _mm_shuffle_ps(in2, in2, _MM_SHUFFLE(0, 0, 0, 0)); + outrow = _mm_mul_ps(in1a, tempsplat); + tempsplat = _mm_shuffle_ps(in2, in2, _MM_SHUFFLE(1, 1, 1, 1)); + tempmul = _mm_mul_ps(in1b, tempsplat); + outrow = _mm_add_ps(outrow, tempmul); + tempsplat = _mm_shuffle_ps(in2, in2, _MM_SHUFFLE(2, 2, 2, 2)); + tempmul = _mm_mul_ps(in1c, tempsplat); + outrow = _mm_add_ps(outrow, tempmul); + tempsplat = _mm_shuffle_ps(in2, in2, _MM_SHUFFLE(3, 3, 3, 3)); + tempmul = _mm_mul_ps(in1d, tempsplat); + outrow = _mm_add_ps(outrow, tempmul); + _mm_storeu_ps(&M[4], outrow); + + in2 = _mm_loadu_ps(&m2.M[8]); + tempsplat = _mm_shuffle_ps(in2, in2, _MM_SHUFFLE(0, 0, 0, 0)); + outrow = _mm_mul_ps(in1a, tempsplat); + tempsplat = _mm_shuffle_ps(in2, in2, _MM_SHUFFLE(1, 1, 1, 1)); + tempmul = _mm_mul_ps(in1b, tempsplat); + outrow = _mm_add_ps(outrow, tempmul); + tempsplat = _mm_shuffle_ps(in2, in2, _MM_SHUFFLE(2, 2, 2, 2)); + tempmul = _mm_mul_ps(in1c, tempsplat); + outrow = _mm_add_ps(outrow, tempmul); + tempsplat = _mm_shuffle_ps(in2, in2, _MM_SHUFFLE(3, 3, 3, 3)); + tempmul = _mm_mul_ps(in1d, tempsplat); + outrow = _mm_add_ps(outrow, tempmul); + _mm_storeu_ps(&M[8], outrow); + + in2 = _mm_loadu_ps(&m2.M[12]); + tempsplat = _mm_shuffle_ps(in2, in2, _MM_SHUFFLE(0, 0, 0, 0)); + outrow = _mm_mul_ps(in1a, tempsplat); + tempsplat = _mm_shuffle_ps(in2, in2, _MM_SHUFFLE(1, 1, 1, 1)); + tempmul = _mm_mul_ps(in1b, tempsplat); + outrow = _mm_add_ps(outrow, tempmul); + tempsplat = _mm_shuffle_ps(in2, in2, _MM_SHUFFLE(2, 2, 2, 2)); + tempmul = _mm_mul_ps(in1c, tempsplat); + outrow = _mm_add_ps(outrow, tempmul); + tempsplat = _mm_shuffle_ps(in2, in2, _MM_SHUFFLE(3, 3, 3, 3)); + tempmul = _mm_mul_ps(in1d, tempsplat); + outrow = _mm_add_ps(outrow, tempmul); + _mm_storeu_ps(&M[12], outrow); +} // *************************************************************************** void CMatrix::setMulMatrixNoProj(const CMatrix &m1, const CMatrix &m2) { +#if USE_SSE2 + setMulMatrixSSE2(m1, m2); +#else /* For a fast MulMatrix, it appears to be better to not take State bits into account (no test/if() overhead) Just do heavy mul all the time (common case, and not so slow) @@ -720,6 +801,7 @@ void CMatrix::setMulMatrixNoProj(const CMatrix &m1, const CMatrix &m2) a14= m1.a11*m2.a14 + m1.a12*m2.a24 + m1.a13*m2.a34 + m1.a14; a24= m1.a21*m2.a14 + m1.a22*m2.a24 + m1.a23*m2.a34 + m1.a24; a34= m1.a31*m2.a14 + m1.a32*m2.a24 + m1.a33*m2.a34 + m1.a34; +#endif // Setup no proj at all, and force valid rot (still may be identity, but 0/1 are filled) StateBit= (m1.StateBit | m2.StateBit | MAT_VALIDROT) & ~(MAT_PROJ|MAT_VALIDPROJ); @@ -737,6 +819,13 @@ void CMatrix::setMulMatrixNoProj(const CMatrix &m1, const CMatrix &m2) void CMatrix::setMulMatrix(const CMatrix &m1, const CMatrix &m2) { // Do *this= m1*m2 +#ifdef USE_SSE2 + setMulMatrixSSE2(m1, m2); + StateBit = m1.StateBit | m2.StateBit; + StateBit |= MAT_VALIDALL; + if (m1.hasTrans() && m2.hasProj()) + StateBit |= MAT_ROT | MAT_SCALEANY; +#else identity(); StateBit= m1.StateBit | m2.StateBit; StateBit&= ~MAT_VALIDALL; @@ -824,18 +913,22 @@ void CMatrix::setMulMatrix(const CMatrix &m1, const CMatrix &m2) a32+= m1.a34*m2.a42; a33+= m1.a34*m2.a43; } +#endif // Modify Scale. if( (StateBit & MAT_SCALEUNI) && !(StateBit & MAT_SCALEANY) ) { // Must have correct Scale33 +#ifndef USE_SSE2 m1.testExpandRot(); m2.testExpandRot(); +#endif Scale33= m1.Scale33*m2.Scale33; } else Scale33=1; +#ifndef USE_SSE2 // In every case, I am valid now! StateBit|=MAT_VALIDROT; @@ -902,6 +995,7 @@ void CMatrix::setMulMatrix(const CMatrix &m1, const CMatrix &m2) { // Don't copy proj part, and leave MAT_VALIDPROJ not set } +#endif } // ====================================================================================================== void CMatrix::invert() @@ -1237,11 +1331,36 @@ bool CMatrix::normalize(TRotOrder ro) // ====================================================================================================== // ====================================================================================================== - // ====================================================================================================== CVector CMatrix::mulVector(const CVector &v) const { - +#ifdef USE_SSE2 + if (hasRot()) + { + CVector ret; + register __m128 in1a = _mm_loadu_ps(&M[0]); + register __m128 in1b = _mm_loadu_ps(&M[4]); + register __m128 in1c = _mm_loadu_ps(&M[8]); + register __m128 in2 = _mm_loadu_ps(&v.x); // WARNING: Read goes past size of CVector! + register __m128 tempsplat; + register __m128 tempmul; + register __m128 out; + tempsplat = _mm_shuffle_ps(in2, in2, _MM_SHUFFLE(0, 0, 0, 0)); + out = _mm_mul_ps(in1a, tempsplat); + tempsplat = _mm_shuffle_ps(in2, in2, _MM_SHUFFLE(1, 1, 1, 1)); + tempmul = _mm_mul_ps(in1b, tempsplat); + out = _mm_add_ps(out, tempmul); + tempsplat = _mm_shuffle_ps(in2, in2, _MM_SHUFFLE(2, 2, 2, 2)); + tempmul = _mm_mul_ps(in1c, tempsplat); + out = _mm_add_ps(out, tempmul); + _mm_storeu_ps(&ret.x, out); + return ret; // WARNING: Write goes past size of CVector (this occurs on the stack)! + } + else + { + return v; + } +#else CVector ret; if( hasRot() ) @@ -1253,6 +1372,7 @@ CVector CMatrix::mulVector(const CVector &v) const } else return v; +#endif } // ====================================================================================================== @@ -1263,9 +1383,31 @@ CVector CMatrix::mulPoint(const CVector &v) const if( hasRot() ) { +#ifdef USE_SSE2 + register __m128 in1a = _mm_loadu_ps(&M[0]); + register __m128 in1b = _mm_loadu_ps(&M[4]); + register __m128 in1c = _mm_loadu_ps(&M[8]); + register __m128 in1d = _mm_loadu_ps(&M[12]); + register __m128 in2 = _mm_loadu_ps(&v.x); // WARNING: Read goes past size of CVector! + register __m128 tempsplat; + register __m128 tempmul; + register __m128 out; + tempsplat = _mm_shuffle_ps(in2, in2, _MM_SHUFFLE(0, 0, 0, 0)); + out = _mm_mul_ps(in1a, tempsplat); + tempsplat = _mm_shuffle_ps(in2, in2, _MM_SHUFFLE(1, 1, 1, 1)); + tempmul = _mm_mul_ps(in1b, tempsplat); + out = _mm_add_ps(out, tempmul); + tempsplat = _mm_shuffle_ps(in2, in2, _MM_SHUFFLE(2, 2, 2, 2)); + tempmul = _mm_mul_ps(in1c, tempsplat); + out = _mm_add_ps(out, tempmul); + out = _mm_add_ps(out, in1d); + _mm_storeu_ps(&ret.x, out); + return ret; // WARNING: Write goes past size of CVector (this occurs on the stack)! +#else ret.x= a11*v.x + a12*v.y + a13*v.z; ret.y= a21*v.x + a22*v.y + a23*v.z; ret.z= a31*v.x + a32*v.y + a33*v.z; +#endif } else { diff --git a/code/nel/src/misc/polygon.cpp b/code/nel/src/misc/polygon.cpp index b541d2eba..2cd60058d 100644 --- a/code/nel/src/misc/polygon.cpp +++ b/code/nel/src/misc/polygon.cpp @@ -249,7 +249,7 @@ public: Back = NULL; Front = NULL; } - CBSPNode2v ( const CPlane &plane, CVector p0, CVector p1, uint v0, uint v1 ) : Plane (plane), P0 (p0), P1 (p1) + CBSPNode2v ( const CPlane &plane, const CVector &p0, const CVector &p1, uint v0, uint v1 ) : Plane (plane), P0 (p0), P1 (p1) { Back = NULL; Front = NULL; diff --git a/code/nel/src/pacs/chain_quad.cpp b/code/nel/src/pacs/chain_quad.cpp index 321761953..c7af9785b 100644 --- a/code/nel/src/pacs/chain_quad.cpp +++ b/code/nel/src/pacs/chain_quad.cpp @@ -344,8 +344,11 @@ sint CChainQuad::selectEdges(const NLMISC::CAABBox &bbox, CCollisionSurfaceTem return nRes; } -sint CChainQuad::selectEdges(CVector start, CVector end, CCollisionSurfaceTemp &cst) const +sint CChainQuad::selectEdges(const CVector &startp, const CVector &endp, CCollisionSurfaceTemp &cst) const { + CVector start = startp; + CVector end = endp; + sint nRes=0; sint i; uint16 *ochainLUT= cst.OChainLUT; diff --git a/code/nel/src/pacs/edge_quad.cpp b/code/nel/src/pacs/edge_quad.cpp index 1515af075..14082a3b2 100644 --- a/code/nel/src/pacs/edge_quad.cpp +++ b/code/nel/src/pacs/edge_quad.cpp @@ -453,8 +453,11 @@ sint CEdgeQuad::selectEdges(const NLMISC::CAABBox &bbox, CCollisionSurfaceTemp return nRes; } -sint CEdgeQuad::selectEdges(CVector start, CVector end, CCollisionSurfaceTemp &cst) const +sint CEdgeQuad::selectEdges(const CVector &startp, const CVector &endp, CCollisionSurfaceTemp &cst) const { + CVector start = startp; + CVector end = endp; + sint nRes=0; sint i; uint16 *indexLUT= cst.OChainLUT; diff --git a/code/nel/src/pacs/local_retriever.cpp b/code/nel/src/pacs/local_retriever.cpp index 7158cee0a..1b18a6052 100644 --- a/code/nel/src/pacs/local_retriever.cpp +++ b/code/nel/src/pacs/local_retriever.cpp @@ -1052,7 +1052,7 @@ bool NLPACS::CLocalRetriever::testPosition(NLPACS::ULocalPosition &local, CColli } -void NLPACS::CLocalRetriever::retrievePosition(CVector estimated, CCollisionSurfaceTemp &cst) const +void NLPACS::CLocalRetriever::retrievePosition(const CVector &estimated, CCollisionSurfaceTemp &cst) const { if (!_Loaded) return; @@ -2200,7 +2200,7 @@ void NLPACS::CLocalRetriever::replaceChain(uint32 chainId, const std::vector= _Surfaces.size()) return false; diff --git a/code/ryzom/client/src/decal.cpp b/code/ryzom/client/src/decal.cpp index 1454d9f59..bfcf4dc4b 100644 --- a/code/ryzom/client/src/decal.cpp +++ b/code/ryzom/client/src/decal.cpp @@ -433,10 +433,16 @@ void CDecal::renderTriCache(NL3D::IDriver &drv, NL3D::CShadowPolyReceiver &/* float bottomBlendBias = bottomBlendScale * (_RefPosition.z - _BottomBlendZMin); do { +#if USE_SSE2 + dest->X = srcVert->X; + dest->Y = srcVert->Y; + dest->Z = srcVert->Z; +#else dest->V = srcVert->V; - float dist = (camPos - srcVert->V).norm(); +#endif + float dist = (camPos - srcVert->asVector()).norm(); float intensity = scale * dist + bias; - float bottomBlend = srcVert->V.z * bottomBlendScale + bottomBlendBias; + float bottomBlend = srcVert->asVector().z * bottomBlendScale + bottomBlendBias; clamp(bottomBlend, 0.f, 1.f); clamp(intensity, 0.f, 255.f); intensity *= bottomBlend; From e8852d630ed353eb822d2b140f7eed096f01bd07 Mon Sep 17 00:00:00 2001 From: kaetemi Date: Thu, 12 Jun 2014 22:02:15 +0200 Subject: [PATCH 02/21] SSE2: Add FIXME_SSE2 notes --HG-- branch : sse2 --- code/nel/include/nel/3d/shadow_poly_receiver.h | 2 +- code/nel/include/nel/misc/matrix.h | 5 +++++ code/nel/include/nel/misc/vector.h | 5 +++++ code/nel/src/3d/mesh_mrm_skin_template.cpp | 2 +- code/nel/src/3d/mesh_mrm_skinned_template.cpp | 2 +- 5 files changed, 13 insertions(+), 3 deletions(-) diff --git a/code/nel/include/nel/3d/shadow_poly_receiver.h b/code/nel/include/nel/3d/shadow_poly_receiver.h index c781578ea..0d97a00ad 100644 --- a/code/nel/include/nel/3d/shadow_poly_receiver.h +++ b/code/nel/include/nel/3d/shadow_poly_receiver.h @@ -83,7 +83,7 @@ public: ); // a vertex - struct CRGBAVertex + struct CRGBAVertex // FIXME_SSE2 { #if USE_SSE2 float X, Y, Z; diff --git a/code/nel/include/nel/misc/matrix.h b/code/nel/include/nel/misc/matrix.h index 700eb4a14..7c7d7d666 100644 --- a/code/nel/include/nel/misc/matrix.h +++ b/code/nel/include/nel/misc/matrix.h @@ -53,6 +53,7 @@ class CPlane; * \author Nevrax France * \date 2000 */ +NL_ALIGN(16) class CMatrix { public: @@ -363,6 +364,10 @@ private: float Scale33; uint32 StateBit; // BitVector. 0<=>identity. +#if USE_SSE2 + void setMulMatrixSSE2(const CMatrix &m1, const CMatrix &m2); +#endif + // Methods For inversion. bool fastInvert33(CMatrix &ret) const; bool slowInvert33(CMatrix &ret) const; diff --git a/code/nel/include/nel/misc/vector.h b/code/nel/include/nel/misc/vector.h index bbf7001b7..46df7edce 100644 --- a/code/nel/include/nel/misc/vector.h +++ b/code/nel/include/nel/misc/vector.h @@ -36,11 +36,16 @@ class IStream; * \author Nevrax France * \date 2000 */ +// NL_ALIGN(16) // FIXME_SSE2 class CVector { public: // Attributes. float x,y,z; +/*#ifdef USE_SSE2 // FIXME_SSE2 + float w; // Padding +#endif*/ + public: // const. /// Null vector (0,0,0). static const CVector Null; diff --git a/code/nel/src/3d/mesh_mrm_skin_template.cpp b/code/nel/src/3d/mesh_mrm_skin_template.cpp index 808dce31a..6e6c160ae 100644 --- a/code/nel/src/3d/mesh_mrm_skin_template.cpp +++ b/code/nel/src/3d/mesh_mrm_skin_template.cpp @@ -494,7 +494,7 @@ void CMeshMRMGeom::applySkinWithTangentSpace(CLod &lod, const CSkeletonModel *sk On a P4-2.4Ghz, for 40000 vertices skinned, both no precaching and asm saves 27% of execution time in the applyRawSkinNormal*() loop (ie 1 ms) */ -#if defined(NL_OS_WINDOWS) && !defined(NL_NO_ASM) &&!defined(USE_SSE2) +#if defined(NL_OS_WINDOWS) && !defined(NL_NO_ASM) &&!defined(USE_SSE2) // FIXME_SSE2 //#define NL3D_RAWSKIN_PRECACHE #define NL3D_RAWSKIN_ASM #endif diff --git a/code/nel/src/3d/mesh_mrm_skinned_template.cpp b/code/nel/src/3d/mesh_mrm_skinned_template.cpp index be072713f..e60a5632b 100644 --- a/code/nel/src/3d/mesh_mrm_skinned_template.cpp +++ b/code/nel/src/3d/mesh_mrm_skinned_template.cpp @@ -43,7 +43,7 @@ On a P4-2.4Ghz, for 40000 vertices skinned, both no precaching and asm saves 27% of execution time in the applyRawSkinNormal*() loop (ie 1 ms) */ -#if defined(NL_OS_WINDOWS) && !defined(NL_NO_ASM) &&!defined(USE_SSE2) +#if defined(NL_OS_WINDOWS) && !defined(NL_NO_ASM) &&!defined(USE_SSE2) // FIXME_SSE2 //#define NL3D_RAWSKIN_PRECACHE #define NL3D_RAWSKIN_ASM #endif From 4c86f536ae571c07e2096640cdf5857a42833254 Mon Sep 17 00:00:00 2001 From: kaetemi Date: Thu, 12 Jun 2014 22:54:13 +0200 Subject: [PATCH 03/21] Disable outdated assembly --HG-- branch : sse2 --- code/nel/src/3d/mesh_mrm_skin_template.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/code/nel/src/3d/mesh_mrm_skin_template.cpp b/code/nel/src/3d/mesh_mrm_skin_template.cpp index 6e6c160ae..6b85326ee 100644 --- a/code/nel/src/3d/mesh_mrm_skin_template.cpp +++ b/code/nel/src/3d/mesh_mrm_skin_template.cpp @@ -39,7 +39,7 @@ static void applyArraySkinNormalT(uint numMatrixes, uint32 *infPtr, CMesh::CSkin { /* Prefetch all vertex/normal before, it is to be faster. */ -#if defined(NL_OS_WINDOWS) && !defined(NL_NO_ASM) +#if 0// defined(NL_OS_WINDOWS) && !defined(NL_NO_ASM) { uint nInfTmp= nInf; uint32 *infTmpPtr= infPtr; @@ -176,7 +176,7 @@ static void applyArraySkinTangentSpaceT(uint numMatrixes, uint32 *infPtr, CMesh: { /* Prefetch all vertex/normal/tgSpace before, it is faster. */ -#if defined(NL_OS_WINDOWS) && !defined(NL_NO_ASM) +#if 0 // defined(NL_OS_WINDOWS) && !defined(NL_NO_ASM) { uint nInfTmp= nInf; uint32 *infTmpPtr= infPtr; From dbb966c8a5e81ce8f6367cf436e50c0453ad6da3 Mon Sep 17 00:00:00 2001 From: kaetemi Date: Thu, 12 Jun 2014 22:54:36 +0200 Subject: [PATCH 04/21] SSE2: Some reference --HG-- branch : sse2 --- code/nel/include/nel/3d/matrix_3x4.h | 201 ++++++++++++++++++++++++++- 1 file changed, 200 insertions(+), 1 deletion(-) diff --git a/code/nel/include/nel/3d/matrix_3x4.h b/code/nel/include/nel/3d/matrix_3x4.h index d7ed660fc..94aee3a25 100644 --- a/code/nel/include/nel/3d/matrix_3x4.h +++ b/code/nel/include/nel/3d/matrix_3x4.h @@ -116,7 +116,7 @@ public: // *************************************************************************** -#if defined(NL_OS_WINDOWS) && !defined(NL_NO_ASM) +#if defined(NL_OS_WINDOWS) && !defined(NL_NO_ASM) && !defined(USE_SSE2) /** For fast vector/point multiplication. Special usage for Skinning. @@ -376,6 +376,205 @@ public: }; +#elif 0 + +NL_ALIGN(16) +class CVectorSSEAligned +{ + float f[4]; +}; + +/** For fast vector/point multiplication. Special usage for Skinning. + */ +NL_ALIGN(16) +class CMatrix3x4SSE +{ +public: + __m128 c1, c2, c3, c4; + + // Copy from a matrix. + inline void set(const CMatrix &mat) + { + const float *m = mat.get(); + register __m128 xmm0 = _mm_loadu_ps(&m[0]); + register __m128 xmm1 = _mm_loadu_ps(&m[4]); + register __m128 xmm2 = _mm_loadu_ps(&m[8]); + register __m128 xmm3 = _mm_loadu_ps(&m[12]); + c1 = xmm0; + c2 = xmm1; + c3 = xmm2; + c4 = xmm3; + } + + // mulSetvector. NB: in should be different as v!! (else don't work). + inline void mulSetVector(const CVector &vin, CVector &vout) + { + CVectorSSEAligned outf; // FIXME_SSE2 + + register __m128 xmm0 = _mm_loadu_ps(&vout.x); // WARNING: Reads beyond CVector size! // FIXME_SSE2: Align CVector + + register __m128 xmm1 = _mm_shuffle_ps(xmm0, xmm0, _MM_SHUFFLE(1, 1, 1, 1)); + register __m128 xmm2 = _mm_shuffle_ps(xmm0, xmm0, _MM_SHUFFLE(2, 2, 2, 2)); + xmm0 = _mm_shuffle_ps(xmm0, xmm0, _MM_SHUFFLE(0, 0, 0, 0)); + + xmm0 = _mm_mul_ps(xmm0, c1); + xmm1 = _mm_mul_ps(xmm1, c2); + xmm2 = _mm_mul_ps(xmm2, c3); + + xmm0 = _mm_add_ps(xmm0, xmm1); + xmm0 = _mm_add_ps(xmm0, xmm2); + + _mm_store_ps(&outf.f[0], xmm0); + vout.x = outf[0]; // FIXME_SSE2 + vout.y = outf[1]; + vout.z = outf[2]; + } + + // mulSetpoint. NB: in should be different as v!! (else don't work). + inline void mulSetPoint(const CVector &vin, CVector &vout) + { + CVectorSSEAligned outf; // FIXME_SSE2 + + register __m128 xmm0 = _mm_loadu_ps(&vout.x); // WARNING: Reads beyond CVector size! // FIXME_SSE2: Align CVector + + register __m128 xmm1 = _mm_shuffle_ps(xmm0, xmm0, _MM_SHUFFLE(1, 1, 1, 1)); + register __m128 xmm2 = _mm_shuffle_ps(xmm0, xmm0, _MM_SHUFFLE(2, 2, 2, 2)); + xmm0 = _mm_shuffle_ps(xmm0, xmm0, _MM_SHUFFLE(0, 0, 0, 0)); + + xmm0 = _mm_mul_ps(xmm0, c1); + xmm1 = _mm_mul_ps(xmm1, c2); + xmm2 = _mm_mul_ps(xmm2, c3); + + xmm0 = _mm_add_ps(xmm0, xmm1); + xmm0 = _mm_add_ps(xmm0, xmm2); + + xmm0 = _mm_add_ps(xmm0, c4); + + _mm_store_ps(&outf.f[0], xmm0); + vout.x = outf[0]; // FIXME_SSE2 + vout.y = outf[1]; + vout.z = outf[2]; + } + + + // mulSetvector. NB: vin should be different as v!! (else don't work). + inline void mulSetVector(const CVector &vin, float scale, CVector &vout) + { + CVectorSSEAligned outf; // FIXME_SSE2 + + register __m128 xmm0 = _mm_loadu_ps(&vout.x); // WARNING: Reads beyond CVector size! // FIXME_SSE2: Align CVector + + register __m128 xmm1 = _mm_shuffle_ps(xmm0, xmm0, _MM_SHUFFLE(1, 1, 1, 1)); + register __m128 xmm2 = _mm_shuffle_ps(xmm0, xmm0, _MM_SHUFFLE(2, 2, 2, 2)); + xmm0 = _mm_shuffle_ps(xmm0, xmm0, _MM_SHUFFLE(0, 0, 0, 0)); + + register __m128 xmm3 = _mm_set1_ps(scale); + + xmm0 = _mm_mul_ps(xmm0, c1); + xmm1 = _mm_mul_ps(xmm1, c2); + xmm2 = _mm_mul_ps(xmm2, c3); + + xmm0 = _mm_add_ps(xmm0, xmm1); + xmm0 = _mm_add_ps(xmm0, xmm2); + + xmm0 = _mm_mul_ps(xmm0, xmm3); + + _mm_store_ps(&outf.f[0], xmm0); + vout.x = outf[0]; // FIXME_SSE2 + vout.y = outf[1]; + vout.z = outf[2]; + } + // mulSetpoint. NB: vin should be different as v!! (else don't work). + inline void mulSetPoint(const CVector &vin, float scale, CVector &vout) + { + CVectorSSEAligned outf; // FIXME_SSE2 + + register __m128 xmm0 = _mm_loadu_ps(&vout.x); // WARNING: Reads beyond CVector size! // FIXME_SSE2: Align CVector + + register __m128 xmm1 = _mm_shuffle_ps(xmm0, xmm0, _MM_SHUFFLE(1, 1, 1, 1)); + register __m128 xmm2 = _mm_shuffle_ps(xmm0, xmm0, _MM_SHUFFLE(2, 2, 2, 2)); + xmm0 = _mm_shuffle_ps(xmm0, xmm0, _MM_SHUFFLE(0, 0, 0, 0)); + + register __m128 xmm3 = _mm_set1_ps(scale); + + xmm0 = _mm_mul_ps(xmm0, c1); + xmm1 = _mm_mul_ps(xmm1, c2); + xmm2 = _mm_mul_ps(xmm2, c3); + + xmm0 = _mm_add_ps(xmm0, xmm1); + xmm0 = _mm_add_ps(xmm0, xmm2); + + xmm0 = _mm_add_ps(xmm0, c4); + + xmm0 = _mm_mul_ps(xmm0, xmm3); + + _mm_store_ps(&outf.f[0], xmm0); + vout.x = outf[0]; // FIXME_SSE2 + vout.y = outf[1]; + vout.z = outf[2]; + } + + + // mulAddvector. NB: vin should be different as v!! (else don't work). + inline void mulAddVector(const CVector &vin, float scale, CVector &vout) + { + CVectorSSEAligned outf; // FIXME_SSE2 + + register __m128 xmm0 = _mm_loadu_ps(&vout.x); // WARNING: Reads beyond CVector size! // FIXME_SSE2: Align CVector + + register __m128 xmm1 = _mm_shuffle_ps(xmm0, xmm0, _MM_SHUFFLE(1, 1, 1, 1)); + register __m128 xmm2 = _mm_shuffle_ps(xmm0, xmm0, _MM_SHUFFLE(2, 2, 2, 2)); + xmm0 = _mm_shuffle_ps(xmm0, xmm0, _MM_SHUFFLE(0, 0, 0, 0)); + + register __m128 xmm3 = _mm_set1_ps(scale); + + xmm0 = _mm_mul_ps(xmm0, c1); + xmm1 = _mm_mul_ps(xmm1, c2); + xmm2 = _mm_mul_ps(xmm2, c3); + + xmm0 = _mm_add_ps(xmm0, xmm1); + xmm0 = _mm_add_ps(xmm0, xmm2); + + xmm0 = _mm_mul_ps(xmm0, xmm3); + + _mm_store_ps(&outf.f[0], xmm0); + vout.x += outf[0]; // FIXME_SSE2 + vout.y += outf[1]; + vout.z += outf[2]; + } + + // mulAddpoint. NB: vin should be different as v!! (else don't work). + inline void mulAddPoint(const CVector &vin, float scale, CVector &vout) + { + CVectorSSEAligned outf; // FIXME_SSE2 + + register __m128 xmm0 = _mm_loadu_ps(&vout.x); // WARNING: Reads beyond CVector size! // FIXME_SSE2: Align CVector + + register __m128 xmm1 = _mm_shuffle_ps(xmm0, xmm0, _MM_SHUFFLE(1, 1, 1, 1)); + register __m128 xmm2 = _mm_shuffle_ps(xmm0, xmm0, _MM_SHUFFLE(2, 2, 2, 2)); + xmm0 = _mm_shuffle_ps(xmm0, xmm0, _MM_SHUFFLE(0, 0, 0, 0)); + + register __m128 xmm3 = _mm_set1_ps(scale); + + xmm0 = _mm_mul_ps(xmm0, c1); + xmm1 = _mm_mul_ps(xmm1, c2); + xmm2 = _mm_mul_ps(xmm2, c3); + + xmm0 = _mm_add_ps(xmm0, xmm1); + xmm0 = _mm_add_ps(xmm0, xmm2); + + xmm0 = _mm_add_ps(xmm0, c4); + + xmm0 = _mm_mul_ps(xmm0, xmm3); + + _mm_store_ps(&outf.f[0], xmm0); + vout.x += outf[0]; // FIXME_SSE2 + vout.y += outf[1]; + vout.z += outf[2]; + } + +}; + #else // NL_OS_WINDOWS /// dummy CMatrix3x4SSE for non windows platform class CMatrix3x4SSE : public CMatrix3x4 { }; From 95fb48fbfc6bccf315955397144747ab64e67500 Mon Sep 17 00:00:00 2001 From: kaetemi Date: Thu, 12 Jun 2014 23:40:12 +0200 Subject: [PATCH 05/21] SSE2: Prepare for CVector alignment --HG-- branch : sse2 --- code/nel/include/nel/3d/mesh_mrm_skinned.h | 17 ++ code/nel/include/nel/3d/packed_zone.h | 2 + code/nel/include/nel/3d/raw_skin.h | 12 +- code/nel/include/nel/3d/vertex_buffer.h | 8 +- code/nel/include/nel/misc/vector.h | 37 ++++ code/nel/src/3d/cloud.cpp | 56 ++--- code/nel/src/3d/mesh_morpher.cpp | 18 +- code/nel/src/3d/mesh_mrm_skin_template.cpp | 205 +++++++++++------- code/nel/src/3d/mesh_mrm_skinned_template.cpp | 23 +- code/nel/src/3d/noise_3d.cpp | 52 ++--- code/nel/src/3d/packed_zone.cpp | 41 +++- code/nel/src/3d/vegetable_shape.cpp | 8 +- code/nel/src/3d/vertex_buffer.cpp | 16 +- .../client/src/landscape_poly_drawer.cpp | 2 +- 14 files changed, 320 insertions(+), 177 deletions(-) diff --git a/code/nel/include/nel/3d/mesh_mrm_skinned.h b/code/nel/include/nel/3d/mesh_mrm_skinned.h index 9e43f8fb9..e3cec562b 100644 --- a/code/nel/include/nel/3d/mesh_mrm_skinned.h +++ b/code/nel/include/nel/3d/mesh_mrm_skinned.h @@ -43,6 +43,7 @@ namespace NL3D using NLMISC::CVector; +using NLMISC::CVectorPacked; using NLMISC::CPlane; using NLMISC::CMatrix; class CMRMBuilder; @@ -405,12 +406,24 @@ public: uint8 Weights[NL3D_MESH_MRM_SKINNED_MAX_MATRIX]; // Decompact it + inline void getPos (CVectorPacked &dest, float factor) const + { + dest.x = (float)X * factor; + dest.y = (float)Y * factor; + dest.z = (float)Z * factor; + } inline void getPos (CVector &dest, float factor) const { dest.x = (float)X * factor; dest.y = (float)Y * factor; dest.z = (float)Z * factor; } + inline void getNormal (CVectorPacked &dest) const + { + dest.x = (float)Nx * (1.f/NL3D_MESH_MRM_SKINNED_NORMAL_FACTOR); + dest.y = (float)Ny * (1.f/NL3D_MESH_MRM_SKINNED_NORMAL_FACTOR); + dest.z = (float)Nz * (1.f/NL3D_MESH_MRM_SKINNED_NORMAL_FACTOR); + } inline void getNormal (CVector &dest) const { dest.x = (float)Nx * (1.f/NL3D_MESH_MRM_SKINNED_NORMAL_FACTOR); @@ -480,6 +493,10 @@ public: } // Decompact position + inline void getPos (CVectorPacked &dest, const CPackedVertex &src) const + { + src.getPos (dest, _DecompactScale); + } inline void getPos (CVector &dest, const CPackedVertex &src) const { src.getPos (dest, _DecompactScale); diff --git a/code/nel/include/nel/3d/packed_zone.h b/code/nel/include/nel/3d/packed_zone.h index 2beb5f5ed..191eea0df 100644 --- a/code/nel/include/nel/3d/packed_zone.h +++ b/code/nel/include/nel/3d/packed_zone.h @@ -164,6 +164,7 @@ private: void addInstance(const CShapeInfo &si, const NLMISC::CMatrix &matrix, TVertexGrid &vertexGrid, TTriListGrid &triListGrid); public: // PRIVATE : unpack a packed tri + void unpackTri(const CPackedTri &src, NLMISC::CVectorPacked dest[3]) const; void unpackTri(const CPackedTri &src, NLMISC::CVector dest[3]) const; }; @@ -197,6 +198,7 @@ private: NLMISC::CVector _PackedLocalToWorld; public: // PRIVATE : unpack a packed tri + void unpackTri(const CPackedTri16 &src, NLMISC::CVectorPacked dest[3]) const; void unpackTri(const CPackedTri16 &src, NLMISC::CVector dest[3]) const; }; diff --git a/code/nel/include/nel/3d/raw_skin.h b/code/nel/include/nel/3d/raw_skin.h index dbf263326..59c3e2c16 100644 --- a/code/nel/include/nel/3d/raw_skin.h +++ b/code/nel/include/nel/3d/raw_skin.h @@ -30,15 +30,21 @@ namespace NL3D using NLMISC::CVector; +using NLMISC::CVectorPacked; using NLMISC::CUV; /// A simple Vertex Pos/Normal/Uv class CRawSkinVertex { public: - CVector Pos; - CVector Normal; - CUV UV; +#if USE_SSE2 + CVectorPacked Pos; + CVectorPacked Normal; +#else + CVector Pos; + CVector Normal; +#endif + CUV UV; }; /// Vertices influenced by 1 matrix only. diff --git a/code/nel/include/nel/3d/vertex_buffer.h b/code/nel/include/nel/3d/vertex_buffer.h index 6c269ec6c..fbce363d9 100644 --- a/code/nel/include/nel/3d/vertex_buffer.h +++ b/code/nel/include/nel/3d/vertex_buffer.h @@ -790,8 +790,8 @@ public: * A call to IDriver::activeVertexBuffer() will change this format to the format returned by IDriver::getVertexColorFormat(). * So, before each write of vertex color in the vertex buffer, the vertex color format must be checked with CVertexBuffer::getVertexColorFormat(). */ - NLMISC::CVector* getVertexCoordPointer(uint idx=0); - NLMISC::CVector* getNormalCoordPointer(uint idx=0); + NLMISC::CVectorPacked* getVertexCoordPointer(uint idx=0); + NLMISC::CVectorPacked* getNormalCoordPointer(uint idx=0); NLMISC::CUV* getTexCoordPointer(uint idx=0, uint8 stage=0); void* getColorPointer(uint idx=0); void* getSpecularPointer(uint idx=0); @@ -854,8 +854,8 @@ public: * A call to IDriver::activeVertexBuffer() will change this format to the format returned by IDriver::getVertexColorFormat(). * So, before each write of vertex color in the vertex buffer, the vertex color format must be checked with CVertexBuffer::getVertexColorFormat(). */ - const NLMISC::CVector* getVertexCoordPointer(uint idx=0) const; - const NLMISC::CVector* getNormalCoordPointer(uint idx=0) const; + const NLMISC::CVectorPacked* getVertexCoordPointer(uint idx=0) const; + const NLMISC::CVectorPacked* getNormalCoordPointer(uint idx=0) const; const NLMISC::CUV* getTexCoordPointer(uint idx=0, uint8 stage=0) const; const void* getColorPointer(uint idx=0) const; const void* getSpecularPointer(uint idx=0) const; diff --git a/code/nel/include/nel/misc/vector.h b/code/nel/include/nel/misc/vector.h index 46df7edce..ff3db1312 100644 --- a/code/nel/include/nel/misc/vector.h +++ b/code/nel/include/nel/misc/vector.h @@ -141,6 +141,43 @@ public: // Methods. friend CVector operator*(float f, const CVector &v0); }; +class CVectorPacked +{ +public: // Attributes. + float x,y,z; + +public: + /// @name Object. + //@{ + /// Constructor which does nothing. + CVectorPacked() { } + /// Constructor . + CVectorPacked(float _x, float _y, float _z) : x(_x), y(_y), z(_z) {} + /// Copy Constructor. + CVectorPacked(const CVector &v) : x(v.x), y(v.y), z(v.z) {} + //@} + + void set(float _x, float _y, float _z) + { + x = _x; + y = _y; + z = _z; + } + + CVectorPacked &operator += (const CVector &v) + { + x += v.x; + y += v.y; + z += v.z; + return *this; + } + + operator CVector () const + { + return CVector(x, y, z); + } +}; + // blend (faster version than the generic version found in algo.h) inline CVector blend(const CVector &v0, const CVector &v1, float lambda) { diff --git a/code/nel/src/3d/cloud.cpp b/code/nel/src/3d/cloud.cpp index 2606f9ad5..280ba2f04 100644 --- a/code/nel/src/3d/cloud.cpp +++ b/code/nel/src/3d/cloud.cpp @@ -120,10 +120,10 @@ void CCloud::generate (CNoise3d &noise) { CVertexBufferReadWrite vba; rVB.lock (vba); - CVector *pVertices = vba.getVertexCoordPointer (0); - *pVertices = CVector(0.0f, 0.0f, 0.0f); pVertices = (CVector*)( ((uint8*)pVertices) + nVSize ); - *pVertices = CVector((float)_NbW*_Width,0.0f, 0.0f); pVertices = (CVector*)( ((uint8*)pVertices) + nVSize ); - *pVertices = CVector((float)_NbW*_Width,(float)_NbH*_Height,0.0f); pVertices = (CVector*)( ((uint8*)pVertices) + nVSize ); + CVectorPacked *pVertices = vba.getVertexCoordPointer (0); + *pVertices = CVector(0.0f, 0.0f, 0.0f); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize ); + *pVertices = CVector((float)_NbW*_Width,0.0f, 0.0f); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize ); + *pVertices = CVector((float)_NbW*_Width,(float)_NbH*_Height,0.0f); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize ); *pVertices = CVector(0.0f, (float)_NbH*_Height,0.0f); _CloudScape->_MatClear.setColor (CRGBA(0,0,0,0)); } @@ -197,10 +197,10 @@ void CCloud::light () { CVertexBufferReadWrite vba; rVB.lock (vba); - CVector *pVertices = vba.getVertexCoordPointer (0); - *pVertices = CVector((float)0.0f, (float)0.0f, 0.0f); pVertices = (CVector*)( ((uint8*)pVertices) + nVSize ); - *pVertices = CVector((float)1.f, (float)0.0f, 0.0f); pVertices = (CVector*)( ((uint8*)pVertices) + nVSize ); - *pVertices = CVector((float)1.f, (float)1.f, 0.0f); pVertices = (CVector*)( ((uint8*)pVertices) + nVSize ); + CVectorPacked *pVertices = vba.getVertexCoordPointer (0); + *pVertices = CVector((float)0.0f, (float)0.0f, 0.0f); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize ); + *pVertices = CVector((float)1.f, (float)0.0f, 0.0f); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize ); + *pVertices = CVector((float)1.f, (float)1.f, 0.0f); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize ); *pVertices = CVector((float)0.0f, (float)1.f, 0.0f); } @@ -340,10 +340,10 @@ void CCloud::reset (NL3D::CCamera *pViewer) CVertexBufferReadWrite vba; rVB.lock (vba); uint32 nVSize = rVB.getVertexSize (); - CVector *pVertices = vba.getVertexCoordPointer (0); - *pVertices = CVector(0.0f, 0.0f, 0.0f); pVertices = (CVector*)( ((uint8*)pVertices) + nVSize ); - *pVertices = CVector(5.0f, 0.0f, 0.0f); pVertices = (CVector*)( ((uint8*)pVertices) + nVSize ); - *pVertices = CVector(5.0f, 5.0f, 0.0f); pVertices = (CVector*)( ((uint8*)pVertices) + nVSize ); + CVectorPacked *pVertices = vba.getVertexCoordPointer (0); + *pVertices = CVector(0.0f, 0.0f, 0.0f); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize ); + *pVertices = CVector(5.0f, 0.0f, 0.0f); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize ); + *pVertices = CVector(5.0f, 5.0f, 0.0f); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize ); *pVertices = CVector(0.0f, 5.0f, 0.0f); _CloudScape->_MatClear.setColor (CRGBA(0,0,0,0)); } @@ -469,7 +469,7 @@ void CCloud::dispXYZ (CMaterial *pMat) float oneOverNbWNbH = 1.0f / (_NbW*_NbH); CVertexBuffer &rVB = _CloudScape->_VertexBuffer; uint32 nVSize = rVB.getVertexSize (); - CVector *pVertices; + CVectorPacked *pVertices; CUV *pUV; _Driver->activeVertexBuffer (rVB); @@ -487,9 +487,9 @@ void CCloud::dispXYZ (CMaterial *pMat) rVB.lock (vba); pVertices = vba.getVertexCoordPointer (0); - *pVertices = CVector(_Pos.x, _Pos.y, _Pos.z+_Size.z*(_NbW*_NbH-d)*oneOverNbWNbH); pVertices = (CVector*)( ((uint8*)pVertices) + nVSize ); - *pVertices = CVector(_Pos.x+_Size.x, _Pos.y, _Pos.z+_Size.z*(_NbW*_NbH-d)*oneOverNbWNbH); pVertices = (CVector*)( ((uint8*)pVertices) + nVSize ); - *pVertices = CVector(_Pos.x+_Size.x, _Pos.y+_Size.y, _Pos.z+_Size.z*(_NbW*_NbH-d)*oneOverNbWNbH); pVertices = (CVector*)( ((uint8*)pVertices) + nVSize ); + *pVertices = CVector(_Pos.x, _Pos.y, _Pos.z+_Size.z*(_NbW*_NbH-d)*oneOverNbWNbH); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize ); + *pVertices = CVector(_Pos.x+_Size.x, _Pos.y, _Pos.z+_Size.z*(_NbW*_NbH-d)*oneOverNbWNbH); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize ); + *pVertices = CVector(_Pos.x+_Size.x, _Pos.y+_Size.y, _Pos.z+_Size.z*(_NbW*_NbH-d)*oneOverNbWNbH); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize ); *pVertices = CVector(_Pos.x, _Pos.y+_Size.y, _Pos.z+_Size.z*(_NbW*_NbH-d)*oneOverNbWNbH); pUV = vba.getTexCoordPointer (0, 0); @@ -512,10 +512,10 @@ void CCloud::dispXYZ (CMaterial *pMat) { CVertexBufferReadWrite vba; rVB.lock (vba); - CVector *pVertices = vba.getVertexCoordPointer (0); - *pVertices = CVector((float)0.25f, 0, (float)0.0f); pVertices = (CVector*)( ((uint8*)pVertices) + nVSize ); - *pVertices = CVector((float)0.75f, 0, (float)0.0f); pVertices = (CVector*)( ((uint8*)pVertices) + nVSize ); - *pVertices = CVector((float)0.75f, 0, (float)0.75f); pVertices = (CVector*)( ((uint8*)pVertices) + nVSize ); + CVectorPacked *pVertices = vba.getVertexCoordPointer (0); + *pVertices = CVector((float)0.25f, 0, (float)0.0f); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize ); + *pVertices = CVector((float)0.75f, 0, (float)0.0f); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize ); + *pVertices = CVector((float)0.75f, 0, (float)0.75f); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize ); *pVertices = CVector((float)0.25f, 0, (float)0.75f); } } @@ -664,10 +664,10 @@ void CCloud::genBill (CCamera *pCam, uint32 nBillSize) CVertexBufferReadWrite vba; rVB.lock (vba); { - CVector *pVertices = vba.getVertexCoordPointer (0); - *pVertices = CVector(0.0f, 0.0f, 0.0f); pVertices = (CVector*)( ((uint8*)pVertices) + nVSize ); - *pVertices = CVector(1.0f, 0.0f, 0.0f); pVertices = (CVector*)( ((uint8*)pVertices) + nVSize ); - *pVertices = CVector(1.0f, 0.0f, 1.0f); pVertices = (CVector*)( ((uint8*)pVertices) + nVSize ); + CVectorPacked *pVertices = vba.getVertexCoordPointer (0); + *pVertices = CVector(0.0f, 0.0f, 0.0f); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize ); + *pVertices = CVector(1.0f, 0.0f, 0.0f); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize ); + *pVertices = CVector(1.0f, 0.0f, 1.0f); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize ); *pVertices = CVector(0.0f, 0.0f, 1.0f); } } @@ -782,10 +782,10 @@ void CCloud::dispBill (CCamera *pCam) rVB.lock (vba); uint32 nVSize = rVB.getVertexSize (); - CVector *pVertices = vba.getVertexCoordPointer (0); - *pVertices = qc.V0; pVertices = (CVector*)( ((uint8*)pVertices) + nVSize ); - *pVertices = qc.V1; pVertices = (CVector*)( ((uint8*)pVertices) + nVSize ); - *pVertices = qc.V2; pVertices = (CVector*)( ((uint8*)pVertices) + nVSize ); + CVectorPacked *pVertices = vba.getVertexCoordPointer (0); + *pVertices = qc.V0; pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize ); + *pVertices = qc.V1; pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize ); + *pVertices = qc.V2; pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize ); *pVertices = qc.V3; CUV *pUV = vba.getTexCoordPointer (0, 0); diff --git a/code/nel/src/3d/mesh_morpher.cpp b/code/nel/src/3d/mesh_morpher.cpp index 4d7ed8255..bfca4b7c7 100644 --- a/code/nel/src/3d/mesh_morpher.cpp +++ b/code/nel/src/3d/mesh_morpher.cpp @@ -163,14 +163,14 @@ void CMeshMorpher::update (std::vector *pBSFactor) if (_VBDst->getVertexFormat() & CVertexBuffer::PositionFlag) if (rBS.deltaPos.size() > 0) { - CVector *pV = dstvba.getVertexCoordPointer (vp); + CVectorPacked *pV = dstvba.getVertexCoordPointer (vp); *pV += rBS.deltaPos[j] * rFactor; } if (_VBDst->getVertexFormat() & CVertexBuffer::NormalFlag) if (rBS.deltaNorm.size() > 0) { - CVector *pV = dstvba.getNormalCoordPointer (vp); + CVectorPacked *pV = dstvba.getNormalCoordPointer (vp); *pV += rBS.deltaNorm[j] * rFactor; } @@ -264,13 +264,13 @@ void CMeshMorpher::updateSkinned (std::vector *pBSFactor) pDst[j+i*VBVertexSize] = pOri[j+i*VBVertexSize]; if (_Vertices != NULL) - _Vertices->operator[](i) = ((CVector*)(pOri+i*VBVertexSize))[0]; + _Vertices->operator[](i) = ((CVectorPacked*)(pOri+i*VBVertexSize))[0]; if (_Normals != NULL) - _Normals->operator[](i) = ((CVector*)(pOri+i*VBVertexSize))[1]; + _Normals->operator[](i) = ((CVectorPacked*)(pOri+i*VBVertexSize))[1]; if (_TgSpace != NULL) - (*_TgSpace)[i] = * (CVector*)(pOri + i * VBVertexSize + tgSpaceOff); + (*_TgSpace)[i] = * (CVectorPacked*)(pOri + i * VBVertexSize + tgSpaceOff); _Flags[i] = OriginalVBDst; } @@ -388,8 +388,8 @@ void CMeshMorpher::updateRawSkin (CVertexBuffer *vbOri, { if(*vRemap) { - (*vRemap)->Pos= *(CVector*)(pOri); - (*vRemap)->Normal= *(CVector*)(pOri + NL3D_RAWSKIN_NORMAL_OFF); + (*vRemap)->Pos= *(CVectorPacked*)(pOri); + (*vRemap)->Normal= *(CVectorPacked*)(pOri + NL3D_RAWSKIN_NORMAL_OFF); (*vRemap)->UV= *(CUV*)(pOri + NL3D_RAWSKIN_UV_OFF); } pOri+= NL3D_RAWSKIN_VERTEX_SIZE; @@ -420,9 +420,9 @@ void CMeshMorpher::updateRawSkin (CVertexBuffer *vbOri, // If exist in this Lod RawSkin, apply if(rsVert) { - if(hasPos) + if(hasPos) // FIXME_SSE2: += rsVert->Pos+= rBS.deltaPos[j] * rFactor; - if(hasNorm) + if(hasNorm) // FIXME_SSE2: += rsVert->Normal+= rBS.deltaNorm[j] * rFactor; if(hasUV) rsVert->UV+= rBS.deltaUV[j] * rFactor; diff --git a/code/nel/src/3d/mesh_mrm_skin_template.cpp b/code/nel/src/3d/mesh_mrm_skin_template.cpp index 6b85326ee..bda804ebc 100644 --- a/code/nel/src/3d/mesh_mrm_skin_template.cpp +++ b/code/nel/src/3d/mesh_mrm_skin_template.cpp @@ -78,14 +78,18 @@ static void applyArraySkinNormalT(uint numMatrixes, uint32 *infPtr, CMesh::CSkin CVector *srcVertex= srcVertexPtr + index; CVector *srcNormal= srcNormalPtr + index; uint8 *dstVertexVB= destVertexPtr + index * vertexSize; - CVector *dstVertex= (CVector*)(dstVertexVB); - CVector *dstNormal= (CVector*)(dstVertexVB + normalOff); + CVectorPacked *dstVertex= (CVectorPacked*)(dstVertexVB); + CVectorPacked *dstNormal= (CVectorPacked*)(dstVertexVB + normalOff); + CVector tempVertex; + CVector tempNormal; // Vertex. - boneMat3x4[ srcSkin->MatrixId[0] ].mulSetPoint( *srcVertex, *dstVertex); + boneMat3x4[ srcSkin->MatrixId[0] ].mulSetPoint( *srcVertex, tempVertex); + *dstVertex = tempVertex; // Normal. - boneMat3x4[ srcSkin->MatrixId[0] ].mulSetVector( *srcNormal, *dstNormal); + boneMat3x4[ srcSkin->MatrixId[0] ].mulSetVector( *srcNormal, tempNormal); + *dstNormal = tempNormal; } break; @@ -99,16 +103,20 @@ static void applyArraySkinNormalT(uint numMatrixes, uint32 *infPtr, CMesh::CSkin CVector *srcVertex= srcVertexPtr + index; CVector *srcNormal= srcNormalPtr + index; uint8 *dstVertexVB= destVertexPtr + index * vertexSize; - CVector *dstVertex= (CVector*)(dstVertexVB); - CVector *dstNormal= (CVector*)(dstVertexVB + normalOff); + CVectorPacked *dstVertex= (CVectorPacked*)(dstVertexVB); + CVectorPacked *dstNormal= (CVectorPacked*)(dstVertexVB + normalOff); + CVector tempVertex; + CVector tempNormal; // Vertex. - boneMat3x4[ srcSkin->MatrixId[0] ].mulSetPoint( *srcVertex, srcSkin->Weights[0], *dstVertex); - boneMat3x4[ srcSkin->MatrixId[1] ].mulAddPoint( *srcVertex, srcSkin->Weights[1], *dstVertex); + boneMat3x4[ srcSkin->MatrixId[0] ].mulSetPoint( *srcVertex, srcSkin->Weights[0], tempVertex); + boneMat3x4[ srcSkin->MatrixId[1] ].mulAddPoint( *srcVertex, srcSkin->Weights[1], tempVertex); + *dstVertex = tempVertex; // Normal. - boneMat3x4[ srcSkin->MatrixId[0] ].mulSetVector( *srcNormal, srcSkin->Weights[0], *dstNormal); - boneMat3x4[ srcSkin->MatrixId[1] ].mulAddVector( *srcNormal, srcSkin->Weights[1], *dstNormal); + boneMat3x4[ srcSkin->MatrixId[0] ].mulSetVector( *srcNormal, srcSkin->Weights[0], tempVertex); + boneMat3x4[ srcSkin->MatrixId[1] ].mulAddVector( *srcNormal, srcSkin->Weights[1], tempVertex); + *dstNormal = tempNormal; } break; @@ -122,18 +130,22 @@ static void applyArraySkinNormalT(uint numMatrixes, uint32 *infPtr, CMesh::CSkin CVector *srcVertex= srcVertexPtr + index; CVector *srcNormal= srcNormalPtr + index; uint8 *dstVertexVB= destVertexPtr + index * vertexSize; - CVector *dstVertex= (CVector*)(dstVertexVB); - CVector *dstNormal= (CVector*)(dstVertexVB + normalOff); + CVectorPacked *dstVertex= (CVectorPacked*)(dstVertexVB); + CVectorPacked *dstNormal= (CVectorPacked*)(dstVertexVB + normalOff); + CVector tempVertex; + CVector tempNormal; // Vertex. - boneMat3x4[ srcSkin->MatrixId[0] ].mulSetPoint( *srcVertex, srcSkin->Weights[0], *dstVertex); - boneMat3x4[ srcSkin->MatrixId[1] ].mulAddPoint( *srcVertex, srcSkin->Weights[1], *dstVertex); - boneMat3x4[ srcSkin->MatrixId[2] ].mulAddPoint( *srcVertex, srcSkin->Weights[2], *dstVertex); + boneMat3x4[ srcSkin->MatrixId[0] ].mulSetPoint( *srcVertex, srcSkin->Weights[0], tempVertex); + boneMat3x4[ srcSkin->MatrixId[1] ].mulAddPoint( *srcVertex, srcSkin->Weights[1], tempVertex); + boneMat3x4[ srcSkin->MatrixId[2] ].mulAddPoint( *srcVertex, srcSkin->Weights[2], tempVertex); + *dstVertex = tempVertex; // Normal. - boneMat3x4[ srcSkin->MatrixId[0] ].mulSetVector( *srcNormal, srcSkin->Weights[0], *dstNormal); - boneMat3x4[ srcSkin->MatrixId[1] ].mulAddVector( *srcNormal, srcSkin->Weights[1], *dstNormal); - boneMat3x4[ srcSkin->MatrixId[2] ].mulAddVector( *srcNormal, srcSkin->Weights[2], *dstNormal); + boneMat3x4[ srcSkin->MatrixId[0] ].mulSetVector( *srcNormal, srcSkin->Weights[0], tempNormal); + boneMat3x4[ srcSkin->MatrixId[1] ].mulAddVector( *srcNormal, srcSkin->Weights[1], tempNormal); + boneMat3x4[ srcSkin->MatrixId[2] ].mulAddVector( *srcNormal, srcSkin->Weights[2], tempNormal); + *dstNormal = tempNormal; } break; @@ -147,20 +159,24 @@ static void applyArraySkinNormalT(uint numMatrixes, uint32 *infPtr, CMesh::CSkin CVector *srcVertex= srcVertexPtr + index; CVector *srcNormal= srcNormalPtr + index; uint8 *dstVertexVB= destVertexPtr + index * vertexSize; - CVector *dstVertex= (CVector*)(dstVertexVB); - CVector *dstNormal= (CVector*)(dstVertexVB + normalOff); + CVectorPacked *dstVertex= (CVectorPacked*)(dstVertexVB); + CVectorPacked *dstNormal= (CVectorPacked*)(dstVertexVB + normalOff); + CVector tempVertex; + CVector tempNormal; // Vertex. - boneMat3x4[ srcSkin->MatrixId[0] ].mulSetPoint( *srcVertex, srcSkin->Weights[0], *dstVertex); - boneMat3x4[ srcSkin->MatrixId[1] ].mulAddPoint( *srcVertex, srcSkin->Weights[1], *dstVertex); - boneMat3x4[ srcSkin->MatrixId[2] ].mulAddPoint( *srcVertex, srcSkin->Weights[2], *dstVertex); - boneMat3x4[ srcSkin->MatrixId[3] ].mulAddPoint( *srcVertex, srcSkin->Weights[3], *dstVertex); + boneMat3x4[ srcSkin->MatrixId[0] ].mulSetPoint( *srcVertex, srcSkin->Weights[0], tempVertex); + boneMat3x4[ srcSkin->MatrixId[1] ].mulAddPoint( *srcVertex, srcSkin->Weights[1], tempVertex); + boneMat3x4[ srcSkin->MatrixId[2] ].mulAddPoint( *srcVertex, srcSkin->Weights[2], tempVertex); + boneMat3x4[ srcSkin->MatrixId[3] ].mulAddPoint( *srcVertex, srcSkin->Weights[3], tempVertex); + *dstVertex = tempVertex; // Normal. - boneMat3x4[ srcSkin->MatrixId[0] ].mulSetVector( *srcNormal, srcSkin->Weights[0], *dstNormal); - boneMat3x4[ srcSkin->MatrixId[1] ].mulAddVector( *srcNormal, srcSkin->Weights[1], *dstNormal); - boneMat3x4[ srcSkin->MatrixId[2] ].mulAddVector( *srcNormal, srcSkin->Weights[2], *dstNormal); - boneMat3x4[ srcSkin->MatrixId[3] ].mulAddVector( *srcNormal, srcSkin->Weights[3], *dstNormal); + boneMat3x4[ srcSkin->MatrixId[0] ].mulSetVector( *srcNormal, srcSkin->Weights[0], tempNormal); + boneMat3x4[ srcSkin->MatrixId[1] ].mulAddVector( *srcNormal, srcSkin->Weights[1], tempNormal); + boneMat3x4[ srcSkin->MatrixId[2] ].mulAddVector( *srcNormal, srcSkin->Weights[2], tempNormal); + boneMat3x4[ srcSkin->MatrixId[3] ].mulAddVector( *srcNormal, srcSkin->Weights[3], tempNormal); + *dstNormal = tempNormal; } break; @@ -220,18 +236,24 @@ static void applyArraySkinTangentSpaceT(uint numMatrixes, uint32 *infPtr, CMesh: CVector *srcTgSpace= tgSpacePtr + index; // uint8 *dstVertexVB= destVertexPtr + index * vertexSize; - CVector *dstVertex= (CVector*)(dstVertexVB); - CVector *dstNormal= (CVector*)(dstVertexVB + normalOff); - CVector *dstTgSpace= (CVector*)(dstVertexVB + tgSpaceOff); + CVectorPacked *dstVertex= (CVectorPacked*)(dstVertexVB); + CVectorPacked *dstNormal= (CVectorPacked*)(dstVertexVB + normalOff); + CVectorPacked *dstTgSpace= (CVectorPacked*)(dstVertexVB + tgSpaceOff); + CVector tempVertex; + CVector tempNormal; + CVector tempTgSpace; // Vertex. - boneMat3x4[ srcSkin->MatrixId[0] ].mulSetPoint( *srcVertex, *dstVertex); + boneMat3x4[ srcSkin->MatrixId[0] ].mulSetPoint( *srcVertex, tempVertex); + *dstVertex = tempVertex; // Normal. - boneMat3x4[ srcSkin->MatrixId[0] ].mulSetVector( *srcNormal, *dstNormal); + boneMat3x4[ srcSkin->MatrixId[0] ].mulSetVector( *srcNormal, tempNormal); + *dstNormal = tempNormal; // Tg space - boneMat3x4[ srcSkin->MatrixId[0] ].mulSetVector( *srcTgSpace, *dstTgSpace); + boneMat3x4[ srcSkin->MatrixId[0] ].mulSetVector( *srcTgSpace, tempTgSpace); + *dstTgSpace = tempTgSpace; } break; @@ -248,19 +270,25 @@ static void applyArraySkinTangentSpaceT(uint numMatrixes, uint32 *infPtr, CMesh: CVector *srcTgSpace= tgSpacePtr + index; // uint8 *dstVertexVB= destVertexPtr + index * vertexSize; - CVector *dstVertex= (CVector*)(dstVertexVB); - CVector *dstNormal= (CVector*)(dstVertexVB + normalOff); - CVector *dstTgSpace= (CVector*)(dstVertexVB + tgSpaceOff); + CVectorPacked *dstVertex= (CVectorPacked*)(dstVertexVB); + CVectorPacked *dstNormal= (CVectorPacked*)(dstVertexVB + normalOff); + CVectorPacked *dstTgSpace= (CVectorPacked*)(dstVertexVB + tgSpaceOff); + CVector tempVertex; + CVector tempNormal; + CVector tempTgSpace; // Vertex. - boneMat3x4[ srcSkin->MatrixId[0] ].mulSetPoint( *srcVertex, srcSkin->Weights[0], *dstVertex); - boneMat3x4[ srcSkin->MatrixId[1] ].mulAddPoint( *srcVertex, srcSkin->Weights[1], *dstVertex); + boneMat3x4[ srcSkin->MatrixId[0] ].mulSetPoint( *srcVertex, srcSkin->Weights[0], tempVertex); + boneMat3x4[ srcSkin->MatrixId[1] ].mulAddPoint( *srcVertex, srcSkin->Weights[1], tempVertex); + *dstVertex = tempVertex; // Normal. - boneMat3x4[ srcSkin->MatrixId[0] ].mulSetVector( *srcNormal, srcSkin->Weights[0], *dstNormal); - boneMat3x4[ srcSkin->MatrixId[1] ].mulAddVector( *srcNormal, srcSkin->Weights[1], *dstNormal); + boneMat3x4[ srcSkin->MatrixId[0] ].mulSetVector( *srcNormal, srcSkin->Weights[0], tempNormal); + boneMat3x4[ srcSkin->MatrixId[1] ].mulAddVector( *srcNormal, srcSkin->Weights[1], tempNormal); + *dstNormal = tempNormal; // Tg space - boneMat3x4[ srcSkin->MatrixId[0] ].mulSetVector( *srcTgSpace, srcSkin->Weights[0], *dstTgSpace); - boneMat3x4[ srcSkin->MatrixId[1] ].mulAddVector( *srcTgSpace, srcSkin->Weights[1], *dstTgSpace); + boneMat3x4[ srcSkin->MatrixId[0] ].mulSetVector( *srcTgSpace, srcSkin->Weights[0], tempTgSpace); + boneMat3x4[ srcSkin->MatrixId[1] ].mulAddVector( *srcTgSpace, srcSkin->Weights[1], tempTgSpace); + *dstTgSpace = tempTgSpace; } break; @@ -276,22 +304,28 @@ static void applyArraySkinTangentSpaceT(uint numMatrixes, uint32 *infPtr, CMesh: CVector *srcTgSpace= tgSpacePtr + index; // uint8 *dstVertexVB= destVertexPtr + index * vertexSize; - CVector *dstVertex= (CVector*)(dstVertexVB); - CVector *dstNormal= (CVector*)(dstVertexVB + normalOff); - CVector *dstTgSpace= (CVector*)(dstVertexVB + tgSpaceOff); + CVectorPacked *dstVertex= (CVectorPacked*)(dstVertexVB); + CVectorPacked *dstNormal= (CVectorPacked*)(dstVertexVB + normalOff); + CVectorPacked *dstTgSpace= (CVectorPacked*)(dstVertexVB + tgSpaceOff); + CVector tempVertex; + CVector tempNormal; + CVector tempTgSpace; // Vertex. - boneMat3x4[ srcSkin->MatrixId[0] ].mulSetPoint( *srcVertex, srcSkin->Weights[0], *dstVertex); - boneMat3x4[ srcSkin->MatrixId[1] ].mulAddPoint( *srcVertex, srcSkin->Weights[1], *dstVertex); - boneMat3x4[ srcSkin->MatrixId[2] ].mulAddPoint( *srcVertex, srcSkin->Weights[2], *dstVertex); + boneMat3x4[ srcSkin->MatrixId[0] ].mulSetPoint( *srcVertex, srcSkin->Weights[0], tempVertex); + boneMat3x4[ srcSkin->MatrixId[1] ].mulAddPoint( *srcVertex, srcSkin->Weights[1], tempVertex); + boneMat3x4[ srcSkin->MatrixId[2] ].mulAddPoint( *srcVertex, srcSkin->Weights[2], tempVertex); + *dstVertex = tempVertex; // Normal. - boneMat3x4[ srcSkin->MatrixId[0] ].mulSetVector( *srcNormal, srcSkin->Weights[0], *dstNormal); - boneMat3x4[ srcSkin->MatrixId[1] ].mulAddVector( *srcNormal, srcSkin->Weights[1], *dstNormal); - boneMat3x4[ srcSkin->MatrixId[2] ].mulAddVector( *srcNormal, srcSkin->Weights[2], *dstNormal); + boneMat3x4[ srcSkin->MatrixId[0] ].mulSetVector( *srcNormal, srcSkin->Weights[0], tempNormal); + boneMat3x4[ srcSkin->MatrixId[1] ].mulAddVector( *srcNormal, srcSkin->Weights[1], tempNormal); + boneMat3x4[ srcSkin->MatrixId[2] ].mulAddVector( *srcNormal, srcSkin->Weights[2], tempNormal); + *dstNormal = tempNormal; // Tg space - boneMat3x4[ srcSkin->MatrixId[0] ].mulSetVector( *srcTgSpace, srcSkin->Weights[0], *dstTgSpace); - boneMat3x4[ srcSkin->MatrixId[1] ].mulAddVector( *srcTgSpace, srcSkin->Weights[1], *dstTgSpace); - boneMat3x4[ srcSkin->MatrixId[2] ].mulAddVector( *srcTgSpace, srcSkin->Weights[2], *dstTgSpace); + boneMat3x4[ srcSkin->MatrixId[0] ].mulSetVector( *srcTgSpace, srcSkin->Weights[0], tempTgSpace); + boneMat3x4[ srcSkin->MatrixId[1] ].mulAddVector( *srcTgSpace, srcSkin->Weights[1], tempTgSpace); + boneMat3x4[ srcSkin->MatrixId[2] ].mulAddVector( *srcTgSpace, srcSkin->Weights[2], tempTgSpace); + *dstTgSpace = tempTgSpace; } break; @@ -307,25 +341,33 @@ static void applyArraySkinTangentSpaceT(uint numMatrixes, uint32 *infPtr, CMesh: CVector *srcTgSpace= tgSpacePtr + index; // uint8 *dstVertexVB= destVertexPtr + index * vertexSize; - CVector *dstVertex= (CVector*)(dstVertexVB); - CVector *dstNormal= (CVector*)(dstVertexVB + normalOff); - CVector *dstTgSpace= (CVector*)(dstVertexVB + tgSpaceOff); + CVectorPacked *dstVertex= (CVectorPacked*)(dstVertexVB); + CVectorPacked *dstNormal= (CVectorPacked*)(dstVertexVB + normalOff); + CVectorPacked *dstTgSpace= (CVectorPacked*)(dstVertexVB + tgSpaceOff); + + CVector tempVertex; + CVector tempNormal; + CVector tempTgSpace; // Vertex. - boneMat3x4[ srcSkin->MatrixId[0] ].mulSetPoint( *srcVertex, srcSkin->Weights[0], *dstVertex); - boneMat3x4[ srcSkin->MatrixId[1] ].mulAddPoint( *srcVertex, srcSkin->Weights[1], *dstVertex); - boneMat3x4[ srcSkin->MatrixId[2] ].mulAddPoint( *srcVertex, srcSkin->Weights[2], *dstVertex); - boneMat3x4[ srcSkin->MatrixId[3] ].mulAddPoint( *srcVertex, srcSkin->Weights[3], *dstVertex); + boneMat3x4[ srcSkin->MatrixId[0] ].mulSetPoint( *srcVertex, srcSkin->Weights[0], tempVertex); + boneMat3x4[ srcSkin->MatrixId[1] ].mulAddPoint( *srcVertex, srcSkin->Weights[1], tempVertex); + boneMat3x4[ srcSkin->MatrixId[2] ].mulAddPoint( *srcVertex, srcSkin->Weights[2], tempVertex); + boneMat3x4[ srcSkin->MatrixId[3] ].mulAddPoint( *srcVertex, srcSkin->Weights[3], tempVertex); // Normal. - boneMat3x4[ srcSkin->MatrixId[0] ].mulSetVector( *srcNormal, srcSkin->Weights[0], *dstNormal); - boneMat3x4[ srcSkin->MatrixId[1] ].mulAddVector( *srcNormal, srcSkin->Weights[1], *dstNormal); - boneMat3x4[ srcSkin->MatrixId[2] ].mulAddVector( *srcNormal, srcSkin->Weights[2], *dstNormal); - boneMat3x4[ srcSkin->MatrixId[3] ].mulAddVector( *srcNormal, srcSkin->Weights[3], *dstNormal); + boneMat3x4[ srcSkin->MatrixId[0] ].mulSetVector( *srcNormal, srcSkin->Weights[0], tempNormal); + boneMat3x4[ srcSkin->MatrixId[1] ].mulAddVector( *srcNormal, srcSkin->Weights[1], tempNormal); + boneMat3x4[ srcSkin->MatrixId[2] ].mulAddVector( *srcNormal, srcSkin->Weights[2], tempNormal); + boneMat3x4[ srcSkin->MatrixId[3] ].mulAddVector( *srcNormal, srcSkin->Weights[3], tempNormal); // Tg space - boneMat3x4[ srcSkin->MatrixId[0] ].mulSetVector( *srcTgSpace, srcSkin->Weights[0], *dstTgSpace); - boneMat3x4[ srcSkin->MatrixId[1] ].mulAddVector( *srcTgSpace, srcSkin->Weights[1], *dstTgSpace); - boneMat3x4[ srcSkin->MatrixId[2] ].mulAddVector( *srcTgSpace, srcSkin->Weights[2], *dstTgSpace); - boneMat3x4[ srcSkin->MatrixId[3] ].mulAddVector( *srcTgSpace, srcSkin->Weights[3], *dstTgSpace); + boneMat3x4[ srcSkin->MatrixId[0] ].mulSetVector( *srcTgSpace, srcSkin->Weights[0], tempTgSpace); + boneMat3x4[ srcSkin->MatrixId[1] ].mulAddVector( *srcTgSpace, srcSkin->Weights[1], tempTgSpace); + boneMat3x4[ srcSkin->MatrixId[2] ].mulAddVector( *srcTgSpace, srcSkin->Weights[2], tempTgSpace); + boneMat3x4[ srcSkin->MatrixId[3] ].mulAddVector( *srcTgSpace, srcSkin->Weights[3], tempTgSpace); + + *dstVertex = tempVertex; + *dstNormal = tempNormal; + *dstTgSpace = tempTgSpace; } break; @@ -530,16 +572,19 @@ void CMeshMRMGeom::applyArrayRawSkinNormal1(CRawVertexNormalSkin1 *src, uint8 * #ifndef NL3D_RAWSKIN_ASM // for all InfluencedVertices only. + CVector tmp; for(;nBlockInf>0;nBlockInf--, src++, destVertexPtr+=NL3D_RAWSKIN_VERTEX_SIZE) { - CVector *dstVertex= (CVector*)(destVertexPtr); - CVector *dstNormal= (CVector*)(destVertexPtr + NL3D_RAWSKIN_NORMAL_OFF); + CVectorPacked *dstVertex= (CVectorPacked*)(destVertexPtr); + CVectorPacked *dstNormal= (CVectorPacked*)(destVertexPtr + NL3D_RAWSKIN_NORMAL_OFF); // For 1 matrix, can write directly to AGP (if destVertexPtr is AGP...) // Vertex. - boneMat3x4[ src->MatrixId[0] ].mulSetPoint( src->Vertex.Pos, *(CVector*)(destVertexPtr) ); + boneMat3x4[ src->MatrixId[0] ].mulSetPoint( src->Vertex.Pos, tmp ); + *(CVectorPacked*)(destVertexPtr) = tmp; // Normal. - boneMat3x4[ src->MatrixId[0] ].mulSetVector( src->Vertex.Normal, *(CVector*)(destVertexPtr + NL3D_RAWSKIN_NORMAL_OFF) ); + boneMat3x4[ src->MatrixId[0] ].mulSetVector( src->Vertex.Normal, tmp ); + *(CVectorPacked*)(destVertexPtr + NL3D_RAWSKIN_NORMAL_OFF) = tmp; // UV copy. *(CUV*)(destVertexPtr + NL3D_RAWSKIN_UV_OFF)= src->Vertex.UV; } @@ -717,11 +762,11 @@ void CMeshMRMGeom::applyArrayRawSkinNormal2(CRawVertexNormalSkin2 *src, uint8 * // Vertex. boneMat3x4[ src->MatrixId[0] ].mulSetPoint( src->Vertex.Pos, src->Weights[0], tmpVert); boneMat3x4[ src->MatrixId[1] ].mulAddPoint( src->Vertex.Pos, src->Weights[1], tmpVert); - *(CVector*)(destVertexPtr)= tmpVert; + *(CVectorPacked*)(destVertexPtr)= tmpVert; // Normal. boneMat3x4[ src->MatrixId[0] ].mulSetVector( src->Vertex.Normal, src->Weights[0], tmpVert); boneMat3x4[ src->MatrixId[1] ].mulAddVector( src->Vertex.Normal, src->Weights[1], tmpVert); - *(CVector*)(destVertexPtr + NL3D_RAWSKIN_NORMAL_OFF)= tmpVert; + *(CVectorPacked*)(destVertexPtr + NL3D_RAWSKIN_NORMAL_OFF)= tmpVert; // UV copy. *(CUV*)(destVertexPtr + NL3D_RAWSKIN_UV_OFF)= src->Vertex.UV; } @@ -1021,12 +1066,12 @@ void CMeshMRMGeom::applyArrayRawSkinNormal3(CRawVertexNormalSkin3 *src, uint8 * boneMat3x4[ src->MatrixId[0] ].mulSetPoint( src->Vertex.Pos, src->Weights[0], tmpVert); boneMat3x4[ src->MatrixId[1] ].mulAddPoint( src->Vertex.Pos, src->Weights[1], tmpVert); boneMat3x4[ src->MatrixId[2] ].mulAddPoint( src->Vertex.Pos, src->Weights[2], tmpVert); - *(CVector*)(destVertexPtr)= tmpVert; + *(CVectorPacked*)(destVertexPtr)= tmpVert; // Normal. boneMat3x4[ src->MatrixId[0] ].mulSetVector( src->Vertex.Normal, src->Weights[0], tmpVert); boneMat3x4[ src->MatrixId[1] ].mulAddVector( src->Vertex.Normal, src->Weights[1], tmpVert); boneMat3x4[ src->MatrixId[2] ].mulAddVector( src->Vertex.Normal, src->Weights[2], tmpVert); - *(CVector*)(destVertexPtr + NL3D_RAWSKIN_NORMAL_OFF)= tmpVert; + *(CVectorPacked*)(destVertexPtr + NL3D_RAWSKIN_NORMAL_OFF)= tmpVert; // UV copy. *(CUV*)(destVertexPtr + NL3D_RAWSKIN_UV_OFF)= src->Vertex.UV; } @@ -1414,13 +1459,13 @@ void CMeshMRMGeom::applyArrayRawSkinNormal4(CRawVertexNormalSkin4 *src, uint8 * boneMat3x4[ src->MatrixId[1] ].mulAddPoint( src->Vertex.Pos, src->Weights[1], tmpVert); boneMat3x4[ src->MatrixId[2] ].mulAddPoint( src->Vertex.Pos, src->Weights[2], tmpVert); boneMat3x4[ src->MatrixId[3] ].mulAddPoint( src->Vertex.Pos, src->Weights[3], tmpVert); - *(CVector*)(destVertexPtr)= tmpVert; + *(CVectorPacked*)(destVertexPtr)= tmpVert; // Normal. boneMat3x4[ src->MatrixId[0] ].mulSetVector( src->Vertex.Normal, src->Weights[0], tmpVert); boneMat3x4[ src->MatrixId[1] ].mulAddVector( src->Vertex.Normal, src->Weights[1], tmpVert); boneMat3x4[ src->MatrixId[2] ].mulAddVector( src->Vertex.Normal, src->Weights[2], tmpVert); boneMat3x4[ src->MatrixId[3] ].mulAddVector( src->Vertex.Normal, src->Weights[3], tmpVert); - *(CVector*)(destVertexPtr + NL3D_RAWSKIN_NORMAL_OFF)= tmpVert; + *(CVectorPacked*)(destVertexPtr + NL3D_RAWSKIN_NORMAL_OFF)= tmpVert; // UV copy. *(CUV*)(destVertexPtr + NL3D_RAWSKIN_UV_OFF)= src->Vertex.UV; } diff --git a/code/nel/src/3d/mesh_mrm_skinned_template.cpp b/code/nel/src/3d/mesh_mrm_skinned_template.cpp index e60a5632b..afacd48fb 100644 --- a/code/nel/src/3d/mesh_mrm_skinned_template.cpp +++ b/code/nel/src/3d/mesh_mrm_skinned_template.cpp @@ -79,16 +79,19 @@ void CMeshMRMSkinnedGeom::applyArrayRawSkinNormal1(CRawVertexNormalSkinned1 *sr #ifndef NL3D_RAWSKIN_ASM // for all InfluencedVertices only. + CVector tmp; for(;nBlockInf>0;nBlockInf--, src++, destVertexPtr+=NL3D_RAWSKIN_VERTEX_SIZE) { - CVector *dstVertex= (CVector*)(destVertexPtr); - CVector *dstNormal= (CVector*)(destVertexPtr + NL3D_RAWSKIN_NORMAL_OFF); + CVectorPacked *dstVertex= (CVectorPacked*)(destVertexPtr); + CVectorPacked *dstNormal= (CVectorPacked*)(destVertexPtr + NL3D_RAWSKIN_NORMAL_OFF); // For 1 matrix, can write directly to AGP (if destVertexPtr is AGP...) // Vertex. - boneMat3x4[ src->MatrixId[0] ].mulSetPoint( src->Vertex, *(CVector*)(destVertexPtr) ); + boneMat3x4[ src->MatrixId[0] ].mulSetPoint( src->Vertex, tmp ); + *(CVectorPacked*)(destVertexPtr) = tmp; // Normal. - boneMat3x4[ src->MatrixId[0] ].mulSetVector( src->Normal, *(CVector*)(destVertexPtr + NL3D_RAWSKIN_NORMAL_OFF) ); + boneMat3x4[ src->MatrixId[0] ].mulSetVector( src->Normal, tmp ); + *(CVectorPacked*)(destVertexPtr + NL3D_RAWSKIN_NORMAL_OFF) = tmp; // UV copy. *(CUV*)(destVertexPtr + NL3D_RAWSKIN_UV_OFF)= src->UV; } @@ -266,11 +269,11 @@ void CMeshMRMSkinnedGeom::applyArrayRawSkinNormal2(CRawVertexNormalSkinned2 *sr // Vertex. boneMat3x4[ src->MatrixId[0] ].mulSetPoint( src->Vertex, src->Weights[0], tmpVert); boneMat3x4[ src->MatrixId[1] ].mulAddPoint( src->Vertex, src->Weights[1], tmpVert); - *(CVector*)(destVertexPtr)= tmpVert; + *(CVectorPacked*)(destVertexPtr)= tmpVert; // Normal. boneMat3x4[ src->MatrixId[0] ].mulSetVector( src->Normal, src->Weights[0], tmpVert); boneMat3x4[ src->MatrixId[1] ].mulAddVector( src->Normal, src->Weights[1], tmpVert); - *(CVector*)(destVertexPtr + NL3D_RAWSKIN_NORMAL_OFF)= tmpVert; + *(CVectorPacked*)(destVertexPtr + NL3D_RAWSKIN_NORMAL_OFF)= tmpVert; // UV copy. *(CUV*)(destVertexPtr + NL3D_RAWSKIN_UV_OFF)= src->UV; } @@ -570,12 +573,12 @@ void CMeshMRMSkinnedGeom::applyArrayRawSkinNormal3(CRawVertexNormalSkinned3 *sr boneMat3x4[ src->MatrixId[0] ].mulSetPoint( src->Vertex, src->Weights[0], tmpVert); boneMat3x4[ src->MatrixId[1] ].mulAddPoint( src->Vertex, src->Weights[1], tmpVert); boneMat3x4[ src->MatrixId[2] ].mulAddPoint( src->Vertex, src->Weights[2], tmpVert); - *(CVector*)(destVertexPtr)= tmpVert; + *(CVectorPacked*)(destVertexPtr)= tmpVert; // Normal. boneMat3x4[ src->MatrixId[0] ].mulSetVector( src->Normal, src->Weights[0], tmpVert); boneMat3x4[ src->MatrixId[1] ].mulAddVector( src->Normal, src->Weights[1], tmpVert); boneMat3x4[ src->MatrixId[2] ].mulAddVector( src->Normal, src->Weights[2], tmpVert); - *(CVector*)(destVertexPtr + NL3D_RAWSKIN_NORMAL_OFF)= tmpVert; + *(CVectorPacked*)(destVertexPtr + NL3D_RAWSKIN_NORMAL_OFF)= tmpVert; // UV copy. *(CUV*)(destVertexPtr + NL3D_RAWSKIN_UV_OFF)= src->UV; } @@ -963,13 +966,13 @@ void CMeshMRMSkinnedGeom::applyArrayRawSkinNormal4(CRawVertexNormalSkinned4 *sr boneMat3x4[ src->MatrixId[1] ].mulAddPoint( src->Vertex, src->Weights[1], tmpVert); boneMat3x4[ src->MatrixId[2] ].mulAddPoint( src->Vertex, src->Weights[2], tmpVert); boneMat3x4[ src->MatrixId[3] ].mulAddPoint( src->Vertex, src->Weights[3], tmpVert); - *(CVector*)(destVertexPtr)= tmpVert; + *(CVectorPacked*)(destVertexPtr)= tmpVert; // Normal. boneMat3x4[ src->MatrixId[0] ].mulSetVector( src->Normal, src->Weights[0], tmpVert); boneMat3x4[ src->MatrixId[1] ].mulAddVector( src->Normal, src->Weights[1], tmpVert); boneMat3x4[ src->MatrixId[2] ].mulAddVector( src->Normal, src->Weights[2], tmpVert); boneMat3x4[ src->MatrixId[3] ].mulAddVector( src->Normal, src->Weights[3], tmpVert); - *(CVector*)(destVertexPtr + NL3D_RAWSKIN_NORMAL_OFF)= tmpVert; + *(CVectorPacked*)(destVertexPtr + NL3D_RAWSKIN_NORMAL_OFF)= tmpVert; // UV copy. *(CUV*)(destVertexPtr + NL3D_RAWSKIN_UV_OFF)= src->UV; } diff --git a/code/nel/src/3d/noise_3d.cpp b/code/nel/src/3d/noise_3d.cpp index 5900e4750..d8e27677b 100644 --- a/code/nel/src/3d/noise_3d.cpp +++ b/code/nel/src/3d/noise_3d.cpp @@ -151,14 +151,14 @@ void CNoise3d::render2passes (CQuadUV &qc, float wpos, float alpha) _VertexBuffer.lock (vba); uint32 nVSize = _VertexBuffer.getVertexSize (); - CVector *pVertices = vba.getVertexCoordPointer(_NbVertices); - *pVertices = qc.V0; pVertices = (CVector*)( ((uint8*)pVertices) + nVSize ); - *pVertices = qc.V1; pVertices = (CVector*)( ((uint8*)pVertices) + nVSize ); - *pVertices = qc.V2; pVertices = (CVector*)( ((uint8*)pVertices) + nVSize ); - *pVertices = qc.V3; pVertices = (CVector*)( ((uint8*)pVertices) + nVSize ); - *pVertices = qc.V0; pVertices = (CVector*)( ((uint8*)pVertices) + nVSize ); - *pVertices = qc.V1; pVertices = (CVector*)( ((uint8*)pVertices) + nVSize ); - *pVertices = qc.V2; pVertices = (CVector*)( ((uint8*)pVertices) + nVSize ); + CVectorPacked *pVertices = vba.getVertexCoordPointer(_NbVertices); + *pVertices = qc.V0; pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize ); + *pVertices = qc.V1; pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize ); + *pVertices = qc.V2; pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize ); + *pVertices = qc.V3; pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize ); + *pVertices = qc.V0; pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize ); + *pVertices = qc.V1; pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize ); + *pVertices = qc.V2; pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize ); *pVertices = qc.V3; CUV *pUV = vba.getTexCoordPointer (_NbVertices, 0); @@ -232,10 +232,10 @@ void CNoise3d::render (CQuadUV &qc, float wpos, float intensity) CVertexBufferReadWrite vba; _VertexBuffer.lock (vba); - CVector *pVertices = vba.getVertexCoordPointer(_NbVertices); - *pVertices = qc.V0; pVertices = (CVector*)( ((uint8*)pVertices) + nVSize ); - *pVertices = qc.V1; pVertices = (CVector*)( ((uint8*)pVertices) + nVSize ); - *pVertices = qc.V2; pVertices = (CVector*)( ((uint8*)pVertices) + nVSize ); + CVectorPacked *pVertices = vba.getVertexCoordPointer(_NbVertices); + *pVertices = qc.V0; pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize ); + *pVertices = qc.V1; pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize ); + *pVertices = qc.V2; pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize ); *pVertices = qc.V3; CUV *pUV = vba.getTexCoordPointer (_NbVertices, 0); @@ -281,7 +281,7 @@ void CNoise3d::renderGrid (uint32 nbw, uint32 nbh, uint32 w, uint32 h, uint32 i, j, nSlice1, nSlice2; float wpos, oneOverNbWNbH = 1.0f / (nbw*nbh); - CVector *pVertices; + CVectorPacked *pVertices; CUV *pUV0, *pUV1; uint8 *pColA, nAlphaPos; uint32 nVSize = _VertexBuffer.getVertexSize (); @@ -319,10 +319,10 @@ void CNoise3d::renderGrid (uint32 nbw, uint32 nbh, uint32 w, uint32 h, // If wpos is just on slice1 alpha must be one nAlphaPos = (uint8)( 255*(1.0f - _Depth*(wpos - (((float)nSlice1) / _Depth))) ); - *pVertices = CVector((float)i*w, (float)j*h, 0.0f); pVertices = (CVector*)( ((uint8*)pVertices) + nVSize ); - *pVertices = CVector((float)(i+1)*w, (float)j*h, 0.0f); pVertices = (CVector*)( ((uint8*)pVertices) + nVSize ); - *pVertices = CVector((float)(i+1)*w, (float)(j+1)*h, 0.0f); pVertices = (CVector*)( ((uint8*)pVertices) + nVSize ); - *pVertices = CVector((float)i*w, (float)(j+1)*h, 0.0f); pVertices = (CVector*)( ((uint8*)pVertices) + nVSize ); + *pVertices = CVector((float)i*w, (float)j*h, 0.0f); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize ); + *pVertices = CVector((float)(i+1)*w, (float)j*h, 0.0f); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize ); + *pVertices = CVector((float)(i+1)*w, (float)(j+1)*h, 0.0f); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize ); + *pVertices = CVector((float)i*w, (float)(j+1)*h, 0.0f); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize ); pUV0->U = UStart+_OffS[nSlice1].U; pUV0->V = VStart+_OffS[nSlice1].V; pUV0 = (CUV*)( ((uint8*)pUV0) + nVSize ); pUV0->U = dU+_OffS[nSlice1].U; pUV0->V = VStart+_OffS[nSlice1].V; pUV0 = (CUV*)( ((uint8*)pUV0) + nVSize ); @@ -350,7 +350,7 @@ void CNoise3d::renderGrid2passes (uint32 nbw, uint32 nbh, uint32 w, uint32 h, { uint32 i, j, nSlice1, nSlice2; float wpos, oneOverNbWNbH = 1.0f / (nbw*nbh); - CVector *pVertices; + CVectorPacked *pVertices; CUV *pUV0; uint8 *pColA, nFinalAlpha; uint32 nVSize = _VertexBuffer.getVertexSize (); @@ -387,14 +387,14 @@ void CNoise3d::renderGrid2passes (uint32 nbw, uint32 nbh, uint32 w, uint32 h, // If wpos is just on slice1 alpha must be one float alphaPos = 1.0f - _Depth*(wpos - (((float)nSlice1) / _Depth)); - *pVertices = CVector((float)i*w, (float)j*h, 0.0f); pVertices = (CVector*)( ((uint8*)pVertices) + nVSize ); - *pVertices = CVector((float)(i+1)*w, (float)j*h, 0.0f); pVertices = (CVector*)( ((uint8*)pVertices) + nVSize ); - *pVertices = CVector((float)(i+1)*w, (float)(j+1)*h, 0.0f); pVertices = (CVector*)( ((uint8*)pVertices) + nVSize ); - *pVertices = CVector((float)i*w, (float)(j+1)*h, 0.0f); pVertices = (CVector*)( ((uint8*)pVertices) + nVSize ); - *pVertices = CVector((float)i*w, (float)j*h, 0.0f); pVertices = (CVector*)( ((uint8*)pVertices) + nVSize ); - *pVertices = CVector((float)(i+1)*w, (float)j*h, 0.0f); pVertices = (CVector*)( ((uint8*)pVertices) + nVSize ); - *pVertices = CVector((float)(i+1)*w, (float)(j+1)*h, 0.0f); pVertices = (CVector*)( ((uint8*)pVertices) + nVSize ); - *pVertices = CVector((float)i*w, (float)(j+1)*h, 0.0f); pVertices = (CVector*)( ((uint8*)pVertices) + nVSize ); + *pVertices = CVector((float)i*w, (float)j*h, 0.0f); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize ); + *pVertices = CVector((float)(i+1)*w, (float)j*h, 0.0f); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize ); + *pVertices = CVector((float)(i+1)*w, (float)(j+1)*h, 0.0f); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize ); + *pVertices = CVector((float)i*w, (float)(j+1)*h, 0.0f); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize ); + *pVertices = CVector((float)i*w, (float)j*h, 0.0f); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize ); + *pVertices = CVector((float)(i+1)*w, (float)j*h, 0.0f); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize ); + *pVertices = CVector((float)(i+1)*w, (float)(j+1)*h, 0.0f); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize ); + *pVertices = CVector((float)i*w, (float)(j+1)*h, 0.0f); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize ); pUV0->U = UStart+_OffS[nSlice1].U; pUV0->V = VStart+_OffS[nSlice1].V; pUV0 = (CUV*)( ((uint8*)pUV0) + nVSize ); pUV0->U = dU+_OffS[nSlice1].U; pUV0->V = VStart+_OffS[nSlice1].V; pUV0 = (CUV*)( ((uint8*)pUV0) + nVSize ); diff --git a/code/nel/src/3d/packed_zone.cpp b/code/nel/src/3d/packed_zone.cpp index 1c6bf4817..944e9c8d1 100644 --- a/code/nel/src/3d/packed_zone.cpp +++ b/code/nel/src/3d/packed_zone.cpp @@ -576,6 +576,22 @@ void CPackedZone32::unpackTri(const CPackedTri &src, CVector dest[3]) const } +// *************************************************************************************** +void CPackedZone32::unpackTri(const CPackedTri &src, CVectorPacked dest[3]) const +{ + // TODO: add 'multiply-add' operator + dest[0].set(Verts[src.V0].X * _PackedLocalToWorld.x + _Origin.x, + Verts[src.V0].Y * _PackedLocalToWorld.y + _Origin.y, + Verts[src.V0].Z * _PackedLocalToWorld.z + _Origin.z); + dest[1].set(Verts[src.V1].X * _PackedLocalToWorld.x + _Origin.x, + Verts[src.V1].Y * _PackedLocalToWorld.y + _Origin.y, + Verts[src.V1].Z * _PackedLocalToWorld.z + _Origin.z); + dest[2].set(Verts[src.V2].X * _PackedLocalToWorld.x + _Origin.x, + Verts[src.V2].Y * _PackedLocalToWorld.y + _Origin.y, + Verts[src.V2].Z * _PackedLocalToWorld.z + _Origin.z); + +} + uint32 CPackedZone32::UndefIndex = 0xffffffff; // *************************************************************************************** @@ -973,8 +989,8 @@ void CPackedZone32::render(CVertexBuffer &vb, IDriver &drv, CMaterial &material, CVertexBufferReadWrite vba; vb.setNumVertices(batchSize * 3); vb.lock(vba); - CVector *dest = vba.getVertexCoordPointer(0); - const CVector *endDest = dest + batchSize * 3; + CVectorPacked *dest = vba.getVertexCoordPointer(0); + const CVectorPacked *endDest = dest + batchSize * 3; for(sint y = 0; y < (sint) silhouette.size(); ++y) { sint gridY = y + minY; @@ -1196,8 +1212,8 @@ void CPackedZone16::render(CVertexBuffer &vb, IDriver &drv, CMaterial &material, CVertexBufferReadWrite vba; vb.setNumVertices(batchSize * 3); vb.lock(vba); - CVector *dest = vba.getVertexCoordPointer(0); - const CVector *endDest = dest + batchSize * 3; + CVectorPacked *dest = vba.getVertexCoordPointer(0); + const CVectorPacked *endDest = dest + batchSize * 3; for(sint y = 0; y < (sint) silhouette.size(); ++y) { sint gridY = y + minY; @@ -1254,6 +1270,23 @@ void CPackedZone16::render(CVertexBuffer &vb, IDriver &drv, CMaterial &material, +// *************************************************************************************** +void CPackedZone16::unpackTri(const CPackedTri16 &src, CVectorPacked dest[3]) const +{ + // yes this is ugly code duplication of CPackedZone16::unpackTri but this code is temporary anyway... + // TODO: add 'multiply-add' operator + dest[0].set(Verts[src.V0].X * _PackedLocalToWorld.x + _Origin.x, + Verts[src.V0].Y * _PackedLocalToWorld.y + _Origin.y, + Verts[src.V0].Z * _PackedLocalToWorld.z + _Origin.z); + dest[1].set(Verts[src.V1].X * _PackedLocalToWorld.x + _Origin.x, + Verts[src.V1].Y * _PackedLocalToWorld.y + _Origin.y, + Verts[src.V1].Z * _PackedLocalToWorld.z + _Origin.z); + dest[2].set(Verts[src.V2].X * _PackedLocalToWorld.x + _Origin.x, + Verts[src.V2].Y * _PackedLocalToWorld.y + _Origin.y, + Verts[src.V2].Z * _PackedLocalToWorld.z + _Origin.z); + +} + // *************************************************************************************** void CPackedZone16::unpackTri(const CPackedTri16 &src, CVector dest[3]) const { diff --git a/code/nel/src/3d/vegetable_shape.cpp b/code/nel/src/3d/vegetable_shape.cpp index 7d9991b65..5b08d32ea 100644 --- a/code/nel/src/3d/vegetable_shape.cpp +++ b/code/nel/src/3d/vegetable_shape.cpp @@ -128,15 +128,15 @@ void CVegetableShape::build(CVegetableShapeBuild &vbuild) for(i=0;icheckLockedBuffer()); uint8* ptr; ptr=_Parent->_LockedBuffer; ptr+=(idx*_Parent->_VertexSize); - return((NLMISC::CVector*)ptr); + return((NLMISC::CVectorPacked*)ptr); } // -------------------------------------------------- -NLMISC::CVector* CVertexBufferReadWrite::getNormalCoordPointer(uint idx) +NLMISC::CVectorPacked* CVertexBufferReadWrite::getNormalCoordPointer(uint idx) { nlassert (_Parent->checkLockedBuffer()); uint8* ptr; @@ -1179,7 +1179,7 @@ NLMISC::CVector* CVertexBufferReadWrite::getNormalCoordPointer(uint idx) ptr=_Parent->_LockedBuffer; ptr+=_Parent->_Offset[CVertexBuffer::Normal]; ptr+=idx*_Parent->_VertexSize; - return((NLMISC::CVector*)ptr); + return((NLMISC::CVectorPacked*)ptr); } // -------------------------------------------------- @@ -1280,19 +1280,19 @@ void CVertexBufferReadWrite::touchVertices (uint first, uint last) // CVertexBufferRead // -------------------------------------------------- -const NLMISC::CVector* CVertexBufferRead::getVertexCoordPointer(uint idx) const +const NLMISC::CVectorPacked* CVertexBufferRead::getVertexCoordPointer(uint idx) const { nlassert (_Parent->checkLockedBuffer()); const uint8* ptr; ptr=_Parent->_LockedBuffer; ptr+=(idx*_Parent->_VertexSize); - return((const NLMISC::CVector*)ptr); + return((const NLMISC::CVectorPacked*)ptr); } // -------------------------------------------------- -const NLMISC::CVector* CVertexBufferRead::getNormalCoordPointer(uint idx) const +const NLMISC::CVectorPacked* CVertexBufferRead::getNormalCoordPointer(uint idx) const { nlassert (_Parent->checkLockedBuffer()); const uint8* ptr; @@ -1304,7 +1304,7 @@ const NLMISC::CVector* CVertexBufferRead::getNormalCoordPointer(uint idx) const ptr=_Parent->_LockedBuffer; ptr+=_Parent->_Offset[CVertexBuffer::Normal]; ptr+=idx*_Parent->_VertexSize; - return((const NLMISC::CVector*)ptr); + return((const NLMISC::CVectorPacked*)ptr); } // -------------------------------------------------- diff --git a/code/ryzom/client/src/landscape_poly_drawer.cpp b/code/ryzom/client/src/landscape_poly_drawer.cpp index e98f17dc7..e8d5ba154 100644 --- a/code/ryzom/client/src/landscape_poly_drawer.cpp +++ b/code/ryzom/client/src/landscape_poly_drawer.cpp @@ -488,7 +488,7 @@ void CLandscapePolyDrawer::drawShadowVolume(uint poly, bool firstPass) uint i; CVector2f vertex; - CVector * vertexVB = NULL; + CVectorPacked * vertexVB = NULL; const CVector cameraPos = Scene->getCam().getPos(); float height = 2000.0; From 31b2141b129259908575d8c2bf7f261c6b74afa6 Mon Sep 17 00:00:00 2001 From: kaetemi Date: Fri, 13 Jun 2014 00:38:24 +0200 Subject: [PATCH 06/21] SSE2: CVector alignment fixes for particle systems --HG-- branch : sse2 --- code/nel/include/nel/3d/mesh.h | 5 +- code/nel/include/nel/3d/ps_force.h | 12 +- code/nel/include/nel/3d/ps_located.h | 11 +- code/nel/include/nel/3d/ps_misc.h | 6 +- code/nel/include/nel/3d/ps_ribbon.h | 2 +- code/nel/include/nel/3d/ps_ribbon_base.h | 20 +- .../nel/include/nel/3d/shadow_poly_receiver.h | 4 +- code/nel/include/nel/misc/matrix.h | 2 +- code/nel/include/nel/misc/types_nl.h | 3 + code/nel/include/nel/misc/vector.h | 11 +- code/nel/src/3d/computed_string.cpp | 81 ++--- code/nel/src/3d/driver_user.cpp | 88 ++--- code/nel/src/3d/mesh.cpp | 32 +- code/nel/src/3d/mesh_morpher.cpp | 2 +- code/nel/src/3d/packed_zone.cpp | 8 +- code/nel/src/3d/patch_render.cpp | 36 +-- code/nel/src/3d/ps_dot.cpp | 8 +- code/nel/src/3d/ps_emitter.cpp | 16 +- code/nel/src/3d/ps_face.cpp | 50 +-- code/nel/src/3d/ps_face_look_at.cpp | 305 +++++++++--------- code/nel/src/3d/ps_fan_light.cpp | 11 +- code/nel/src/3d/ps_force.cpp | 14 +- code/nel/src/3d/ps_located.cpp | 4 +- code/nel/src/3d/ps_mesh.cpp | 18 +- code/nel/src/3d/ps_ribbon.cpp | 40 +-- code/nel/src/3d/ps_ribbon_base.cpp | 38 +-- code/nel/src/3d/ps_ribbon_look_at.cpp | 26 +- code/nel/src/3d/ps_shockwave.cpp | 5 +- code/nel/src/3d/ps_tail_dot.cpp | 6 +- code/nel/src/3d/ps_util.cpp | 1 + code/nel/src/3d/seg_remanence.cpp | 5 +- code/nel/src/3d/water_model.cpp | 37 +-- 32 files changed, 470 insertions(+), 437 deletions(-) diff --git a/code/nel/include/nel/3d/mesh.h b/code/nel/include/nel/3d/mesh.h index 780a455aa..2d31adbd7 100644 --- a/code/nel/include/nel/3d/mesh.h +++ b/code/nel/include/nel/3d/mesh.h @@ -41,6 +41,7 @@ namespace NL3D using NLMISC::CVector; +using NLMISC::CVectorPacked; using NLMISC::CPlane; using NLMISC::CMatrix; @@ -842,8 +843,8 @@ private: void flagSkinVerticesForMatrixBlock(uint8 *skinFlags, CMatrixBlock &mb); void computeSkinMatrixes(CSkeletonModel *skeleton, CMatrix3x4 *matrixes, CMatrixBlock *prevBlock, CMatrixBlock &curBlock); - void computeSoftwarePointSkinning(CMatrix3x4 *matrixes, CVector *srcVector, CPaletteSkin *srcPal, float *srcWgt, CVector *dstVector); - void computeSoftwareVectorSkinning(CMatrix3x4 *matrixes, CVector *srcVector, CPaletteSkin *srcPal, float *srcWgt, CVector *dstVector); + void computeSoftwarePointSkinning(CMatrix3x4 *matrixes, CVector *srcVector, CPaletteSkin *srcPal, float *srcWgt, CVectorPacked *dstVector); + void computeSoftwareVectorSkinning(CMatrix3x4 *matrixes, CVector *srcVector, CPaletteSkin *srcPal, float *srcWgt, CVectorPacked *dstVector); // Shadow mapping and CMesh. NB: not serialized, but created at each load CShadowSkin _ShadowSkin; diff --git a/code/nel/include/nel/3d/ps_force.h b/code/nel/include/nel/3d/ps_force.h index 76f22f40b..76cf90ee4 100644 --- a/code/nel/include/nel/3d/ps_force.h +++ b/code/nel/include/nel/3d/ps_force.h @@ -102,9 +102,9 @@ public: */ virtual void integrateSingle(float /* startDate */, float /* deltaT */, uint /* numStep */, const CPSLocated * /* src */, uint32 /* indexInLocated */, - NLMISC::CVector * /* destPos */, + NLMISC::CVectorPacked * /* destPos */, bool /* accumulate */ = false, - uint /* posStride */ = sizeof(NLMISC::CVector)) const + uint /* posStride */ = sizeof(NLMISC::CVectorPacked)) const { nlassert(0); // not an integrable force } @@ -412,9 +412,9 @@ public: virtual void integrateSingle(float startDate, float deltaT, uint numStep, const CPSLocated *src, uint32 indexInLocated, - NLMISC::CVector *destPos, + NLMISC::CVectorPacked *destPos, bool accumulate = false, - uint posStride = sizeof(NLMISC::CVector)) const; + uint posStride = sizeof(NLMISC::CVectorPacked)) const; protected: /// inherited from CPSForceIntensityHelper @@ -583,9 +583,9 @@ public: virtual void integrateSingle(float startDate, float deltaT, uint numStep, const CPSLocated *src, uint32 indexInLocated, - NLMISC::CVector *destPos, + NLMISC::CVectorPacked *destPos, bool accumulate = false, - uint posStride = sizeof(NLMISC::CVector)) const; + uint posStride = sizeof(NLMISC::CVectorPacked)) const; /// perform initialisations static void initPrecalc(); diff --git a/code/nel/include/nel/3d/ps_located.h b/code/nel/include/nel/3d/ps_located.h index 2c4862b63..ca1c86a7b 100644 --- a/code/nel/include/nel/3d/ps_located.h +++ b/code/nel/include/nel/3d/ps_located.h @@ -508,11 +508,12 @@ public: */ void integrateSingle(float startDate, float deltaT, uint numStep, uint32 indexInLocated, - NLMISC::CVector *destPos, - uint posStride = sizeof(NLMISC::CVector)) const; + NLMISC::CVectorPacked *destPos, + uint posStride = sizeof(NLMISC::CVectorPacked)) const; // compute position for a single element at the given date // NB : only works with object that have parametric trajectories + inline void computeParametricPos(float date, uint indexInLocated, NLMISC::CVectorPacked &dest) const; inline void computeParametricPos(float date, uint indexInLocated, NLMISC::CVector &dest) const; @@ -1052,6 +1053,12 @@ inline TAnimationTime CPSLocated::getAgeInSeconds(uint elementIndex) const // ***************************************************************************************************** inline void CPSLocated::computeParametricPos(float date, uint indexInLocated, NLMISC::CVector &dest) const +{ + NLMISC::CVectorPacked temp; + integrateSingle(date, 1.f, 1, indexInLocated, &temp); + dest = temp; +} +inline void CPSLocated::computeParametricPos(float date, uint indexInLocated, NLMISC::CVectorPacked &dest) const { integrateSingle(date, 1.f, 1, indexInLocated, &dest); } diff --git a/code/nel/include/nel/3d/ps_misc.h b/code/nel/include/nel/3d/ps_misc.h index d425f908a..993df7625 100644 --- a/code/nel/include/nel/3d/ps_misc.h +++ b/code/nel/include/nel/3d/ps_misc.h @@ -51,12 +51,12 @@ inline uint ScaleFloatGE(float f, float deltaT, float clampValue, uint numStep) * \param destPos The destination, that will be filled with the given value * \param stride Number of byte between each value to be copied */ -inline NLMISC::CVector *FillBufUsingSubdiv(const NLMISC::CVector &value, +inline NLMISC::CVectorPacked *FillBufUsingSubdiv(const NLMISC::CVector &value, float clampValue, float &startValue, float deltaT, uint &maxNumStep, - NLMISC::CVector *destPos, + NLMISC::CVectorPacked *destPos, uint32 stride ) { @@ -68,7 +68,7 @@ inline NLMISC::CVector *FillBufUsingSubdiv(const NLMISC::CVector &value, while (numToFill--) { *destPos = value; - destPos = (NLMISC::CVector *) ( (uint8 *) destPos + stride); + destPos = (NLMISC::CVectorPacked *) ( (uint8 *) destPos + stride); } return destPos; diff --git a/code/nel/include/nel/3d/ps_ribbon.h b/code/nel/include/nel/3d/ps_ribbon.h index ccda62021..9c6350d9c 100644 --- a/code/nel/include/nel/3d/ps_ribbon.h +++ b/code/nel/include/nel/3d/ps_ribbon.h @@ -247,7 +247,7 @@ private: //@} CSmartPtr _Tex; - CPSVector::V _Shape; + CPSVector::V _Shape; float _UFactor, _VFactor; TOrientation _Orientation; diff --git a/code/nel/include/nel/3d/ps_ribbon_base.h b/code/nel/include/nel/3d/ps_ribbon_base.h index ea3443060..793e22142 100644 --- a/code/nel/include/nel/3d/ps_ribbon_base.h +++ b/code/nel/include/nel/3d/ps_ribbon_base.h @@ -120,8 +120,8 @@ protected: * The dest tab must have at least nbSegs + 1 entries. */ void computeRibbon( uint index, - NLMISC::CVector *dest, - uint stride = sizeof(NLMISC::CVector) + NLMISC::CVectorPacked *dest, + uint stride = sizeof(NLMISC::CVectorPacked) ); /// Called each time the time of the system change in order to update the ribbons positions @@ -168,26 +168,26 @@ private: /// Compute the ribbon points using linear interpolation between each sampling point. void computeLinearRibbon( uint index, - NLMISC::CVector *dest, - uint stride = sizeof(NLMISC::CVector) + NLMISC::CVectorPacked *dest, + uint stride = sizeof(NLMISC::CVectorPacked) ); /// The same as compute linear ribbon but try to make its length constant void computeLinearCstSizeRibbon( uint index, - NLMISC::CVector *dest, - uint stride = sizeof(NLMISC::CVector) + NLMISC::CVectorPacked *dest, + uint stride = sizeof(NLMISC::CVectorPacked) ); /// Compute the ribbon points using hermitte splines between each sampling point. void computeHermitteRibbon( uint index, - NLMISC::CVector *dest, - uint stride = sizeof(NLMISC::CVector) + NLMISC::CVectorPacked *dest, + uint stride = sizeof(NLMISC::CVectorPacked) ); /** Compute the ribbon points using hermitte splines between each sampling point, * and make a rough approximation to get a constant length */ void computeHermitteCstSizeRibbon( uint index, - NLMISC::CVector *dest, - uint stride = sizeof(NLMISC::CVector) + NLMISC::CVectorPacked *dest, + uint stride = sizeof(NLMISC::CVectorPacked) ); // called by the system when its date has been manually changed virtual void systemDateChanged(); diff --git a/code/nel/include/nel/3d/shadow_poly_receiver.h b/code/nel/include/nel/3d/shadow_poly_receiver.h index 0d97a00ad..ccf379638 100644 --- a/code/nel/include/nel/3d/shadow_poly_receiver.h +++ b/code/nel/include/nel/3d/shadow_poly_receiver.h @@ -83,6 +83,7 @@ public: ); // a vertex + NL_ALIGN_SSE2(16) struct CRGBAVertex // FIXME_SSE2 { #if USE_SSE2 @@ -96,8 +97,7 @@ public: CRGBAVertex(const CVector &v, CRGBA c) : X(v.x), Y(v.y), Z(v.z), Color(c) {} const CVector &asVector() const { - //nlctassert(sizeof(CVector) == sizeof(CRGBAVertex)); - nlctassert(sizeof(CVector) + 4 == sizeof(CRGBAVertex)); + nlctassert(sizeof(CVector) == sizeof(CRGBAVertex)); *reinterpret_cast(this); } #else diff --git a/code/nel/include/nel/misc/matrix.h b/code/nel/include/nel/misc/matrix.h index 7c7d7d666..611ca3882 100644 --- a/code/nel/include/nel/misc/matrix.h +++ b/code/nel/include/nel/misc/matrix.h @@ -53,7 +53,7 @@ class CPlane; * \author Nevrax France * \date 2000 */ -NL_ALIGN(16) +NL_ALIGN_SSE2(16) class CMatrix { public: diff --git a/code/nel/include/nel/misc/types_nl.h b/code/nel/include/nel/misc/types_nl.h index b5aa77e68..b94ffe50f 100644 --- a/code/nel/include/nel/misc/types_nl.h +++ b/code/nel/include/nel/misc/types_nl.h @@ -340,6 +340,9 @@ extern void *operator new(size_t size) throw(std::bad_alloc); extern void *operator new[](size_t size) throw(std::bad_alloc); extern void operator delete(void *p) throw(); extern void operator delete[](void *p) throw(); +#define NL_ALIGN_SSE2(nb) NL_ALIGN(nb) +#else +#define NL_ALIGN_SSE2(nb) #endif // CHashMap, CHashSet and CHashMultiMap definitions diff --git a/code/nel/include/nel/misc/vector.h b/code/nel/include/nel/misc/vector.h index ff3db1312..b1e2573d5 100644 --- a/code/nel/include/nel/misc/vector.h +++ b/code/nel/include/nel/misc/vector.h @@ -36,15 +36,15 @@ class IStream; * \author Nevrax France * \date 2000 */ -// NL_ALIGN(16) // FIXME_SSE2 +NL_ALIGN_SSE2(16) class CVector { public: // Attributes. float x,y,z; -/*#ifdef USE_SSE2 // FIXME_SSE2 +#ifdef USE_SSE2 float w; // Padding -#endif*/ +#endif public: // const. /// Null vector (0,0,0). @@ -176,6 +176,11 @@ public: { return CVector(x, y, z); } + + void serial(IStream &f) + { + f.serial(x,y,z); + } }; // blend (faster version than the generic version found in algo.h) diff --git a/code/nel/src/3d/computed_string.cpp b/code/nel/src/3d/computed_string.cpp index 1c8962f5e..ff09c6df8 100644 --- a/code/nel/src/3d/computed_string.cpp +++ b/code/nel/src/3d/computed_string.cpp @@ -30,6 +30,7 @@ #include "nel/misc/fast_mem.h" using namespace std; +using NLMISC::CVectorPacked; namespace NL3D { @@ -270,9 +271,9 @@ void CComputedString::render2DClip (IDriver& driver, CRenderStringBuffer &rdrBuf // copy and translate pos CHECK_VBA_RANGE(srcvba, srcPtr, Vertices.getVertexSize()); CHECK_VBA_RANGE(dstvba, dstPtr, rdrBuffer.Vertices.getVertexSize()) - ((CVector*)dstPtr)->x= x + ((CVector*)srcPtr)->x; - ((CVector*)dstPtr)->y= ((CVector*)srcPtr)->y; - ((CVector*)dstPtr)->z= z + ((CVector*)srcPtr)->z; + ((CVectorPacked*)dstPtr)->x= x + ((CVectorPacked*)srcPtr)->x; + ((CVectorPacked*)dstPtr)->y= ((CVectorPacked*)srcPtr)->y; + ((CVectorPacked*)dstPtr)->z= z + ((CVectorPacked*)srcPtr)->z; // uv *((CUV*)(dstPtr+ofsDstUV))= *((CUV*)(srcPtr+ofsSrcUV)); // color @@ -298,12 +299,12 @@ void CComputedString::render2DClip (IDriver& driver, CRenderStringBuffer &rdrBuf uint numVerts= nNumQuadSrc*4; // clip into VerticesClipped - CVector *pIniPos0 = (CVector*)srcPtr; - CVector *pIniPos2 = (CVector*)(((uint8*)pIniPos0) + srcSize*2); - CVector *pClipPos0 = (CVector*)dstPtr; - CVector *pClipPos1 = (CVector*)(((uint8*)pClipPos0) + dstSize); - CVector *pClipPos2 = (CVector*)(((uint8*)pClipPos1) + dstSize); - CVector *pClipPos3 = (CVector*)(((uint8*)pClipPos2) + dstSize); + CVectorPacked *pIniPos0 = (CVectorPacked*)srcPtr; + CVectorPacked *pIniPos2 = (CVectorPacked*)(((uint8*)pIniPos0) + srcSize*2); + CVectorPacked *pClipPos0 = (CVectorPacked*)dstPtr; + CVectorPacked *pClipPos1 = (CVectorPacked*)(((uint8*)pClipPos0) + dstSize); + CVectorPacked *pClipPos2 = (CVectorPacked*)(((uint8*)pClipPos1) + dstSize); + CVectorPacked *pClipPos3 = (CVectorPacked*)(((uint8*)pClipPos2) + dstSize); CUV *pClipUV0 = (CUV*)(dstPtr + ofsDstUV ); CUV *pClipUV1 = (CUV*)(((uint8*)pClipUV0) + dstSize); CUV *pClipUV2 = (CUV*)(((uint8*)pClipUV1) + dstSize); @@ -336,28 +337,28 @@ void CComputedString::render2DClip (IDriver& driver, CRenderStringBuffer &rdrBuf // copy with no clip // v0 - *((CVector*) (dstPtr + dstSize*0))= *((CVector*) (srcPtr + srcSize*0)); + *((CVectorPacked*) (dstPtr + dstSize*0))= *((CVectorPacked*) (srcPtr + srcSize*0)); *((CUV*) (dstPtr + dstSize*0 + ofsDstUV))= *((CUV*)(srcPtr + srcSize*0 + ofsSrcUV)); if (vtype == CVertexBuffer::TRGBA) *((CRGBA*) (dstPtr + dstSize*0 + ofsDstColor))= mCol; else *((CBGRA*) (dstPtr + dstSize*0 + ofsDstColor))= mCol; // v1 - *((CVector*) (dstPtr + dstSize*1))= *((CVector*) (srcPtr + srcSize*1)); + *((CVectorPacked*) (dstPtr + dstSize*1))= *((CVectorPacked*) (srcPtr + srcSize*1)); *((CUV*) (dstPtr + dstSize*1 + ofsDstUV))= *((CUV*)(srcPtr + srcSize*1 + ofsSrcUV)); if (vtype == CVertexBuffer::TRGBA) *((CRGBA*) (dstPtr + dstSize*1 + ofsDstColor))= mCol; else *((CBGRA*) (dstPtr + dstSize*1 + ofsDstColor))= mCol; // v2 - *((CVector*) (dstPtr + dstSize*2))= *((CVector*) (srcPtr + srcSize*2)); + *((CVectorPacked*) (dstPtr + dstSize*2))= *((CVectorPacked*) (srcPtr + srcSize*2)); *((CUV*) (dstPtr + dstSize*2 + ofsDstUV))= *((CUV*)(srcPtr + srcSize*2 + ofsSrcUV)); if (vtype == CVertexBuffer::TRGBA) *((CRGBA*) (dstPtr + dstSize*2 + ofsDstColor))= mCol; else *((CBGRA*) (dstPtr + dstSize*2 + ofsDstColor))= mCol; // v3 - *((CVector*) (dstPtr + dstSize*3))= *((CVector*) (srcPtr + srcSize*3)); + *((CVectorPacked*) (dstPtr + dstSize*3))= *((CVectorPacked*) (srcPtr + srcSize*3)); *((CUV*) (dstPtr + dstSize*3 + ofsDstUV))= *((CUV*)(srcPtr + srcSize*3 + ofsSrcUV)); if (vtype == CVertexBuffer::TRGBA) *((CRGBA*) (dstPtr + dstSize*3 + ofsDstColor))= mCol; @@ -410,10 +411,10 @@ void CComputedString::render2DClip (IDriver& driver, CRenderStringBuffer &rdrBuf // next quad out ++nNumQuadClipped; - pClipPos0 = (CVector*)(((uint8*)pClipPos0) + dstSize*4); - pClipPos1 = (CVector*)(((uint8*)pClipPos0) + dstSize); - pClipPos2 = (CVector*)(((uint8*)pClipPos1) + dstSize); - pClipPos3 = (CVector*)(((uint8*)pClipPos2) + dstSize); + pClipPos0 = (CVectorPacked*)(((uint8*)pClipPos0) + dstSize*4); + pClipPos1 = (CVectorPacked*)(((uint8*)pClipPos0) + dstSize); + pClipPos2 = (CVectorPacked*)(((uint8*)pClipPos1) + dstSize); + pClipPos3 = (CVectorPacked*)(((uint8*)pClipPos2) + dstSize); pClipUV0 = (CUV*)( ((uint8*)pClipUV0) + dstSize*4 ); pClipUV1 = (CUV*)(((uint8*)pClipUV0) + dstSize); pClipUV2 = (CUV*)(((uint8*)pClipUV1) + dstSize); @@ -421,8 +422,8 @@ void CComputedString::render2DClip (IDriver& driver, CRenderStringBuffer &rdrBuf dstPtr+= 4*dstSize; } // next quad in - pIniPos0 = (CVector*)(((uint8*)pIniPos0) + srcSize*4); - pIniPos2 = (CVector*)(((uint8*)pIniPos0) + srcSize*2); + pIniPos0 = (CVectorPacked*)(((uint8*)pIniPos0) + srcSize*4); + pIniPos2 = (CVectorPacked*)(((uint8*)pIniPos0) + srcSize*2); srcPtr+= 4*srcSize; } @@ -506,8 +507,8 @@ void CComputedString::render2DUnProjected (IDriver& driver, CRenderStringBuffer // copy and translate pos CHECK_VBA_RANGE(dstvba, dstPtr, Vertices.getVertexSize()); CHECK_VBA_RANGE(srcvba, srcPtr, rdrBuffer.Vertices.getVertexSize()); - ((CVector*)dstPtr)->x= x + ((CVector*)srcPtr)->x; - ((CVector*)dstPtr)->z= z + ((CVector*)srcPtr)->z; + ((CVectorPacked*)dstPtr)->x= x + ((CVectorPacked*)srcPtr)->x; + ((CVectorPacked*)dstPtr)->z= z + ((CVectorPacked*)srcPtr)->z; // uv *((CUV*)(dstPtr+ofsDstUV))= *((CUV*)(srcPtr+ofsSrcUV)); @@ -533,12 +534,12 @@ void CComputedString::render2DUnProjected (IDriver& driver, CRenderStringBuffer uint numVerts= nNumQuadSrc*4; // clip into VerticesClipped - CVector *pIniPos0 = (CVector*)srcPtr; - CVector *pIniPos2 = (CVector*)(((uint8*)pIniPos0) + srcSize*2); - CVector *pClipPos0 = (CVector*)dstPtr; - CVector *pClipPos1 = (CVector*)(((uint8*)pClipPos0) + dstSize); - CVector *pClipPos2 = (CVector*)(((uint8*)pClipPos1) + dstSize); - CVector *pClipPos3 = (CVector*)(((uint8*)pClipPos2) + dstSize); + CVectorPacked *pIniPos0 = (CVectorPacked*)srcPtr; + CVectorPacked *pIniPos2 = (CVectorPacked*)(((uint8*)pIniPos0) + srcSize*2); + CVectorPacked *pClipPos0 = (CVectorPacked*)dstPtr; + CVectorPacked *pClipPos1 = (CVectorPacked*)(((uint8*)pClipPos0) + dstSize); + CVectorPacked *pClipPos2 = (CVectorPacked*)(((uint8*)pClipPos1) + dstSize); + CVectorPacked *pClipPos3 = (CVectorPacked*)(((uint8*)pClipPos2) + dstSize); CUV *pClipUV0 = (CUV*)(dstPtr + ofsDstUV ); CUV *pClipUV1 = (CUV*)(((uint8*)pClipUV0) + dstSize); CUV *pClipUV2 = (CUV*)(((uint8*)pClipUV1) + dstSize); @@ -555,28 +556,28 @@ void CComputedString::render2DUnProjected (IDriver& driver, CRenderStringBuffer { // copy with no clip // v0 - *((CVector*) (dstPtr + dstSize*0))= *((CVector*) (srcPtr + srcSize*0)); + *((CVectorPacked*) (dstPtr + dstSize*0))= *((CVectorPacked*) (srcPtr + srcSize*0)); *((CUV*) (dstPtr + dstSize*0 + ofsDstUV))= *((CUV*)(srcPtr + srcSize*0 + ofsSrcUV)); if (vtype == CVertexBuffer::TRGBA) *((CRGBA*) (dstPtr + dstSize*0 + ofsDstColor))= Color; else *((CBGRA*) (dstPtr + dstSize*0 + ofsDstColor))= Color; // v1 - *((CVector*) (dstPtr + dstSize*1))= *((CVector*) (srcPtr + srcSize*1)); + *((CVectorPacked*) (dstPtr + dstSize*1))= *((CVectorPacked*) (srcPtr + srcSize*1)); *((CUV*) (dstPtr + dstSize*1 + ofsDstUV))= *((CUV*)(srcPtr + srcSize*1 + ofsSrcUV)); if (vtype == CVertexBuffer::TRGBA) *((CRGBA*) (dstPtr + dstSize*1 + ofsDstColor))= Color; else *((CBGRA*) (dstPtr + dstSize*1 + ofsDstColor))= Color; // v2 - *((CVector*) (dstPtr + dstSize*2))= *((CVector*) (srcPtr + srcSize*2)); + *((CVectorPacked*) (dstPtr + dstSize*2))= *((CVectorPacked*) (srcPtr + srcSize*2)); *((CUV*) (dstPtr + dstSize*2 + ofsDstUV))= *((CUV*)(srcPtr + srcSize*2 + ofsSrcUV)); if (vtype == CVertexBuffer::TRGBA) *((CRGBA*) (dstPtr + dstSize*2 + ofsDstColor))= Color; else *((CBGRA*) (dstPtr + dstSize*2 + ofsDstColor))= Color; // v3 - *((CVector*) (dstPtr + dstSize*3))= *((CVector*) (srcPtr + srcSize*3)); + *((CVectorPacked*) (dstPtr + dstSize*3))= *((CVectorPacked*) (srcPtr + srcSize*3)); *((CUV*) (dstPtr + dstSize*3 + ofsDstUV))= *((CUV*)(srcPtr + srcSize*3 + ofsSrcUV)); if (vtype == CVertexBuffer::TRGBA) *((CRGBA*) (dstPtr + dstSize*3 + ofsDstColor))= Color; @@ -630,10 +631,10 @@ void CComputedString::render2DUnProjected (IDriver& driver, CRenderStringBuffer // next quad out ++nNumQuadClipped; - pClipPos0 = (CVector*)(((uint8*)pClipPos0) + dstSize*4); - pClipPos1 = (CVector*)(((uint8*)pClipPos0) + dstSize); - pClipPos2 = (CVector*)(((uint8*)pClipPos1) + dstSize); - pClipPos3 = (CVector*)(((uint8*)pClipPos2) + dstSize); + pClipPos0 = (CVectorPacked*)(((uint8*)pClipPos0) + dstSize*4); + pClipPos1 = (CVectorPacked*)(((uint8*)pClipPos0) + dstSize); + pClipPos2 = (CVectorPacked*)(((uint8*)pClipPos1) + dstSize); + pClipPos3 = (CVectorPacked*)(((uint8*)pClipPos2) + dstSize); pClipUV0 = (CUV*)( ((uint8*)pClipUV0) + dstSize*4 ); pClipUV1 = (CUV*)(((uint8*)pClipUV0) + dstSize); pClipUV2 = (CUV*)(((uint8*)pClipUV1) + dstSize); @@ -641,8 +642,8 @@ void CComputedString::render2DUnProjected (IDriver& driver, CRenderStringBuffer dstPtr+= 4*dstSize; } // next quad in - pIniPos0 = (CVector*)(((uint8*)pIniPos0) + srcSize*4); - pIniPos2 = (CVector*)(((uint8*)pIniPos0) + srcSize*2); + pIniPos0 = (CVectorPacked*)(((uint8*)pIniPos0) + srcSize*4); + pIniPos2 = (CVectorPacked*)(((uint8*)pIniPos0) + srcSize*2); srcPtr+= 4*srcSize; } @@ -657,13 +658,13 @@ void CComputedString::render2DUnProjected (IDriver& driver, CRenderStringBuffer { // preset unprojection CVector tmp; - tmp.x = ((CVector*)dstPtrBackup)->x * OOW; - tmp.y = ((CVector*)dstPtrBackup)->z * OOH; + tmp.x = ((CVectorPacked*)dstPtrBackup)->x * OOW; + tmp.y = ((CVectorPacked*)dstPtrBackup)->z * OOH; tmp.z = depth; // mul by user scale matrix tmp= scaleMatrix * tmp; // Unproject it - *((CVector*)dstPtrBackup) = frustum.unProjectZ(tmp); + *((CVectorPacked*)dstPtrBackup) = frustum.unProjectZ(tmp); dstPtrBackup += dstSize; } diff --git a/code/nel/src/3d/driver_user.cpp b/code/nel/src/3d/driver_user.cpp index e5d814755..cdfce0ce0 100644 --- a/code/nel/src/3d/driver_user.cpp +++ b/code/nel/src/3d/driver_user.cpp @@ -920,29 +920,29 @@ void CDriverUser::drawQuads(const NLMISC::CQuadColorUV *quads, uint32 nbQuads, for (uint32 i = 0; i < nbQuads; ++i) { const NLMISC::CQuadColorUV &qcuv = quads[i]; - CHECK_VBA_RANGE(vba, dstPtr+0, sizeof(CVector)) - *(CVector*)(dstPtr+0)= qcuv.V0; + CHECK_VBA_RANGE(vba, dstPtr+0, sizeof(CVectorPacked)) + *(CVectorPacked*)(dstPtr+0)= qcuv.V0; CHECK_VBA_RANGE(vba, dstPtr+uvOfs, sizeof(CUV)) *(CUV*)(dstPtr+uvOfs)= qcuv.Uv0; CHECK_VBA_RANGE(vba, dstPtr+colorOfs, sizeof(CRGBA)) *(CRGBA*)(dstPtr+colorOfs)= qcuv.Color0; dstPtr+= vSize; - CHECK_VBA_RANGE(vba, dstPtr+0, sizeof(CVector)) - *(CVector*)(dstPtr+0)= qcuv.V1; + CHECK_VBA_RANGE(vba, dstPtr+0, sizeof(CVectorPacked)) + *(CVectorPacked*)(dstPtr+0)= qcuv.V1; CHECK_VBA_RANGE(vba, dstPtr+uvOfs, sizeof(CUV)) *(CUV*)(dstPtr+uvOfs)= qcuv.Uv1; CHECK_VBA_RANGE(vba, dstPtr+colorOfs, sizeof(CRGBA)) *(CRGBA*)(dstPtr+colorOfs)= qcuv.Color1; dstPtr+= vSize; - CHECK_VBA_RANGE(vba, dstPtr+0, sizeof(CVector)) - *(CVector*)(dstPtr+0)= qcuv.V2; + CHECK_VBA_RANGE(vba, dstPtr+0, sizeof(CVectorPacked)) + *(CVectorPacked*)(dstPtr+0)= qcuv.V2; CHECK_VBA_RANGE(vba, dstPtr+uvOfs, sizeof(CUV)) *(CUV*)(dstPtr+uvOfs)= qcuv.Uv2; CHECK_VBA_RANGE(vba, dstPtr+colorOfs, sizeof(CRGBA)) *(CRGBA*)(dstPtr+colorOfs)= qcuv.Color2; dstPtr+= vSize; - CHECK_VBA_RANGE(vba, dstPtr+0, sizeof(CVector)) - *(CVector*)(dstPtr+0)= qcuv.V3; + CHECK_VBA_RANGE(vba, dstPtr+0, sizeof(CVectorPacked)) + *(CVectorPacked*)(dstPtr+0)= qcuv.V3; CHECK_VBA_RANGE(vba, dstPtr+uvOfs, sizeof(CUV)) *(CUV*)(dstPtr+uvOfs)= qcuv.Uv3; CHECK_VBA_RANGE(vba, dstPtr+colorOfs, sizeof(CRGBA)) @@ -955,29 +955,29 @@ void CDriverUser::drawQuads(const NLMISC::CQuadColorUV *quads, uint32 nbQuads, for (uint32 i = 0; i < nbQuads; ++i) { const NLMISC::CQuadColorUV &qcuv = quads[i]; - CHECK_VBA_RANGE(vba, dstPtr+0, sizeof(CVector)) - *(CVector*)(dstPtr+0)= qcuv.V0; + CHECK_VBA_RANGE(vba, dstPtr+0, sizeof(CVectorPacked)) + *(CVectorPacked*)(dstPtr+0)= qcuv.V0; CHECK_VBA_RANGE(vba, dstPtr+uvOfs, sizeof(CUV)) *(CUV*)(dstPtr+uvOfs)= qcuv.Uv0; CHECK_VBA_RANGE(vba, dstPtr+colorOfs, sizeof(CRGBA)) *(CBGRA*)(dstPtr+colorOfs)= qcuv.Color0; dstPtr+= vSize; - CHECK_VBA_RANGE(vba, dstPtr+0, sizeof(CVector)) - *(CVector*)(dstPtr+0)= qcuv.V1; + CHECK_VBA_RANGE(vba, dstPtr+0, sizeof(CVectorPacked)) + *(CVectorPacked*)(dstPtr+0)= qcuv.V1; CHECK_VBA_RANGE(vba, dstPtr+uvOfs, sizeof(CUV)) *(CUV*)(dstPtr+uvOfs)= qcuv.Uv1; CHECK_VBA_RANGE(vba, dstPtr+colorOfs, sizeof(CRGBA)) *(CBGRA*)(dstPtr+colorOfs)= qcuv.Color1; dstPtr+= vSize; - CHECK_VBA_RANGE(vba, dstPtr+0, sizeof(CVector)) - *(CVector*)(dstPtr+0)= qcuv.V2; + CHECK_VBA_RANGE(vba, dstPtr+0, sizeof(CVectorPacked)) + *(CVectorPacked*)(dstPtr+0)= qcuv.V2; CHECK_VBA_RANGE(vba, dstPtr+uvOfs, sizeof(CUV)) *(CUV*)(dstPtr+uvOfs)= qcuv.Uv2; CHECK_VBA_RANGE(vba, dstPtr+colorOfs, sizeof(CBGRA)) *(CBGRA*)(dstPtr+colorOfs)= qcuv.Color2; dstPtr+= vSize; - CHECK_VBA_RANGE(vba, dstPtr+0, sizeof(CVector)) - *(CVector*)(dstPtr+0)= qcuv.V3; + CHECK_VBA_RANGE(vba, dstPtr+0, sizeof(CVectorPacked)) + *(CVectorPacked*)(dstPtr+0)= qcuv.V3; CHECK_VBA_RANGE(vba, dstPtr+uvOfs, sizeof(CUV)) *(CUV*)(dstPtr+uvOfs)= qcuv.Uv3; CHECK_VBA_RANGE(vba, dstPtr+colorOfs, sizeof(CRGBA)) @@ -1014,8 +1014,8 @@ void CDriverUser::drawQuads(const NLMISC::CQuadColorUV2 *quads, uint32 nbQuads for (uint32 i = 0; i < nbQuads; ++i) { const NLMISC::CQuadColorUV2 &qcuv = quads[i]; - CHECK_VBA_RANGE(vba, dstPtr+0, sizeof(CVector)) - *(CVector*)(dstPtr+0)= qcuv.V0; + CHECK_VBA_RANGE(vba, dstPtr+0, sizeof(CVectorPacked)) + *(CVectorPacked*)(dstPtr+0)= qcuv.V0; CHECK_VBA_RANGE(vba, dstPtr+uvOfs0, sizeof(CUV)) *(CUV*)(dstPtr+uvOfs0)= qcuv.Uv0; CHECK_VBA_RANGE(vba, dstPtr+uvOfs1, sizeof(CUV)) @@ -1023,8 +1023,8 @@ void CDriverUser::drawQuads(const NLMISC::CQuadColorUV2 *quads, uint32 nbQuads CHECK_VBA_RANGE(vba, dstPtr+colorOfs, sizeof(CRGBA)) *(CRGBA*)(dstPtr+colorOfs)= qcuv.Color0; dstPtr+= vSize; - CHECK_VBA_RANGE(vba, dstPtr+0, sizeof(CVector)) - *(CVector*)(dstPtr+0)= qcuv.V1; + CHECK_VBA_RANGE(vba, dstPtr+0, sizeof(CVectorPacked)) + *(CVectorPacked*)(dstPtr+0)= qcuv.V1; CHECK_VBA_RANGE(vba, dstPtr+uvOfs0, sizeof(CUV)) *(CUV*)(dstPtr+uvOfs0)= qcuv.Uv1; CHECK_VBA_RANGE(vba, dstPtr+uvOfs1, sizeof(CUV)) @@ -1032,8 +1032,8 @@ void CDriverUser::drawQuads(const NLMISC::CQuadColorUV2 *quads, uint32 nbQuads CHECK_VBA_RANGE(vba, dstPtr+colorOfs, sizeof(CRGBA)) *(CRGBA*)(dstPtr+colorOfs)= qcuv.Color1; dstPtr+= vSize; - CHECK_VBA_RANGE(vba, dstPtr+0, sizeof(CVector)) - *(CVector*)(dstPtr+0)= qcuv.V2; + CHECK_VBA_RANGE(vba, dstPtr+0, sizeof(CVectorPacked)) + *(CVectorPacked*)(dstPtr+0)= qcuv.V2; CHECK_VBA_RANGE(vba, dstPtr+uvOfs0, sizeof(CUV)) *(CUV*)(dstPtr+uvOfs0)= qcuv.Uv2; CHECK_VBA_RANGE(vba, dstPtr+uvOfs1, sizeof(CUV)) @@ -1041,8 +1041,8 @@ void CDriverUser::drawQuads(const NLMISC::CQuadColorUV2 *quads, uint32 nbQuads CHECK_VBA_RANGE(vba, dstPtr+colorOfs, sizeof(CRGBA)) *(CRGBA*)(dstPtr+colorOfs)= qcuv.Color2; dstPtr+= vSize; - CHECK_VBA_RANGE(vba, dstPtr+0, sizeof(CVector)) - *(CVector*)(dstPtr+0)= qcuv.V3; + CHECK_VBA_RANGE(vba, dstPtr+0, sizeof(CVectorPacked)) + *(CVectorPacked*)(dstPtr+0)= qcuv.V3; CHECK_VBA_RANGE(vba, dstPtr+uvOfs0, sizeof(CUV)) *(CUV*)(dstPtr+uvOfs0)= qcuv.Uv3; CHECK_VBA_RANGE(vba, dstPtr+uvOfs1, sizeof(CUV)) @@ -1057,8 +1057,8 @@ void CDriverUser::drawQuads(const NLMISC::CQuadColorUV2 *quads, uint32 nbQuads for (uint32 i = 0; i < nbQuads; ++i) { const NLMISC::CQuadColorUV2 &qcuv = quads[i]; - CHECK_VBA_RANGE(vba, dstPtr+0, sizeof(CVector)) - *(CVector*)(dstPtr+0)= qcuv.V0; + CHECK_VBA_RANGE(vba, dstPtr+0, sizeof(CVectorPacked)) + *(CVectorPacked*)(dstPtr+0)= qcuv.V0; CHECK_VBA_RANGE(vba, dstPtr+uvOfs0, sizeof(CUV)) *(CUV*)(dstPtr+uvOfs0)= qcuv.Uv0; CHECK_VBA_RANGE(vba, dstPtr+uvOfs1, sizeof(CUV)) @@ -1066,8 +1066,8 @@ void CDriverUser::drawQuads(const NLMISC::CQuadColorUV2 *quads, uint32 nbQuads CHECK_VBA_RANGE(vba, dstPtr+colorOfs, sizeof(CRGBA)) *(CBGRA*)(dstPtr+colorOfs)= qcuv.Color0; dstPtr+= vSize; - CHECK_VBA_RANGE(vba, dstPtr+0, sizeof(CVector)) - *(CVector*)(dstPtr+0)= qcuv.V1; + CHECK_VBA_RANGE(vba, dstPtr+0, sizeof(CVectorPacked)) + *(CVectorPacked*)(dstPtr+0)= qcuv.V1; CHECK_VBA_RANGE(vba, dstPtr+uvOfs0, sizeof(CUV)) *(CUV*)(dstPtr+uvOfs0)= qcuv.Uv1; CHECK_VBA_RANGE(vba, dstPtr+uvOfs1, sizeof(CUV)) @@ -1075,8 +1075,8 @@ void CDriverUser::drawQuads(const NLMISC::CQuadColorUV2 *quads, uint32 nbQuads CHECK_VBA_RANGE(vba, dstPtr+colorOfs, sizeof(CRGBA)) *(CBGRA*)(dstPtr+colorOfs)= qcuv.Color1; dstPtr+= vSize; - CHECK_VBA_RANGE(vba, dstPtr+0, sizeof(CVector)) - *(CVector*)(dstPtr+0)= qcuv.V2; + CHECK_VBA_RANGE(vba, dstPtr+0, sizeof(CVectorPacked)) + *(CVectorPacked*)(dstPtr+0)= qcuv.V2; CHECK_VBA_RANGE(vba, dstPtr+uvOfs0, sizeof(CUV)) *(CUV*)(dstPtr+uvOfs0)= qcuv.Uv2; CHECK_VBA_RANGE(vba, dstPtr+uvOfs1, sizeof(CUV)) @@ -1084,8 +1084,8 @@ void CDriverUser::drawQuads(const NLMISC::CQuadColorUV2 *quads, uint32 nbQuads CHECK_VBA_RANGE(vba, dstPtr+colorOfs, sizeof(CRGBA)) *(CBGRA*)(dstPtr+colorOfs)= qcuv.Color2; dstPtr+= vSize; - CHECK_VBA_RANGE(vba, dstPtr+0, sizeof(CVector)) - *(CVector*)(dstPtr+0)= qcuv.V3; + CHECK_VBA_RANGE(vba, dstPtr+0, sizeof(CVectorPacked)) + *(CVectorPacked*)(dstPtr+0)= qcuv.V3; CHECK_VBA_RANGE(vba, dstPtr+uvOfs0, sizeof(CUV)) *(CUV*)(dstPtr+uvOfs0)= qcuv.Uv3; CHECK_VBA_RANGE(vba, dstPtr+uvOfs1, sizeof(CUV)) @@ -1127,24 +1127,24 @@ void CDriverUser::drawTriangles(const NLMISC::CTriangleColorUV *tris, uint32 nbT do { // - CHECK_VBA_RANGE(vba, dstPtr+0, sizeof(CVector)) - *(CVector*)(dstPtr+0)= tris->V0; + CHECK_VBA_RANGE(vba, dstPtr+0, sizeof(CVectorPacked)) + *(CVectorPacked*)(dstPtr+0)= tris->V0; CHECK_VBA_RANGE(vba, dstPtr+uvOfs0, sizeof(CUV)) *(CUV*)(dstPtr+uvOfs0)= tris->Uv0; CHECK_VBA_RANGE(vba, dstPtr+colorOfs, sizeof(CRGBA)) *(CRGBA*)(dstPtr+colorOfs)= tris->Color0; dstPtr+= vSize; // - CHECK_VBA_RANGE(vba, dstPtr+0, sizeof(CVector)) - *(CVector*)(dstPtr+0)= tris->V1; + CHECK_VBA_RANGE(vba, dstPtr+0, sizeof(CVectorPacked)) + *(CVectorPacked*)(dstPtr+0)= tris->V1; CHECK_VBA_RANGE(vba, dstPtr+uvOfs0, sizeof(CUV)) *(CUV*)(dstPtr+uvOfs0)= tris->Uv1; CHECK_VBA_RANGE(vba, dstPtr+colorOfs, sizeof(CRGBA)) *(CRGBA*)(dstPtr+colorOfs)= tris->Color1; dstPtr+= vSize; // - CHECK_VBA_RANGE(vba, dstPtr+0, sizeof(CVector)) - *(CVector*)(dstPtr+0)= tris->V2; + CHECK_VBA_RANGE(vba, dstPtr+0, sizeof(CVectorPacked)) + *(CVectorPacked*)(dstPtr+0)= tris->V2; CHECK_VBA_RANGE(vba, dstPtr+uvOfs0, sizeof(CUV)) *(CUV*)(dstPtr+uvOfs0)= tris->Uv2; CHECK_VBA_RANGE(vba, dstPtr+colorOfs, sizeof(CRGBA)) @@ -1159,24 +1159,24 @@ void CDriverUser::drawTriangles(const NLMISC::CTriangleColorUV *tris, uint32 nbT do { // - CHECK_VBA_RANGE(vba, dstPtr+0, sizeof(CVector)) - *(CVector*)(dstPtr+0)= tris->V0; + CHECK_VBA_RANGE(vba, dstPtr+0, sizeof(CVectorPacked)) + *(CVectorPacked*)(dstPtr+0)= tris->V0; CHECK_VBA_RANGE(vba, dstPtr+uvOfs0, sizeof(CUV)) *(CUV*)(dstPtr+uvOfs0)= tris->Uv0; CHECK_VBA_RANGE(vba, dstPtr+colorOfs, sizeof(CRGBA)) *(CBGRA*)(dstPtr+colorOfs)= tris->Color0; dstPtr+= vSize; // - CHECK_VBA_RANGE(vba, dstPtr+0, sizeof(CVector)) - *(CVector*)(dstPtr+0)= tris->V1; + CHECK_VBA_RANGE(vba, dstPtr+0, sizeof(CVectorPacked)) + *(CVectorPacked*)(dstPtr+0)= tris->V1; CHECK_VBA_RANGE(vba, dstPtr+uvOfs0, sizeof(CUV)) *(CUV*)(dstPtr+uvOfs0)= tris->Uv1; CHECK_VBA_RANGE(vba, dstPtr+colorOfs, sizeof(CRGBA)) *(CBGRA*)(dstPtr+colorOfs)= tris->Color1; dstPtr+= vSize; // - CHECK_VBA_RANGE(vba, dstPtr+0, sizeof(CVector)) - *(CVector*)(dstPtr+0)= tris->V2; + CHECK_VBA_RANGE(vba, dstPtr+0, sizeof(CVectorPacked)) + *(CVectorPacked*)(dstPtr+0)= tris->V2; CHECK_VBA_RANGE(vba, dstPtr+uvOfs0, sizeof(CUV)) *(CUV*)(dstPtr+uvOfs0)= tris->Uv2; CHECK_VBA_RANGE(vba, dstPtr+colorOfs, sizeof(CRGBA)) diff --git a/code/nel/src/3d/mesh.cpp b/code/nel/src/3d/mesh.cpp index a533632c5..dfaed0ce4 100644 --- a/code/nel/src/3d/mesh.cpp +++ b/code/nel/src/3d/mesh.cpp @@ -1870,15 +1870,15 @@ void CMeshGeom::applySkin(CSkeletonModel *skeleton) nlassert(psPal->MatrixId[3]=SkinWithNormal) - computeSoftwareVectorSkinning(matrixes, srcNormal, psPal, (float*)srcWgt, (CVector*)dstNormal); + computeSoftwareVectorSkinning(matrixes, srcNormal, psPal, (float*)srcWgt, (CVectorPacked*)dstNormal); // compute tg part. if(skinType>=SkinWithTgSpace) - computeSoftwareVectorSkinning(matrixes, srcTgSpace, psPal, (float*)srcWgt, (CVector*)dstTgSpace); + computeSoftwareVectorSkinning(matrixes, srcTgSpace, psPal, (float*)srcWgt, (CVectorPacked*)dstTgSpace); } // inc flags. @@ -1938,42 +1938,48 @@ void CMeshGeom::flagSkinVerticesForMatrixBlock(uint8 *skinFlags, CMatrixBlock &m // *************************************************************************** -void CMeshGeom::computeSoftwarePointSkinning(CMatrix3x4 *matrixes, CVector *srcVec, CPaletteSkin *srcPal, float *srcWgt, CVector *pDst) +void CMeshGeom::computeSoftwarePointSkinning(CMatrix3x4 *matrixes, CVector *srcVec, CPaletteSkin *srcPal, float *srcWgt, CVectorPacked *pDst) { CMatrix3x4 *pMat; + CVector temp; // 0th matrix influence. pMat= matrixes + srcPal->MatrixId[0]; - pMat->mulSetPoint(*srcVec, srcWgt[0], *pDst); + pMat->mulSetPoint(*srcVec, srcWgt[0], temp); // 1th matrix influence. pMat= matrixes + srcPal->MatrixId[1]; - pMat->mulAddPoint(*srcVec, srcWgt[1], *pDst); + pMat->mulAddPoint(*srcVec, srcWgt[1], temp); // 2th matrix influence. pMat= matrixes + srcPal->MatrixId[2]; - pMat->mulAddPoint(*srcVec, srcWgt[2], *pDst); + pMat->mulAddPoint(*srcVec, srcWgt[2], temp); // 3th matrix influence. pMat= matrixes + srcPal->MatrixId[3]; - pMat->mulAddPoint(*srcVec, srcWgt[3], *pDst); + pMat->mulAddPoint(*srcVec, srcWgt[3], temp); + + *pDst = temp; } // *************************************************************************** -void CMeshGeom::computeSoftwareVectorSkinning(CMatrix3x4 *matrixes, CVector *srcVec, CPaletteSkin *srcPal, float *srcWgt, CVector *pDst) +void CMeshGeom::computeSoftwareVectorSkinning(CMatrix3x4 *matrixes, CVector *srcVec, CPaletteSkin *srcPal, float *srcWgt, CVectorPacked *pDst) { CMatrix3x4 *pMat; + CVector temp; // 0th matrix influence. pMat= matrixes + srcPal->MatrixId[0]; - pMat->mulSetVector(*srcVec, srcWgt[0], *pDst); + pMat->mulSetVector(*srcVec, srcWgt[0], temp); // 1th matrix influence. pMat= matrixes + srcPal->MatrixId[1]; - pMat->mulAddVector(*srcVec, srcWgt[1], *pDst); + pMat->mulAddVector(*srcVec, srcWgt[1], temp); // 2th matrix influence. pMat= matrixes + srcPal->MatrixId[2]; - pMat->mulAddVector(*srcVec, srcWgt[2], *pDst); + pMat->mulAddVector(*srcVec, srcWgt[2], temp); // 3th matrix influence. pMat= matrixes + srcPal->MatrixId[3]; - pMat->mulAddVector(*srcVec, srcWgt[3], *pDst); + pMat->mulAddVector(*srcVec, srcWgt[3], temp); + + *pDst = temp; } diff --git a/code/nel/src/3d/mesh_morpher.cpp b/code/nel/src/3d/mesh_morpher.cpp index bfca4b7c7..14224af2b 100644 --- a/code/nel/src/3d/mesh_morpher.cpp +++ b/code/nel/src/3d/mesh_morpher.cpp @@ -177,7 +177,7 @@ void CMeshMorpher::update (std::vector *pBSFactor) if (_UseTgSpace) if (rBS.deltaTgSpace.size() > 0) { - CVector *pV = (CVector*)dstvba.getTexCoordPointer (vp, tgSpaceStage); + CVectorPacked *pV = (CVectorPacked*)dstvba.getTexCoordPointer (vp, tgSpaceStage); *pV += rBS.deltaTgSpace[j] * rFactor; } diff --git a/code/nel/src/3d/packed_zone.cpp b/code/nel/src/3d/packed_zone.cpp index 944e9c8d1..5b522a656 100644 --- a/code/nel/src/3d/packed_zone.cpp +++ b/code/nel/src/3d/packed_zone.cpp @@ -418,7 +418,7 @@ void serialPackedVector12(std::vector &v, NLMISC::IStream &f) } // some function to ease writing of some primitives into a vertex buffer -static inline void pushVBLine2D(NLMISC::CVector *&dest, const NLMISC::CVector &v0, const NLMISC::CVector &v1) +static inline void pushVBLine2D(NLMISC::CVectorPacked *&dest, const NLMISC::CVector &v0, const NLMISC::CVector &v1) { dest->x = v0.x; dest->y = v0.y; @@ -434,7 +434,7 @@ static inline void pushVBLine2D(NLMISC::CVector *&dest, const NLMISC::CVector &v ++ dest; } -static inline void pushVBTri2D(NLMISC::CVector *&dest, const NLMISC::CTriangle &tri) +static inline void pushVBTri2D(NLMISC::CVectorPacked *&dest, const NLMISC::CTriangle &tri) { dest->x = tri.V0.x; dest->y = tri.V0.y; @@ -451,7 +451,7 @@ static inline void pushVBTri2D(NLMISC::CVector *&dest, const NLMISC::CTriangle & } -static inline void pushVBQuad2D(NLMISC::CVector *&dest, const NLMISC::CQuad &quad) +static inline void pushVBQuad2D(NLMISC::CVectorPacked *&dest, const NLMISC::CQuad &quad) { dest->x = quad.V0.x; dest->y = quad.V0.y; @@ -471,7 +471,7 @@ static inline void pushVBQuad2D(NLMISC::CVector *&dest, const NLMISC::CQuad &qua ++ dest; } -static inline void pushVBQuad(NLMISC::CVector *&dest, const NLMISC::CQuad &quad) +static inline void pushVBQuad(NLMISC::CVectorPacked *&dest, const NLMISC::CQuad &quad) { *dest++ = quad.V0; *dest++ = quad.V1; diff --git a/code/nel/src/3d/patch_render.cpp b/code/nel/src/3d/patch_render.cpp index bc74648d3..135b9fdb3 100644 --- a/code/nel/src/3d/patch_render.cpp +++ b/code/nel/src/3d/patch_render.cpp @@ -1026,8 +1026,8 @@ inline void CPatch::fillFar0VertexVB(CTessFarVertex *pVert) if( !CLandscapeGlobals::VertexProgramEnabled ) { // Set Pos. Set it local to the current center of landscape - CHECK_VBA_RANGE(CLandscapeGlobals::CurrentFar0VBInfo.Accessor, CurVBPtr, sizeof(CVector)); - *(CVector*)CurVBPtr= pVert->Src->Pos - CLandscapeGlobals::PZBModelPosition; + CHECK_VBA_RANGE(CLandscapeGlobals::CurrentFar0VBInfo.Accessor, CurVBPtr, sizeof(CVectorPacked)); + *(CVectorPacked*)CurVBPtr= pVert->Src->Pos - CLandscapeGlobals::PZBModelPosition; // Set Uvs. CHECK_VBA_RANGE(CLandscapeGlobals::CurrentFar0VBInfo.Accessor, CurVBPtr + CLandscapeGlobals::CurrentFar0VBInfo.TexCoordOff0, sizeof(CUV)); *(CUV*)(CurVBPtr + CLandscapeGlobals::CurrentFar0VBInfo.TexCoordOff0)= uv; @@ -1038,8 +1038,8 @@ inline void CPatch::fillFar0VertexVB(CTessFarVertex *pVert) { // Else must setup Vertex program inputs // v[0]== StartPos. - CHECK_VBA_RANGE(CLandscapeGlobals::CurrentFar0VBInfo.Accessor, CurVBPtr, sizeof(CVector)); - *(CVector*)CurVBPtr= pVert->Src->StartPos; + CHECK_VBA_RANGE(CLandscapeGlobals::CurrentFar0VBInfo.Accessor, CurVBPtr, sizeof(CVectorPacked)); + *(CVectorPacked*)CurVBPtr= pVert->Src->StartPos; // v[8]== Tex0 CHECK_VBA_RANGE(CLandscapeGlobals::CurrentFar0VBInfo.Accessor, CurVBPtr + CLandscapeGlobals::CurrentFar0VBInfo.TexCoordOff0, sizeof(CUV)); *(CUV*)(CurVBPtr + CLandscapeGlobals::CurrentFar0VBInfo.TexCoordOff0)= uv; @@ -1110,8 +1110,8 @@ inline void CPatch::fillFar1VertexVB(CTessFarVertex *pVert) if( !CLandscapeGlobals::VertexProgramEnabled ) { // Set Pos. Set it local to the current center of landscape - CHECK_VBA_RANGE(CLandscapeGlobals::CurrentFar1VBInfo.Accessor, CurVBPtr, sizeof(CVector)); - *(CVector*)CurVBPtr= pVert->Src->Pos - CLandscapeGlobals::PZBModelPosition; + CHECK_VBA_RANGE(CLandscapeGlobals::CurrentFar1VBInfo.Accessor, CurVBPtr, sizeof(CVectorPacked)); + *(CVectorPacked*)CurVBPtr= pVert->Src->Pos - CLandscapeGlobals::PZBModelPosition; // Set Uvs. CHECK_VBA_RANGE(CLandscapeGlobals::CurrentFar1VBInfo.Accessor, CurVBPtr + CLandscapeGlobals::CurrentFar1VBInfo.TexCoordOff0, sizeof(CUV)); *(CUV*)(CurVBPtr + CLandscapeGlobals::CurrentFar1VBInfo.TexCoordOff0)= uv; @@ -1126,8 +1126,8 @@ inline void CPatch::fillFar1VertexVB(CTessFarVertex *pVert) { // Else must setup Vertex program inputs // v[0]== StartPos. - CHECK_VBA_RANGE(CLandscapeGlobals::CurrentFar1VBInfo.Accessor, CurVBPtr, sizeof(CVector)); - *(CVector*)CurVBPtr= pVert->Src->StartPos; + CHECK_VBA_RANGE(CLandscapeGlobals::CurrentFar1VBInfo.Accessor, CurVBPtr, sizeof(CVectorPacked)); + *(CVectorPacked*)CurVBPtr= pVert->Src->StartPos; // v[8]== Tex0 CHECK_VBA_RANGE(CLandscapeGlobals::CurrentFar1VBInfo.Accessor, CurVBPtr + CLandscapeGlobals::CurrentFar1VBInfo.TexCoordOff0, sizeof(CUV)); *(CUV*)(CurVBPtr + CLandscapeGlobals::CurrentFar1VBInfo.TexCoordOff0)= uv; @@ -1179,8 +1179,8 @@ inline void CPatch::fillTileVertexVB(CTessNearVertex *pVert) if( !CLandscapeGlobals::VertexProgramEnabled ) { // Set Pos. Set it local to the current center of landscape - CHECK_VBA_RANGE(CLandscapeGlobals::CurrentTileVBInfo.Accessor, CurVBPtr, sizeof(CVector)) - *(CVector*)CurVBPtr= pVert->Src->Pos - CLandscapeGlobals::PZBModelPosition; + CHECK_VBA_RANGE(CLandscapeGlobals::CurrentTileVBInfo.Accessor, CurVBPtr, sizeof(CVectorPacked)) + *(CVectorPacked*)CurVBPtr= pVert->Src->Pos - CLandscapeGlobals::PZBModelPosition; // Set Uvs. CHECK_VBA_RANGE(CLandscapeGlobals::CurrentTileVBInfo.Accessor, CurVBPtr + CLandscapeGlobals::CurrentTileVBInfo.TexCoordOff0, sizeof(CUV)) *(CUV*)(CurVBPtr + CLandscapeGlobals::CurrentTileVBInfo.TexCoordOff0)= pVert->PUv0; @@ -1193,8 +1193,8 @@ inline void CPatch::fillTileVertexVB(CTessNearVertex *pVert) { // Else must setup Vertex program inputs // v[0]== StartPos. - CHECK_VBA_RANGE(CLandscapeGlobals::CurrentTileVBInfo.Accessor, CurVBPtr, sizeof(CVector)) - *(CVector*)CurVBPtr= pVert->Src->StartPos; + CHECK_VBA_RANGE(CLandscapeGlobals::CurrentTileVBInfo.Accessor, CurVBPtr, sizeof(CVectorPacked)) + *(CVectorPacked*)CurVBPtr= pVert->Src->StartPos; // v[8]== Tex0 CHECK_VBA_RANGE(CLandscapeGlobals::CurrentTileVBInfo.Accessor, CurVBPtr + CLandscapeGlobals::CurrentTileVBInfo.TexCoordOff0, sizeof(CUV)) *(CUV*)(CurVBPtr + CLandscapeGlobals::CurrentTileVBInfo.TexCoordOff0)= pVert->PUv0; @@ -1383,8 +1383,8 @@ void CPatch::computeGeomorphFar0VertexListVB(CTessList &vertLi CurVBPtr+= pVert->Index0 * CLandscapeGlobals::CurrentFar0VBInfo.VertexSize; // Set Geomorphed Position. Set it local to the current center of landscape - CHECK_VBA_RANGE(CLandscapeGlobals::CurrentFar0VBInfo.Accessor, CurVBPtr, sizeof(CVector)) - *(CVector*)CurVBPtr= pVert->Src->Pos - CLandscapeGlobals::PZBModelPosition; + CHECK_VBA_RANGE(CLandscapeGlobals::CurrentFar0VBInfo.Accessor, CurVBPtr, sizeof(CVectorPacked)) + *(CVectorPacked*)CurVBPtr= pVert->Src->Pos - CLandscapeGlobals::PZBModelPosition; } } @@ -1404,8 +1404,8 @@ void CPatch::computeGeomorphAlphaFar1VertexListVB(CTessList &v // NB: the filling order of data is important, for AGP write combiners. // Set Geomorphed Position. Set it local to the current center of landscape - CHECK_VBA_RANGE(CLandscapeGlobals::CurrentFar1VBInfo.Accessor, CurVBPtr, sizeof(CVector)) - *(CVector*)CurVBPtr= pVert->Src->Pos - CLandscapeGlobals::PZBModelPosition; + CHECK_VBA_RANGE(CLandscapeGlobals::CurrentFar1VBInfo.Accessor, CurVBPtr, sizeof(CVectorPacked)) + *(CVectorPacked*)CurVBPtr= pVert->Src->Pos - CLandscapeGlobals::PZBModelPosition; // Set Alpha color. static CRGBA col(255,255,255,255); @@ -1434,8 +1434,8 @@ void CPatch::computeGeomorphTileVertexListVB(CTessList &vertL CurVBPtr+= pVert->Index * CLandscapeGlobals::CurrentTileVBInfo.VertexSize; // Set Geomorphed Position. Set it local to the current center of landscape - CHECK_VBA_RANGE(CLandscapeGlobals::CurrentTileVBInfo.Accessor, CurVBPtr, sizeof(CVector)) - *(CVector*)CurVBPtr= pVert->Src->Pos - CLandscapeGlobals::PZBModelPosition; + CHECK_VBA_RANGE(CLandscapeGlobals::CurrentTileVBInfo.Accessor, CurVBPtr, sizeof(CVectorPacked)) + *(CVectorPacked*)CurVBPtr= pVert->Src->Pos - CLandscapeGlobals::PZBModelPosition; } } diff --git a/code/nel/src/3d/ps_dot.cpp b/code/nel/src/3d/ps_dot.cpp index 516cac8f1..6219605f7 100644 --- a/code/nel/src/3d/ps_dot.cpp +++ b/code/nel/src/3d/ps_dot.cpp @@ -23,6 +23,8 @@ #include "nel/3d/particle_system.h" #include "nel/misc/fast_mem.h" +using NLMISC::CVectorPacked; + namespace NL3D { @@ -84,7 +86,7 @@ inline void DrawDot(T it, do { CHECK_VERTEX_BUFFER(vb, currPos); - *((CVector *) currPos) = *it; + *((CVectorPacked *) currPos) = *it; ++it ; currPos += stride; } @@ -93,7 +95,7 @@ inline void DrawDot(T it, else if (srcStep == (1 << 16)) // make sure we haven't got auto-lod and that the step is 1.0 { // there's no color information in the buffer, so we can copy it directly - NLMISC::CFastMem::memcpy(vba.getVertexCoordPointer(), &(*it), sizeof(NLMISC::CVector) * toProcess); + NLMISC::CFastMem::memcpy(vba.getVertexCoordPointer(), &(*it), sizeof(NLMISC::CVectorPacked) * toProcess); it += toProcess; } else @@ -103,7 +105,7 @@ inline void DrawDot(T it, do { CHECK_VERTEX_BUFFER(vb, currPos); - *((CVector *) currPos) = *it; + *((CVectorPacked *) currPos) = *it; ++it ; currPos += sizeof(float[3]); } diff --git a/code/nel/src/3d/ps_emitter.cpp b/code/nel/src/3d/ps_emitter.cpp index c9806722f..0084111a0 100644 --- a/code/nel/src/3d/ps_emitter.cpp +++ b/code/nel/src/3d/ps_emitter.cpp @@ -914,7 +914,7 @@ uint GenEmitterPositions(CPSLocated *emitter, uint numStep, TAnimationTime deltaT, /* fraction of time needed to reach the first emission */ TAnimationTime step, - std::vector &dest + std::vector &dest ) { NL_PS_FUNC(GenEmitterPositions) @@ -930,8 +930,8 @@ uint GenEmitterPositions(CPSLocated *emitter, } else { - std::vector::iterator outIt = dest.end(); - std::vector::iterator endIt = dest.begin(); + std::vector::iterator outIt = dest.end(); + std::vector::iterator endIt = dest.begin(); NLMISC::CVector pos = emitter->getPos()[emitterIndex] - deltaT * emitter->getSpeed()[emitterIndex]; NLMISC::CVector speed = step * emitter->getSpeed()[emitterIndex]; do @@ -966,7 +966,7 @@ static inline uint GenEmitterPositionsWithLOD(CPSLocated *emitter, TAnimationTime deltaT, /* fraction of time needed to reach the first emission */ TAnimationTime step, float invLODRatio, - std::vector &dest + std::vector &dest ) { NL_PS_FUNC(GenEmitterPositionsWithLOD) @@ -982,8 +982,8 @@ static inline uint GenEmitterPositionsWithLOD(CPSLocated *emitter, } else { - std::vector::iterator outIt = dest.end(); - std::vector::iterator endIt = dest.begin(); + std::vector::iterator outIt = dest.end(); + std::vector::iterator endIt = dest.begin(); NLMISC::CVector pos = emitter->getPos()[emitterIndex] - deltaT * emitter->getSpeed()[emitterIndex]; NLMISC::CVector speed = step * invLODRatio * emitter->getSpeed()[emitterIndex]; do @@ -1021,7 +1021,7 @@ void CPSEmitter::processRegularEmissionConsistent(uint firstInstanceIndex, float // - static std::vector emitterPositions; + static std::vector emitterPositions; // Positions for the emitter. They are computed by using a parametric trajectory or by using integration const uint size = _Owner->getSize(); @@ -1454,7 +1454,7 @@ void CPSEmitter::processRegularEmissionConsistentWithNoLOD(uint firstInstanceInd // - static std::vector emitterPositions; + static std::vector emitterPositions; // Positions for the emitter. They are computed by using a parametric trajectory or by using integration const uint size = _Owner->getSize(); diff --git a/code/nel/src/3d/ps_face.cpp b/code/nel/src/3d/ps_face.cpp index 63909287d..bc60fb313 100644 --- a/code/nel/src/3d/ps_face.cpp +++ b/code/nel/src/3d/ps_face.cpp @@ -23,7 +23,7 @@ #include "nel/3d/particle_system.h" #include "nel/misc/quat.h" - +using NLMISC::CVectorPacked; namespace NL3D { @@ -96,27 +96,27 @@ public: { const CPlaneBasis &currBasis = f._PrecompBasis[*indexIt].Basis; CHECK_VERTEX_BUFFER(vb, currVertex); - ((CVector *) currVertex)->x = (*posIt).x + *ptSize * currBasis.X.x; - ((CVector *) currVertex)->y = (*posIt).y + *ptSize * currBasis.X.y; - ((CVector *) currVertex)->z = (*posIt).z + *ptSize * currBasis.X.z; + ((CVectorPacked *) currVertex)->x = (*posIt).x + *ptSize * currBasis.X.x; + ((CVectorPacked *) currVertex)->y = (*posIt).y + *ptSize * currBasis.X.y; + ((CVectorPacked *) currVertex)->z = (*posIt).z + *ptSize * currBasis.X.z; currVertex += stride; CHECK_VERTEX_BUFFER(vb, currVertex); - ((CVector *) currVertex)->x = (*posIt).x + *ptSize * currBasis.Y.x; - ((CVector *) currVertex)->y = (*posIt).y + *ptSize * currBasis.Y.y; - ((CVector *) currVertex)->z = (*posIt).z + *ptSize * currBasis.Y.z; + ((CVectorPacked *) currVertex)->x = (*posIt).x + *ptSize * currBasis.Y.x; + ((CVectorPacked *) currVertex)->y = (*posIt).y + *ptSize * currBasis.Y.y; + ((CVectorPacked *) currVertex)->z = (*posIt).z + *ptSize * currBasis.Y.z; currVertex += stride; CHECK_VERTEX_BUFFER(vb, currVertex); - ((CVector *) currVertex)->x = (*posIt).x - *ptSize * currBasis.X.x; - ((CVector *) currVertex)->y = (*posIt).y - *ptSize * currBasis.X.y; - ((CVector *) currVertex)->z = (*posIt).z - *ptSize * currBasis.X.z; + ((CVectorPacked *) currVertex)->x = (*posIt).x - *ptSize * currBasis.X.x; + ((CVectorPacked *) currVertex)->y = (*posIt).y - *ptSize * currBasis.X.y; + ((CVectorPacked *) currVertex)->z = (*posIt).z - *ptSize * currBasis.X.z; currVertex += stride; CHECK_VERTEX_BUFFER(vb, currVertex); - ((CVector *) currVertex)->x = (*posIt).x - *ptSize * currBasis.Y.x; - ((CVector *) currVertex)->y = (*posIt).y - *ptSize * currBasis.Y.y; - ((CVector *) currVertex)->z = (*posIt).z - *ptSize * currBasis.Y.z; + ((CVectorPacked *) currVertex)->x = (*posIt).x - *ptSize * currBasis.Y.x; + ((CVectorPacked *) currVertex)->y = (*posIt).y - *ptSize * currBasis.Y.y; + ((CVectorPacked *) currVertex)->z = (*posIt).z - *ptSize * currBasis.Y.z; currVertex += stride; ptSize += ptSizeIncrement; ++indexIt; @@ -168,27 +168,27 @@ public: { // we use this instead of the + operator, because we avoid 4 constructor calls this way CHECK_VERTEX_BUFFER(vb, currVertex); - ((CVector *) currVertex)->x = (*posIt).x + *ptSize * currBasis->X.x; - ((CVector *) currVertex)->y = (*posIt).y + *ptSize * currBasis->X.y; - ((CVector *) currVertex)->z = (*posIt).z + *ptSize * currBasis->X.z; + ((CVectorPacked *) currVertex)->x = (*posIt).x + *ptSize * currBasis->X.x; + ((CVectorPacked *) currVertex)->y = (*posIt).y + *ptSize * currBasis->X.y; + ((CVectorPacked *) currVertex)->z = (*posIt).z + *ptSize * currBasis->X.z; currVertex += vSize; CHECK_VERTEX_BUFFER(vb, currVertex); - ((CVector *) currVertex)->x = (*posIt).x + *ptSize * currBasis->Y.x; - ((CVector *) currVertex)->y = (*posIt).y + *ptSize * currBasis->Y.y; - ((CVector *) currVertex)->z = (*posIt).z + *ptSize * currBasis->Y.z; + ((CVectorPacked *) currVertex)->x = (*posIt).x + *ptSize * currBasis->Y.x; + ((CVectorPacked *) currVertex)->y = (*posIt).y + *ptSize * currBasis->Y.y; + ((CVectorPacked *) currVertex)->z = (*posIt).z + *ptSize * currBasis->Y.z; currVertex += vSize; CHECK_VERTEX_BUFFER(vb, currVertex); - ((CVector *) currVertex)->x = (*posIt).x - *ptSize * currBasis->X.x; - ((CVector *) currVertex)->y = (*posIt).y - *ptSize * currBasis->X.y; - ((CVector *) currVertex)->z = (*posIt).z - *ptSize * currBasis->X.z; + ((CVectorPacked *) currVertex)->x = (*posIt).x - *ptSize * currBasis->X.x; + ((CVectorPacked *) currVertex)->y = (*posIt).y - *ptSize * currBasis->X.y; + ((CVectorPacked *) currVertex)->z = (*posIt).z - *ptSize * currBasis->X.z; currVertex += vSize; CHECK_VERTEX_BUFFER(vb, currVertex); - ((CVector *) currVertex)->x = (*posIt).x - *ptSize * currBasis->Y.x; - ((CVector *) currVertex)->y = (*posIt).y - *ptSize * currBasis->Y.y; - ((CVector *) currVertex)->z = (*posIt).z - *ptSize * currBasis->Y.z; + ((CVectorPacked *) currVertex)->x = (*posIt).x - *ptSize * currBasis->Y.x; + ((CVectorPacked *) currVertex)->y = (*posIt).y - *ptSize * currBasis->Y.y; + ((CVectorPacked *) currVertex)->z = (*posIt).z - *ptSize * currBasis->Y.z; currVertex += vSize; ptSize += ptSizeIncrement; ++posIt; diff --git a/code/nel/src/3d/ps_face_look_at.cpp b/code/nel/src/3d/ps_face_look_at.cpp index 5b06f6eb5..ccc5907c5 100644 --- a/code/nel/src/3d/ps_face_look_at.cpp +++ b/code/nel/src/3d/ps_face_look_at.cpp @@ -23,6 +23,7 @@ #include "nel/3d/particle_system.h" #include "nel/misc/fast_floor.h" +using NLMISC::CVectorPacked; namespace NL3D { @@ -147,27 +148,27 @@ public: v1 = rotTable[tabIndex] * currAlign->I + rotTable[tabIndex + 1] * currAlign->K; v2 = rotTable[tabIndex + 2] * currAlign->I + rotTable[tabIndex + 3] * currAlign->K; CHECK_VERTEX_BUFFER(vb, ptPos); - ((CVector *) ptPos)->x = (*it).x + *currentSize * v1.x; - ((CVector *) ptPos)->y = (*it).y + *currentSize * v1.y; - ((CVector *) ptPos)->z = (*it).z + *currentSize * v1.z; + ((CVectorPacked *) ptPos)->x = (*it).x + *currentSize * v1.x; + ((CVectorPacked *) ptPos)->y = (*it).y + *currentSize * v1.y; + ((CVectorPacked *) ptPos)->z = (*it).z + *currentSize * v1.z; ptPos += stride; CHECK_VERTEX_BUFFER(vb, ptPos); - ((CVector *) ptPos)->x = (*it).x + *currentSize * v2.x; - ((CVector *) ptPos)->y = (*it).y + *currentSize * v2.y; - ((CVector *) ptPos)->z = (*it).z + *currentSize * v2.z; + ((CVectorPacked *) ptPos)->x = (*it).x + *currentSize * v2.x; + ((CVectorPacked *) ptPos)->y = (*it).y + *currentSize * v2.y; + ((CVectorPacked *) ptPos)->z = (*it).z + *currentSize * v2.z; ptPos += stride; CHECK_VERTEX_BUFFER(vb, ptPos); - ((CVector *) ptPos)->x = (*it).x - *currentSize * v1.x; - ((CVector *) ptPos)->y = (*it).y - *currentSize * v1.y; - ((CVector *) ptPos)->z = (*it).z - *currentSize * v1.z; + ((CVectorPacked *) ptPos)->x = (*it).x - *currentSize * v1.x; + ((CVectorPacked *) ptPos)->y = (*it).y - *currentSize * v1.y; + ((CVectorPacked *) ptPos)->z = (*it).z - *currentSize * v1.z; ptPos += stride; CHECK_VERTEX_BUFFER(vb, ptPos); - ((CVector *) ptPos)->x = (*it).x - *currentSize * v2.x; - ((CVector *) ptPos)->y = (*it).y - *currentSize * v2.y; - ((CVector *) ptPos)->z = (*it).z - *currentSize * v2.z; + ((CVectorPacked *) ptPos)->x = (*it).x - *currentSize * v2.x; + ((CVectorPacked *) ptPos)->y = (*it).y - *currentSize * v2.y; + ((CVectorPacked *) ptPos)->z = (*it).z - *currentSize * v2.z; ptPos += stride; ++it; @@ -199,27 +200,27 @@ public: v1 = CPSUtil::getCos((sint32) la._Angle2D) * currAlign->I + CPSUtil::getSin((sint32) la._Angle2D) * currAlign->K; v2 = - CPSUtil::getSin((sint32) la._Angle2D) * currAlign->I + CPSUtil::getCos((sint32) la._Angle2D) * currAlign->K; CHECK_VERTEX_BUFFER(vb, ptPos); - ((CVector *) ptPos)->x = (*it).x - *currentSize * v1.x + *currentSize2 * v2.x; - ((CVector *) ptPos)->y = (*it).y - *currentSize * v1.y + *currentSize2 * v2.y; - ((CVector *) ptPos)->z = (*it).z - *currentSize * v1.z + *currentSize2 * v2.z; + ((CVectorPacked *) ptPos)->x = (*it).x - *currentSize * v1.x + *currentSize2 * v2.x; + ((CVectorPacked *) ptPos)->y = (*it).y - *currentSize * v1.y + *currentSize2 * v2.y; + ((CVectorPacked *) ptPos)->z = (*it).z - *currentSize * v1.z + *currentSize2 * v2.z; ptPos += stride; CHECK_VERTEX_BUFFER(vb, ptPos); - ((CVector *) ptPos)->x = (*it).x + *currentSize * v1.x + *currentSize2 * v2.x; - ((CVector *) ptPos)->y = (*it).y + *currentSize * v1.y + *currentSize2 * v2.y; - ((CVector *) ptPos)->z = (*it).z + *currentSize * v1.z + *currentSize2 * v2.z; + ((CVectorPacked *) ptPos)->x = (*it).x + *currentSize * v1.x + *currentSize2 * v2.x; + ((CVectorPacked *) ptPos)->y = (*it).y + *currentSize * v1.y + *currentSize2 * v2.y; + ((CVectorPacked *) ptPos)->z = (*it).z + *currentSize * v1.z + *currentSize2 * v2.z; ptPos += stride; CHECK_VERTEX_BUFFER(vb, ptPos); - ((CVector *) ptPos)->x = (*it).x + *currentSize * v1.x - *currentSize2 * v2.x; - ((CVector *) ptPos)->y = (*it).y + *currentSize * v1.y - *currentSize2 * v2.y; - ((CVector *) ptPos)->z = (*it).z + *currentSize * v1.z - *currentSize2 * v2.z; + ((CVectorPacked *) ptPos)->x = (*it).x + *currentSize * v1.x - *currentSize2 * v2.x; + ((CVectorPacked *) ptPos)->y = (*it).y + *currentSize * v1.y - *currentSize2 * v2.y; + ((CVectorPacked *) ptPos)->z = (*it).z + *currentSize * v1.z - *currentSize2 * v2.z; ptPos += stride; CHECK_VERTEX_BUFFER(vb, ptPos); - ((CVector *) ptPos)->x = (*it).x - *currentSize * v1.x - *currentSize2 * v2.x; - ((CVector *) ptPos)->y = (*it).y - *currentSize * v1.y - *currentSize2 * v2.y; - ((CVector *) ptPos)->z = (*it).z - *currentSize * v1.z - *currentSize2 * v2.z; + ((CVectorPacked *) ptPos)->x = (*it).x - *currentSize * v1.x - *currentSize2 * v2.x; + ((CVectorPacked *) ptPos)->y = (*it).y - *currentSize * v1.y - *currentSize2 * v2.y; + ((CVectorPacked *) ptPos)->z = (*it).z - *currentSize * v1.z - *currentSize2 * v2.z; ptPos += stride; ++it; ++currAlign; @@ -283,24 +284,24 @@ public: CHECK_VERTEX_BUFFER(vb, ptPos + stride2); CHECK_VERTEX_BUFFER(vb, ptPos + stride3); - ((CVector *) ptPos)->x = (*it).x + v1.x; - ((CVector *) ptPos)->y = (*it).y + v1.y; - ((CVector *) ptPos)->z = (*it).z + v1.z; + ((CVectorPacked *) ptPos)->x = (*it).x + v1.x; + ((CVectorPacked *) ptPos)->y = (*it).y + v1.y; + ((CVectorPacked *) ptPos)->z = (*it).z + v1.z; ptPos += stride; - ((CVector *) ptPos)->x = (*it).x + v2.x; - ((CVector *) ptPos)->y = (*it).y + v2.y; - ((CVector *) ptPos)->z = (*it).z + v2.z; + ((CVectorPacked *) ptPos)->x = (*it).x + v2.x; + ((CVectorPacked *) ptPos)->y = (*it).y + v2.y; + ((CVectorPacked *) ptPos)->z = (*it).z + v2.z; ptPos += stride; - ((CVector *) ptPos)->x = (*it).x - v1.x; - ((CVector *) ptPos)->y = (*it).y - v1.y; - ((CVector *) ptPos)->z = (*it).z - v1.z; + ((CVectorPacked *) ptPos)->x = (*it).x - v1.x; + ((CVectorPacked *) ptPos)->y = (*it).y - v1.y; + ((CVectorPacked *) ptPos)->z = (*it).z - v1.z; ptPos += stride; - ((CVector *) ptPos)->x = (*it).x - v2.x; - ((CVector *) ptPos)->y = (*it).y - v2.y; - ((CVector *) ptPos)->z = (*it).z - v2.z; + ((CVectorPacked *) ptPos)->x = (*it).x - v2.x; + ((CVectorPacked *) ptPos)->y = (*it).y - v2.y; + ((CVectorPacked *) ptPos)->z = (*it).z - v2.z; ptPos += stride; ++it; @@ -336,27 +337,27 @@ public: v2 = - sinAngle * currAlign->I + cosAngle * currAlign->K; CHECK_VERTEX_BUFFER(vb, ptPos); - ((CVector *) ptPos)->x = (*it).x - *currentSize * v1.x + *currentSize2 * v2.x; - ((CVector *) ptPos)->y = (*it).y - *currentSize * v1.y + *currentSize2 * v2.y; - ((CVector *) ptPos)->z = (*it).z - *currentSize * v1.z + *currentSize2 * v2.z; + ((CVectorPacked *) ptPos)->x = (*it).x - *currentSize * v1.x + *currentSize2 * v2.x; + ((CVectorPacked *) ptPos)->y = (*it).y - *currentSize * v1.y + *currentSize2 * v2.y; + ((CVectorPacked *) ptPos)->z = (*it).z - *currentSize * v1.z + *currentSize2 * v2.z; ptPos += stride; CHECK_VERTEX_BUFFER(vb, ptPos); - ((CVector *) ptPos)->x = (*it).x + *currentSize * v1.x + *currentSize2 * v2.x; - ((CVector *) ptPos)->y = (*it).y + *currentSize * v1.y + *currentSize2 * v2.y; - ((CVector *) ptPos)->z = (*it).z + *currentSize * v1.z + *currentSize2 * v2.z; + ((CVectorPacked *) ptPos)->x = (*it).x + *currentSize * v1.x + *currentSize2 * v2.x; + ((CVectorPacked *) ptPos)->y = (*it).y + *currentSize * v1.y + *currentSize2 * v2.y; + ((CVectorPacked *) ptPos)->z = (*it).z + *currentSize * v1.z + *currentSize2 * v2.z; ptPos += stride; CHECK_VERTEX_BUFFER(vb, ptPos); - ((CVector *) ptPos)->x = (*it).x + *currentSize * v1.x - *currentSize2 * v2.x; - ((CVector *) ptPos)->y = (*it).y + *currentSize * v1.y - *currentSize2 * v2.y; - ((CVector *) ptPos)->z = (*it).z + *currentSize * v1.z - *currentSize2 * v2.z; + ((CVectorPacked *) ptPos)->x = (*it).x + *currentSize * v1.x - *currentSize2 * v2.x; + ((CVectorPacked *) ptPos)->y = (*it).y + *currentSize * v1.y - *currentSize2 * v2.y; + ((CVectorPacked *) ptPos)->z = (*it).z + *currentSize * v1.z - *currentSize2 * v2.z; ptPos += stride; CHECK_VERTEX_BUFFER(vb, ptPos); - ((CVector *) ptPos)->x = (*it).x - *currentSize * v1.x - *currentSize2 * v2.x; - ((CVector *) ptPos)->y = (*it).y - *currentSize * v1.y - *currentSize2 * v2.y; - ((CVector *) ptPos)->z = (*it).z - *currentSize * v1.z - *currentSize2 * v2.z; + ((CVectorPacked *) ptPos)->x = (*it).x - *currentSize * v1.x - *currentSize2 * v2.x; + ((CVectorPacked *) ptPos)->y = (*it).y - *currentSize * v1.y - *currentSize2 * v2.y; + ((CVectorPacked *) ptPos)->z = (*it).z - *currentSize * v1.z - *currentSize2 * v2.z; ptPos += stride; ++it; ++currentAngle; @@ -462,27 +463,27 @@ public: while (it != endIt) { CHECK_VERTEX_BUFFER(vb, ptPos); - ((CVector *) ptPos)->x = (*it).x + *currentSize * v1.x; - ((CVector *) ptPos)->y = (*it).y + *currentSize * v1.y; - ((CVector *) ptPos)->z = (*it).z + *currentSize * v1.z; + ((CVectorPacked *) ptPos)->x = (*it).x + *currentSize * v1.x; + ((CVectorPacked *) ptPos)->y = (*it).y + *currentSize * v1.y; + ((CVectorPacked *) ptPos)->z = (*it).z + *currentSize * v1.z; ptPos += stride; CHECK_VERTEX_BUFFER(vb, ptPos); - ((CVector *) ptPos)->x = (*it).x + *currentSize * v2.x; - ((CVector *) ptPos)->y = (*it).y + *currentSize * v2.y; - ((CVector *) ptPos)->z = (*it).z + *currentSize * v2.z; + ((CVectorPacked *) ptPos)->x = (*it).x + *currentSize * v2.x; + ((CVectorPacked *) ptPos)->y = (*it).y + *currentSize * v2.y; + ((CVectorPacked *) ptPos)->z = (*it).z + *currentSize * v2.z; ptPos += stride; CHECK_VERTEX_BUFFER(vb, ptPos); - ((CVector *) ptPos)->x = (*it).x - *currentSize * v1.x; - ((CVector *) ptPos)->y = (*it).y - *currentSize * v1.y; - ((CVector *) ptPos)->z = (*it).z - *currentSize * v1.z; + ((CVectorPacked *) ptPos)->x = (*it).x - *currentSize * v1.x; + ((CVectorPacked *) ptPos)->y = (*it).y - *currentSize * v1.y; + ((CVectorPacked *) ptPos)->z = (*it).z - *currentSize * v1.z; ptPos += stride; CHECK_VERTEX_BUFFER(vb, ptPos); - ((CVector *) ptPos)->x = (*it).x - *currentSize * v2.x; - ((CVector *) ptPos)->y = (*it).y - *currentSize * v2.y; - ((CVector *) ptPos)->z = (*it).z - *currentSize * v2.z; + ((CVectorPacked *) ptPos)->x = (*it).x - *currentSize * v2.x; + ((CVectorPacked *) ptPos)->y = (*it).y - *currentSize * v2.y; + ((CVectorPacked *) ptPos)->z = (*it).z - *currentSize * v2.z; ptPos += stride; ++it; @@ -498,27 +499,27 @@ public: while (it != endIt) { CHECK_VERTEX_BUFFER(vb, ptPos); - ((CVector *) ptPos)->x = (*it).x + myV1.x; - ((CVector *) ptPos)->y = (*it).y + myV1.y; - ((CVector *) ptPos)->z = (*it).z + myV1.z; + ((CVectorPacked *) ptPos)->x = (*it).x + myV1.x; + ((CVectorPacked *) ptPos)->y = (*it).y + myV1.y; + ((CVectorPacked *) ptPos)->z = (*it).z + myV1.z; ptPos += stride; CHECK_VERTEX_BUFFER(vb, ptPos); - ((CVector *) ptPos)->x = (*it).x + myV2.x; - ((CVector *) ptPos)->y = (*it).y + myV2.y; - ((CVector *) ptPos)->z = (*it).z + myV2.z; + ((CVectorPacked *) ptPos)->x = (*it).x + myV2.x; + ((CVectorPacked *) ptPos)->y = (*it).y + myV2.y; + ((CVectorPacked *) ptPos)->z = (*it).z + myV2.z; ptPos += stride; CHECK_VERTEX_BUFFER(vb, ptPos); - ((CVector *) ptPos)->x = (*it).x - myV1.x; - ((CVector *) ptPos)->y = (*it).y - myV1.y; - ((CVector *) ptPos)->z = (*it).z - myV1.z; + ((CVectorPacked *) ptPos)->x = (*it).x - myV1.x; + ((CVectorPacked *) ptPos)->y = (*it).y - myV1.y; + ((CVectorPacked *) ptPos)->z = (*it).z - myV1.z; ptPos += stride; CHECK_VERTEX_BUFFER(vb, ptPos); - ((CVector *) ptPos)->x = (*it).x - myV2.x; - ((CVector *) ptPos)->y = (*it).y - myV2.y; - ((CVector *) ptPos)->z = (*it).z - myV2.z; + ((CVectorPacked *) ptPos)->x = (*it).x - myV2.x; + ((CVectorPacked *) ptPos)->y = (*it).y - myV2.y; + ((CVectorPacked *) ptPos)->z = (*it).z - myV2.z; ptPos += stride; ++it; } @@ -548,27 +549,27 @@ public: while (it != endIt) { CHECK_VERTEX_BUFFER(vb, ptPos); - ((CVector *) ptPos)->x = (*it).x - *currentSize * v1.x + *currentSize2 * v2.x; - ((CVector *) ptPos)->y = (*it).y - *currentSize * v1.y + *currentSize2 * v2.y; - ((CVector *) ptPos)->z = (*it).z - *currentSize * v1.z + *currentSize2 * v2.z; + ((CVectorPacked *) ptPos)->x = (*it).x - *currentSize * v1.x + *currentSize2 * v2.x; + ((CVectorPacked *) ptPos)->y = (*it).y - *currentSize * v1.y + *currentSize2 * v2.y; + ((CVectorPacked *) ptPos)->z = (*it).z - *currentSize * v1.z + *currentSize2 * v2.z; ptPos += stride; CHECK_VERTEX_BUFFER(vb, ptPos); - ((CVector *) ptPos)->x = (*it).x + *currentSize * v1.x + *currentSize2 * v2.x; - ((CVector *) ptPos)->y = (*it).y + *currentSize * v1.y + *currentSize2 * v2.y; - ((CVector *) ptPos)->z = (*it).z + *currentSize * v1.z + *currentSize2 * v2.z; + ((CVectorPacked *) ptPos)->x = (*it).x + *currentSize * v1.x + *currentSize2 * v2.x; + ((CVectorPacked *) ptPos)->y = (*it).y + *currentSize * v1.y + *currentSize2 * v2.y; + ((CVectorPacked *) ptPos)->z = (*it).z + *currentSize * v1.z + *currentSize2 * v2.z; ptPos += stride; CHECK_VERTEX_BUFFER(vb, ptPos); - ((CVector *) ptPos)->x = (*it).x + *currentSize * v1.x - *currentSize2 * v2.x; - ((CVector *) ptPos)->y = (*it).y + *currentSize * v1.y - *currentSize2 * v2.y; - ((CVector *) ptPos)->z = (*it).z + *currentSize * v1.z - *currentSize2 * v2.z; + ((CVectorPacked *) ptPos)->x = (*it).x + *currentSize * v1.x - *currentSize2 * v2.x; + ((CVectorPacked *) ptPos)->y = (*it).y + *currentSize * v1.y - *currentSize2 * v2.y; + ((CVectorPacked *) ptPos)->z = (*it).z + *currentSize * v1.z - *currentSize2 * v2.z; ptPos += stride; CHECK_VERTEX_BUFFER(vb, ptPos); - ((CVector *) ptPos)->x = (*it).x - *currentSize * v1.x - *currentSize2 * v2.x; - ((CVector *) ptPos)->y = (*it).y - *currentSize * v1.y - *currentSize2 * v2.y; - ((CVector *) ptPos)->z = (*it).z - *currentSize * v1.z - *currentSize2 * v2.z; + ((CVectorPacked *) ptPos)->x = (*it).x - *currentSize * v1.x - *currentSize2 * v2.x; + ((CVectorPacked *) ptPos)->y = (*it).y - *currentSize * v1.y - *currentSize2 * v2.y; + ((CVectorPacked *) ptPos)->z = (*it).z - *currentSize * v1.z - *currentSize2 * v2.z; ptPos += stride; ++it; currentSize += currentSizeStep; @@ -637,81 +638,81 @@ public: mbv12 = -*currentSize * mbv1n; mbv1 *= *currentSize * (1 + la._MotionBlurCoeff * n * n) / n; - *(CVector *) ptPos = *it - mbv2; - *(CVector *) (ptPos + stride) = *it + mbv1; - *(CVector *) (ptPos + stride2) = *it + mbv2; - *(CVector *) (ptPos + stride3) = *it + mbv12; + *(CVectorPacked *) ptPos = *it - mbv2; + *(CVectorPacked *) (ptPos + stride) = *it + mbv1; + *(CVectorPacked *) (ptPos + stride2) = *it + mbv2; + *(CVectorPacked *) (ptPos + stride3) = *it + mbv12; CHECK_VERTEX_BUFFER(vb, ptPos); - ((CVector *) ptPos)->x = (*it).x - mbv2.x; - ((CVector *) ptPos)->y = (*it).y - mbv2.y; - ((CVector *) ptPos)->z = (*it).z - mbv2.z; + ((CVectorPacked *) ptPos)->x = (*it).x - mbv2.x; + ((CVectorPacked *) ptPos)->y = (*it).y - mbv2.y; + ((CVectorPacked *) ptPos)->z = (*it).z - mbv2.z; CHECK_VERTEX_BUFFER(vb, ptPos + stride); - ((CVector *) (ptPos + stride))->x = (*it).x + mbv1.x; - ((CVector *) (ptPos + stride))->y = (*it).y + mbv1.y; - ((CVector *) (ptPos + stride))->z = (*it).z + mbv1.z; + ((CVectorPacked *) (ptPos + stride))->x = (*it).x + mbv1.x; + ((CVectorPacked *) (ptPos + stride))->y = (*it).y + mbv1.y; + ((CVectorPacked *) (ptPos + stride))->z = (*it).z + mbv1.z; CHECK_VERTEX_BUFFER(vb, ptPos + stride2); - ((CVector *) (ptPos + stride2))->x = (*it).x + mbv2.x; - ((CVector *) (ptPos + stride2))->y = (*it).y + mbv2.y; - ((CVector *) (ptPos + stride2))->z = (*it).z + mbv2.z; + ((CVectorPacked *) (ptPos + stride2))->x = (*it).x + mbv2.x; + ((CVectorPacked *) (ptPos + stride2))->y = (*it).y + mbv2.y; + ((CVectorPacked *) (ptPos + stride2))->z = (*it).z + mbv2.z; CHECK_VERTEX_BUFFER(vb, ptPos + stride3); - ((CVector *) (ptPos + stride3))->x = (*it).x + mbv12.x; - ((CVector *) (ptPos + stride3))->y = (*it).y + mbv12.y; - ((CVector *) (ptPos + stride3))->z = (*it).z + mbv12.z; + ((CVectorPacked *) (ptPos + stride3))->x = (*it).x + mbv12.x; + ((CVectorPacked *) (ptPos + stride3))->y = (*it).y + mbv12.y; + ((CVectorPacked *) (ptPos + stride3))->z = (*it).z + mbv12.z; } else // speed too small, we must avoid imprecision { CHECK_VERTEX_BUFFER(vb, ptPos); - ((CVector *) ptPos)->x = (*it).x - *currentSize * v2.x; - ((CVector *) ptPos)->y = (*it).y - *currentSize * v2.y; - ((CVector *) ptPos)->z = (*it).z - *currentSize * v2.z; + ((CVectorPacked *) ptPos)->x = (*it).x - *currentSize * v2.x; + ((CVectorPacked *) ptPos)->y = (*it).y - *currentSize * v2.y; + ((CVectorPacked *) ptPos)->z = (*it).z - *currentSize * v2.z; CHECK_VERTEX_BUFFER(vb, ptPos + stride); - ((CVector *) (ptPos + stride))->x = (*it).x + *currentSize * v1.x; - ((CVector *) (ptPos + stride))->y = (*it).y + *currentSize * v1.y; - ((CVector *) (ptPos + stride))->z = (*it).z + *currentSize * v1.z; + ((CVectorPacked *) (ptPos + stride))->x = (*it).x + *currentSize * v1.x; + ((CVectorPacked *) (ptPos + stride))->y = (*it).y + *currentSize * v1.y; + ((CVectorPacked *) (ptPos + stride))->z = (*it).z + *currentSize * v1.z; CHECK_VERTEX_BUFFER(vb, ptPos + stride2); - ((CVector *) (ptPos + stride2))->x = (*it).x + *currentSize * v2.x; - ((CVector *) (ptPos + stride2))->y = (*it).y + *currentSize * v2.y; - ((CVector *) (ptPos + stride2))->z = (*it).z + *currentSize * v2.z; + ((CVectorPacked *) (ptPos + stride2))->x = (*it).x + *currentSize * v2.x; + ((CVectorPacked *) (ptPos + stride2))->y = (*it).y + *currentSize * v2.y; + ((CVectorPacked *) (ptPos + stride2))->z = (*it).z + *currentSize * v2.z; CHECK_VERTEX_BUFFER(vb, ptPos + stride3); - ((CVector *) (ptPos + stride3))->x = (*it).x - *currentSize * v1.x; - ((CVector *) (ptPos + stride3))->y = (*it).y - *currentSize * v1.y; - ((CVector *) (ptPos + stride3))->z = (*it).z - *currentSize * v1.z; + ((CVectorPacked *) (ptPos + stride3))->x = (*it).x - *currentSize * v1.x; + ((CVectorPacked *) (ptPos + stride3))->y = (*it).y - *currentSize * v1.y; + ((CVectorPacked *) (ptPos + stride3))->z = (*it).z - *currentSize * v1.z; } } else { CHECK_VERTEX_BUFFER(vb, ptPos); - ((CVector *) ptPos)->x = (*it).x - *currentSize * v2.x; - ((CVector *) ptPos)->y = (*it).y - *currentSize * v2.y; - ((CVector *) ptPos)->z = (*it).z - *currentSize * v2.z; + ((CVectorPacked *) ptPos)->x = (*it).x - *currentSize * v2.x; + ((CVectorPacked *) ptPos)->y = (*it).y - *currentSize * v2.y; + ((CVectorPacked *) ptPos)->z = (*it).z - *currentSize * v2.z; CHECK_VERTEX_BUFFER(vb, ptPos + stride); - ((CVector *) (ptPos + stride))->x = (*it).x + *currentSize * v1.x; - ((CVector *) (ptPos + stride))->y = (*it).y + *currentSize * v1.y; - ((CVector *) (ptPos + stride))->z = (*it).z + *currentSize * v1.z; + ((CVectorPacked *) (ptPos + stride))->x = (*it).x + *currentSize * v1.x; + ((CVectorPacked *) (ptPos + stride))->y = (*it).y + *currentSize * v1.y; + ((CVectorPacked *) (ptPos + stride))->z = (*it).z + *currentSize * v1.z; CHECK_VERTEX_BUFFER(vb, ptPos + stride2); - ((CVector *) (ptPos + stride2))->x = (*it).x + *currentSize * v2.x; - ((CVector *) (ptPos + stride2))->y = (*it).y + *currentSize * v2.y; - ((CVector *) (ptPos + stride2))->z = (*it).z + *currentSize * v2.z; + ((CVectorPacked *) (ptPos + stride2))->x = (*it).x + *currentSize * v2.x; + ((CVectorPacked *) (ptPos + stride2))->y = (*it).y + *currentSize * v2.y; + ((CVectorPacked *) (ptPos + stride2))->z = (*it).z + *currentSize * v2.z; CHECK_VERTEX_BUFFER(vb, ptPos + stride3); - ((CVector *) (ptPos + stride3))->x = (*it).x - *currentSize * v1.x; - ((CVector *) (ptPos + stride3))->y = (*it).y - *currentSize * v1.y; - ((CVector *) (ptPos + stride3))->z = (*it).z - *currentSize * v1.z; + ((CVectorPacked *) (ptPos + stride3))->x = (*it).x - *currentSize * v1.x; + ((CVectorPacked *) (ptPos + stride3))->y = (*it).y - *currentSize * v1.y; + ((CVectorPacked *) (ptPos + stride3))->z = (*it).z - *currentSize * v1.z; } ptPos += stride4; @@ -793,30 +794,30 @@ public: CHECK_VERTEX_BUFFER(vb, ptPos + stride2); CHECK_VERTEX_BUFFER(vb, ptPos + stride3); - ((CVector *) ptPos)->x = (*it).x + v1.x; - ((CVector *) ptPos)->y = (*it).y + v1.y; - ((CVector *) ptPos)->z = (*it).z + v1.z; - //nlinfo("** %f, %f, %f", ((CVector *) ptPos)->x, ((CVector *) ptPos)->y, ((CVector *) ptPos)->z); + ((CVectorPacked *) ptPos)->x = (*it).x + v1.x; + ((CVectorPacked *) ptPos)->y = (*it).y + v1.y; + ((CVectorPacked *) ptPos)->z = (*it).z + v1.z; + //nlinfo("** %f, %f, %f", ((CVectorPacked *) ptPos)->x, ((CVectorPacked *) ptPos)->y, ((CVectorPacked *) ptPos)->z); ptPos += stride; - ((CVector *) ptPos)->x = (*it).x + v2.x; - ((CVector *) ptPos)->y = (*it).y + v2.y; - ((CVector *) ptPos)->z = (*it).z + v2.z; - //nlinfo("%f, %f, %f", ((CVector *) ptPos)->x, ((CVector *) ptPos)->y, ((CVector *) ptPos)->z); + ((CVectorPacked *) ptPos)->x = (*it).x + v2.x; + ((CVectorPacked *) ptPos)->y = (*it).y + v2.y; + ((CVectorPacked *) ptPos)->z = (*it).z + v2.z; + //nlinfo("%f, %f, %f", ((CVectorPacked *) ptPos)->x, ((CVectorPacked *) ptPos)->y, ((CVectorPacked *) ptPos)->z); ptPos += stride; - ((CVector *) ptPos)->x = (*it).x - v1.x; - ((CVector *) ptPos)->y = (*it).y - v1.y; - ((CVector *) ptPos)->z = (*it).z - v1.z; - //nlinfo("%f, %f, %f", ((CVector *) ptPos)->x, ((CVector *) ptPos)->y, ((CVector *) ptPos)->z); + ((CVectorPacked *) ptPos)->x = (*it).x - v1.x; + ((CVectorPacked *) ptPos)->y = (*it).y - v1.y; + ((CVectorPacked *) ptPos)->z = (*it).z - v1.z; + //nlinfo("%f, %f, %f", ((CVectorPacked *) ptPos)->x, ((CVectorPacked *) ptPos)->y, ((CVectorPacked *) ptPos)->z); ptPos += stride; - ((CVector *) ptPos)->x = (*it).x - v2.x; - ((CVector *) ptPos)->y = (*it).y - v2.y; - ((CVector *) ptPos)->z = (*it).z - v2.z; - //nlinfo("%f, %f, %f", ((CVector *) ptPos)->x, ((CVector *) ptPos)->y, ((CVector *) ptPos)->z); + ((CVectorPacked *) ptPos)->x = (*it).x - v2.x; + ((CVectorPacked *) ptPos)->y = (*it).y - v2.y; + ((CVectorPacked *) ptPos)->z = (*it).z - v2.z; + //nlinfo("%f, %f, %f", ((CVectorPacked *) ptPos)->x, ((CVectorPacked *) ptPos)->y, ((CVectorPacked *) ptPos)->z); ptPos += stride; ++it; @@ -851,27 +852,27 @@ public: v2 = - sinAngle * I + cosAngle * K; CHECK_VERTEX_BUFFER(vb, ptPos); - ((CVector *) ptPos)->x = (*it).x - *currentSize * v1.x + *currentSize2 * v2.x; - ((CVector *) ptPos)->y = (*it).y - *currentSize * v1.y + *currentSize2 * v2.y; - ((CVector *) ptPos)->z = (*it).z - *currentSize * v1.z + *currentSize2 * v2.z; + ((CVectorPacked *) ptPos)->x = (*it).x - *currentSize * v1.x + *currentSize2 * v2.x; + ((CVectorPacked *) ptPos)->y = (*it).y - *currentSize * v1.y + *currentSize2 * v2.y; + ((CVectorPacked *) ptPos)->z = (*it).z - *currentSize * v1.z + *currentSize2 * v2.z; ptPos += stride; CHECK_VERTEX_BUFFER(vb, ptPos); - ((CVector *) ptPos)->x = (*it).x + *currentSize * v1.x + *currentSize2 * v2.x; - ((CVector *) ptPos)->y = (*it).y + *currentSize * v1.y + *currentSize2 * v2.y; - ((CVector *) ptPos)->z = (*it).z + *currentSize * v1.z + *currentSize2 * v2.z; + ((CVectorPacked *) ptPos)->x = (*it).x + *currentSize * v1.x + *currentSize2 * v2.x; + ((CVectorPacked *) ptPos)->y = (*it).y + *currentSize * v1.y + *currentSize2 * v2.y; + ((CVectorPacked *) ptPos)->z = (*it).z + *currentSize * v1.z + *currentSize2 * v2.z; ptPos += stride; CHECK_VERTEX_BUFFER(vb, ptPos); - ((CVector *) ptPos)->x = (*it).x + *currentSize * v1.x - *currentSize2 * v2.x; - ((CVector *) ptPos)->y = (*it).y + *currentSize * v1.y - *currentSize2 * v2.y; - ((CVector *) ptPos)->z = (*it).z + *currentSize * v1.z - *currentSize2 * v2.z; + ((CVectorPacked *) ptPos)->x = (*it).x + *currentSize * v1.x - *currentSize2 * v2.x; + ((CVectorPacked *) ptPos)->y = (*it).y + *currentSize * v1.y - *currentSize2 * v2.y; + ((CVectorPacked *) ptPos)->z = (*it).z + *currentSize * v1.z - *currentSize2 * v2.z; ptPos += stride; CHECK_VERTEX_BUFFER(vb, ptPos); - ((CVector *) ptPos)->x = (*it).x - *currentSize * v1.x - *currentSize2 * v2.x; - ((CVector *) ptPos)->y = (*it).y - *currentSize * v1.y - *currentSize2 * v2.y; - ((CVector *) ptPos)->z = (*it).z - *currentSize * v1.z - *currentSize2 * v2.z; + ((CVectorPacked *) ptPos)->x = (*it).x - *currentSize * v1.x - *currentSize2 * v2.x; + ((CVectorPacked *) ptPos)->y = (*it).y - *currentSize * v1.y - *currentSize2 * v2.y; + ((CVectorPacked *) ptPos)->z = (*it).z - *currentSize * v1.z - *currentSize2 * v2.z; ptPos += stride; ++it; ++currentAngle; diff --git a/code/nel/src/3d/ps_fan_light.cpp b/code/nel/src/3d/ps_fan_light.cpp index cb33fcd20..4ead1f362 100644 --- a/code/nel/src/3d/ps_fan_light.cpp +++ b/code/nel/src/3d/ps_fan_light.cpp @@ -23,6 +23,7 @@ #include "nel/3d/particle_system.h" #include "nel/3d/driver.h" +using NLMISC::CVectorPacked; namespace NL3D @@ -154,7 +155,7 @@ public: { CHECK_VERTEX_BUFFER(*vb, ptVect); - *(CVector *) ptVect = *posIt; + *(CVectorPacked *) ptVect = *posIt; // the start angle currentAngle = *currentAnglePt; const uint8 phaseAdd = (uint8) (f._PhaseSpeed * (*timeIt)); @@ -163,7 +164,7 @@ public: const float moveIntensity = f._MoveIntensity * fanSize; // compute radius & vect for first fan firstSize = fanSize + (moveIntensity * CPSUtil::getCos(randomPhaseTab[0] + phaseAdd)); - *(CVector *) ptVect = (*posIt) + I * firstSize * (CPSUtil::getCos((sint32) currentAngle)) + *(CVectorPacked *) ptVect = (*posIt) + I * firstSize * (CPSUtil::getCos((sint32) currentAngle)) + K * firstSize * (CPSUtil::getSin((sint32) currentAngle)); currentAngle += angleStep; ptVect += stride; @@ -173,7 +174,7 @@ public: for (k = 1; k <= upperBound; ++k) { fSize = fanSize + (moveIntensity * CPSUtil::getCos(randomPhaseTab[k] + phaseAdd)); - *(CVector *) ptVect = (*posIt) + I * fSize * (CPSUtil::getCos((sint32) currentAngle)) + *(CVectorPacked *) ptVect = (*posIt) + I * fSize * (CPSUtil::getCos((sint32) currentAngle)) + K * fSize * (CPSUtil::getSin((sint32) currentAngle)); currentAngle += angleStep; ptVect += stride; @@ -183,14 +184,14 @@ public: sizeStep = sizeStepBase * (firstSize - fSize); for (; k <= (sint32) (f._NbFans - 1); ++k) { - *(CVector *) ptVect = (*posIt) + I * fSize * (CPSUtil::getCos((sint32) currentAngle)) + *(CVectorPacked *) ptVect = (*posIt) + I * fSize * (CPSUtil::getCos((sint32) currentAngle)) + K * fSize * (CPSUtil::getSin((sint32) currentAngle)); currentAngle += angleStep; ptVect += stride; fSize += sizeStep; } // last fan - *(CVector *) ptVect = (*posIt) + I * firstSize * (CPSUtil::getCos((sint32) *currentAnglePt)) + *(CVectorPacked *) ptVect = (*posIt) + I * firstSize * (CPSUtil::getCos((sint32) *currentAnglePt)) + K * firstSize * (CPSUtil::getSin((sint32) *currentAnglePt)); ptVect += stride; currentSizePt += currentSizePtIncrement; diff --git a/code/nel/src/3d/ps_force.cpp b/code/nel/src/3d/ps_force.cpp index cb3445619..7659ce7af 100644 --- a/code/nel/src/3d/ps_force.cpp +++ b/code/nel/src/3d/ps_force.cpp @@ -602,9 +602,9 @@ void CPSGravity::integrate(float date, CPSLocated *src, uint32 startIndex, uint3 void CPSGravity::integrateSingle(float startDate, float deltaT, uint numStep, const CPSLocated *src, uint32 indexInLocated, - NLMISC::CVector *destPos, + NLMISC::CVectorPacked *destPos, bool accumulate /*= false*/, - uint stride/* = sizeof(NLMISC::CVector)*/) const + uint stride/* = sizeof(NLMISC::CVectorPacked)*/) const { NL_PS_FUNC(CPSGravity_CVector ) nlassert(src->isParametricMotionEnabled()); @@ -635,7 +635,7 @@ void CPSGravity::integrateSingle(float startDate, float deltaT, uint numStep, destPos->y = startPos.y + currDate * startSpeed.y; destPos->z = startPos.z + currDate * startSpeed.z - _K * halfTimeSquare; currDate += deltaT; - destPos = (NLMISC::CVector *) ( (uint8 *) destPos + stride); + destPos = (NLMISC::CVectorPacked *) ( (uint8 *) destPos + stride); } while (--numStep); } @@ -655,7 +655,7 @@ void CPSGravity::integrateSingle(float startDate, float deltaT, uint numStep, float halfTimeSquare = 0.5f * currDate * currDate; destPos->z -= _K * halfTimeSquare; currDate += deltaT; - destPos = (NLMISC::CVector *) ( (uint8 *) destPos + stride); + destPos = (NLMISC::CVectorPacked *) ( (uint8 *) destPos + stride); } while (--numStep); } @@ -1146,7 +1146,7 @@ void CPSBrownianForce::integrate(float date, CPSLocated *src, ///========================================================== void CPSBrownianForce::integrateSingle(float startDate, float deltaT, uint numStep, const CPSLocated *src, uint32 indexInLocated, - NLMISC::CVector *destPos, + NLMISC::CVectorPacked *destPos, bool accumulate, uint stride) const { @@ -1181,7 +1181,7 @@ void CPSBrownianForce::integrateSingle(float startDate, float deltaT, uint numSt destPos->y = startPos.y + currDate * startSpeed.y + _K * PrecomputedPos[index].y; destPos->z = startPos.z + currDate * startSpeed.z + _K * PrecomputedPos[index].z; currDate += deltaT; - destPos = (NLMISC::CVector *) ( (uint8 *) destPos + stride); + destPos = (NLMISC::CVectorPacked *) ( (uint8 *) destPos + stride); } while (--numStep); } @@ -1203,7 +1203,7 @@ void CPSBrownianForce::integrateSingle(float startDate, float deltaT, uint numSt destPos->y += _K * PrecomputedPos[index].y; destPos->z += _K * PrecomputedPos[index].z; currDate += deltaT; - destPos = (NLMISC::CVector *) ( (uint8 *) destPos + stride); + destPos = (NLMISC::CVectorPacked *) ( (uint8 *) destPos + stride); } while (--numStep); } diff --git a/code/nel/src/3d/ps_located.cpp b/code/nel/src/3d/ps_located.cpp index 9be0baf41..57997e2be 100644 --- a/code/nel/src/3d/ps_located.cpp +++ b/code/nel/src/3d/ps_located.cpp @@ -257,7 +257,7 @@ void CPSLocated::notifyMotionTypeChanged(void) /// *************************************************************************************** void CPSLocated::integrateSingle(float startDate, float deltaT, uint numStep, uint32 indexInLocated, - NLMISC::CVector *destPos, + NLMISC::CVectorPacked *destPos, uint stride) const { NL_PS_FUNC(CPSLocated_integrateSingle) @@ -293,7 +293,7 @@ void CPSLocated::integrateSingle(float startDate, float deltaT, uint numStep, destPos->y = pi.Pos.y + currDate * pi.Speed.y; destPos->z = pi.Pos.z + currDate * pi.Speed.z; currDate += deltaT; - destPos = (NLMISC::CVector *) ( (uint8 *) destPos + stride); + destPos = (NLMISC::CVectorPacked *) ( (uint8 *) destPos + stride); } while (--numStep); } diff --git a/code/nel/src/3d/ps_mesh.cpp b/code/nel/src/3d/ps_mesh.cpp index f473a79ee..ddbf024fb 100644 --- a/code/nel/src/3d/ps_mesh.cpp +++ b/code/nel/src/3d/ps_mesh.cpp @@ -660,9 +660,9 @@ public: CHECK_VERTEX_BUFFER(outVb, outVertex + outNormalOff); // translate and resize the vertex (relatively to the mesh origin) - *(CVector *) outVertex = *posIt + sM * *(CVector *) inVertex; + *(CVectorPacked *) outVertex = *posIt + sM * *(CVector *) inVertex; // copy the normal - *(CVector *) (outVertex + outNormalOff) = M * *(CVector *) (inVertex + inNormalOff); + *(CVectorPacked *) (outVertex + outNormalOff) = M * *(CVector *) (inVertex + inNormalOff); inVertex += inVSize; @@ -683,7 +683,7 @@ public: CHECK_VERTEX_BUFFER(outVb, outVertex); // translate and resize the vertex (relatively to the mesh origin) - *(CVector *) outVertex = *posIt + sM * *(CVector *) inVertex; + *(CVectorPacked *) outVertex = *posIt + sM * *(CVector *) inVertex; inVertex += inVSize; outVertex += outVSize; @@ -774,9 +774,9 @@ public: CHECK_VERTEX_BUFFER(outVb, outVertex + outNormalOff); // morph, and transform the vertex - *(CVector *) outVertex = *posIt + sM * (opLambda * *(CVector *) m0 + lambda * *(CVector *) m1); + *(CVectorPacked *) outVertex = *posIt + sM * (opLambda * *(CVector *) m0 + lambda * *(CVector *) m1); // morph, and transform the normal - *(CVector *) (outVertex + outNormalOff) = M * (opLambda * *(CVector *) (m0 + inNormalOff) + *(CVectorPacked *) (outVertex + outNormalOff) = M * (opLambda * *(CVector *) (m0 + inNormalOff) + lambda * *(CVector *) (m1 + inNormalOff)).normed(); @@ -799,7 +799,7 @@ public: CHECK_VERTEX_BUFFER((*inVB1), m1); CHECK_VERTEX_BUFFER(outVb, outVertex); // morph, and transform the vertex - *(CVector *) outVertex = *posIt + sM * (opLambda * *(CVector *) m0 + opLambda * *(CVector *) m1); + *(CVectorPacked *) outVertex = *posIt + sM * (opLambda * *(CVector *) m0 + opLambda * *(CVector *) m1); m0 += inVSize; m1 += inVSize; @@ -1684,8 +1684,8 @@ CVertexBuffer &CPSConstraintMesh::makePrerotatedVb(const CVertexBuffer &inVb) CHECK_VERTEX_BUFFER(prerotatedVb, outVertex); CHECK_VERTEX_BUFFER(prerotatedVb, outVertex + pNormalOff); - * (CVector *) outVertex = mat.mulVector(* (CVector *) inVertex); - * (CVector *) (outVertex + normalOff) = mat.mulVector(* (CVector *) (inVertex + pNormalOff) ); + * (CVectorPacked *) outVertex = mat.mulVector(* (CVector *) inVertex); + * (CVectorPacked *) (outVertex + normalOff) = mat.mulVector(* (CVector *) (inVertex + pNormalOff) ); outVertex += vpSize; inVertex += vSize; @@ -1701,7 +1701,7 @@ CVertexBuffer &CPSConstraintMesh::makePrerotatedVb(const CVertexBuffer &inVb) CHECK_VERTEX_BUFFER(prerotatedVb, outVertex); CHECK_VERTEX_BUFFER(inVb, inVertex); - * (CVector *) outVertex = mat.mulVector(* (CVector *) inVertex); + * (CVectorPacked *) outVertex = mat.mulVector(* (CVector *) inVertex); outVertex += vpSize; inVertex += vSize; } diff --git a/code/nel/src/3d/ps_ribbon.cpp b/code/nel/src/3d/ps_ribbon.cpp index cc7d0bcd7..2bf0a74bd 100644 --- a/code/nel/src/3d/ps_ribbon.cpp +++ b/code/nel/src/3d/ps_ribbon.cpp @@ -397,7 +397,7 @@ static inline uint8 *BuildRibbonFirstSlice(const NLMISC::CVector &pos, NL_PS_FUNC(BuildRibbonFirstSlice) do { - * (NLMISC::CVector *) dest = pos; + * (NLMISC::CVectorPacked *) dest = pos; dest += vertexSize; } while (--numVerts); @@ -409,7 +409,7 @@ static inline uint8 *BuildRibbonFirstSlice(const NLMISC::CVector &pos, // This compute one slice of a ribbon, and return the next vertex to be filled static inline uint8 *ComputeRibbonSliceFollowPath(const NLMISC::CVector &prev, const NLMISC::CVector &next, - const NLMISC::CVector *shape, + const NLMISC::CVectorPacked *shape, uint numVerts, uint8 *dest, uint vertexSize, @@ -430,10 +430,10 @@ static inline uint8 *ComputeRibbonSliceFollowPath(const NLMISC::CVector &prev, } basis.setPos(next); - const NLMISC::CVector *shapeEnd = shape + numVerts; + const NLMISC::CVectorPacked *shapeEnd = shape + numVerts; do { - *(NLMISC::CVector *) dest = basis * (size * (*shape)); + *(NLMISC::CVectorPacked *) dest = basis * (size * CVector(*shape)); ++shape; dest += vertexSize; } @@ -445,7 +445,7 @@ static inline uint8 *ComputeRibbonSliceFollowPath(const NLMISC::CVector &prev, // This compute one slice of a ribbon, and return the next vertex to be filled static inline uint8 *ComputeRibbonSliceIdentity(const NLMISC::CVector &prev, const NLMISC::CVector &next, - const NLMISC::CVector *shape, + const NLMISC::CVectorPacked *shape, uint numVerts, uint8 *dest, uint vertexSize, @@ -453,10 +453,10 @@ static inline uint8 *ComputeRibbonSliceIdentity(const NLMISC::CVector &prev, ) { NL_PS_FUNC(ComputeRibbonSliceIdentity) - const NLMISC::CVector *shapeEnd = shape + numVerts; + const NLMISC::CVectorPacked *shapeEnd = shape + numVerts; do { - ((NLMISC::CVector *) dest)->set(size * shape->x + next.x, + ((NLMISC::CVectorPacked *) dest)->set(size * shape->x + next.x, size * shape->y + next.y, size * shape->z + next.z); ++shape; @@ -469,7 +469,7 @@ static inline uint8 *ComputeRibbonSliceIdentity(const NLMISC::CVector &prev, ///========================================================================= static inline uint8 *ComputeRibbonSliceFollowPathXY(const NLMISC::CVector &prev, const NLMISC::CVector &next, - const NLMISC::CVector *shape, + const NLMISC::CVectorPacked *shape, uint numVerts, uint8 *dest, uint vertexSize, @@ -492,10 +492,10 @@ static inline uint8 *ComputeRibbonSliceFollowPathXY(const NLMISC::CVector &prev, basis.setRot(I, CVector::K, J, true); } basis.setPos(next); - const NLMISC::CVector *shapeEnd = shape + numVerts; + const NLMISC::CVectorPacked *shapeEnd = shape + numVerts; do { - *(NLMISC::CVector *) dest = basis * (size * (*shape)); + *(NLMISC::CVectorPacked *) dest = basis * (size * CVector(*shape)); ++shape; dest += vertexSize; } @@ -511,8 +511,8 @@ static inline uint8 *ComputeRibbonSliceFollowPathXY(const NLMISC::CVector &prev, // This is for untextured versions (no need to duplicate the last vertex of each slice) static inline uint8 *ComputeUntexturedRibbonMesh(uint8 *destVb, uint vertexSize, - const NLMISC::CVector *curve, - const NLMISC::CVector *shape, + const NLMISC::CVectorPacked *curve, + const NLMISC::CVectorPacked *shape, uint numSegs, uint numVerticesInShape, float sizeIncrement, @@ -585,8 +585,8 @@ static inline uint8 *ComputeUntexturedRibbonMesh(uint8 *destVb, // (Textured Version) static inline uint8 *ComputeTexturedRibbonMesh(uint8 *destVb, uint vertexSize, - const NLMISC::CVector *curve, - const NLMISC::CVector *shape, + const NLMISC::CVectorPacked *curve, + const NLMISC::CVectorPacked *shape, uint numSegs, uint numVerticesInShape, float sizeIncrement, @@ -612,7 +612,7 @@ static inline uint8 *ComputeTexturedRibbonMesh(uint8 *destVb, basis ); // duplicate last vertex ( equal first) - * (NLMISC::CVector *) nextDestVb = * (NLMISC::CVector *) destVb; + * (NLMISC::CVectorPacked *) nextDestVb = * (NLMISC::CVectorPacked *) destVb; destVb = nextDestVb + vertexSize; // ++ curve; @@ -633,7 +633,7 @@ static inline uint8 *ComputeTexturedRibbonMesh(uint8 *destVb, basis ); // duplicate last vertex ( equal first) - * (NLMISC::CVector *) nextDestVb = * (NLMISC::CVector *) destVb; + * (NLMISC::CVectorPacked *) nextDestVb = * (NLMISC::CVectorPacked *) destVb; destVb = nextDestVb + vertexSize; // ++ curve; @@ -653,7 +653,7 @@ static inline uint8 *ComputeTexturedRibbonMesh(uint8 *destVb, size ); // duplicate last vertex ( equal first) - * (NLMISC::CVector *) nextDestVb = * (NLMISC::CVector *) destVb; + * (NLMISC::CVectorPacked *) nextDestVb = * (NLMISC::CVectorPacked *) destVb; destVb = nextDestVb + vertexSize; // ++ curve; @@ -727,7 +727,7 @@ void CPSRibbon::displayRibbons(uint32 nbRibbons, uint32 srcStep) const uint numVerticesInShape = (uint)_Shape.size(); // static std::vector sizes; - static std::vector ribbonPos; // this is where the position of each ribbon slice center i stored + static std::vector ribbonPos; // this is where the position of each ribbon slice center i stored ribbonPos.resize(_UsedNbSegs + 1); // make sure we have enough room sizes.resize(numRibbonBatch); @@ -782,7 +782,7 @@ void CPSRibbon::displayRibbons(uint32 nbRibbons, uint32 srcStep) const float ribbonSizeIncrement = *ptCurrSize / (float) _UsedNbSegs; ptCurrSize += ptCurrSizeIncrement; // the parent class has a method to get the ribbons positions - computeRibbon((uint) (fpRibbonIndex >> 16), &ribbonPos[0], sizeof(NLMISC::CVector)); + computeRibbon((uint) (fpRibbonIndex >> 16), &ribbonPos[0], sizeof(NLMISC::CVectorPacked)); currVert = ComputeTexturedRibbonMesh(currVert, vertexSize, &ribbonPos[0], @@ -804,7 +804,7 @@ void CPSRibbon::displayRibbons(uint32 nbRibbons, uint32 srcStep) const float ribbonSizeIncrement = *ptCurrSize / (float) _UsedNbSegs; ptCurrSize += ptCurrSizeIncrement; // the parent class has a method to get the ribbons positions - computeRibbon((uint) (fpRibbonIndex >> 16), &ribbonPos[0], sizeof(NLMISC::CVector)); + computeRibbon((uint) (fpRibbonIndex >> 16), &ribbonPos[0], sizeof(NLMISC::CVectorPacked)); currVert = ComputeUntexturedRibbonMesh(currVert, vertexSize, &ribbonPos[0], diff --git a/code/nel/src/3d/ps_ribbon_base.cpp b/code/nel/src/3d/ps_ribbon_base.cpp index 8f2a64932..212456fcf 100644 --- a/code/nel/src/3d/ps_ribbon_base.cpp +++ b/code/nel/src/3d/ps_ribbon_base.cpp @@ -33,7 +33,7 @@ static inline void BuildHermiteVector(const NLMISC::CVector &P0, const NLMISC::CVector &P1, const NLMISC::CVector &T0, const NLMISC::CVector &T1, - NLMISC::CVector &dest, + NLMISC::CVectorPacked &dest, float lambda ) { @@ -54,7 +54,7 @@ static inline void BuildHermiteVector(const NLMISC::CVector &P0, /// for test static inline void BuildLinearVector(const NLMISC::CVector &P0, const NLMISC::CVector &P1, - NLMISC::CVector &dest, + NLMISC::CVectorPacked &dest, float lambda, float oneMinusLambda ) @@ -204,7 +204,7 @@ void CPSRibbonBase::updateGlobals() //======================================================= -void CPSRibbonBase::computeHermitteRibbon(uint index, NLMISC::CVector *dest, uint stride /* = sizeof(NLMISC::CVector)*/) +void CPSRibbonBase::computeHermitteRibbon(uint index, NLMISC::CVectorPacked *dest, uint stride /* = sizeof(NLMISC::CVectorPacked)*/) { NL_PS_FUNC(CPSRibbonBase_CVector ) nlassert(!_Parametric); @@ -242,7 +242,7 @@ void CPSRibbonBase::computeHermitteRibbon(uint index, NLMISC::CVector *dest, uin nlassert(NLMISC::isValidDouble(dest->y)); nlassert(NLMISC::isValidDouble(dest->z)); #endif - dest = (NLMISC::CVector *) ((uint8 *) dest + stride); + dest = (NLMISC::CVectorPacked *) ((uint8 *) dest + stride); } while (--leftToDo); return; @@ -262,7 +262,7 @@ void CPSRibbonBase::computeHermitteRibbon(uint index, NLMISC::CVector *dest, uin nlassert(NLMISC::isValidDouble(dest->y)); nlassert(NLMISC::isValidDouble(dest->z)); #endif - dest = (NLMISC::CVector *) ((uint8 *) dest + stride); + dest = (NLMISC::CVectorPacked *) ((uint8 *) dest + stride); -- leftToDo; if (!leftToDo) return; lambda += lambdaStep; @@ -289,7 +289,7 @@ void CPSRibbonBase::computeHermitteRibbon(uint index, NLMISC::CVector *dest, uin } //======================================================= -void CPSRibbonBase::computeLinearRibbon(uint index, NLMISC::CVector *dest, uint stride) +void CPSRibbonBase::computeLinearRibbon(uint index, NLMISC::CVectorPacked *dest, uint stride) { NL_PS_FUNC(CPSRibbonBase_computeLinearRibbon) nlassert(!_Parametric); @@ -321,7 +321,7 @@ void CPSRibbonBase::computeLinearRibbon(uint index, NLMISC::CVector *dest, uint nlassert(NLMISC::isValidDouble(dest->y)); nlassert(NLMISC::isValidDouble(dest->z)); #endif - dest = (NLMISC::CVector *) ((uint8 *) dest + stride); + dest = (NLMISC::CVectorPacked *) ((uint8 *) dest + stride); } while (--leftToDo); @@ -345,7 +345,7 @@ void CPSRibbonBase::computeLinearRibbon(uint index, NLMISC::CVector *dest, uint nlassert(NLMISC::isValidDouble(dest->y)); nlassert(NLMISC::isValidDouble(dest->z)); #endif - dest = (NLMISC::CVector *) ((uint8 *) dest + stride); + dest = (NLMISC::CVectorPacked *) ((uint8 *) dest + stride); -- leftToDo; if (!leftToDo) return; lambda += lambdaStep; @@ -387,14 +387,14 @@ void CPSRibbonBase::computeLinearRibbon(uint index, NLMISC::CVector *dest, uint do { *dest = *currIt; - dest = (NLMISC::CVector *) ((uint8 *) dest + stride); + dest = (NLMISC::CVectorPacked *) ((uint8 *) dest + stride); } while (--leftToDo); return; } float lambdaStep = _UsedSegDuration / dt; BuildLinearVector(*currIt, *nextIt, *dest, 0.f, 1.f); - dest = (NLMISC::CVector *) ((uint8 *) dest + stride); + dest = (NLMISC::CVectorPacked *) ((uint8 *) dest + stride); -- leftToDo; // snap lambda to nearest time step lambda = lambdaStep * fmodf(date[0], _UsedSegDuration) / _UsedSegDuration; @@ -406,7 +406,7 @@ void CPSRibbonBase::computeLinearRibbon(uint index, NLMISC::CVector *dest, uint if (lambda >= 1.f) break; /// compute a location BuildLinearVector(*currIt, *nextIt, *dest, lambda, oneMinusLambda); - dest = (NLMISC::CVector *) ((uint8 *) dest + stride); + dest = (NLMISC::CVectorPacked *) ((uint8 *) dest + stride); -- leftToDo; if (!leftToDo) return; lambda += lambdaStep; @@ -426,7 +426,7 @@ void CPSRibbonBase::computeLinearRibbon(uint index, NLMISC::CVector *dest, uint do { *dest = *currIt; - dest = (NLMISC::CVector *) ((uint8 *) dest + stride); + dest = (NLMISC::CVectorPacked *) ((uint8 *) dest + stride); } while (--leftToDo); return; @@ -442,7 +442,7 @@ void CPSRibbonBase::computeLinearRibbon(uint index, NLMISC::CVector *dest, uint //======================================================= -void CPSRibbonBase::computeLinearCstSizeRibbon(uint index, NLMISC::CVector *dest, uint stride /* = sizeof(NLMISC::CVector)*/) +void CPSRibbonBase::computeLinearCstSizeRibbon(uint index, NLMISC::CVectorPacked *dest, uint stride /* = sizeof(NLMISC::CVectorPacked)*/) { NL_PS_FUNC(CPSRibbonBase_CVector ) nlassert(!_Parametric); @@ -485,7 +485,7 @@ void CPSRibbonBase::computeLinearCstSizeRibbon(uint index, NLMISC::CVector *dest nlassert(NLMISC::isValidDouble(dest->y)); nlassert(NLMISC::isValidDouble(dest->z)); #endif - dest = (NLMISC::CVector *) ((uint8 *) dest + stride); + dest = (NLMISC::CVectorPacked *) ((uint8 *) dest + stride); -- leftToDo; if (!leftToDo) return; lambda += lambdaStep; @@ -512,7 +512,7 @@ void CPSRibbonBase::computeLinearCstSizeRibbon(uint index, NLMISC::CVector *dest nlassert(NLMISC::isValidDouble(dest->y)); nlassert(NLMISC::isValidDouble(dest->z)); #endif - dest = (NLMISC::CVector *) ((uint8 *) dest + stride); + dest = (NLMISC::CVectorPacked *) ((uint8 *) dest + stride); } return; } @@ -520,7 +520,7 @@ void CPSRibbonBase::computeLinearCstSizeRibbon(uint index, NLMISC::CVector *dest } //======================================================= -void CPSRibbonBase::computeHermitteCstSizeRibbon(uint index, NLMISC::CVector *dest, uint stride /* = sizeof(NLMISC::CVector)*/) +void CPSRibbonBase::computeHermitteCstSizeRibbon(uint index, NLMISC::CVectorPacked *dest, uint stride /* = sizeof(NLMISC::CVectorPacked)*/) { NL_PS_FUNC(CPSRibbonBase_CVector ) nlassert(!_Parametric); @@ -567,7 +567,7 @@ void CPSRibbonBase::computeHermitteCstSizeRibbon(uint index, NLMISC::CVector *de nlassert(NLMISC::isValidDouble(dest->z)); #endif - dest = (NLMISC::CVector *) ((uint8 *) dest + stride); + dest = (NLMISC::CVectorPacked *) ((uint8 *) dest + stride); -- leftToDo; if (!leftToDo) return; lambda += lambdaStep; @@ -593,7 +593,7 @@ void CPSRibbonBase::computeHermitteCstSizeRibbon(uint index, NLMISC::CVector *de nlassert(NLMISC::isValidDouble(dest->y)); nlassert(NLMISC::isValidDouble(dest->z)); #endif - dest = (NLMISC::CVector *) ((uint8 *) dest + stride); + dest = (NLMISC::CVectorPacked *) ((uint8 *) dest + stride); } return; } @@ -605,7 +605,7 @@ void CPSRibbonBase::computeHermitteCstSizeRibbon(uint index, NLMISC::CVector *de //======================================================= -void CPSRibbonBase::computeRibbon(uint index, NLMISC::CVector *dest, uint stride /* = sizeof(NLMISC::CVector)*/) +void CPSRibbonBase::computeRibbon(uint index, NLMISC::CVectorPacked *dest, uint stride /* = sizeof(NLMISC::CVectorPacked)*/) { NL_PS_FUNC(CPSRibbonBase_CVector ) switch (_InterpolationMode) diff --git a/code/nel/src/3d/ps_ribbon_look_at.cpp b/code/nel/src/3d/ps_ribbon_look_at.cpp index 3b22f561e..e682135fe 100644 --- a/code/nel/src/3d/ps_ribbon_look_at.cpp +++ b/code/nel/src/3d/ps_ribbon_look_at.cpp @@ -34,7 +34,7 @@ const float NormEpsilon = 10E-8f; struct CVectInfo { - NLMISC::CVector Interp; + NLMISC::CVectorPacked Interp; NLMISC::CVector Proj; }; typedef std::vector TRibbonVect; // a vector used for intermediate computations @@ -247,8 +247,8 @@ static inline void BuildSlice(const NLMISC::CMatrix &mat, CVertexBuffer &vb, uin invTgNorm = 1.f; } // build orthogonals vectors to tangent - *(NLMISC::CVector *) currVert = pos->Interp + ribSize * invTgNorm * (tangent.x * K - tangent.z * I); - *(NLMISC::CVector *) (currVert + vertexSize) = pos->Interp + ribSize * invTgNorm * (- tangent.x * K + tangent.z * I); + *(NLMISC::CVectorPacked *) currVert = NLMISC::CVector(pos->Interp) + ribSize * invTgNorm * (tangent.x * K - tangent.z * I); + *(NLMISC::CVectorPacked *) (currVert + vertexSize) = NLMISC::CVector(pos->Interp) + ribSize * invTgNorm * (- tangent.x * K + tangent.z * I); } else if (prev->Proj.y > ZEpsilon) // second point cross the near plane { @@ -263,8 +263,8 @@ static inline void BuildSlice(const NLMISC::CMatrix &mat, CVertexBuffer &vb, uin } else // { - *(NLMISC::CVector *) currVert = pos->Interp; - *(NLMISC::CVector *) (currVert + vertexSize) = pos->Interp; + *(NLMISC::CVectorPacked *) currVert = pos->Interp; + *(NLMISC::CVectorPacked *) (currVert + vertexSize) = pos->Interp; return; } @@ -282,8 +282,8 @@ static inline void BuildSlice(const NLMISC::CMatrix &mat, CVertexBuffer &vb, uin } // build orthogonals vectors to tangent - *(NLMISC::CVector *) currVert = inter + ribSize * invTgNorm * (tangent.x * K - tangent.z * I); - *(NLMISC::CVector *) (currVert + vertexSize) = inter + ribSize * invTgNorm * (- tangent.x * K + tangent.z * I); + *(NLMISC::CVectorPacked *) currVert = inter + ribSize * invTgNorm * (tangent.x * K - tangent.z * I); + *(NLMISC::CVectorPacked *) (currVert + vertexSize) = inter + ribSize * invTgNorm * (- tangent.x * K + tangent.z * I); } else if (next->Proj.y > ZEpsilon) // first point cross the near plane { @@ -298,8 +298,8 @@ static inline void BuildSlice(const NLMISC::CMatrix &mat, CVertexBuffer &vb, uin } else // { - *(NLMISC::CVector *) currVert = pos->Interp; - *(NLMISC::CVector *) (currVert + vertexSize) = pos->Interp; + *(NLMISC::CVectorPacked *) currVert = pos->Interp; + *(NLMISC::CVectorPacked *) (currVert + vertexSize) = pos->Interp; return; } @@ -316,14 +316,14 @@ static inline void BuildSlice(const NLMISC::CMatrix &mat, CVertexBuffer &vb, uin } // build orthogonals vectors to tangent - *(NLMISC::CVector *) currVert = inter + ribSize * invTgNorm * (tangent.x * K - tangent.z * I); - *(NLMISC::CVector *) (currVert + vertexSize) = inter + ribSize * invTgNorm * (- tangent.x * K + tangent.z * I); + *(NLMISC::CVectorPacked *) currVert = inter + ribSize * invTgNorm * (tangent.x * K - tangent.z * I); + *(NLMISC::CVectorPacked *) (currVert + vertexSize) = inter + ribSize * invTgNorm * (- tangent.x * K + tangent.z * I); } else // two points are not visible { - *(NLMISC::CVector *) currVert = pos->Interp; - *(NLMISC::CVector *) (currVert + vertexSize) = pos->Interp; + *(NLMISC::CVectorPacked *) currVert = pos->Interp; + *(NLMISC::CVectorPacked *) (currVert + vertexSize) = pos->Interp; } } diff --git a/code/nel/src/3d/ps_shockwave.cpp b/code/nel/src/3d/ps_shockwave.cpp index 607b2d03e..4421faded 100644 --- a/code/nel/src/3d/ps_shockwave.cpp +++ b/code/nel/src/3d/ps_shockwave.cpp @@ -23,6 +23,7 @@ #include "nel/3d/ps_iterator.h" #include "nel/3d/particle_system.h" +using NLMISC::CVectorPacked; namespace NL3D { @@ -159,10 +160,10 @@ public: radVect = *ptCurrSize * (CPSUtil::getCos((sint32) currAngle) * ptCurrBasis->X + CPSUtil::getSin((sint32) currAngle) * ptCurrBasis->Y); innerVect = radiusRatio * radVect; CHECK_VERTEX_BUFFER(*vb, currVertex); - * (CVector *) currVertex = *posIt + radVect; + * (CVectorPacked *) currVertex = *posIt + radVect; currVertex += vSize; CHECK_VERTEX_BUFFER(*vb, currVertex); - * (CVector *) currVertex = *posIt + innerVect; + * (CVectorPacked *) currVertex = *posIt + innerVect; currVertex += vSize; currAngle += angleStep; } diff --git a/code/nel/src/3d/ps_tail_dot.cpp b/code/nel/src/3d/ps_tail_dot.cpp index 623b8e7f7..0568528ad 100644 --- a/code/nel/src/3d/ps_tail_dot.cpp +++ b/code/nel/src/3d/ps_tail_dot.cpp @@ -25,6 +25,8 @@ #include +using NLMISC::CVectorPacked; + namespace NL3D { static NLMISC::CRGBA GradientB2W[] = {NLMISC::CRGBA(0, 0, 0, 0), NLMISC::CRGBA(255, 255, 255, 255) }; @@ -330,7 +332,7 @@ void CPSTailDot::displayRibbons(uint32 nbRibbons, uint32 srcStep) do { // the parent class has a method to get the ribbons positions - computeRibbon((uint) (fpRibbonIndex >> 16), (CVector *) currVert, vertexSize); + computeRibbon((uint) (fpRibbonIndex >> 16), (CVectorPacked *) currVert, vertexSize); currVert += vertexSize * (_UsedNbSegs + 1); fpRibbonIndex += srcStep; } @@ -345,7 +347,7 @@ void CPSTailDot::displayRibbons(uint32 nbRibbons, uint32 srcStep) { // we compute each pos thanks to the parametric curve _Owner->integrateSingle(date - _UsedSegDuration * (_UsedNbSegs + 1), _UsedSegDuration, _UsedNbSegs + 1, (uint) (fpRibbonIndex >> 16), - (NLMISC::CVector *) currVert, vertexSize); + (NLMISC::CVectorPacked *) currVert, vertexSize); currVert += vertexSize * (_UsedNbSegs + 1); fpRibbonIndex += srcStep; } diff --git a/code/nel/src/3d/ps_util.cpp b/code/nel/src/3d/ps_util.cpp index 0f7600d0e..211383a61 100644 --- a/code/nel/src/3d/ps_util.cpp +++ b/code/nel/src/3d/ps_util.cpp @@ -44,6 +44,7 @@ namespace NL3D { using NLMISC::CVector; +using NLMISC::CVectorPacked; //#ifdef NL_DEBUG diff --git a/code/nel/src/3d/seg_remanence.cpp b/code/nel/src/3d/seg_remanence.cpp index e0f9a52ae..5340811b2 100644 --- a/code/nel/src/3d/seg_remanence.cpp +++ b/code/nel/src/3d/seg_remanence.cpp @@ -27,6 +27,7 @@ #include "nel/3d/dru.h" +using NLMISC::CVectorPacked; @@ -162,8 +163,8 @@ void CSegRemanence::registerBasic() // helper functions to fill vb static inline void vbPush(uint8 *&dest, const CVector &v) { - *(CVector *) dest = v; - dest +=sizeof(CVector); + *(CVectorPacked *) dest = v; + dest +=sizeof(CVectorPacked); } static inline void vbPush(uint8 *&dest, float f) diff --git a/code/nel/src/3d/water_model.cpp b/code/nel/src/3d/water_model.cpp index eb8ceae27..2a7d80dbb 100644 --- a/code/nel/src/3d/water_model.cpp +++ b/code/nel/src/3d/water_model.cpp @@ -33,6 +33,7 @@ #include "nel/3d/texture_bump.h" #include "nel/3d/water_env_map.h" +using NLMISC::CVectorPacked; using NLMISC::CVector2f; @@ -1450,15 +1451,15 @@ uint CWaterModel::fillVBSoft(void *datas, uint startTri) } for(uint l = 0; l < numVerts - 2; ++l) { - *(CVector *) dest = unprojectedTriSoft[0]; + *(CVectorPacked *) dest = unprojectedTriSoft[0]; dest += sizeof(float[3]); *(CVector2f *) dest = envMap[0]; dest += sizeof(float[2]); - *(CVector *) dest = unprojectedTriSoft[l + 1]; + *(CVectorPacked *) dest = unprojectedTriSoft[l + 1]; dest += sizeof(float[3]); *(CVector2f *) dest = envMap[l + 1]; dest += sizeof(float[2]); - *(CVector *) dest = unprojectedTriSoft[l + 2]; + *(CVectorPacked *) dest = unprojectedTriSoft[l + 2]; dest += sizeof(float[3]); *(CVector2f *) dest = envMap[l + 2]; dest += sizeof(float[2]); @@ -1481,27 +1482,27 @@ uint CWaterModel::fillVBSoft(void *datas, uint startTri) computeWaterVertexSoft((float) (x + 1), (float) (y + 1), proj[2], envMap[2], camI, camJ, camK, denom, date, camMat.getPos()); computeWaterVertexSoft((float) x, (float) (y + 1), proj[3], envMap[3], camI, camJ, camK, denom, date, camMat.getPos()); // - *(CVector *) dest = proj[0]; + *(CVectorPacked *) dest = proj[0]; dest += sizeof(float[3]); *(CVector2f *) dest = envMap[0]; dest += sizeof(float[2]); - *(CVector *) dest = proj[2]; + *(CVectorPacked *) dest = proj[2]; dest += sizeof(float[3]); *(CVector2f *) dest = envMap[2]; dest += sizeof(float[2]); - *(CVector *) dest = proj[1]; + *(CVectorPacked *) dest = proj[1]; dest += sizeof(float[3]); *(CVector2f *) dest = envMap[1]; dest += sizeof(float[2]); - *(CVector *) dest = proj[0]; + *(CVectorPacked *) dest = proj[0]; dest += sizeof(float[3]); *(CVector2f *) dest = envMap[0]; dest += sizeof(float[2]); - *(CVector *) dest = proj[3]; + *(CVectorPacked *) dest = proj[3]; dest += sizeof(float[3]); *(CVector2f *) dest = envMap[3]; dest += sizeof(float[2]); - *(CVector *) dest = proj[2]; + *(CVectorPacked *) dest = proj[2]; dest += sizeof(float[3]); *(CVector2f *) dest = envMap[2]; dest += sizeof(float[2]); @@ -1557,11 +1558,11 @@ uint CWaterModel::fillVBHard(void *datas, uint startTri) } for(uint l = 0; l < numVerts - 2; ++l) { - *(CVector *) dest = unprojectedTri[0]; + *(CVectorPacked *) dest = unprojectedTri[0]; dest += WATER_VERTEX_HARD_SIZE; - *(CVector *) dest = unprojectedTri[l + 1]; + *(CVectorPacked *) dest = unprojectedTri[l + 1]; dest += WATER_VERTEX_HARD_SIZE; - *(CVector *) dest = unprojectedTri[l + 2]; + *(CVectorPacked *) dest = unprojectedTri[l + 2]; dest += WATER_VERTEX_HARD_SIZE; } } @@ -1581,17 +1582,17 @@ uint CWaterModel::fillVBHard(void *datas, uint startTri) computeWaterVertexHard((float) (x + 1), (float) (y + 1), proj[2], camI, camJ, camK, denom); computeWaterVertexHard((float) x, (float) (y + 1), proj[3], camI, camJ, camK, denom); // - *(CVector *) dest = proj[0]; + *(CVectorPacked *) dest = proj[0]; dest += WATER_VERTEX_HARD_SIZE; - *(CVector *) dest = proj[2]; + *(CVectorPacked *) dest = proj[2]; dest += WATER_VERTEX_HARD_SIZE; - *(CVector *) dest = proj[1]; + *(CVectorPacked *) dest = proj[1]; dest += WATER_VERTEX_HARD_SIZE; - *(CVector *) dest = proj[0]; + *(CVectorPacked *) dest = proj[0]; dest += WATER_VERTEX_HARD_SIZE; - *(CVector *) dest = proj[3]; + *(CVectorPacked *) dest = proj[3]; dest += WATER_VERTEX_HARD_SIZE; - *(CVector *) dest = proj[2]; + *(CVectorPacked *) dest = proj[2]; dest += WATER_VERTEX_HARD_SIZE; } } From d94a49b3d847aeb235af86ad2b4e07abaf08c767 Mon Sep 17 00:00:00 2001 From: kaetemi Date: Fri, 13 Jun 2014 00:53:13 +0200 Subject: [PATCH 07/21] SSE2: More CVector alignment fixes --HG-- branch : sse2 --- code/nel/include/nel/3d/shadow_skin.h | 1 + code/nel/src/3d/lod_character_manager.cpp | 2 +- code/nel/src/3d/mesh.cpp | 8 ++--- code/nel/src/3d/mesh_mrm.cpp | 12 +++---- code/nel/src/3d/mesh_mrm_skin.cpp | 36 +++++++++++++-------- code/nel/src/3d/mesh_mrm_skinned.cpp | 2 +- code/nel/src/3d/mesh_multi_lod_instance.cpp | 2 +- code/nel/src/3d/patch_render.cpp | 6 ++-- code/nel/src/3d/shadow_skin.cpp | 28 ++++++++++++++++ code/nel/src/3d/vegetable_manager.cpp | 16 ++++----- code/nel/src/3d/vertex_buffer.cpp | 4 +-- 11 files changed, 77 insertions(+), 40 deletions(-) diff --git a/code/nel/include/nel/3d/shadow_skin.h b/code/nel/include/nel/3d/shadow_skin.h index 2b63a635d..3ecc56631 100644 --- a/code/nel/include/nel/3d/shadow_skin.h +++ b/code/nel/include/nel/3d/shadow_skin.h @@ -74,6 +74,7 @@ public: public: // skinning + void applySkin(NLMISC::CVectorPacked *dst, std::vector &boneMat3x4); void applySkin(NLMISC::CVector *dst, std::vector &boneMat3x4); /** return ray intersection. diff --git a/code/nel/src/3d/lod_character_manager.cpp b/code/nel/src/3d/lod_character_manager.cpp index 48c2a500f..46a6bacf8 100644 --- a/code/nel/src/3d/lod_character_manager.cpp +++ b/code/nel/src/3d/lod_character_manager.cpp @@ -676,7 +676,7 @@ bool CLodCharacterManager::addRenderCharacterKey(CLodCharacterInstance &instan { // NB: order is important for AGP filling optimisation // transform vertex, and store. - CVector *dstVector= (CVector*)dstPtr; + CVectorPacked *dstVector= (CVectorPacked*)dstPtr; fVect.x= vertPtr->x; fVect.y= vertPtr->y; fVect.z= vertPtr->z; ++vertPtr; dstVector->x= a00 * fVect.x + a01 * fVect.y + a02 * fVect.z + matPos.x; diff --git a/code/nel/src/3d/mesh.cpp b/code/nel/src/3d/mesh.cpp index dfaed0ce4..4bd444fb0 100644 --- a/code/nel/src/3d/mesh.cpp +++ b/code/nel/src/3d/mesh.cpp @@ -1049,7 +1049,7 @@ bool CMeshGeom::retrieveVertices(std::vector &vertices) const uint vSize= vb.getVertexSize(); for(i=0;iMatrixId[0]; diff --git a/code/nel/src/3d/mesh_mrm.cpp b/code/nel/src/3d/mesh_mrm.cpp index 999b3b62d..d0d733283 100644 --- a/code/nel/src/3d/mesh_mrm.cpp +++ b/code/nel/src/3d/mesh_mrm.cpp @@ -2066,7 +2066,7 @@ void CMeshMRMGeom::bkupOriginalSkinVerticesSubset(uint wedgeStart, uint wedgeEnd _OriginalTGSpace.resize(_VBufferFinal.getNumVertices()); for(uint i=wedgeStart; i &vertice // Final remaping of vertex to final index vertexRemap[i]= dstIndex; // copy to dest - *pDstVert= *(CVector*)pSrcVert; + *pDstVert= *(CVectorPacked*)pSrcVert; // next dest pDstVert++; @@ -3467,7 +3467,7 @@ sint CMeshMRMGeom::renderShadowSkinGeom(CMeshMRMInstance *mi, uint remainingVe CLod &lod= _Lods[_Lods.size()-1]; computeBoneMatrixes3x4(boneMat3x4, lod.MatrixInfluences, skeleton); - _ShadowSkin.applySkin((CVector*)vbDest, boneMat3x4); + _ShadowSkin.applySkin((CVectorPacked*)vbDest, boneMat3x4); // How many vertices are added to the VBuffer ??? diff --git a/code/nel/src/3d/mesh_mrm_skin.cpp b/code/nel/src/3d/mesh_mrm_skin.cpp index 13e8bdd21..d8460a1a5 100644 --- a/code/nel/src/3d/mesh_mrm_skin.cpp +++ b/code/nel/src/3d/mesh_mrm_skin.cpp @@ -222,11 +222,13 @@ void CMeshMRMGeom::applySkin(CLod &lod, const CSkeletonModel *skeleton) CMesh::CSkinWeight *srcSkin= srcSkinPtr + index; CVector *srcVertex= srcVertexPtr + index; uint8 *dstVertexVB= destVertexPtr + index * vertexSize; - CVector *dstVertex= (CVector*)(dstVertexVB); + CVectorPacked *dstVertex= (CVectorPacked*)(dstVertexVB); // Vertex. - boneMat3x4[ srcSkin->MatrixId[0] ].mulSetPoint( *srcVertex, *dstVertex); + CVector temp; + boneMat3x4[ srcSkin->MatrixId[0] ].mulSetPoint( *srcVertex, temp); + *dstVertex = temp; } break; @@ -239,12 +241,14 @@ void CMeshMRMGeom::applySkin(CLod &lod, const CSkeletonModel *skeleton) CMesh::CSkinWeight *srcSkin= srcSkinPtr + index; CVector *srcVertex= srcVertexPtr + index; uint8 *dstVertexVB= destVertexPtr + index * vertexSize; - CVector *dstVertex= (CVector*)(dstVertexVB); + CVectorPacked *dstVertex= (CVectorPacked*)(dstVertexVB); // Vertex. - boneMat3x4[ srcSkin->MatrixId[0] ].mulSetPoint( *srcVertex, srcSkin->Weights[0], *dstVertex); - boneMat3x4[ srcSkin->MatrixId[1] ].mulAddPoint( *srcVertex, srcSkin->Weights[1], *dstVertex); + CVector temp; + boneMat3x4[ srcSkin->MatrixId[0] ].mulSetPoint( *srcVertex, srcSkin->Weights[0], temp); + boneMat3x4[ srcSkin->MatrixId[1] ].mulAddPoint( *srcVertex, srcSkin->Weights[1], temp); + *dstVertex = temp; } break; @@ -257,13 +261,15 @@ void CMeshMRMGeom::applySkin(CLod &lod, const CSkeletonModel *skeleton) CMesh::CSkinWeight *srcSkin= srcSkinPtr + index; CVector *srcVertex= srcVertexPtr + index; uint8 *dstVertexVB= destVertexPtr + index * vertexSize; - CVector *dstVertex= (CVector*)(dstVertexVB); + CVectorPacked *dstVertex= (CVectorPacked*)(dstVertexVB); // Vertex. - boneMat3x4[ srcSkin->MatrixId[0] ].mulSetPoint( *srcVertex, srcSkin->Weights[0], *dstVertex); - boneMat3x4[ srcSkin->MatrixId[1] ].mulAddPoint( *srcVertex, srcSkin->Weights[1], *dstVertex); - boneMat3x4[ srcSkin->MatrixId[2] ].mulAddPoint( *srcVertex, srcSkin->Weights[2], *dstVertex); + CVector temp; + boneMat3x4[ srcSkin->MatrixId[0] ].mulSetPoint( *srcVertex, srcSkin->Weights[0], temp); + boneMat3x4[ srcSkin->MatrixId[1] ].mulAddPoint( *srcVertex, srcSkin->Weights[1], temp); + boneMat3x4[ srcSkin->MatrixId[2] ].mulAddPoint( *srcVertex, srcSkin->Weights[2], temp); + *dstVertex = temp; } break; @@ -276,14 +282,16 @@ void CMeshMRMGeom::applySkin(CLod &lod, const CSkeletonModel *skeleton) CMesh::CSkinWeight *srcSkin= srcSkinPtr + index; CVector *srcVertex= srcVertexPtr + index; uint8 *dstVertexVB= destVertexPtr + index * vertexSize; - CVector *dstVertex= (CVector*)(dstVertexVB); + CVectorPacked *dstVertex= (CVectorPacked*)(dstVertexVB); // Vertex. - boneMat3x4[ srcSkin->MatrixId[0] ].mulSetPoint( *srcVertex, srcSkin->Weights[0], *dstVertex); - boneMat3x4[ srcSkin->MatrixId[1] ].mulAddPoint( *srcVertex, srcSkin->Weights[1], *dstVertex); - boneMat3x4[ srcSkin->MatrixId[2] ].mulAddPoint( *srcVertex, srcSkin->Weights[2], *dstVertex); - boneMat3x4[ srcSkin->MatrixId[3] ].mulAddPoint( *srcVertex, srcSkin->Weights[3], *dstVertex); + CVector temp; + boneMat3x4[ srcSkin->MatrixId[0] ].mulSetPoint( *srcVertex, srcSkin->Weights[0], temp); + boneMat3x4[ srcSkin->MatrixId[1] ].mulAddPoint( *srcVertex, srcSkin->Weights[1], temp); + boneMat3x4[ srcSkin->MatrixId[2] ].mulAddPoint( *srcVertex, srcSkin->Weights[2], temp); + boneMat3x4[ srcSkin->MatrixId[3] ].mulAddPoint( *srcVertex, srcSkin->Weights[3], temp); + *dstVertex = temp; } break; diff --git a/code/nel/src/3d/mesh_mrm_skinned.cpp b/code/nel/src/3d/mesh_mrm_skinned.cpp index 2b1c3beb6..c4f795c87 100644 --- a/code/nel/src/3d/mesh_mrm_skinned.cpp +++ b/code/nel/src/3d/mesh_mrm_skinned.cpp @@ -1962,7 +1962,7 @@ sint CMeshMRMSkinnedGeom::renderShadowSkinGeom(CMeshMRMSkinnedInstance *mi, ui CLod &lod= _Lods[_Lods.size()-1]; computeBoneMatrixes3x4(boneMat3x4, lod.MatrixInfluences, skeleton); - _ShadowSkin.applySkin((CVector*)vbDest, boneMat3x4); + _ShadowSkin.applySkin((CVectorPacked*)vbDest, boneMat3x4); // How many vertices are added to the VBuffer ??? diff --git a/code/nel/src/3d/mesh_multi_lod_instance.cpp b/code/nel/src/3d/mesh_multi_lod_instance.cpp index c6c8fa237..f3dbbab93 100644 --- a/code/nel/src/3d/mesh_multi_lod_instance.cpp +++ b/code/nel/src/3d/mesh_multi_lod_instance.cpp @@ -302,7 +302,7 @@ void CMeshMultiLodInstance::setPosCoarseMesh( CMeshGeom &geom, const CMatrix &m for (uint i=0; i<_LastCoarseMeshNumVertices; i++) { // Transform position - *(CVector*)vDest = matrix.mulPoint (*(const CVector*)vSrc); + *(CVectorPacked*)vDest = matrix.mulPoint (CVector(*(const CVectorPacked*)vSrc)); // Next point vSrc+=vtSrcSize; diff --git a/code/nel/src/3d/patch_render.cpp b/code/nel/src/3d/patch_render.cpp index 135b9fdb3..76687cb38 100644 --- a/code/nel/src/3d/patch_render.cpp +++ b/code/nel/src/3d/patch_render.cpp @@ -1056,7 +1056,7 @@ inline void CPatch::fillFar0VertexVB(CTessFarVertex *pVert) // v[11]== EndPos - StartPos CHECK_VBA_RANGE(CLandscapeGlobals::CurrentFar0VBInfo.Accessor, CurVBPtr + CLandscapeGlobals::CurrentFar0VBInfo.DeltaPosOff, sizeof(CVector)) - *(CVector*)(CurVBPtr + CLandscapeGlobals::CurrentFar0VBInfo.DeltaPosOff)= + *(CVectorPacked*)(CurVBPtr + CLandscapeGlobals::CurrentFar0VBInfo.DeltaPosOff)= pVert->Src->EndPos - pVert->Src->StartPos; } } @@ -1144,7 +1144,7 @@ inline void CPatch::fillFar1VertexVB(CTessFarVertex *pVert) // v[11]== EndPos - StartPos CHECK_VBA_RANGE(CLandscapeGlobals::CurrentFar1VBInfo.Accessor, CurVBPtr + CLandscapeGlobals::CurrentFar1VBInfo.DeltaPosOff, sizeof(CVector)) - *(CVector*)(CurVBPtr + CLandscapeGlobals::CurrentFar1VBInfo.DeltaPosOff)= + *(CVectorPacked*)(CurVBPtr + CLandscapeGlobals::CurrentFar1VBInfo.DeltaPosOff)= pVert->Src->EndPos - pVert->Src->StartPos; // v[12]== Alpha information @@ -1214,7 +1214,7 @@ inline void CPatch::fillTileVertexVB(CTessNearVertex *pVert) // v[11]== EndPos - StartPos CHECK_VBA_RANGE(CLandscapeGlobals::CurrentTileVBInfo.Accessor, CurVBPtr + CLandscapeGlobals::CurrentTileVBInfo.DeltaPosOff, sizeof(CVector)) - *(CVector*)(CurVBPtr + CLandscapeGlobals::CurrentTileVBInfo.DeltaPosOff)= + *(CVectorPacked*)(CurVBPtr + CLandscapeGlobals::CurrentTileVBInfo.DeltaPosOff)= pVert->Src->EndPos - pVert->Src->StartPos; } } diff --git a/code/nel/src/3d/shadow_skin.cpp b/code/nel/src/3d/shadow_skin.cpp index 717d81f74..7aef7821e 100644 --- a/code/nel/src/3d/shadow_skin.cpp +++ b/code/nel/src/3d/shadow_skin.cpp @@ -38,6 +38,33 @@ uint CShadowSkin::NumCacheVertexShadow= NL_BlockByteL1 / sizeof(CShadowVertex); // *************************************************************************** +void CShadowSkin::applySkin(CVectorPacked *dst, std::vector &boneMat3x4) +{ + if(Vertices.empty()) + return; + uint numVerts= (uint)Vertices.size(); + CShadowVertex *src= &Vertices[0]; + + // Then do the skin + for(;numVerts>0;) + { + // number of vertices to process for this block. + uint nBlockInf= min(NumCacheVertexShadow, numVerts); + // next block. + numVerts-= nBlockInf; + + // cache the data in L1 cache. + CFastMem::precache(src, nBlockInf * sizeof(CShadowVertex)); + + CVector temp; + // for all InfluencedVertices only. + for(;nBlockInf>0;nBlockInf--, src++, dst++) + { + boneMat3x4[ src->MatrixId ].mulSetPoint( src->Vertex, temp ); + *dst = temp; + } + } +} void CShadowSkin::applySkin(CVector *dst, std::vector &boneMat3x4) { if(Vertices.empty()) @@ -56,6 +83,7 @@ void CShadowSkin::applySkin(CVector *dst, std::vector &boneMat3x4) // cache the data in L1 cache. CFastMem::precache(src, nBlockInf * sizeof(CShadowVertex)); + CVector temp; // for all InfluencedVertices only. for(;nBlockInf>0;nBlockInf--, src++, dst++) { diff --git a/code/nel/src/3d/vegetable_manager.cpp b/code/nel/src/3d/vegetable_manager.cpp index ba44a766f..f860b5b59 100644 --- a/code/nel/src/3d/vegetable_manager.cpp +++ b/code/nel/src/3d/vegetable_manager.cpp @@ -1379,21 +1379,21 @@ void CVegetableManager::addInstance(CVegetableInstanceGroup *ig, // Pos. //------- // Separate Center and relative pos. - CVector relPos= mat.mulVector(*(CVector*)srcPtr); // mulVector, because translation in v[center] + CVector relPos= mat.mulVector(*(CVectorPacked*)srcPtr); // mulVector, because translation in v[center] // compute bendCenterPos CVector bendCenterPos; if(shape->BendCenterMode == CVegetableShapeBuild::BendCenterNull) bendCenterPos= CVector::Null; else { - CVector v= *(CVector*)srcPtr; + CVector v= *(CVectorPacked*)srcPtr; v.z= 0; bendCenterPos= mat.mulVector(v); // mulVector, because translation in v[center] } // copy deltaPos= relPos-bendCenterPos; - *(CVector*)dstPtr= deltaPos; - *(CVector*)(dstPtr + dstCenterOff)= instancePos + bendCenterPos; + *(CVectorPacked*)dstPtr= deltaPos; + *(CVectorPacked*)(dstPtr + dstCenterOff)= instancePos + bendCenterPos; // if !destLighted, then VP is different if(!destLighted) { @@ -1426,7 +1426,7 @@ void CVegetableManager::addInstance(CVegetableInstanceGroup *ig, if(destLighted) { // normal - *(CVector*)(dstPtr + dstNormalOff)= normalMat.mulVector( *(CVector*)(srcPtr + srcNormalOff) ); + *(CVectorPacked*)(dstPtr + dstNormalOff)= normalMat.mulVector( *(CVectorPacked*)(srcPtr + srcNormalOff) ); } // If destLighted, secondaryRGBA is the ambient // else secondaryRGBA is used only for Alpha (DLM uv.v). @@ -1437,7 +1437,7 @@ void CVegetableManager::addInstance(CVegetableInstanceGroup *ig, nlassert(!destLighted); // compute normal. - CVector rotNormal= normalMat.mulVector( *(CVector*)(srcPtr + srcNormalOff) ); + CVector rotNormal= normalMat.mulVector( *(CVectorPacked*)(srcPtr + srcNormalOff) ); // must normalize() because scale is possible. rotNormal.normalize(); @@ -1466,7 +1466,7 @@ void CVegetableManager::addInstance(CVegetableInstanceGroup *ig, // Bend. //------- - CVector *dstBendPtr= (CVector*)(dstPtr + dstBendOff); + CVectorPacked *dstBendPtr= (CVectorPacked*)(dstPtr + dstBendOff); // setup bend Phase. dstBendPtr->y= bendPhase; // setup bend Weight. @@ -2704,7 +2704,7 @@ uint CVegetableManager::updateInstanceLighting(CVegetableInstanceGroup *ig, uin nlassert(!destLighted); // compute normal. - CVector rotNormal= normalMat.mulVector( *(CVector*)(srcPtr + srcNormalOff) ); + CVector rotNormal= normalMat.mulVector( *(CVectorPacked*)(srcPtr + srcNormalOff) ); // must normalize() because scale is possible. rotNormal.normalize(); diff --git a/code/nel/src/3d/vertex_buffer.cpp b/code/nel/src/3d/vertex_buffer.cpp index 94f269a2a..e8c5ac0c9 100644 --- a/code/nel/src/3d/vertex_buffer.cpp +++ b/code/nel/src/3d/vertex_buffer.cpp @@ -674,13 +674,13 @@ void CVertexBuffer::serialOldV1Minus(NLMISC::IStream &f, sint ver) // XYZ. if(_Flags & PositionFlag) { - CVector &vert= *(CVector*)(pointer + stridedId + _Offset[Position]); + CVectorPacked &vert= *(CVectorPacked*)(pointer + stridedId + _Offset[Position]); f.serial(vert); } // Normal if(_Flags & NormalFlag) { - CVector &norm= *(CVector*)(pointer + stridedId + _Offset[Normal]); + CVectorPacked &norm= *(CVectorPacked*)(pointer + stridedId + _Offset[Normal]); f.serial(norm); } // Uvs. From ba2231f0683f41e9c51e299a348e525dcbeae1d8 Mon Sep 17 00:00:00 2001 From: kaetemi Date: Fri, 13 Jun 2014 01:21:00 +0200 Subject: [PATCH 08/21] SSE2: Some initial CVector SSE2 math --HG-- branch : sse2 --- code/nel/include/nel/misc/types_nl.h | 70 ++++++++++++++++++++++- code/nel/include/nel/misc/vector.h | 27 ++++++++- code/nel/include/nel/misc/vector_inline.h | 42 ++++++++++++++ code/nel/src/misc/common.cpp | 28 +-------- 4 files changed, 136 insertions(+), 31 deletions(-) diff --git a/code/nel/include/nel/misc/types_nl.h b/code/nel/include/nel/misc/types_nl.h index b94ffe50f..21cd8b39e 100644 --- a/code/nel/include/nel/misc/types_nl.h +++ b/code/nel/include/nel/misc/types_nl.h @@ -336,14 +336,82 @@ typedef unsigned int uint; // at least 32bits (depend of processor) #endif #ifdef USE_SSE2 + extern void *operator new(size_t size) throw(std::bad_alloc); extern void *operator new[](size_t size) throw(std::bad_alloc); extern void operator delete(void *p) throw(); extern void operator delete[](void *p) throw(); + #define NL_ALIGN_SSE2(nb) NL_ALIGN(nb) + +# ifdef NL_COMP_VC + +inline void *aligned_malloc(size_t size, size_t alignment) +{ + return _aligned_malloc(size, alignment); +} + +inline void aligned_free(void *ptr) +{ + _aligned_free(ptr); +} + +# else + +inline void *aligned_malloc(size_t size, size_t alignment) +{ + return memalign(alignment, size); +} + +inline void aligned_free(void *ptr) +{ + free(ptr); +} + +# endif /* NL_COMP_ */ + +template +class aligned_allocator : public std::allocator +{ +public: + typedef size_t size_type; + typedef std::ptrdiff_t difference_type; + typedef T* pointer; + typedef const T* const_pointer; + typedef T& reference; + typedef const T& const_reference; + typedef T value_type; + + template + struct rebind + { + typedef aligned_allocator other; + }; + + aligned_allocator() : std::allocator() {} + + aligned_allocator(const aligned_allocator& other) : std::allocator(other) {} + + template + aligned_allocator(const aligned_allocator& other) : std::allocator(other) {} + + ~aligned_allocator() {} + + pointer allocate(size_type num, const void* /*hint*/ = 0) + { + return static_cast(aligned_malloc(NL_DEFAULT_MEMORY_ALIGNMENT, num * sizeof(T))); + } + + void deallocate(pointer p, size_type /*num*/) + { + aligned_free(p); + } +}; + #else #define NL_ALIGN_SSE2(nb) -#endif +#endif /* USE_SSE2 */ + // CHashMap, CHashSet and CHashMultiMap definitions #if defined(_STLPORT_VERSION) // STLport detected diff --git a/code/nel/include/nel/misc/vector.h b/code/nel/include/nel/misc/vector.h index b1e2573d5..f11137764 100644 --- a/code/nel/include/nel/misc/vector.h +++ b/code/nel/include/nel/misc/vector.h @@ -35,15 +35,24 @@ class IStream; * \author Lionel Berenguier * \author Nevrax France * \date 2000 + * \author Jan Boon + * \date 2014 */ NL_ALIGN_SSE2(16) class CVector { public: // Attributes. - float x,y,z; - #ifdef USE_SSE2 - float w; // Padding + union + { + struct + { + float x, y, z, P; + }; + __m128 mm; + }; +#else + float x,y,z; #endif public: // const. @@ -187,9 +196,21 @@ public: inline CVector blend(const CVector &v0, const CVector &v1, float lambda) { float invLambda = 1.f - lambda; +#ifdef USE_SSE2 + CVector res; + __m128 mLambda = _mm_set1_ps(lambda); + __m128 mInvLambda = _mm_set1_ps(invLambda); + __m128 mv0 = v0.mm; + __m128 mv1 = v1.mm; + mv0 = _mm_mul_ps(mv0, mInvLambda); + mv1 = _mm_mul_ps(mv1, mLambda); + res.mm = _mm_add_ps(mv0, mv1); + return res; +#else return CVector(invLambda * v0.x + lambda * v1.x, invLambda * v0.y + lambda * v1.y, invLambda * v0.z + lambda * v1.z); +#endif } diff --git a/code/nel/include/nel/misc/vector_inline.h b/code/nel/include/nel/misc/vector_inline.h index 9f890f637..61f20e367 100644 --- a/code/nel/include/nel/misc/vector_inline.h +++ b/code/nel/include/nel/misc/vector_inline.h @@ -31,23 +31,35 @@ namespace NLMISC // Base Maths. inline CVector &CVector::operator+=(const CVector &v) { +#ifdef USE_SSE2 + mm = _mm_add_ps(mm, v.mm); +#else x+=v.x; y+=v.y; z+=v.z; +#endif return *this; } inline CVector &CVector::operator-=(const CVector &v) { +#ifdef USE_SSE2 + mm = _mm_sub_ps(mm, v.mm); +#else x-=v.x; y-=v.y; z-=v.z; +#endif return *this; } inline CVector &CVector::operator*=(float f) { +#ifdef USE_SSE2 + mm = _mm_mul_ps(mm, _mm_set1_ps(f)); +#else x*=f; y*=f; z*=f; +#endif return *this; } inline CVector &CVector::operator/=(float f) @@ -56,18 +68,36 @@ inline CVector &CVector::operator/=(float f) } inline CVector CVector::operator+(const CVector &v) const { +#ifdef USE_SSE2 + CVector res; + res.mm = _mm_add_ps(mm, v.mm); + return res; +#else CVector ret(x+v.x, y+v.y, z+v.z); return ret; +#endif } inline CVector CVector::operator-(const CVector &v) const { +#ifdef USE_SSE2 + CVector res; + res.mm = _mm_sub_ps(mm, v.mm); + return res; +#else CVector ret(x-v.x, y-v.y, z-v.z); return ret; +#endif } inline CVector CVector::operator*(float f) const { +#ifdef USE_SSE2 + CVector res; + res.mm = _mm_mul_ps(mm, _mm_set1_ps(f)); + return res; +#else CVector ret(x*f, y*f, z*f); return ret; +#endif } inline CVector CVector::operator/(float f) const { @@ -75,12 +105,24 @@ inline CVector CVector::operator/(float f) const } inline CVector CVector::operator-() const { +#ifdef USE_SSE2 + CVector res; + res.mm = _mm_mul_ps(mm, _mm_set1_ps(-1.0f)); + return res; +#else return CVector(-x,-y,-z); +#endif } inline CVector operator*(float f, const CVector &v) { +#ifdef USE_SSE2 + CVector res; + res.mm = _mm_mul_ps(v.mm, _mm_set1_ps(f)); + return res; +#else CVector ret(v.x*f, v.y*f, v.z*f); return ret; +#endif } diff --git a/code/nel/src/misc/common.cpp b/code/nel/src/misc/common.cpp index b58792a65..dd244667b 100644 --- a/code/nel/src/misc/common.cpp +++ b/code/nel/src/misc/common.cpp @@ -71,33 +71,7 @@ extern "C" long _ftol2( double dblSource ) { return _ftol( dblSource ); } #endif // NL_OS_WINDOWS -#ifdef HAS_SSE2 - -# ifdef NL_COMP_VC - -inline void *aligned_malloc(size_t size, size_t alignment) -{ - return _aligned_malloc(size, alignment); -} - -inline void aligned_free(void *p) -{ - _aligned_free(ptr); -} - -# else - -inline void *aligned_malloc(size_t size, size_t alignment) -{ - return memalign(alignment, size); -} - -inline void aligned_free(void *ptr) -{ - free(ptr); -} - -# endif /* NL_COMP_ */ +#ifdef USE_SSE2 void *operator new(size_t size) throw(std::bad_alloc) { From f8b6d81b254486d2a71277a8a81e15e5b0d6e515 Mon Sep 17 00:00:00 2001 From: kaetemi Date: Fri, 13 Jun 2014 03:30:40 +0200 Subject: [PATCH 09/21] SSE2: More alignment workarounds --HG-- branch : sse2 --- code/nel/include/nel/3d/driver.h | 1 + code/nel/include/nel/3d/particle_system.h | 2 +- code/nel/include/nel/3d/ps_attrib.h | 12 ++--- .../include/nel/3d/ps_attrib_maker_helper.h | 12 ++--- .../nel/3d/ps_attrib_maker_iterators.h | 12 ++--- code/nel/include/nel/3d/ps_force.h | 11 +++-- code/nel/include/nel/3d/ps_iterator.h | 4 +- code/nel/include/nel/3d/ps_located.h | 3 +- code/nel/include/nel/3d/ps_util.h | 1 + code/nel/include/nel/3d/ps_zone.h | 12 ++--- code/nel/include/nel/3d/u_driver.h | 1 + code/nel/include/nel/misc/vector.h | 40 ++++++++++++++-- code/nel/src/3d/particle_system.cpp | 2 +- code/nel/src/3d/ps_emitter.cpp | 4 +- code/nel/src/3d/ps_face_look_at.cpp | 2 +- code/nel/src/3d/ps_force.cpp | 8 ++-- code/nel/src/3d/ps_located.cpp | 24 +++++++--- code/nel/src/3d/ps_plane_basis_maker.cpp | 6 +-- code/nel/src/3d/ps_shockwave.cpp | 4 +- code/nel/src/3d/ps_sound.cpp | 4 +- code/nel/src/3d/ps_zone.cpp | 46 +++++++++---------- 21 files changed, 132 insertions(+), 79 deletions(-) diff --git a/code/nel/include/nel/3d/driver.h b/code/nel/include/nel/3d/driver.h index 3eb5823ca..ef846fd2c 100644 --- a/code/nel/include/nel/3d/driver.h +++ b/code/nel/include/nel/3d/driver.h @@ -57,6 +57,7 @@ using NLMISC::CRefCount; using NLMISC::CSmartPtr; using NLMISC::CRGBA; using NLMISC::CVector; +using NLMISC::CVectorPacked; using NLMISC::CMatrix; using NLMISC::CSynchronized; diff --git a/code/nel/include/nel/3d/particle_system.h b/code/nel/include/nel/3d/particle_system.h index c1139d070..2cc0f9d79 100644 --- a/code/nel/include/nel/3d/particle_system.h +++ b/code/nel/include/nel/3d/particle_system.h @@ -1244,7 +1244,7 @@ public: static std::vector _ParticleToRemove; // used during the update step, contains the indices of the particles to remove static std::vector _ParticleRemoveListIndex; // for each particle, -1 if it hasn't been removed, or else give the insertion number in _ParticleToRemove static std::vector _CollidingParticles; // index of particle that collided - static std::vector _SpawnPos; // spawn position of newly created particles + static std::vector _SpawnPos; // spawn position of newly created particles public: // current sim steps infos static TAnimationTime EllapsedTime; diff --git a/code/nel/include/nel/3d/ps_attrib.h b/code/nel/include/nel/3d/ps_attrib.h index cd691e719..a70fa7921 100644 --- a/code/nel/include/nel/3d/ps_attrib.h +++ b/code/nel/include/nel/3d/ps_attrib.h @@ -563,12 +563,12 @@ void CPSAttrib::swap(CPSAttrib &other) // here we give some definition for common types -typedef CPSAttrib TPSAttribVector; -typedef CPSAttrib TPSAttribRGBA; -typedef CPSAttrib TPSAttribFloat; -typedef CPSAttrib TPSAttribUInt; -typedef CPSAttrib TPSAttribUInt8; -typedef CPSAttrib TPSAttribTime; +typedef CPSAttrib TPSAttribVector; +typedef CPSAttrib TPSAttribRGBA; +typedef CPSAttrib TPSAttribFloat; +typedef CPSAttrib TPSAttribUInt; +typedef CPSAttrib TPSAttribUInt8; +typedef CPSAttrib TPSAttribTime; } // NL3D diff --git a/code/nel/include/nel/3d/ps_attrib_maker_helper.h b/code/nel/include/nel/3d/ps_attrib_maker_helper.h index 147d1ae5d..4ae6e65b1 100644 --- a/code/nel/include/nel/3d/ps_attrib_maker_helper.h +++ b/code/nel/include/nel/3d/ps_attrib_maker_helper.h @@ -1190,10 +1190,10 @@ T CPSAttribMakerT::get(CPSLocated *loc, uint32 index) result= getInternal(loc->getInvMass()[index]); break; case CPSInputType::attrSpeed: - result = getInternal(loc->getSpeed()[index].norm()); + result = getInternal(NLMISC::CVector(loc->getSpeed()[index]).norm()); break; case CPSInputType::attrPosition: - result = getInternal(loc->getPos()[index].norm()); + result = getInternal(NLMISC::CVector(loc->getPos()[index]).norm()); break; case CPSInputType::attrUniformRandom: { @@ -1210,7 +1210,7 @@ T CPSAttribMakerT::get(CPSLocated *loc, uint32 index) static NLMISC::CVector lodVect; float lodOffset; loc->getLODVect(lodVect, lodOffset, loc->getMatrixMode()); - float r = fabsf(loc->getPos()[index] * lodVect + lodOffset); + float r = fabsf(NLMISC::CVector(loc->getPos()[index]) * lodVect + lodOffset); r = this->_NbCycles * r > MaxInputValue ? MaxInputValue : r; if (_Clamp) { @@ -1224,7 +1224,7 @@ T CPSAttribMakerT::get(CPSLocated *loc, uint32 index) static NLMISC::CVector lodVect; float lodOffset; loc->getLODVect(lodVect, lodOffset, loc->getMatrixMode()); - float r = loc->getPos()[index] * lodVect + lodOffset; + float r = NLMISC::CVector(loc->getPos()[index]) * lodVect + lodOffset; r = this->_NbCycles * (r > MaxInputValue ? MaxInputValue : r * r); if (_Clamp) @@ -1240,7 +1240,7 @@ T CPSAttribMakerT::get(CPSLocated *loc, uint32 index) float lodOffset; loc->getLODVect(lodVect, lodOffset, loc->getMatrixMode()); - float r = loc->getPos()[index] * lodVect + lodOffset; + float r = NLMISC::CVector(loc->getPos()[index]) * lodVect + lodOffset; if (r < 0) { result = _F(MaxInputValue); @@ -1260,7 +1260,7 @@ T CPSAttribMakerT::get(CPSLocated *loc, uint32 index) float lodOffset; loc->getLODVect(lodVect, lodOffset, loc->getMatrixMode()); - float r = loc->getPos()[index] * lodVect + lodOffset; + float r = NLMISC::CVector(loc->getPos()[index]) * lodVect + lodOffset; if (r < 0) { result = _F(MaxInputValue); diff --git a/code/nel/include/nel/3d/ps_attrib_maker_iterators.h b/code/nel/include/nel/3d/ps_attrib_maker_iterators.h index c2c54d9d8..cbc3231d0 100644 --- a/code/nel/include/nel/3d/ps_attrib_maker_iterators.h +++ b/code/nel/include/nel/3d/ps_attrib_maker_iterators.h @@ -48,7 +48,7 @@ namespace NL3D template struct CVectNormIterator : CPSBaseIterator { - GET_INLINE float get() const { return this->Iter.get().norm(); } + GET_INLINE float get() const { return CVector(this->Iter.get()).norm(); } CVectNormIterator(const TBaseIter &it) : CPSBaseIterator(it) {} }; @@ -76,7 +76,7 @@ namespace NL3D template struct CDistIterator : CPSBaseIterator { - NLMISC::CVector V; + NLMISC::CVectorPacked V; float Offset; CDistIterator(const TBaseIter &it) : CPSBaseIterator(it) {} }; @@ -89,7 +89,7 @@ namespace NL3D GET_INLINE float get() const { - const float r = fabsf(this->Iter.get() * this->V + this->Offset); + const float r = fabsf(CVector(this->Iter.get()) * this->V + this->Offset); return r > MaxInputValue ? MaxInputValue : r; } CFDot3AddIterator(const TBaseIter &it) : CDistIterator(it) {} @@ -101,7 +101,7 @@ namespace NL3D { float get() const { - float r = this->Iter.get() * this->V + this->Offset; + float r = CVector(this->Iter.get()) * this->V + this->Offset; r *= r; return r > MaxInputValue ? MaxInputValue : r; } @@ -115,7 +115,7 @@ namespace NL3D GET_INLINE float get() const { - const float r = this->Iter.get() * this->V + this->Offset; + const float r = CVector(this->Iter.get()) * this->V + this->Offset; if (r < 0.f) return MaxInputValue; return r > MaxInputValue ? MaxInputValue : r; } @@ -130,7 +130,7 @@ namespace NL3D GET_INLINE float get() const { - float r = this->Iter.get() * this->V + this->Offset; + float r = CVector(this->Iter.get()) * this->V + this->Offset; if (r < 0) return MaxInputValue; r *= r; return r > MaxInputValue ? MaxInputValue : r; diff --git a/code/nel/include/nel/3d/ps_force.h b/code/nel/include/nel/3d/ps_force.h index 76cf90ee4..1739bfa92 100644 --- a/code/nel/include/nel/3d/ps_force.h +++ b/code/nel/include/nel/3d/ps_force.h @@ -87,9 +87,9 @@ public: * 'accumulate' set to false. * NB : works only with integrable forces */ - virtual void integrate(float /* date */, CPSLocated * /* src */, uint32 /* startIndex */, uint32 /* numObjects */, NLMISC::CVector * /* destPos */ = NULL, NLMISC::CVector * /* destSpeed */ = NULL, + virtual void integrate(float /* date */, CPSLocated * /* src */, uint32 /* startIndex */, uint32 /* numObjects */, NLMISC::CVectorPacked * /* destPos */ = NULL, NLMISC::CVectorPacked * /* destSpeed */ = NULL, bool /* accumulate */ = false, - uint /* posStride */ = sizeof(NLMISC::CVector), uint /* speedStride */ = sizeof(NLMISC::CVector) + uint /* posStride */ = sizeof(NLMISC::CVectorPacked), uint /* speedStride */ = sizeof(NLMISC::CVectorPacked) ) const { nlassert(0); // not an integrable force @@ -325,7 +325,10 @@ template void CIsotropicForceT::computeForces(CPSLocated &target) for (; speedIt != endSpeedIt; ++speedIt, ++posIt, ++invMassIt) { - _F(*posIt, *speedIt, *invMassIt); + const CVector posv = *posIt; + CVector speedv = *speedIt; + _F(posv, speedv, *invMassIt); + *speedIt = speedv; } } } @@ -770,7 +773,7 @@ protected: virtual CPSLocated *getForceIntensityOwner(void) { return _Owner; } // the normal of the vortex - CPSAttrib _Normal; + CPSAttrib _Normal; // radius of the vortex TPSAttribFloat _Radius; diff --git a/code/nel/include/nel/3d/ps_iterator.h b/code/nel/include/nel/3d/ps_iterator.h index 5a02cecda..5f850e393 100644 --- a/code/nel/include/nel/3d/ps_iterator.h +++ b/code/nel/include/nel/3d/ps_iterator.h @@ -134,10 +134,10 @@ namespace NL3D /// Some typedefs typedef CAdvance1Iterator TIteratorFloatStep1; typedef CAdvance1Iterator TIteratorFloatStep1; - typedef CAdvance1Iterator TIteratorVectStep1; + typedef CAdvance1Iterator TIteratorVectStep1; typedef CAdvance1616Iterator TIteratorFloatStep1616; typedef CAdvance1616Iterator TIteratorTimeStep1616; - typedef CAdvance1616Iterator TIteratorVectStep1616; + typedef CAdvance1616Iterator TIteratorVectStep1616; } // NL3D diff --git a/code/nel/include/nel/3d/ps_located.h b/code/nel/include/nel/3d/ps_located.h index ca1c86a7b..2fb210544 100644 --- a/code/nel/include/nel/3d/ps_located.h +++ b/code/nel/include/nel/3d/ps_located.h @@ -220,6 +220,7 @@ public: CScene *getScene(void); /// shortcut to the same method of the owning particle system + void getLODVect(NLMISC::CVectorPacked &v, float &offset, TPSMatrixMode matrixMode); void getLODVect(NLMISC::CVector &v, float &offset, TPSMatrixMode matrixMode); @@ -411,7 +412,7 @@ public: void computeForces(); // compute collisions - void computeCollisions(uint firstInstanceIndex, const NLMISC::CVector *posBefore, const NLMISC::CVector *posAfter); + void computeCollisions(uint firstInstanceIndex, const NLMISC::CVectorPacked *posBefore, const NLMISC::CVectorPacked *posAfter); // get a conversion matrix between 2 matrix modes static const NLMISC::CMatrix &getConversionMatrix(const CParticleSystem &ps, TPSMatrixMode to, TPSMatrixMode from); diff --git a/code/nel/include/nel/3d/ps_util.h b/code/nel/include/nel/3d/ps_util.h index 7542b7053..64af8a7fa 100644 --- a/code/nel/include/nel/3d/ps_util.h +++ b/code/nel/include/nel/3d/ps_util.h @@ -28,6 +28,7 @@ namespace NLMISC { class CMatrix; class CVector; + class CVectorPacked; }; namespace NL3D diff --git a/code/nel/include/nel/3d/ps_zone.h b/code/nel/include/nel/3d/ps_zone.h index cf29bc258..72d5a5529 100644 --- a/code/nel/include/nel/3d/ps_zone.h +++ b/code/nel/include/nel/3d/ps_zone.h @@ -106,7 +106,7 @@ public: /** Compute collisions for the given target. This will update the collisions infos. * The caller must provide pointer to arrays positions before and after time step. */ - virtual void computeCollisions(CPSLocated &target, uint firstInstanceIndex, const NLMISC::CVector *posBefore, const NLMISC::CVector *posAfter) = 0; + virtual void computeCollisions(CPSLocated &target, uint firstInstanceIndex, const NLMISC::CVectorPacked *posBefore, const NLMISC::CVectorPacked *posAfter) = 0; protected: @@ -141,7 +141,7 @@ protected: class CPSZonePlane : public CPSZone, public IPSMover { public: - virtual void computeCollisions(CPSLocated &target, uint firstInstanceIndex, const NLMISC::CVector *posBefore, const NLMISC::CVector *posAfter); + virtual void computeCollisions(CPSLocated &target, uint firstInstanceIndex, const NLMISC::CVectorPacked *posBefore, const NLMISC::CVectorPacked *posAfter); virtual void show(); @@ -192,7 +192,7 @@ typedef CPSAttrib TPSAttribRadiusPair; class CPSZoneSphere : public CPSZone, public IPSMover { public: - virtual void computeCollisions(CPSLocated &target, uint firstInstanceIndex, const NLMISC::CVector *posBefore, const NLMISC::CVector *posAfter); + virtual void computeCollisions(CPSLocated &target, uint firstInstanceIndex, const NLMISC::CVectorPacked *posBefore, const NLMISC::CVectorPacked *posAfter); virtual void show(); @@ -236,7 +236,7 @@ class CPSZoneSphere : public CPSZone, public IPSMover class CPSZoneDisc : public CPSZone, public IPSMover { public: - virtual void computeCollisions(CPSLocated &target, uint firstInstanceIndex, const NLMISC::CVector *posBefore, const NLMISC::CVector *posAfter); + virtual void computeCollisions(CPSLocated &target, uint firstInstanceIndex, const NLMISC::CVectorPacked *posBefore, const NLMISC::CVectorPacked *posAfter); virtual void show(); CPSZoneDisc() @@ -283,7 +283,7 @@ class CPSZoneDisc : public CPSZone, public IPSMover class CPSZoneCylinder : public CPSZone, public IPSMover { public: - virtual void computeCollisions(CPSLocated &target, uint firstInstanceIndex, const NLMISC::CVector *posBefore, const NLMISC::CVector *posAfter); + virtual void computeCollisions(CPSLocated &target, uint firstInstanceIndex, const NLMISC::CVectorPacked *posBefore, const NLMISC::CVectorPacked *posAfter); virtual void show(); CPSZoneCylinder() @@ -335,7 +335,7 @@ class CPSZoneCylinder : public CPSZone, public IPSMover class CPSZoneRectangle : public CPSZone, public IPSMover { public: - virtual void computeCollisions(CPSLocated &target, uint firstInstanceIndex, const NLMISC::CVector *posBefore, const NLMISC::CVector *posAfter); + virtual void computeCollisions(CPSLocated &target, uint firstInstanceIndex, const NLMISC::CVectorPacked *posBefore, const NLMISC::CVectorPacked *posAfter); virtual void show(); CPSZoneRectangle() diff --git a/code/nel/include/nel/3d/u_driver.h b/code/nel/include/nel/3d/u_driver.h index 2e74ae3fe..73b97d9cd 100644 --- a/code/nel/include/nel/3d/u_driver.h +++ b/code/nel/include/nel/3d/u_driver.h @@ -47,6 +47,7 @@ namespace NL3D using NLMISC::CVector; +using NLMISC::CVectorPacked; using NLMISC::CMatrix; using NLMISC::CRGBA; using NLMISC::CBitmap; diff --git a/code/nel/include/nel/misc/vector.h b/code/nel/include/nel/misc/vector.h index f11137764..d499a5dba 100644 --- a/code/nel/include/nel/misc/vector.h +++ b/code/nel/include/nel/misc/vector.h @@ -69,11 +69,11 @@ public: // Methods. /// @name Object. //@{ /// Constructor which does nothing. - CVector() {} + CVector() { if (((uintptr_t)(void *)(this) & 0xF) != 0) nlerror("Vector alignment error"); } /// Constructor . - CVector(float _x, float _y, float _z) : x(_x), y(_y), z(_z) {} + CVector(float _x, float _y, float _z) : x(_x), y(_y), z(_z) { if (((uintptr_t)(void *)(this) & 0xF) != 0) nlerror("Vector alignment error"); } /// Copy Constructor. - CVector(const CVector &v) : x(v.x), y(v.y), z(v.z) {} + CVector(const CVector &v) : x(v.x), y(v.y), z(v.z) { if (((uintptr_t)(void *)(this) & 0xF) != 0) nlerror("Vector alignment error"); } //@} /// @name Base Maths. @@ -181,6 +181,14 @@ public: return *this; } + CVectorPacked &operator -= (const CVector &v) + { + x -= v.x; + y -= v.y; + z -= v.z; + return *this; + } + operator CVector () const { return CVector(x, y, z); @@ -190,6 +198,16 @@ public: { f.serial(x,y,z); } + + CVector operator+(const CVector &v) const + { + return CVector(*this) + v; + } + + CVector operator-(const CVector &v) const + { + return CVector(*this) - v; + } }; // blend (faster version than the generic version found in algo.h) @@ -214,9 +232,25 @@ inline CVector blend(const CVector &v0, const CVector &v1, float lambda) } + } +namespace std { + inline void swap(NLMISC::CVectorPacked &v1, NLMISC::CVector &v2) + { + NLMISC::CVectorPacked temp = v2; + v2 = NLMISC::CVector(v1); + v1 = temp; + } + inline void swap(NLMISC::CVector &v1, NLMISC::CVectorPacked &v2) + { + NLMISC::CVectorPacked temp = v1; + v1 = NLMISC::CVector(v2); + v2 = temp; + } +} + #include "vector_inline.h" diff --git a/code/nel/src/3d/particle_system.cpp b/code/nel/src/3d/particle_system.cpp index 3b25c3e1e..ce8fbaae4 100644 --- a/code/nel/src/3d/particle_system.cpp +++ b/code/nel/src/3d/particle_system.cpp @@ -64,7 +64,7 @@ float CParticleSystem::RealEllapsedTimeRatio = 1.f; bool CParticleSystem::InsideSimLoop = false; bool CParticleSystem::InsideRemoveLoop = false; bool CParticleSystem::InsideNewElementsLoop = false;; -std::vector CParticleSystem::_SpawnPos; +std::vector CParticleSystem::_SpawnPos; diff --git a/code/nel/src/3d/ps_emitter.cpp b/code/nel/src/3d/ps_emitter.cpp index 0084111a0..ef0931eed 100644 --- a/code/nel/src/3d/ps_emitter.cpp +++ b/code/nel/src/3d/ps_emitter.cpp @@ -2790,7 +2790,7 @@ void CPSEmitter::doEmitOnce(uint firstInstanceIndex) CVector startPos; if (!_Owner->isParametricMotionEnabled()) { - startPos = _Owner->getPos()[k] - _Owner->getSpeed()[k] * CParticleSystem::EllapsedTime; + startPos = CVector(_Owner->getPos()[k]) - CVector(_Owner->getSpeed()[k]) * CParticleSystem::EllapsedTime; } else { @@ -2823,7 +2823,7 @@ void CPSEmitter::doEmitOnce(uint firstInstanceIndex) CVector startPos; if (!_Owner->isParametricMotionEnabled()) { - startPos = _Owner->getPos()[k] - _Owner->getSpeed()[k] * CParticleSystem::EllapsedTime; + startPos = CVector(_Owner->getPos()[k]) - CVector(_Owner->getSpeed()[k]) * CParticleSystem::EllapsedTime; } else { diff --git a/code/nel/src/3d/ps_face_look_at.cpp b/code/nel/src/3d/ps_face_look_at.cpp index ccc5907c5..782d5c70d 100644 --- a/code/nel/src/3d/ps_face_look_at.cpp +++ b/code/nel/src/3d/ps_face_look_at.cpp @@ -65,7 +65,7 @@ public: do { // tmp unoptimized slow version - CVector normedSpeed = (*speedIt).normed(); + CVector normedSpeed = CVector(*speedIt).normed(); float iProj = normedSpeed * I; float kProj = normedSpeed * K; dest->I = iProj * I + kProj * K; diff --git a/code/nel/src/3d/ps_force.cpp b/code/nel/src/3d/ps_force.cpp index 7659ce7af..f013dcc3a 100644 --- a/code/nel/src/3d/ps_force.cpp +++ b/code/nel/src/3d/ps_force.cpp @@ -862,8 +862,8 @@ void CPSCylindricVortex::computeForces(CPSLocated &target) p *= 1.f / d; // compute the speed vect that we should have (normalized) realTangentialSpeed = n ^ p; - tangentialSpeed = (*speedIt * realTangentialSpeed) * realTangentialSpeed; - radialSpeed = (p * *speedIt) * p; + tangentialSpeed = (CVector(*speedIt) * realTangentialSpeed) * realTangentialSpeed; + radialSpeed = (p * CVector(*speedIt)) * p; // update radial speed; *speedIt -= _RadialViscosity * CParticleSystem::EllapsedTime * radialSpeed; // update tangential speed @@ -981,7 +981,7 @@ void CPSMagneticForce::computeForces(CPSLocated &target) TPSAttribFloat::const_iterator invMassIt = target.getInvMass().begin(); for (; it != itend; ++it, ++invMassIt) { - (*it) += intensity * *invMassIt * (*it ^ toAdd); + (*it) += intensity * *invMassIt * (CVector(*it) ^ toAdd); } } else @@ -989,7 +989,7 @@ void CPSMagneticForce::computeForces(CPSLocated &target) float i = intensity / target.getInitialMass(); for (; it != itend; ++it) { - (*it) += i * (*it ^ toAdd); + (*it) += i * (CVector(*it) ^ toAdd); } } } diff --git a/code/nel/src/3d/ps_located.cpp b/code/nel/src/3d/ps_located.cpp index 57997e2be..0ef57fab1 100644 --- a/code/nel/src/3d/ps_located.cpp +++ b/code/nel/src/3d/ps_located.cpp @@ -502,6 +502,18 @@ bool CPSLocated::hasEmitters(void) const return false; } +/// *************************************************************************************** +void CPSLocated::getLODVect(NLMISC::CVectorPacked &v, float &offset, TPSMatrixMode matrixMode) +{ + NL_PS_FUNC(CPSLocated_getLODVect) + nlassert(_Owner); + CHECK_PS_INTEGRITY + CVector temp; + _Owner->getLODVect(temp, offset, matrixMode); + v = temp; + CHECK_PS_INTEGRITY +} + /// *************************************************************************************** void CPSLocated::getLODVect(NLMISC::CVector &v, float &offset, TPSMatrixMode matrixMode) { @@ -1866,7 +1878,7 @@ void CPSLocated::updateCollisions() if (_Time[currCollision->Index] >= 1.f) { // check whether particles died before the collision. If so, just continue (particle has already been inserted in the remove list), and cancel the collision - float timeToCollision = currCollision->Dist / _Speed[currCollision->Index].norm(); + float timeToCollision = currCollision->Dist / CVector(_Speed[currCollision->Index]).norm(); if (_Time[currCollision->Index] / _TimeIncrement[currCollision->Index] - timeToCollision * CParticleSystem::RealEllapsedTimeRatio >= 1.f) { // says that collision did not occurs @@ -2196,12 +2208,12 @@ void CPSLocated::removeOldParticles() if (_LifeScheme) { - _Pos[*it] -= _Speed[*it] * ((_Time[*it] - 1.f) / _TimeIncrement[*it]) * ellapsedTimeRatio; + _Pos[*it] -= CVector(_Speed[*it]) * ((_Time[*it] - 1.f) / _TimeIncrement[*it]) * ellapsedTimeRatio; timeUntilNextSimStep = (_Time[*it] - 1.f) / _TimeIncrement[*it]; } else { - _Pos[*it] -= _Speed[*it] * ((_Time[*it] - 1.f) * _InitialLife) * ellapsedTimeRatio; + _Pos[*it] -= CVector(_Speed[*it]) * ((_Time[*it] - 1.f) * _InitialLife) * ellapsedTimeRatio; timeUntilNextSimStep = (_Time[*it] - 1.f) * _InitialLife; } _Time[*it] = 0.9999f; @@ -2255,7 +2267,7 @@ void CPSLocated::removeOldParticles() { // move position backward (compute its position at death) timeUntilNextSimStep = ((_Time[*it] - 1.f) / _TimeIncrement[*it]) * ellapsedTimeRatio; - _Pos[*it] -= _Speed[*it] * timeUntilNextSimStep; + _Pos[*it] -= CVector(_Speed[*it]) * timeUntilNextSimStep; // force time to 1 because emitter 'on death' may rely on the date of emitter to compute its attributes _Time[*it] = 0.9999f; @@ -2283,7 +2295,7 @@ void CPSLocated::removeOldParticles() { // move position backward timeUntilNextSimStep = (_Time[*it] - 1.f) * _InitialLife * ellapsedTimeRatio; - _Pos[*it] -= _Speed[*it] * timeUntilNextSimStep; + _Pos[*it] -= CVector(_Speed[*it]) * timeUntilNextSimStep; // force time to 1 because emitter 'on death' may rely on the date of emitter to compute its attributes _Time[*it] = 0.9999f; } @@ -3038,7 +3050,7 @@ void CPSLocated::setZBias(float value) } /// *************************************************************************************** -void CPSLocated::computeCollisions(uint firstInstanceIndex, const NLMISC::CVector *posBefore, const NLMISC::CVector *posAfter) +void CPSLocated::computeCollisions(uint firstInstanceIndex, const NLMISC::CVectorPacked *posBefore, const NLMISC::CVectorPacked *posAfter) { NL_PS_FUNC(CPSLocated_computeCollisions) for(TDtorObserversVect::iterator it = _DtorObserversVect.begin(); it != _DtorObserversVect.end(); ++it) diff --git a/code/nel/src/3d/ps_plane_basis_maker.cpp b/code/nel/src/3d/ps_plane_basis_maker.cpp index f1ca718b9..2bec4c713 100644 --- a/code/nel/src/3d/ps_plane_basis_maker.cpp +++ b/code/nel/src/3d/ps_plane_basis_maker.cpp @@ -130,7 +130,7 @@ void *CPSPlaneBasisFollowSpeed::make(CPSLocated *loc, case XY: while (numAttrib --) { - const CVector *speedVect = &(*(speedIt + (fpIndex >> 16))); + const NLMISC::CVectorPacked *speedVect = &(*(speedIt + (fpIndex >> 16))); float norm = sqrtf(speedVect->x * speedVect->x + speedVect->y * speedVect->y); float invNorm = (norm != 0.f) ? 1.f / norm : 0.f; CPlaneBasis &pb = *(CPlaneBasis *) ptDat; @@ -143,7 +143,7 @@ void *CPSPlaneBasisFollowSpeed::make(CPSLocated *loc, case XZ: while (numAttrib --) { - const CVector *speedVect = &(*(speedIt + (fpIndex >> 16))); + const NLMISC::CVectorPacked *speedVect = &(*(speedIt + (fpIndex >> 16))); float norm = sqrtf(speedVect->x * speedVect->x + speedVect->z * speedVect->z); float invNorm = (norm != 0.f) ? 1.f / norm : 0.f; CPlaneBasis &pb = *(CPlaneBasis *) ptDat; @@ -156,7 +156,7 @@ void *CPSPlaneBasisFollowSpeed::make(CPSLocated *loc, case YZ: while (numAttrib --) { - const CVector *speedVect = &(*(speedIt + (fpIndex >> 16))); + const NLMISC::CVectorPacked *speedVect = &(*(speedIt + (fpIndex >> 16))); float norm = sqrtf(speedVect->y * speedVect->y + speedVect->z * speedVect->z); float invNorm = (norm != 0.f) ? 1.f / norm : 0.f; CPlaneBasis &pb = *(CPlaneBasis *) ptDat; diff --git a/code/nel/src/3d/ps_shockwave.cpp b/code/nel/src/3d/ps_shockwave.cpp index 4421faded..169ad2ec6 100644 --- a/code/nel/src/3d/ps_shockwave.cpp +++ b/code/nel/src/3d/ps_shockwave.cpp @@ -160,10 +160,10 @@ public: radVect = *ptCurrSize * (CPSUtil::getCos((sint32) currAngle) * ptCurrBasis->X + CPSUtil::getSin((sint32) currAngle) * ptCurrBasis->Y); innerVect = radiusRatio * radVect; CHECK_VERTEX_BUFFER(*vb, currVertex); - * (CVectorPacked *) currVertex = *posIt + radVect; + * (CVectorPacked *) currVertex = CVector(*posIt) + radVect; currVertex += vSize; CHECK_VERTEX_BUFFER(*vb, currVertex); - * (CVectorPacked *) currVertex = *posIt + innerVect; + * (CVectorPacked *) currVertex = CVector(*posIt) + innerVect; currVertex += vSize; currAngle += angleStep; } diff --git a/code/nel/src/3d/ps_sound.cpp b/code/nel/src/3d/ps_sound.cpp index a5ae6ad26..07aefd4cf 100644 --- a/code/nel/src/3d/ps_sound.cpp +++ b/code/nel/src/3d/ps_sound.cpp @@ -148,8 +148,8 @@ void CPSSound::step(TPSProcessPass pass) CPSAttrib::iterator it = _Sounds.begin(), endIt; - CPSAttrib::const_iterator posIt = _Owner->getPos().begin(); - CPSAttrib::const_iterator speedIt = _Owner->getSpeed().begin(); + CPSAttrib::const_iterator posIt = _Owner->getPos().begin(); + CPSAttrib::const_iterator speedIt = _Owner->getSpeed().begin(); do { diff --git a/code/nel/src/3d/ps_zone.cpp b/code/nel/src/3d/ps_zone.cpp index 813103896..250877e28 100644 --- a/code/nel/src/3d/ps_zone.cpp +++ b/code/nel/src/3d/ps_zone.cpp @@ -194,7 +194,7 @@ void CPSZonePlane::deleteElement(uint32 index) } -void CPSZonePlane::computeCollisions(CPSLocated &target, uint firstInstanceIndex, const NLMISC::CVector *posBefore, const NLMISC::CVector *posAfter) +void CPSZonePlane::computeCollisions(CPSLocated &target, uint firstInstanceIndex, const NLMISC::CVectorPacked *posBefore, const NLMISC::CVectorPacked *posAfter) { NL_PS_FUNC(CPSZonePlane_computeCollisions) MINI_TIMER(PSStatsZonePlane) @@ -213,9 +213,9 @@ void CPSZonePlane::computeCollisions(CPSLocated &target, uint firstInstanceIndex NLMISC::CPlane p; p.make(m.mulVector(*normalIt), m * (*planePosIt)); // deals with each particle - const NLMISC::CVector *itPosBefore = posBefore + firstInstanceIndex; - const NLMISC::CVector *itPosBeforeEnd = posBefore + target.getSize(); - const NLMISC::CVector *itPosAfter = posAfter + firstInstanceIndex; + const NLMISC::CVectorPacked *itPosBefore = posBefore + firstInstanceIndex; + const NLMISC::CVectorPacked *itPosBeforeEnd = posBefore + target.getSize(); + const NLMISC::CVectorPacked *itPosAfter = posAfter + firstInstanceIndex; while (itPosBefore != itPosBeforeEnd) { float posSide = p * *itPosBefore; @@ -235,7 +235,7 @@ void CPSZonePlane::computeCollisions(CPSLocated &target, uint firstInstanceIndex ci.Dist = startEnd.norm(); // we translate the particle from an epsilon so that it won't get hooked to the plane ci.NewPos = *itPosBefore + startEnd + PSCollideEpsilon * p.getNormal(); - const CVector &speed = target.getSpeed()[(uint32)(itPosBefore - posBefore)]; + const CVector speed = target.getSpeed()[(uint32)(itPosBefore - posBefore)]; ci.NewSpeed = _BounceFactor * (speed - 2.0f * (speed * p.getNormal()) * p.getNormal()); ci.CollisionZone = this; CPSLocated::_Collisions[itPosBefore - posBefore].update(ci); @@ -290,7 +290,7 @@ void CPSZonePlane::serial(NLMISC::IStream &f) throw(NLMISC::EStream) // sphere implementation // /////////////////////////// -void CPSZoneSphere::computeCollisions(CPSLocated &target, uint firstInstanceIndex, const NLMISC::CVector *posBefore, const NLMISC::CVector *posAfter) +void CPSZoneSphere::computeCollisions(CPSLocated &target, uint firstInstanceIndex, const NLMISC::CVectorPacked *posBefore, const NLMISC::CVectorPacked *posAfter) { NL_PS_FUNC(CPSZoneSphere_computeCollisions) MINI_TIMER(PSStatsZoneSphere) @@ -308,9 +308,9 @@ void CPSZoneSphere::computeCollisions(CPSLocated &target, uint firstInstanceInde const CMatrix &m = CPSLocated::getConversionMatrix(&target, this->_Owner); CVector center = m * *spherePosIt; // deals with each particle - const NLMISC::CVector *itPosBefore = posBefore + firstInstanceIndex; - const NLMISC::CVector *itPosBeforeEnd = posBefore + target.getSize(); - const NLMISC::CVector *itPosAfter = posAfter + firstInstanceIndex; + const NLMISC::CVectorPacked *itPosBefore = posBefore + firstInstanceIndex; + const NLMISC::CVectorPacked *itPosBeforeEnd = posBefore + target.getSize(); + const NLMISC::CVectorPacked *itPosAfter = posAfter + firstInstanceIndex; while (itPosBefore != itPosBeforeEnd) { // check whether the located is going through the sphere @@ -346,7 +346,7 @@ void CPSZoneSphere::computeCollisions(CPSLocated &target, uint firstInstanceInde ci.Dist = startEnd.norm(); // we translate the particle from an epsilon so that it won't get hooked to the sphere ci.NewPos = pos + startEnd + PSCollideEpsilon * normal; - const CVector &speed = target.getSpeed()[(uint32)(itPosBefore - posBefore)]; + const CVector speed = target.getSpeed()[(uint32)(itPosBefore - posBefore)]; ci.NewSpeed = _BounceFactor * (speed - 2.0f * (speed * normal) * normal); ci.CollisionZone = this; CPSLocated::_Collisions[itPosBefore - posBefore].update(ci); @@ -450,7 +450,7 @@ void CPSZoneSphere::deleteElement(uint32 index) //////////////////////////////// // CPSZoneDisc implementation // //////////////////////////////// -void CPSZoneDisc::computeCollisions(CPSLocated &target, uint firstInstanceIndex, const NLMISC::CVector *posBefore, const NLMISC::CVector *posAfter) +void CPSZoneDisc::computeCollisions(CPSLocated &target, uint firstInstanceIndex, const NLMISC::CVectorPacked *posBefore, const NLMISC::CVectorPacked *posAfter) { NL_PS_FUNC(CPSZoneDisc_computeCollisions) MINI_TIMER(PSStatsZoneDisc) @@ -477,9 +477,9 @@ void CPSZoneDisc::computeCollisions(CPSLocated &target, uint firstInstanceIndex, const float epsilon = 0.5f * PSCollideEpsilon; // deals with each particle - const NLMISC::CVector *itPosBefore = posBefore + firstInstanceIndex; - const NLMISC::CVector *itPosBeforeEnd = posBefore + target.getSize(); - const NLMISC::CVector *itPosAfter = posAfter + firstInstanceIndex; + const NLMISC::CVectorPacked *itPosBefore = posBefore + firstInstanceIndex; + const NLMISC::CVectorPacked *itPosBeforeEnd = posBefore + target.getSize(); + const NLMISC::CVectorPacked *itPosAfter = posAfter + firstInstanceIndex; while (itPosBefore != itPosBeforeEnd) { float posSide = p * *itPosBefore; @@ -503,7 +503,7 @@ void CPSZoneDisc::computeCollisions(CPSLocated &target, uint firstInstanceIndex, hitRadius2 = (ci.NewPos - center) * (ci.NewPos - center); if (hitRadius2 < radiusIt->R2) // check collision against disc { - const CVector &speed = target.getSpeed()[(uint32)(itPosBefore - posBefore)]; + const CVector speed = target.getSpeed()[(uint32)(itPosBefore - posBefore)]; ci.NewSpeed = _BounceFactor * (speed - 2.0f * (speed * p.getNormal()) * p.getNormal()); ci.CollisionZone = this; CPSLocated::_Collisions[itPosBefore - posBefore].update(ci); @@ -847,7 +847,7 @@ void CPSZoneCylinder::performMotion(TAnimationTime ellapsedTime) */ -void CPSZoneCylinder::computeCollisions(CPSLocated &target, uint firstInstanceIndex, const NLMISC::CVector *posBefore, const NLMISC::CVector *posAfter) +void CPSZoneCylinder::computeCollisions(CPSLocated &target, uint firstInstanceIndex, const NLMISC::CVectorPacked *posBefore, const NLMISC::CVectorPacked *posAfter) { NL_PS_FUNC(CPSZoneCylinder_computeCollisions) MINI_TIMER(PSStatsZoneCylinder) @@ -873,9 +873,9 @@ void CPSZoneCylinder::computeCollisions(CPSLocated &target, uint firstInstanceIn CVector destProjectedPos, destTPos; // deals with each particle // deals with each particle - const NLMISC::CVector *itPosBefore = posBefore + firstInstanceIndex; - const NLMISC::CVector *itPosBeforeEnd = posBefore + target.getSize(); - const NLMISC::CVector *itPosAfter = posAfter + firstInstanceIndex; + const NLMISC::CVectorPacked *itPosBefore = posBefore + firstInstanceIndex; + const NLMISC::CVectorPacked *itPosBeforeEnd = posBefore + target.getSize(); + const NLMISC::CVectorPacked *itPosAfter = posAfter + firstInstanceIndex; while (itPosBefore != itPosBeforeEnd) { const CVector &pos = *itPosBefore; @@ -1123,7 +1123,7 @@ void CPSZoneCylinder::deleteElement(uint32 index) // implementation of CPSZoneRectangle // ////////////////////////////////////////////// -void CPSZoneRectangle::computeCollisions(CPSLocated &target, uint firstInstanceIndex, const NLMISC::CVector *posBefore, const NLMISC::CVector *posAfter) +void CPSZoneRectangle::computeCollisions(CPSLocated &target, uint firstInstanceIndex, const NLMISC::CVectorPacked *posBefore, const NLMISC::CVectorPacked *posAfter) { NL_PS_FUNC(CPSZoneRectangle_computeCollisions) MINI_TIMER(PSStatsZoneRectangle) @@ -1149,9 +1149,9 @@ void CPSZoneRectangle::computeCollisions(CPSLocated &target, uint firstInstanceI p.make(X ^ Y, center); // deals with each particle const float epsilon = 0.5f * PSCollideEpsilon; - const NLMISC::CVector *itPosBefore = posBefore + firstInstanceIndex; - const NLMISC::CVector *itPosBeforeEnd = posBefore + target.getSize(); - const NLMISC::CVector *itPosAfter = posAfter + firstInstanceIndex; + const NLMISC::CVectorPacked *itPosBefore = posBefore + firstInstanceIndex; + const NLMISC::CVectorPacked *itPosBeforeEnd = posBefore + target.getSize(); + const NLMISC::CVectorPacked *itPosAfter = posAfter + firstInstanceIndex; while (itPosBefore != itPosBeforeEnd) { float posSide = p * *itPosBefore; From 7c7db53c72e97b9d595ef484b9f93389d5d2b16f Mon Sep 17 00:00:00 2001 From: kaetemi Date: Fri, 13 Jun 2014 04:09:14 +0200 Subject: [PATCH 10/21] SSE2: Another workaround for a stupid uint8 alloc --HG-- branch : sse2 --- code/nel/src/3d/ps_face_look_at.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/code/nel/src/3d/ps_face_look_at.cpp b/code/nel/src/3d/ps_face_look_at.cpp index 782d5c70d..f5063f7f2 100644 --- a/code/nel/src/3d/ps_face_look_at.cpp +++ b/code/nel/src/3d/ps_face_look_at.cpp @@ -33,8 +33,8 @@ namespace NL3D */ struct CLookAtAlign { - CVector I; - CVector K; + CVectorPacked I; + CVectorPacked K; }; From 1ceaed828ad41644731403107ca4869e91273b86 Mon Sep 17 00:00:00 2001 From: kaetemi Date: Fri, 13 Jun 2014 04:17:38 +0200 Subject: [PATCH 11/21] SSE2: More alignment fixes --HG-- branch : sse2 --- code/nel/include/nel/sound/clustered_sound.h | 2 ++ code/nel/src/3d/ps_mesh.cpp | 26 ++++++++++---------- code/nel/src/sound/clustered_sound.cpp | 3 ++- 3 files changed, 17 insertions(+), 14 deletions(-) diff --git a/code/nel/include/nel/sound/clustered_sound.h b/code/nel/include/nel/sound/clustered_sound.h index ed674f879..d9d2c47f2 100644 --- a/code/nel/include/nel/sound/clustered_sound.h +++ b/code/nel/include/nel/sound/clustered_sound.h @@ -50,6 +50,7 @@ class CClusteredSound { public: /// This structure contain data about sound status in a cluster + NL_ALIGN_SSE2(16) struct CClusterSoundStatus { /// The relative gain of sound in the cluster @@ -78,6 +79,7 @@ public: typedef std::map TClusterStatusMap; /// This structure is used when we traverse the cluster/portal graph. + NL_ALIGN_SSE2(16) struct CSoundTravContext { /// The current gain. diff --git a/code/nel/src/3d/ps_mesh.cpp b/code/nel/src/3d/ps_mesh.cpp index ddbf024fb..a8c057497 100644 --- a/code/nel/src/3d/ps_mesh.cpp +++ b/code/nel/src/3d/ps_mesh.cpp @@ -660,9 +660,9 @@ public: CHECK_VERTEX_BUFFER(outVb, outVertex + outNormalOff); // translate and resize the vertex (relatively to the mesh origin) - *(CVectorPacked *) outVertex = *posIt + sM * *(CVector *) inVertex; + *(CVectorPacked *) outVertex = *posIt + sM * CVector(*(CVectorPacked *) inVertex); // copy the normal - *(CVectorPacked *) (outVertex + outNormalOff) = M * *(CVector *) (inVertex + inNormalOff); + *(CVectorPacked *) (outVertex + outNormalOff) = M * CVector(*(CVectorPacked *) (inVertex + inNormalOff)); inVertex += inVSize; @@ -683,7 +683,7 @@ public: CHECK_VERTEX_BUFFER(outVb, outVertex); // translate and resize the vertex (relatively to the mesh origin) - *(CVectorPacked *) outVertex = *posIt + sM * *(CVector *) inVertex; + *(CVectorPacked *) outVertex = *posIt + sM * CVector(*(CVectorPacked *) inVertex); inVertex += inVSize; outVertex += outVSize; @@ -774,10 +774,10 @@ public: CHECK_VERTEX_BUFFER(outVb, outVertex + outNormalOff); // morph, and transform the vertex - *(CVectorPacked *) outVertex = *posIt + sM * (opLambda * *(CVector *) m0 + lambda * *(CVector *) m1); + *(CVectorPacked *) outVertex = *posIt + sM * (opLambda * CVector(*(CVectorPacked *) m0) + lambda * CVector(*(CVectorPacked *) m1)); // morph, and transform the normal - *(CVectorPacked *) (outVertex + outNormalOff) = M * (opLambda * *(CVector *) (m0 + inNormalOff) - + lambda * *(CVector *) (m1 + inNormalOff)).normed(); + *(CVectorPacked *) (outVertex + outNormalOff) = M * (opLambda * CVector(*(CVectorPacked *) (m0 + inNormalOff)) + + lambda * CVector(*(CVectorPacked *) (m1 + inNormalOff))).normed(); m0 += inVSize; @@ -799,7 +799,7 @@ public: CHECK_VERTEX_BUFFER((*inVB1), m1); CHECK_VERTEX_BUFFER(outVb, outVertex); // morph, and transform the vertex - *(CVectorPacked *) outVertex = *posIt + sM * (opLambda * *(CVector *) m0 + opLambda * *(CVector *) m1); + *(CVectorPacked *) outVertex = *posIt + sM * (opLambda * CVector(*(CVectorPacked *) m0) + opLambda * CVector(*(CVectorPacked *) m1)); m0 += inVSize; m1 += inVSize; @@ -948,9 +948,9 @@ public: // translate and resize the vertex (relatively to the mesh origin) - *(CVector *) outVertex = *posIt + *ptCurrSize * *(CVector *) inVertex; + *(CVectorPacked *) outVertex = *posIt + *ptCurrSize * CVector(*(CVectorPacked *) inVertex); // copy the normal - *(CVector *) (outVertex + normalOff ) = *(CVector *) (inVertex + pNormalOff); + *(CVectorPacked *) (outVertex + normalOff ) = *(CVectorPacked *) (inVertex + pNormalOff); inVertex += inVSize; outVertex += outVSize; } @@ -963,7 +963,7 @@ public: // translate and resize the vertex (relatively to the mesh origin) CHECK_VERTEX_BUFFER(outVb, outVertex); CHECK_VERTEX_BUFFER(prerotVb, inVertex); - *(CVector *) outVertex = *posIt + *ptCurrSize * *(CVector *) inVertex; + *(CVectorPacked *) outVertex = *posIt + *ptCurrSize * CVector(*(CVectorPacked *) inVertex); inVertex += inVSize; outVertex += outVSize; } @@ -1684,8 +1684,8 @@ CVertexBuffer &CPSConstraintMesh::makePrerotatedVb(const CVertexBuffer &inVb) CHECK_VERTEX_BUFFER(prerotatedVb, outVertex); CHECK_VERTEX_BUFFER(prerotatedVb, outVertex + pNormalOff); - * (CVectorPacked *) outVertex = mat.mulVector(* (CVector *) inVertex); - * (CVectorPacked *) (outVertex + normalOff) = mat.mulVector(* (CVector *) (inVertex + pNormalOff) ); + * (CVectorPacked *) outVertex = mat.mulVector(* (CVectorPacked *) inVertex); + * (CVectorPacked *) (outVertex + normalOff) = mat.mulVector(* (CVectorPacked *) (inVertex + pNormalOff) ); outVertex += vpSize; inVertex += vSize; @@ -1701,7 +1701,7 @@ CVertexBuffer &CPSConstraintMesh::makePrerotatedVb(const CVertexBuffer &inVb) CHECK_VERTEX_BUFFER(prerotatedVb, outVertex); CHECK_VERTEX_BUFFER(inVb, inVertex); - * (CVectorPacked *) outVertex = mat.mulVector(* (CVector *) inVertex); + * (CVectorPacked *) outVertex = mat.mulVector(* (CVectorPacked *) inVertex); outVertex += vpSize; inVertex += vSize; } diff --git a/code/nel/src/sound/clustered_sound.cpp b/code/nel/src/sound/clustered_sound.cpp index 25f0e5b64..0ccfd0d59 100644 --- a/code/nel/src/sound/clustered_sound.cpp +++ b/code/nel/src/sound/clustered_sound.cpp @@ -881,7 +881,8 @@ bool CClusteredSound::addAudibleCluster(CCluster *cluster, CClusterSoundStatus & } else { - _AudibleClusters.insert(make_pair(cluster, soundStatus)); + //_AudibleClusters.insert(make_pair(cluster, soundStatus)); + _AudibleClusters[cluster] = soundStatus; return true; } From 35737498b5c2460da773a3a9c646affe1c4ec3a4 Mon Sep 17 00:00:00 2001 From: kaetemi Date: Fri, 13 Jun 2014 05:03:55 +0200 Subject: [PATCH 12/21] SSE2: Implement CVector --HG-- branch : sse2 --- code/nel/include/nel/misc/vector.h | 10 ++-- code/nel/include/nel/misc/vector_inline.h | 57 +++++++++++++++++++++++ 2 files changed, 64 insertions(+), 3 deletions(-) diff --git a/code/nel/include/nel/misc/vector.h b/code/nel/include/nel/misc/vector.h index d499a5dba..f9667e1c7 100644 --- a/code/nel/include/nel/misc/vector.h +++ b/code/nel/include/nel/misc/vector.h @@ -69,11 +69,15 @@ public: // Methods. /// @name Object. //@{ /// Constructor which does nothing. - CVector() { if (((uintptr_t)(void *)(this) & 0xF) != 0) nlerror("Vector alignment error"); } + CVector() { /*if (((uintptr_t)(void *)(this) & 0xF) != 0) nlerror("Vector alignment error");*/ } /// Constructor . - CVector(float _x, float _y, float _z) : x(_x), y(_y), z(_z) { if (((uintptr_t)(void *)(this) & 0xF) != 0) nlerror("Vector alignment error"); } + CVector(float _x, float _y, float _z) : x(_x), y(_y), z(_z) { /*if (((uintptr_t)(void *)(this) & 0xF) != 0) nlerror("Vector alignment error");*/ } /// Copy Constructor. - CVector(const CVector &v) : x(v.x), y(v.y), z(v.z) { if (((uintptr_t)(void *)(this) & 0xF) != 0) nlerror("Vector alignment error"); } +#ifdef USE_SSE2 + CVector(const CVector &v) : mm(v.mm) { /*if (((uintptr_t)(void *)(this) & 0xF) != 0) nlerror("Vector alignment error");*/ } +#else + CVector(const CVector &v) : x(v.x), y(v.y), z(v.z) { } +#endif //@} /// @name Base Maths. diff --git a/code/nel/include/nel/misc/vector_inline.h b/code/nel/include/nel/misc/vector_inline.h index 61f20e367..270608af0 100644 --- a/code/nel/include/nel/misc/vector_inline.h +++ b/code/nel/include/nel/misc/vector_inline.h @@ -125,15 +125,43 @@ inline CVector operator*(float f, const CVector &v) #endif } +#ifdef USE_SSE2 +inline __m128 dotsplat(const __m128 &l, const __m128 &r) +{ + // TODO: _mm_hadd_ps SSE3 + + __m128 mult = _mm_mul_ps(l, r); + __m128 vx = _mm_shuffle_ps(mult, mult, _MM_SHUFFLE(0, 0, 0, 0)); + __m128 vy = _mm_shuffle_ps(mult, mult, _MM_SHUFFLE(1, 1, 1, 1)); + __m128 vz = _mm_shuffle_ps(mult, mult, _MM_SHUFFLE(2, 2, 2, 2)); + __m128 result = _mm_add_ps(_mm_add_ps(vx, vy), vz); + return result; +} +#endif // ============================================================================================ // Advanced Maths. inline float CVector::operator*(const CVector &v) const { +#ifdef USE_SSE2 + return _mm_cvtss_f32(dotsplat(mm, v.mm)); +#else return x*v.x + y*v.y + z*v.z; +#endif } inline CVector CVector::operator^(const CVector &v) const { +#ifdef USE_SSE2 + CVector res; + __m128 l = _mm_shuffle_ps(mm, mm, _MM_SHUFFLE(3, 0, 2, 1)); + __m128 r = _mm_shuffle_ps(v.mm, v.mm, _MM_SHUFFLE(3, 1, 0, 2)); + __m128 mul1 = _mm_mul_ps(l, r); + l = _mm_shuffle_ps(mm, mm, _MM_SHUFFLE(3, 1, 0, 2)); + r = _mm_shuffle_ps(v.mm, v.mm, _MM_SHUFFLE(3, 0, 2, 1)); + __m128 mul2 = _mm_mul_ps(l, r); + res.mm = _mm_sub_ps(mul1, mul2); + return res; +#else CVector ret; ret.x= y*v.z - z*v.y; @@ -141,27 +169,48 @@ inline CVector CVector::operator^(const CVector &v) const ret.z= x*v.y - y*v.x; return ret; +#endif } inline float CVector::sqrnorm() const { +#ifdef USE_SSE2 + return _mm_cvtss_f32(dotsplat(mm, mm)); +#else return (float)(x*x + y*y + z*z); +#endif } inline float CVector::norm() const { +#ifdef USE_SSE2 + return sqrt(_mm_cvtss_f32(dotsplat(mm, mm))); +#else return (float)sqrt(x*x + y*y + z*z); +#endif } inline void CVector::normalize() { +#ifdef USE_SSE2 + __m128 normsplat = _mm_sqrt_ps(dotsplat(mm, mm)); + mm = _mm_div_ps(mm, normsplat); +#else float n=norm(); if(n) *this/=n; +#endif } inline CVector CVector::normed() const { +#ifdef USE_SSE2 + CVector res; + __m128 normsplat = _mm_sqrt_ps(dotsplat(mm, mm)); + res.mm = _mm_div_ps(mm, normsplat); + return res; +#else CVector ret; ret= *this; ret.normalize(); return ret; +#endif } @@ -219,15 +268,23 @@ inline void CVector::sphericToCartesian(float r, float theta,float phi) } inline void CVector::minof(const CVector &a, const CVector &b) { +#ifdef USE_SSE2 + mm = _mm_min_ps(a.mm, b.mm); +#else x= std::min(a.x, b.x); y= std::min(a.y, b.y); z= std::min(a.z, b.z); +#endif } inline void CVector::maxof(const CVector &a, const CVector &b) { +#ifdef USE_SSE2 + mm = _mm_max_ps(a.mm, b.mm); +#else x= std::max(a.x, b.x); y= std::max(a.y, b.y); z= std::max(a.z, b.z); +#endif } inline void CVector::serial(IStream &f) { From 00b8ad4c914ea0ebe967cf9deeadb02f987910e5 Mon Sep 17 00:00:00 2001 From: kaetemi Date: Fri, 13 Jun 2014 05:21:27 +0200 Subject: [PATCH 13/21] SSE2: More alignment workarounds --HG-- branch : sse2 --- .../nel/3d/ps_attrib_maker_bin_op_inline.h | 2 +- code/nel/include/nel/3d/ps_plane_basis.h | 6 ++-- code/nel/include/nel/misc/vector.h | 28 +++++++++++++++++++ code/nel/src/3d/ps_mesh.cpp | 4 +-- 4 files changed, 34 insertions(+), 6 deletions(-) diff --git a/code/nel/include/nel/3d/ps_attrib_maker_bin_op_inline.h b/code/nel/include/nel/3d/ps_attrib_maker_bin_op_inline.h index 0070ffb38..673925643 100644 --- a/code/nel/include/nel/3d/ps_attrib_maker_bin_op_inline.h +++ b/code/nel/include/nel/3d/ps_attrib_maker_bin_op_inline.h @@ -43,7 +43,7 @@ template <> inline CPlaneBasis PSBinOpModulate(const CPlaneBasis &p1, const CPlaneBasis &p2) { // we compute p1 * p2 - NLMISC::CVector z = p1.X ^ p1.Y; + NLMISC::CVector z = CVector(p1.X) ^ CVector(p1.Y); CPlaneBasis r; r.X.x = p2.X.x * p1.X.x + p2.X.y * p1.Y.x + p2.X.z * z.x; r.X.y = p2.X.x * p1.X.y + p2.X.y * p1.Y.y + p2.X.z * z.y; diff --git a/code/nel/include/nel/3d/ps_plane_basis.h b/code/nel/include/nel/3d/ps_plane_basis.h index 41882e148..2c8fd07c4 100644 --- a/code/nel/include/nel/3d/ps_plane_basis.h +++ b/code/nel/include/nel/3d/ps_plane_basis.h @@ -37,8 +37,8 @@ namespace NL3D { struct CPlaneBasis { - NLMISC::CVector X ; - NLMISC::CVector Y ; + NLMISC::CVectorPacked X ; + NLMISC::CVectorPacked Y ; // default ctor @@ -62,7 +62,7 @@ struct CPlaneBasis /// compute the normal of the plane basis NLMISC::CVector getNormal(void) const { - return X ^ Y ; + return CVector(X) ^ CVector(Y) ; } diff --git a/code/nel/include/nel/misc/vector.h b/code/nel/include/nel/misc/vector.h index f9667e1c7..1d4ef3fe4 100644 --- a/code/nel/include/nel/misc/vector.h +++ b/code/nel/include/nel/misc/vector.h @@ -212,6 +212,34 @@ public: { return CVector(*this) - v; } + + bool operator==(const CVectorPacked &v) const + { + return x==v.x && y==v.y && z==v.z; + } + bool operator!=(const CVectorPacked &v) const + { + return !(*this==v); + } + bool operator<(const CVectorPacked &v) const + { + if(x!=v.x) + return xX * CPSUtil::getCos((sint32) *ptCurrAngle) + ptBasis->Y * CPSUtil::getSin((sint32) *ptCurrAngle) - , ptBasis->X * CPSUtil::getCos((sint32) *ptCurrAngle + 64) + ptBasis->Y * CPSUtil::getSin((sint32) *ptCurrAngle + 64) + mat.setRot( CVector(ptBasis->X) * CPSUtil::getCos((sint32) *ptCurrAngle) + CVector(ptBasis->Y) * CPSUtil::getSin((sint32) *ptCurrAngle) + , CVector(ptBasis->X) * CPSUtil::getCos((sint32) *ptCurrAngle + 64) + CVector(ptBasis->Y) * CPSUtil::getSin((sint32) *ptCurrAngle + 64) , ptBasis->X ^ ptBasis->Y ); From d3847e10ccb132ba2738462a58dc2ef1f6610d8d Mon Sep 17 00:00:00 2001 From: kaetemi Date: Fri, 13 Jun 2014 05:36:43 +0200 Subject: [PATCH 14/21] SSE2: Workaround alignment issue related to std::pair --HG-- branch : sse2 --- code/nel/include/nel/sound/clustered_sound.h | 16 +++++++--------- code/nel/src/sound/audio_mixer_user.cpp | 2 +- code/nel/src/sound/clustered_sound.cpp | 9 ++++----- code/nel/src/sound/simple_source.cpp | 2 +- code/nel/src/sound/stream_source.cpp | 2 +- code/ryzom/client/src/sound_manager.cpp | 2 +- 6 files changed, 15 insertions(+), 18 deletions(-) diff --git a/code/nel/include/nel/sound/clustered_sound.h b/code/nel/include/nel/sound/clustered_sound.h index d9d2c47f2..da7cdb12f 100644 --- a/code/nel/include/nel/sound/clustered_sound.h +++ b/code/nel/include/nel/sound/clustered_sound.h @@ -50,7 +50,6 @@ class CClusteredSound { public: /// This structure contain data about sound status in a cluster - NL_ALIGN_SSE2(16) struct CClusterSoundStatus { /// The relative gain of sound in the cluster @@ -60,11 +59,11 @@ public: /// The ratio distance/max earing distance float DistFactor; /// The sound virtual position (in fact Dist * Direction) - NLMISC::CVector Position; + NLMISC::CVectorPacked Position; /// The blending factor between real sound pos and virtual pos (1 mean virtual pos, 0 mean real pos). float PosAlpha; /// The direction vector for the virtual sound source. - NLMISC::CVector Direction; + NLMISC::CVectorPacked Direction; /// The occlusion att. sint32 Occlusion; /// The occlusion LF factor (see EAX spec) @@ -79,7 +78,6 @@ public: typedef std::map TClusterStatusMap; /// This structure is used when we traverse the cluster/portal graph. - NL_ALIGN_SSE2(16) struct CSoundTravContext { /// The current gain. @@ -99,18 +97,18 @@ public: /// A blending factor to compute virtual source position. float Alpha; /// The direction vector from listener to the first portal/cluster - NLMISC::CVector Direction1; + NLMISC::CVectorPacked Direction1; /// The direction vector from the first portal/cluster to the second one. - NLMISC::CVector Direction2; + NLMISC::CVectorPacked Direction2; /// The current blended direction used to place vitual source. - NLMISC::CVector Direction; + NLMISC::CVectorPacked Direction; /// The previously traversed cluster. Used to stop back traversal. NL3D::CCluster *PreviousCluster; /// The previous sound propagation vector - NLMISC::CVector PreviousVector; + NLMISC::CVectorPacked PreviousVector; /// The last pseudo listener position - NLMISC::CVector ListenerPos; + NLMISC::CVectorPacked ListenerPos; /// Constructor. Init all default value. CSoundTravContext(const NLMISC::CVector &listenerPos, diff --git a/code/nel/src/sound/audio_mixer_user.cpp b/code/nel/src/sound/audio_mixer_user.cpp index 3a75433f4..4807581f0 100644 --- a/code/nel/src/sound/audio_mixer_user.cpp +++ b/code/nel/src/sound/audio_mixer_user.cpp @@ -1705,7 +1705,7 @@ void CAudioMixerUser::update() { // there is some data here, update the virtual position of the sound. float dist = (css->Position - source->getPos()).norm(); - CVector vpos(_ListenPosition + css->Direction * (css->Dist + dist)); + CVector vpos(_ListenPosition + CVector(css->Direction) * (css->Dist + dist)); // _Tracks[i]->DrvSource->setPos(source->getPos() * (1-css->PosAlpha) + css->Position*(css->PosAlpha)); _Tracks[i]->getPhysicalSource()->setPos(source->getPos() * (1-css->PosAlpha) + vpos*(css->PosAlpha)); // update the relative gain diff --git a/code/nel/src/sound/clustered_sound.cpp b/code/nel/src/sound/clustered_sound.cpp index 0ccfd0d59..8dbc12264 100644 --- a/code/nel/src/sound/clustered_sound.cpp +++ b/code/nel/src/sound/clustered_sound.cpp @@ -263,7 +263,7 @@ void CClusteredSound::update(const CVector &listenerPos, const CVector &/* view { // this one is better ! cs.Distance = css.Dist; - cs.Source->setPos(listenerPos + css.Direction * css.Dist + CVector(0,0,2)); + cs.Source->setPos(listenerPos + CVector(css.Direction) * css.Dist + CVector(0,0,2)); if (css.DistFactor < 1.0f) cs.Source->setRelativeGain(css.Gain * (1.0f - (css.DistFactor*css.DistFactor*css.DistFactor*css.DistFactor))); else @@ -289,7 +289,7 @@ void CClusteredSound::update(const CVector &listenerPos, const CVector &/* view cs.Source = CAudioMixerUser::instance()->createSource(soundName, false, NULL, NULL, cluster); if (cs.Source != 0) { - cs.Source->setPos(listenerPos + css.Direction * css.Dist + CVector(0,0,2)); + cs.Source->setPos(listenerPos + CVector(css.Direction) * css.Dist + CVector(0,0,2)); if (css.DistFactor < 1.0f) cs.Source->setRelativeGain(css.Gain * (1.0f - (css.DistFactor*css.DistFactor/**css.DistFactor*css.DistFactor*/))); else @@ -867,7 +867,7 @@ bool CClusteredSound::addAudibleCluster(CCluster *cluster, CClusterSoundStatus & { TClusterStatusMap::iterator it(_AudibleClusters.find(cluster)); nlassert(soundStatus.Dist < _MaxEarDistance); - nlassert(soundStatus.Direction.norm() <= 1.01f); + nlassert(CVector(soundStatus.Direction).norm() <= 1.01f); if (it != _AudibleClusters.end()) { @@ -881,8 +881,7 @@ bool CClusteredSound::addAudibleCluster(CCluster *cluster, CClusterSoundStatus & } else { - //_AudibleClusters.insert(make_pair(cluster, soundStatus)); - _AudibleClusters[cluster] = soundStatus; + _AudibleClusters.insert(make_pair(cluster, soundStatus)); return true; } diff --git a/code/nel/src/sound/simple_source.cpp b/code/nel/src/sound/simple_source.cpp index 2c9dda86c..0bf4eba8d 100644 --- a/code/nel/src/sound/simple_source.cpp +++ b/code/nel/src/sound/simple_source.cpp @@ -109,7 +109,7 @@ CVector CSimpleSource::getVirtualPos() const { // there is some data here, update the virtual position of the sound. float dist = (css->Position - getPos()).norm(); - CVector vpos(CAudioMixerUser::instance()->getListenPosVector() + css->Direction * (css->Dist + dist)); + CVector vpos(CAudioMixerUser::instance()->getListenPosVector() + CVector(css->Direction) * (css->Dist + dist)); vpos = _Position * (1-css->PosAlpha) + vpos*(css->PosAlpha); return vpos; } diff --git a/code/nel/src/sound/stream_source.cpp b/code/nel/src/sound/stream_source.cpp index 9bd48ff25..ece500cd0 100644 --- a/code/nel/src/sound/stream_source.cpp +++ b/code/nel/src/sound/stream_source.cpp @@ -134,7 +134,7 @@ CVector CStreamSource::getVirtualPos() const { // there is some data here, update the virtual position of the sound. float dist = (css->Position - getPos()).norm(); - CVector vpos(CAudioMixerUser::instance()->getListenPosVector() + css->Direction * (css->Dist + dist)); + CVector vpos(CAudioMixerUser::instance()->getListenPosVector() + CVector(css->Direction) * (css->Dist + dist)); vpos = _Position * (1-css->PosAlpha) + vpos*(css->PosAlpha); return vpos; } diff --git a/code/ryzom/client/src/sound_manager.cpp b/code/ryzom/client/src/sound_manager.cpp index f87c6cdee..7c5589432 100644 --- a/code/ryzom/client/src/sound_manager.cpp +++ b/code/ryzom/client/src/sound_manager.cpp @@ -343,7 +343,7 @@ void CSoundManager::drawSounds(float camHeight) const CClusteredSound::CClusterSoundStatus &css = first->second; if (css.Direction != CVector::Null) { - CVector dest = pos+css.Direction*css.Dist; + CVector dest = pos+CVector(css.Direction)*css.Dist; NL3D::CDRU::drawLine(pos, dest, CRGBA(0,255,255,255), *idriver); NL3D::CDRU::drawLine(dest+CVector(0.5f,0.5f,0), dest+CVector(-0.5f,-0.5f,0), CRGBA(0, 255,255,255), *idriver); From d787c54567d9ec3ee9dce298be8a9879db049a27 Mon Sep 17 00:00:00 2001 From: kaetemi Date: Fri, 13 Jun 2014 14:01:26 +0200 Subject: [PATCH 15/21] SSE2: Implement alignment for arena allocator --HG-- branch : sse2 --- .../nel/include/nel/misc/fixed_size_allocator.h | 1 + code/nel/src/misc/fixed_size_allocator.cpp | 17 ++++++++++++----- code/nel/src/misc/object_arena_allocator.cpp | 14 ++++++++------ 3 files changed, 21 insertions(+), 11 deletions(-) diff --git a/code/nel/include/nel/misc/fixed_size_allocator.h b/code/nel/include/nel/misc/fixed_size_allocator.h index 9eb1d8a10..80b9ed491 100644 --- a/code/nel/include/nel/misc/fixed_size_allocator.h +++ b/code/nel/include/nel/misc/fixed_size_allocator.h @@ -53,6 +53,7 @@ public: uint getNumAllocatedBlocks() const { return _NumAlloc; } private: class CChunk; + NL_ALIGN(NL_DEFAULT_MEMORY_ALIGNMENT) class CNode { public: diff --git a/code/nel/src/misc/fixed_size_allocator.cpp b/code/nel/src/misc/fixed_size_allocator.cpp index 790275ec6..30693ddfd 100644 --- a/code/nel/src/misc/fixed_size_allocator.cpp +++ b/code/nel/src/misc/fixed_size_allocator.cpp @@ -33,6 +33,9 @@ CFixedSizeAllocator::CFixedSizeAllocator(uint numBytesPerBlock, uint numBlockPer _NumChunks = 0; nlassert(numBytesPerBlock > 1); _NumBytesPerBlock = numBytesPerBlock; + const uint mask = NL_DEFAULT_MEMORY_ALIGNMENT - 1; + _NumBytesPerBlock = (_NumBytesPerBlock + mask) & ~mask; + nlassert(_NumBytesPerBlock >= numBytesPerBlock); _NumBlockPerChunk = std::max(numBlockPerChunk, (uint) 3); _NumAlloc = 0; } @@ -67,12 +70,14 @@ void *CFixedSizeAllocator::alloc() return _FreeSpace->unlink(); } +#define aligned_offsetof(s, m) ((offsetof(s, m) + (NL_DEFAULT_MEMORY_ALIGNMENT - 1)) & ~(NL_DEFAULT_MEMORY_ALIGNMENT - 1)) + // ***************************************************************************************************************** void CFixedSizeAllocator::free(void *block) { if (!block) return; /// get the node from the object - CNode *node = (CNode *) ((uint8 *) block - offsetof(CNode, Next)); + CNode *node = (CNode *) ((uint8 *) block - aligned_offsetof(CNode, Next)); // nlassert(node->Chunk != NULL); nlassert(node->Chunk->Allocator == this); @@ -84,7 +89,9 @@ void CFixedSizeAllocator::free(void *block) // ***************************************************************************************************************** uint CFixedSizeAllocator::CChunk::getBlockSizeWithOverhead() const { - return std::max((uint)(sizeof(CNode) - offsetof(CNode, Next)),(uint)(Allocator->getNumBytesPerBlock())) + offsetof(CNode, Next); + nlctassert((sizeof(CNode) % NL_DEFAULT_MEMORY_ALIGNMENT) == 0); + return std::max((uint)(sizeof(CNode) - aligned_offsetof(CNode, Next)), + (uint)(Allocator->getNumBytesPerBlock())) + aligned_offsetof(CNode, Next); } // ***************************************************************************************************************** @@ -105,7 +112,7 @@ CFixedSizeAllocator::CChunk::~CChunk() nlassert(NumFreeObjs == 0); nlassert(Allocator->_NumChunks > 0); -- (Allocator->_NumChunks); - delete[] Mem; + aligned_free(Mem); //delete[] Mem; } // ***************************************************************************************************************** @@ -115,7 +122,7 @@ void CFixedSizeAllocator::CChunk::init(CFixedSizeAllocator *alloc) nlassert(alloc != NULL); Allocator = alloc; // - Mem = new uint8[getBlockSizeWithOverhead() * alloc->getNumBlockPerChunk()]; + Mem = (uint8 *)aligned_malloc(getBlockSizeWithOverhead() * alloc->getNumBlockPerChunk(), NL_DEFAULT_MEMORY_ALIGNMENT); // new uint8[getBlockSizeWithOverhead() * alloc->getNumBlockPerChunk()]; // getNode(0).Chunk = this; getNode(0).Next = &getNode(1); @@ -179,7 +186,7 @@ void *CFixedSizeAllocator::CNode::unlink() *Prev = Next; nlassert(Chunk->NumFreeObjs > 0); Chunk->grab(); // tells the containing chunk that a node has been allocated - return (void *) &Next; + return (void *)((uintptr_t)(this) + aligned_offsetof(CNode, Next)); //(void *) &Next; } // ***************************************************************************************************************** diff --git a/code/nel/src/misc/object_arena_allocator.cpp b/code/nel/src/misc/object_arena_allocator.cpp index 9c73f5059..8084b4ac9 100644 --- a/code/nel/src/misc/object_arena_allocator.cpp +++ b/code/nel/src/misc/object_arena_allocator.cpp @@ -68,21 +68,23 @@ void *CObjectArenaAllocator::alloc(uint size) if (size >= _MaxAllocSize) { // use standard allocator - uint8 *block = new uint8[size + sizeof(uint)]; // an additionnal uint is needed to store size of block + nlctassert(NL_DEFAULT_MEMORY_ALIGNMENT > sizeof(uint)); + uint8 *block = (uint8 *)aligned_malloc(NL_DEFAULT_MEMORY_ALIGNMENT + size, NL_DEFAULT_MEMORY_ALIGNMENT); //new uint8[size + sizeof(uint)]; // an additionnal uint is needed to store size of block if (!block) return NULL; #ifdef NL_DEBUG _MemBlockToAllocID[block] = _AllocID; #endif *(uint *) block = size; - return block + sizeof(uint); + return block + NL_DEFAULT_MEMORY_ALIGNMENT; } uint entry = ((size + (_Granularity - 1)) / _Granularity) ; nlassert(entry < _ObjectSizeToAllocator.size()); if (!_ObjectSizeToAllocator[entry]) { - _ObjectSizeToAllocator[entry] = new CFixedSizeAllocator(entry * _Granularity + sizeof(uint), _MaxAllocSize / size); // an additionnal uint is needed to store size of block + _ObjectSizeToAllocator[entry] = new CFixedSizeAllocator(entry * _Granularity + NL_DEFAULT_MEMORY_ALIGNMENT, _MaxAllocSize / size); // an additionnal uint is needed to store size of block } void *block = _ObjectSizeToAllocator[entry]->alloc(); + nlassert(((uintptr_t)block % NL_DEFAULT_MEMORY_ALIGNMENT) == 0); #ifdef NL_DEBUG if (block) { @@ -91,14 +93,14 @@ void *CObjectArenaAllocator::alloc(uint size) ++_AllocID; #endif *(uint *) block = size; - return (void *) ((uint8 *) block + sizeof(uint)); + return (void *) ((uint8 *) block + NL_DEFAULT_MEMORY_ALIGNMENT); } // ***************************************************************************************************************** void CObjectArenaAllocator::free(void *block) { if (!block) return; - uint8 *realBlock = (uint8 *) block - sizeof(uint); // a uint is used at start of block to give its size + uint8 *realBlock = (uint8 *) block - NL_DEFAULT_MEMORY_ALIGNMENT; // sizeof(uint); // a uint is used at start of block to give its size uint size = *(uint *) realBlock; if (size >= _MaxAllocSize) { @@ -107,7 +109,7 @@ void CObjectArenaAllocator::free(void *block) nlassert(it != _MemBlockToAllocID.end()); _MemBlockToAllocID.erase(it); #endif - delete realBlock; + aligned_free(realBlock); return; } uint entry = ((size + (_Granularity - 1)) / _Granularity); From 7867db46542953b4adee4834b4bcc9f19353a4e6 Mon Sep 17 00:00:00 2001 From: kaetemi Date: Fri, 13 Jun 2014 14:23:51 +0200 Subject: [PATCH 16/21] SSE2: Non-SSE2 copy to avoid some issues with STL pairs --HG-- branch : sse2 --- code/nel/include/nel/misc/vector.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/code/nel/include/nel/misc/vector.h b/code/nel/include/nel/misc/vector.h index 1d4ef3fe4..0d3216a18 100644 --- a/code/nel/include/nel/misc/vector.h +++ b/code/nel/include/nel/misc/vector.h @@ -73,10 +73,10 @@ public: // Methods. /// Constructor . CVector(float _x, float _y, float _z) : x(_x), y(_y), z(_z) { /*if (((uintptr_t)(void *)(this) & 0xF) != 0) nlerror("Vector alignment error");*/ } /// Copy Constructor. -#ifdef USE_SSE2 +#if 0 CVector(const CVector &v) : mm(v.mm) { /*if (((uintptr_t)(void *)(this) & 0xF) != 0) nlerror("Vector alignment error");*/ } #else - CVector(const CVector &v) : x(v.x), y(v.y), z(v.z) { } + CVector(const CVector &v) : x(v.x), y(v.y), z(v.z) { if (((uintptr_t)(void *)(this) & 0xF) != 0) nlerror("Vector alignment error"); } #endif //@} From 5ec363a8a921be49fde3e85c354bce3d2b1c88d0 Mon Sep 17 00:00:00 2001 From: kaetemi Date: Fri, 13 Jun 2014 15:05:52 +0200 Subject: [PATCH 17/21] SSE2: Don't divide by zero when normalizing --HG-- branch : sse2 --- code/nel/include/nel/misc/vector_inline.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/code/nel/include/nel/misc/vector_inline.h b/code/nel/include/nel/misc/vector_inline.h index 270608af0..77d827c35 100644 --- a/code/nel/include/nel/misc/vector_inline.h +++ b/code/nel/include/nel/misc/vector_inline.h @@ -182,7 +182,7 @@ inline float CVector::sqrnorm() const inline float CVector::norm() const { #ifdef USE_SSE2 - return sqrt(_mm_cvtss_f32(dotsplat(mm, mm))); + return sqrt(sqrnorm()); #else return (float)sqrt(x*x + y*y + z*z); #endif @@ -191,7 +191,8 @@ inline void CVector::normalize() { #ifdef USE_SSE2 __m128 normsplat = _mm_sqrt_ps(dotsplat(mm, mm)); - mm = _mm_div_ps(mm, normsplat); + if (_mm_cvtss_f32(normsplat)) + mm = _mm_div_ps(mm, normsplat); #else float n=norm(); if(n) @@ -203,7 +204,8 @@ inline CVector CVector::normed() const #ifdef USE_SSE2 CVector res; __m128 normsplat = _mm_sqrt_ps(dotsplat(mm, mm)); - res.mm = _mm_div_ps(mm, normsplat); + if (_mm_cvtss_f32(normsplat)) + res.mm = _mm_div_ps(mm, normsplat); return res; #else CVector ret; From d18159616e7d34220be85812d5f500fb53450edc Mon Sep 17 00:00:00 2001 From: kaetemi Date: Fri, 13 Jun 2014 16:22:27 +0200 Subject: [PATCH 18/21] SSE2: Fix hopefully the last few alignment issues --HG-- branch : sse2 --- code/nel/include/nel/sound/audio_mixer_user.h | 2 +- code/nel/include/nel/sound/background_sound_manager.h | 10 +++++----- code/nel/include/nel/sound/u_audio_mixer.h | 8 +++++++- code/nel/src/sound/audio_mixer_user.cpp | 10 +++++----- code/ryzom/client/src/sound_manager.cpp | 4 ++-- 5 files changed, 20 insertions(+), 14 deletions(-) diff --git a/code/nel/include/nel/sound/audio_mixer_user.h b/code/nel/include/nel/sound/audio_mixer_user.h index d8949c3d3..d152cd31b 100644 --- a/code/nel/include/nel/sound/audio_mixer_user.h +++ b/code/nel/include/nel/sound/audio_mixer_user.h @@ -471,7 +471,7 @@ protected: /// Returns nb available tracks (or NULL) void getFreeTracks( uint nb, CTrack **tracks ); /// Fill a vector of position and mute flag for all playing sound source. - virtual void getPlayingSoundsPos(bool virtualPos, std::vector > &pos); + virtual void getPlayingSoundsPos(bool virtualPos, std::vector &pos); typedef CHashMap TUserVarControlsContainer; /// Container for all user controler and currently controled playing source diff --git a/code/nel/include/nel/sound/background_sound_manager.h b/code/nel/include/nel/sound/background_sound_manager.h index 326ece0f3..06f15f80f 100644 --- a/code/nel/include/nel/sound/background_sound_manager.h +++ b/code/nel/include/nel/sound/background_sound_manager.h @@ -258,17 +258,17 @@ private: struct TSoundStatus { /// The data of the sound. - TSoundData &SoundData; + TSoundData &SoundData; /// The position of the source. - NLMISC::CVector Position; + NLMISC::CVectorPacked Position; /** The relative gain of the source. This is used for patatoid competition.when * a smaller patatoid mute bigger one. */ - float Gain; + float Gain; /// The distance beween listener and source. - float Distance; + float Distance; /// flag if inside a sound zone - bool Inside; + bool Inside; /// Constructor. TSoundStatus(TSoundData &sd, const NLMISC::CVector &position, float gain, float distance, bool inside) : SoundData(sd), Position(position), Gain(gain), Distance(distance), Inside(inside) diff --git a/code/nel/include/nel/sound/u_audio_mixer.h b/code/nel/include/nel/sound/u_audio_mixer.h index 0d845f9c3..0b1bcc42a 100644 --- a/code/nel/include/nel/sound/u_audio_mixer.h +++ b/code/nel/include/nel/sound/u_audio_mixer.h @@ -336,7 +336,13 @@ public: virtual uint getMutedPlayingSourcesCount() const = 0; /// Return a string showing the playing sources virtual std::string getSourcesStats() const = 0; - virtual void getPlayingSoundsPos(bool virtualPos, std::vector > &pos) =0; + struct CPlayingSoundPos + { + CPlayingSoundPos(bool first_, const NLMISC::CVector &second_) : first(first_), second(second_) { } + bool first; + NLMISC::CVector second; + }; + virtual void getPlayingSoundsPos(bool virtualPos, std::vector &pos) =0; /** Write profiling information about the mixer to the output stream. * \param out The output stream to which to write the information */ diff --git a/code/nel/src/sound/audio_mixer_user.cpp b/code/nel/src/sound/audio_mixer_user.cpp index 4807581f0..1c9c46cdc 100644 --- a/code/nel/src/sound/audio_mixer_user.cpp +++ b/code/nel/src/sound/audio_mixer_user.cpp @@ -1475,7 +1475,7 @@ void CAudioMixerUser::freeTrack(CTrack *track) // ****************************************************************** -void CAudioMixerUser::getPlayingSoundsPos(bool virtualPos, std::vector > &pos) +void CAudioMixerUser::getPlayingSoundsPos(bool virtualPos, std::vector &pos) { int nbplay = 0; int nbmute = 0; @@ -1493,9 +1493,9 @@ void CAudioMixerUser::getPlayingSoundsPos(bool virtualPos, std::vectorisPlaying()) { if (virtualPos) - pos.push_back(make_pair(source->getTrack() == 0, source->getVirtualPos())); + pos.push_back(CPlayingSoundPos(source->getTrack() == 0, source->getVirtualPos())); else - pos.push_back(make_pair(source->getTrack() == 0, + pos.push_back(CPlayingSoundPos(source->getTrack() == 0, source->getSourceRelativeMode() ? source->getPos() + _ListenPosition : source->getPos())); @@ -1517,9 +1517,9 @@ void CAudioMixerUser::getPlayingSoundsPos(bool virtualPos, std::vectorisPlaying()) { if (virtualPos) - pos.push_back(make_pair(source->getTrack() == 0, source->getVirtualPos())); + pos.push_back(CPlayingSoundPos(source->getTrack() == 0, source->getVirtualPos())); else - pos.push_back(make_pair(source->getTrack() == 0, + pos.push_back(CPlayingSoundPos(source->getTrack() == 0, source->getSourceRelativeMode() ? source->getPos() + _ListenPosition : source->getPos())); diff --git a/code/ryzom/client/src/sound_manager.cpp b/code/ryzom/client/src/sound_manager.cpp index 7c5589432..eddd4ed4c 100644 --- a/code/ryzom/client/src/sound_manager.cpp +++ b/code/ryzom/client/src/sound_manager.cpp @@ -363,10 +363,10 @@ void CSoundManager::drawSounds(float camHeight) } // draw the sound source position { - std::vector > soundPos; + std::vector soundPos; _AudioMixer->getPlayingSoundsPos(true, soundPos); - std::vector >::iterator first(soundPos.begin()), last(soundPos.end()); + std::vector::iterator first(soundPos.begin()), last(soundPos.end()); for (; first != last; ++first) { NL3D::CDRU::drawLine(first->second + CVector(0.5f,0.5f,0), first->second + CVector(-0.5f,-0.5f,0), CRGBA(255,0,255,255), *idriver); From afa315b1b5d0a7f9aa939281cc500e52f81c1675 Mon Sep 17 00:00:00 2001 From: kaetemi Date: Fri, 13 Jun 2014 16:46:36 +0200 Subject: [PATCH 19/21] SSE2: Implement some more of CVector --HG-- branch : sse2 --- code/nel/include/nel/misc/vector.h | 4 ++-- code/nel/include/nel/misc/vector_inline.h | 27 +++++++++++++++++++++++ 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/code/nel/include/nel/misc/vector.h b/code/nel/include/nel/misc/vector.h index 0d3216a18..3be84894d 100644 --- a/code/nel/include/nel/misc/vector.h +++ b/code/nel/include/nel/misc/vector.h @@ -73,10 +73,10 @@ public: // Methods. /// Constructor . CVector(float _x, float _y, float _z) : x(_x), y(_y), z(_z) { /*if (((uintptr_t)(void *)(this) & 0xF) != 0) nlerror("Vector alignment error");*/ } /// Copy Constructor. -#if 0 +#if USE_SSE2 CVector(const CVector &v) : mm(v.mm) { /*if (((uintptr_t)(void *)(this) & 0xF) != 0) nlerror("Vector alignment error");*/ } #else - CVector(const CVector &v) : x(v.x), y(v.y), z(v.z) { if (((uintptr_t)(void *)(this) & 0xF) != 0) nlerror("Vector alignment error"); } + CVector(const CVector &v) : x(v.x), y(v.y), z(v.z) { /*if (((uintptr_t)(void *)(this) & 0xF) != 0) nlerror("Vector alignment error");*/ } #endif //@} diff --git a/code/nel/include/nel/misc/vector_inline.h b/code/nel/include/nel/misc/vector_inline.h index 77d827c35..10351e2c9 100644 --- a/code/nel/include/nel/misc/vector_inline.h +++ b/code/nel/include/nel/misc/vector_inline.h @@ -64,7 +64,12 @@ inline CVector &CVector::operator*=(float f) } inline CVector &CVector::operator/=(float f) { +#ifdef USE_SSE2 + mm = _mm_div_ps(mm, _mm_set1_ps(f)); + return *this; +#else return *this*= (1.0f/f); +#endif } inline CVector CVector::operator+(const CVector &v) const { @@ -101,7 +106,13 @@ inline CVector CVector::operator*(float f) const } inline CVector CVector::operator/(float f) const { +#ifdef USE_SSE2 + CVector res; + res.mm = _mm_div_ps(mm, _mm_set1_ps(f)); + return res; +#else return *this*(1.0f/f); +#endif } inline CVector CVector::operator-() const { @@ -220,19 +231,35 @@ inline CVector CVector::normed() const // Misc. inline void CVector::set(float _x, float _y, float _z) { +#ifdef USE_SSE2 + mm = _mm_setr_ps(_x, _y, _z, 0.0f); +#else x=_x; y=_y; z=_z; +#endif } inline bool CVector::operator==(const CVector &v) const { +#ifdef USE_SSE2 + return (_mm_movemask_ps(_mm_cmpeq_ps(mm, v.mm)) & 0x07) == 0x07; +#else return x==v.x && y==v.y && z==v.z; +#endif } inline bool CVector::operator!=(const CVector &v) const { +#ifdef USE_SSE2 + return (_mm_movemask_ps(_mm_cmpneq_ps(mm, v.mm)) & 0x07) != 0; +#else return !(*this==v); +#endif } inline bool CVector::isNull() const { +#ifdef USE_SSE2 + return (_mm_movemask_ps(_mm_cmpeq_ps(mm, _mm_setzero_ps())) & 0x07) == 0x07; +#else return *this==CVector::Null; +#endif } inline bool CVector::operator<(const CVector &v) const { From 60879d87e44a5b8141a6488147ee1dd3883e5b08 Mon Sep 17 00:00:00 2001 From: kaetemi Date: Fri, 13 Jun 2014 16:54:46 +0200 Subject: [PATCH 20/21] SSE2: Vectorize some code --HG-- branch : sse2 --- code/nel/include/nel/3d/track_keyframer.h | 8 ++++---- code/nel/include/nel/misc/vector.h | 1 + code/nel/include/nel/misc/vector_inline.h | 25 +++++++++++++++++++++++ code/nel/src/3d/bone.cpp | 8 ++------ code/nel/src/3d/cloud.cpp | 22 ++++++++++---------- 5 files changed, 43 insertions(+), 21 deletions(-) diff --git a/code/nel/include/nel/3d/track_keyframer.h b/code/nel/include/nel/3d/track_keyframer.h index 6cfb6736a..e50b13354 100644 --- a/code/nel/include/nel/3d/track_keyframer.h +++ b/code/nel/include/nel/3d/track_keyframer.h @@ -403,10 +403,10 @@ template inline void copyToValue(T &value, const TKeyVal inline void copyToValue(NLMISC::CRGBA &col, const CVector &v) { sint i; - - i= (sint)(v.x*255); NLMISC::clamp(i,0,255); col.R= (uint8) i; - i= (sint)(v.y*255); NLMISC::clamp(i,0,255); col.G= (uint8) i; - i= (sint)(v.z*255); NLMISC::clamp(i,0,255); col.B= (uint8) i; + CVector mul255 = v * 255; + i= (sint)(mul255.x); NLMISC::clamp(i,0,255); col.R= (uint8) i; + i= (sint)(mul255.y); NLMISC::clamp(i,0,255); col.G= (uint8) i; + i= (sint)(mul255.z); NLMISC::clamp(i,0,255); col.B= (uint8) i; col.A=255; } diff --git a/code/nel/include/nel/misc/vector.h b/code/nel/include/nel/misc/vector.h index 3be84894d..62a73e1dd 100644 --- a/code/nel/include/nel/misc/vector.h +++ b/code/nel/include/nel/misc/vector.h @@ -152,6 +152,7 @@ public: // Methods. // friends. friend CVector operator*(float f, const CVector &v0); + friend CVector operator/(float f, const CVector &v0); }; class CVectorPacked diff --git a/code/nel/include/nel/misc/vector_inline.h b/code/nel/include/nel/misc/vector_inline.h index 10351e2c9..d854d7b45 100644 --- a/code/nel/include/nel/misc/vector_inline.h +++ b/code/nel/include/nel/misc/vector_inline.h @@ -136,6 +136,18 @@ inline CVector operator*(float f, const CVector &v) #endif } +inline CVector operator/(float f, const CVector &v) +{ +#ifdef USE_SSE2 + CVector res; + res.mm = _mm_div_ps(_mm_set1_ps(f), v.mm); + return res; +#else + CVector ret(f/v.x, f/v.y, f/v.z); + return ret; +#endif +} + #ifdef USE_SSE2 inline __m128 dotsplat(const __m128 &l, const __m128 &r) { @@ -150,6 +162,19 @@ inline __m128 dotsplat(const __m128 &l, const __m128 &r) } #endif +inline CVector mul(const CVector &l, const CVector &r) +{ + CVector res; +#ifdef USE_SSE2 + res.mm = _mm_mul_ps(l.mm, r.mm); +#else + res.x = l.x * r.x; + res.y = l.y * r.y; + res.z = l.z * r.z; +#endif + return res; +} + // ============================================================================================ // Advanced Maths. inline float CVector::operator*(const CVector &v) const diff --git a/code/nel/src/3d/bone.cpp b/code/nel/src/3d/bone.cpp index 9adf64fcd..2d6694e3d 100644 --- a/code/nel/src/3d/bone.cpp +++ b/code/nel/src/3d/bone.cpp @@ -189,9 +189,7 @@ void CBone::compute(CBone *parent, const CMatrix &rootMatrix, CSkeletonModel *sk // retrieve scale from our father. parent->getScale(fatherScale); // inverse this scale. - fatherScale.x= 1.0f / fatherScale.x; - fatherScale.y= 1.0f / fatherScale.y; - fatherScale.z= 1.0f / fatherScale.z; + fatherScale = 1.0f / fatherScale; // Compute InverseScale compensation: // with UnheritScale, formula per bone should be T*Sf-1*P*R*S*P-1. @@ -199,9 +197,7 @@ void CBone::compute(CBone *parent, const CMatrix &rootMatrix, CSkeletonModel *sk // So we must compute T*Sf-1*T-1, in order to get wanted result. invScaleComp.setScale(fatherScale); // Faster compute of the translation part: just "trans + fatherScale MUL -trans" where MUL is comp mul - trans.x-= fatherScale.x * trans.x; - trans.y-= fatherScale.y * trans.y; - trans.z-= fatherScale.z * trans.z; + trans -= mul(trans, fatherScale); invScaleComp.setPos(trans); diff --git a/code/nel/src/3d/cloud.cpp b/code/nel/src/3d/cloud.cpp index 280ba2f04..0da6da45a 100644 --- a/code/nel/src/3d/cloud.cpp +++ b/code/nel/src/3d/cloud.cpp @@ -487,10 +487,10 @@ void CCloud::dispXYZ (CMaterial *pMat) rVB.lock (vba); pVertices = vba.getVertexCoordPointer (0); - *pVertices = CVector(_Pos.x, _Pos.y, _Pos.z+_Size.z*(_NbW*_NbH-d)*oneOverNbWNbH); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize ); - *pVertices = CVector(_Pos.x+_Size.x, _Pos.y, _Pos.z+_Size.z*(_NbW*_NbH-d)*oneOverNbWNbH); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize ); - *pVertices = CVector(_Pos.x+_Size.x, _Pos.y+_Size.y, _Pos.z+_Size.z*(_NbW*_NbH-d)*oneOverNbWNbH); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize ); - *pVertices = CVector(_Pos.x, _Pos.y+_Size.y, _Pos.z+_Size.z*(_NbW*_NbH-d)*oneOverNbWNbH); + *pVertices = CVectorPacked(_Pos.x, _Pos.y, _Pos.z+_Size.z*(_NbW*_NbH-d)*oneOverNbWNbH); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize ); + *pVertices = CVectorPacked(_Pos.x+_Size.x, _Pos.y, _Pos.z+_Size.z*(_NbW*_NbH-d)*oneOverNbWNbH); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize ); + *pVertices = CVectorPacked(_Pos.x+_Size.x, _Pos.y+_Size.y, _Pos.z+_Size.z*(_NbW*_NbH-d)*oneOverNbWNbH); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize ); + *pVertices = CVectorPacked(_Pos.x, _Pos.y+_Size.y, _Pos.z+_Size.z*(_NbW*_NbH-d)*oneOverNbWNbH); pUV = vba.getTexCoordPointer (0, 0); pUV->U = i*oneOverNbW; pUV->V = j*oneOverNbH; pUV = (CUV*)( ((uint8*)pUV) + nVSize ); @@ -513,10 +513,10 @@ void CCloud::dispXYZ (CMaterial *pMat) CVertexBufferReadWrite vba; rVB.lock (vba); CVectorPacked *pVertices = vba.getVertexCoordPointer (0); - *pVertices = CVector((float)0.25f, 0, (float)0.0f); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize ); - *pVertices = CVector((float)0.75f, 0, (float)0.0f); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize ); - *pVertices = CVector((float)0.75f, 0, (float)0.75f); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize ); - *pVertices = CVector((float)0.25f, 0, (float)0.75f); + *pVertices = CVectorPacked((float)0.25f, 0, (float)0.0f); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize ); + *pVertices = CVectorPacked((float)0.75f, 0, (float)0.0f); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize ); + *pVertices = CVectorPacked((float)0.75f, 0, (float)0.75f); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize ); + *pVertices = CVectorPacked((float)0.25f, 0, (float)0.75f); } } @@ -640,7 +640,7 @@ void CCloud::genBill (CCamera *pCam, uint32 nBillSize) //CMatrix CamMat = pCam->getMatrix(); //CVector Viewer = CamMat.getPos(); CVector Viewer = CVector (0,0,0); - CVector Center = CVector (_Pos.x+_Size.x/2, _Pos.y+_Size.y/2, _Pos.z+_Size.z/2); + CVector Center = _Pos + (_Size / 2); //CVector (_Pos.x+_Size.x/2, _Pos.y+_Size.y/2, _Pos.z+_Size.z/2); CVector Size = _Size; CVector I, J, K; float Left, Right, Top, Bottom, Near, Far; @@ -650,7 +650,7 @@ void CCloud::genBill (CCamera *pCam, uint32 nBillSize) CMatrix mat; mat.identity(); mat.setRot(I,J,K, true); - mat.setPos(CVector(Viewer.x, Viewer.y, Viewer.z)); + mat.setPos(Viewer); mat.invert(); // Clear background for cloud creation @@ -708,7 +708,7 @@ void CCloud::dispBill (CCamera *pCam) // CMatrix CamMat = pCam->getMatrix(); // CVector Viewer = CamMat.getPos(); CVector Viewer = CVector (0,0,0); - CVector Center = CVector (_Pos.x+_Size.x/2, _Pos.y+_Size.y/2, _Pos.z+_Size.z/2); + CVector Center = _Pos + (_Size / 2); //CVector (_Pos.x+_Size.x/2, _Pos.y+_Size.y/2, _Pos.z+_Size.z/2); CVector Size = _Size; // Prepare vertices.