Merge with default

--HG-- branch : sse2
SSE2: Discard previous branch head
93 changed files with 1362 additions and 828 deletions
--- a/code/nel/include/nel/3d/driver.h
+++ b/code/nel/include/nel/3d/driver.h
@ -57,6 +57,7 @@ using NLMISC::CRefCount;
 using NLMISC::CSmartPtr;
 using NLMISC::CRGBA;
 using NLMISC::CVector;
+using NLMISC::CVectorPacked;
 using NLMISC::CMatrix;
 using NLMISC::CSynchronized;

--- a/code/nel/include/nel/3d/matrix_3x4.h
+++ b/code/nel/include/nel/3d/matrix_3x4.h
@ -38,69 +38,146 @@ namespace NL3D
 * \author Nevrax France
 * \date 2002
 */
-class	CMatrix3x4
+class NL_ALIGN_SSE2 CMatrix3x4
 {
 public:
+#ifdef NL_HAS_SSE2
+	union { struct { float a11, a21, a31; }; __m128 mm1; };
+	union { struct { float a12, a22, a32; }; __m128 mm2; };
+	union { struct { float a13, a23, a33; }; __m128 mm3; };
+	union { struct { float a14, a24, a34; }; __m128 mm4; };
+#else
 	// Order them in memory line first, for faster memory access.
 	float	a11, a12, a13, a14;
 	float	a21, a22, a23, a24;
 	float	a31, a32, a33, a34;
+#endif

 	// Copy from a matrix.
 	void	set(const CMatrix &mat)
 	{
-		const float	*m =mat.get();
+		const float	*m = mat.get();
+#ifdef NL_HAS_SSE2
+		mm1 = _mm_load_ps(&m[0]);
+		mm2 = _mm_load_ps(&m[4]);
+		mm3 = _mm_load_ps(&m[8]);
+		mm4 = _mm_load_ps(&m[12]);
+#else
 		a11= m[0]; a12= m[4]; a13= m[8] ; a14= m[12];
 		a21= m[1]; a22= m[5]; a23= m[9] ; a24= m[13];
 		a31= m[2]; a32= m[6]; a33= m[10]; a34= m[14];
+#endif
 	}


 	// mulSetvector. NB: in should be different as v!! (else don't work).
 	void	mulSetVector(const CVector &in, CVector &out)
 	{
+#ifdef NL_HAS_SSE2
+		__m128 xxx = _mm_shuffle_ps(in.mm, in.mm, _MM_SHUFFLE(0, 0, 0, 0));
+		__m128 yyy = _mm_shuffle_ps(in.mm, in.mm, _MM_SHUFFLE(1, 1, 1, 1));
+		__m128 zzz = _mm_shuffle_ps(in.mm, in.mm, _MM_SHUFFLE(2, 2, 2, 2));
+		out.mm = _mm_mul_ps(mm1, xxx);
+		out.mm = _mm_add_ps(out.mm, _mm_mul_ps(mm2, yyy));
+		out.mm = _mm_add_ps(out.mm, _mm_mul_ps(mm3, zzz));
+#else
 		out.x= (a11*in.x + a12*in.y + a13*in.z);
 		out.y= (a21*in.x + a22*in.y + a23*in.z);
 		out.z= (a31*in.x + a32*in.y + a33*in.z);
+#endif
 	}
 	// mulSetpoint. NB: in should be different as v!! (else don't work).
 	void	mulSetPoint(const CVector &in, CVector &out)
 	{
+#ifdef NL_HAS_SSE2
+		__m128 xxx = _mm_shuffle_ps(in.mm, in.mm, _MM_SHUFFLE(0, 0, 0, 0));
+		__m128 yyy = _mm_shuffle_ps(in.mm, in.mm, _MM_SHUFFLE(1, 1, 1, 1));
+		__m128 zzz = _mm_shuffle_ps(in.mm, in.mm, _MM_SHUFFLE(2, 2, 2, 2));
+		out.mm = _mm_mul_ps(mm1, xxx);
+		out.mm = _mm_add_ps(out.mm, _mm_mul_ps(mm2, yyy));
+		out.mm = _mm_add_ps(out.mm, _mm_mul_ps(mm3, zzz));
+		out.mm = _mm_add_ps(out.mm, mm4);
+#else
 		out.x= (a11*in.x + a12*in.y + a13*in.z + a14);
 		out.y= (a21*in.x + a22*in.y + a23*in.z + a24);
 		out.z= (a31*in.x + a32*in.y + a33*in.z + a34);
+#endif
 	}


 	// mulSetvector. NB: in should be different as v!! (else don't work).
 	void	mulSetVector(const CVector &in, float scale, CVector &out)
 	{
+#ifdef NL_HAS_SSE2
+		__m128 xxx = _mm_shuffle_ps(in.mm, in.mm, _MM_SHUFFLE(0, 0, 0, 0));
+		__m128 yyy = _mm_shuffle_ps(in.mm, in.mm, _MM_SHUFFLE(1, 1, 1, 1));
+		__m128 zzz = _mm_shuffle_ps(in.mm, in.mm, _MM_SHUFFLE(2, 2, 2, 2));
+		out.mm = _mm_mul_ps(mm1, xxx);
+		out.mm = _mm_add_ps(out.mm, _mm_mul_ps(mm2, yyy));
+		out.mm = _mm_add_ps(out.mm, _mm_mul_ps(mm3, zzz));
+		out.mm = _mm_mul_ps(out.mm, _mm_set1_ps(scale));
+#else
 		out.x= (a11*in.x + a12*in.y + a13*in.z) * scale;
 		out.y= (a21*in.x + a22*in.y + a23*in.z) * scale;
 		out.z= (a31*in.x + a32*in.y + a33*in.z) * scale;
+#endif
 	}
 	// mulSetpoint. NB: in should be different as v!! (else don't work).
 	void	mulSetPoint(const CVector &in, float scale, CVector &out)
 	{
+#ifdef NL_HAS_SSE2
+		__m128 xxx = _mm_shuffle_ps(in.mm, in.mm, _MM_SHUFFLE(0, 0, 0, 0));
+		__m128 yyy = _mm_shuffle_ps(in.mm, in.mm, _MM_SHUFFLE(1, 1, 1, 1));
+		__m128 zzz = _mm_shuffle_ps(in.mm, in.mm, _MM_SHUFFLE(2, 2, 2, 2));
+		out.mm = _mm_mul_ps(mm1, xxx);
+		out.mm = _mm_add_ps(out.mm, _mm_mul_ps(mm2, yyy));
+		out.mm = _mm_add_ps(out.mm, _mm_mul_ps(mm3, zzz));
+		out.mm = _mm_add_ps(out.mm, mm4);
+		out.mm = _mm_mul_ps(out.mm, _mm_set1_ps(scale));
+#else
 		out.x= (a11*in.x + a12*in.y + a13*in.z + a14) * scale;
 		out.y= (a21*in.x + a22*in.y + a23*in.z + a24) * scale;
 		out.z= (a31*in.x + a32*in.y + a33*in.z + a34) * scale;
+#endif
 	}


 	// mulAddvector. NB: in should be different as v!! (else don't work).
 	void	mulAddVector(const CVector &in, float scale, CVector &out)
 	{
+#ifdef NL_HAS_SSE2
+		__m128 xxx = _mm_shuffle_ps(in.mm, in.mm, _MM_SHUFFLE(0, 0, 0, 0));
+		__m128 yyy = _mm_shuffle_ps(in.mm, in.mm, _MM_SHUFFLE(1, 1, 1, 1));
+		__m128 zzz = _mm_shuffle_ps(in.mm, in.mm, _MM_SHUFFLE(2, 2, 2, 2));
+		__m128 temp = _mm_mul_ps(mm1, xxx);
+		temp = _mm_add_ps(temp, _mm_mul_ps(mm2, yyy));
+		temp = _mm_add_ps(temp, _mm_mul_ps(mm3, zzz));
+		temp = _mm_mul_ps(temp, _mm_set1_ps(scale));
+		out.mm = _mm_add_ps(out.mm, temp);
+#else
 		out.x+= (a11*in.x + a12*in.y + a13*in.z) * scale;
 		out.y+= (a21*in.x + a22*in.y + a23*in.z) * scale;
 		out.z+= (a31*in.x + a32*in.y + a33*in.z) * scale;
+#endif
 	}
 	// mulAddpoint. NB: in should be different as v!! (else don't work).
 	void	mulAddPoint(const CVector &in, float scale, CVector &out)
 	{
+#ifdef NL_HAS_SSE2
+		__m128 xxx = _mm_shuffle_ps(in.mm, in.mm, _MM_SHUFFLE(0, 0, 0, 0));
+		__m128 yyy = _mm_shuffle_ps(in.mm, in.mm, _MM_SHUFFLE(1, 1, 1, 1));
+		__m128 zzz = _mm_shuffle_ps(in.mm, in.mm, _MM_SHUFFLE(2, 2, 2, 2));
+		__m128 temp = _mm_mul_ps(mm1, xxx);
+		temp = _mm_add_ps(temp, _mm_mul_ps(mm2, yyy));
+		temp = _mm_add_ps(temp, _mm_mul_ps(mm3, zzz));
+		temp = _mm_add_ps(temp, mm4);
+		temp = _mm_mul_ps(temp, _mm_set1_ps(scale));
+		out.mm = _mm_add_ps(out.mm, temp);
+#else
 		out.x+= (a11*in.x + a12*in.y + a13*in.z + a14) * scale;
 		out.y+= (a21*in.x + a22*in.y + a23*in.z + a24) * scale;
 		out.z+= (a31*in.x + a32*in.y + a33*in.z + a34) * scale;
+#endif
 	}


--- a/code/nel/include/nel/3d/mesh.h
+++ b/code/nel/include/nel/3d/mesh.h
@ -41,6 +41,7 @@ namespace NL3D


 using	NLMISC::CVector;
+using	NLMISC::CVectorPacked;
 using	NLMISC::CPlane;
 using	NLMISC::CMatrix;

@ -842,8 +843,8 @@ private:

 	void	flagSkinVerticesForMatrixBlock(uint8 *skinFlags, CMatrixBlock &mb);
 	void	computeSkinMatrixes(CSkeletonModel *skeleton, CMatrix3x4 *matrixes, CMatrixBlock  *prevBlock, CMatrixBlock &curBlock);
-	void	computeSoftwarePointSkinning(CMatrix3x4 *matrixes, CVector *srcVector, CPaletteSkin *srcPal, float *srcWgt, CVector *dstVector);
-	void	computeSoftwareVectorSkinning(CMatrix3x4 *matrixes, CVector *srcVector, CPaletteSkin *srcPal, float *srcWgt, CVector *dstVector);
+	void	computeSoftwarePointSkinning(CMatrix3x4 *matrixes, CVector *srcVector, CPaletteSkin *srcPal, float *srcWgt, CVectorPacked *dstVector);
+	void	computeSoftwareVectorSkinning(CMatrix3x4 *matrixes, CVector *srcVector, CPaletteSkin *srcPal, float *srcWgt, CVectorPacked *dstVector);

 	// Shadow mapping and CMesh. NB: not serialized, but created at each load
 	CShadowSkin				_ShadowSkin;
--- a/code/nel/include/nel/3d/mesh_mrm_skinned.h
+++ b/code/nel/include/nel/3d/mesh_mrm_skinned.h
@ -43,6 +43,7 @@ namespace NL3D


 using	NLMISC::CVector;
+using	NLMISC::CVectorPacked;
 using	NLMISC::CPlane;
 using	NLMISC::CMatrix;
 class	CMRMBuilder;
@ -405,12 +406,24 @@ public:
 			uint8	Weights[NL3D_MESH_MRM_SKINNED_MAX_MATRIX];

 			// Decompact it
+			inline void getPos (CVectorPacked &dest, float factor) const
+			{
+				dest.x = (float)X * factor;
+				dest.y = (float)Y * factor;
+				dest.z = (float)Z * factor;
+			}
 			inline void getPos (CVector &dest, float factor) const
 			{
 				dest.x = (float)X * factor;
 				dest.y = (float)Y * factor;
 				dest.z = (float)Z * factor;
 			}
+			inline void getNormal (CVectorPacked &dest) const
+			{
+				dest.x = (float)Nx * (1.f/NL3D_MESH_MRM_SKINNED_NORMAL_FACTOR);
+				dest.y = (float)Ny * (1.f/NL3D_MESH_MRM_SKINNED_NORMAL_FACTOR);
+				dest.z = (float)Nz * (1.f/NL3D_MESH_MRM_SKINNED_NORMAL_FACTOR);
+			}
 			inline void getNormal (CVector &dest) const
 			{
 				dest.x = (float)Nx * (1.f/NL3D_MESH_MRM_SKINNED_NORMAL_FACTOR);
@ -480,6 +493,10 @@ public:
 		}

 		// Decompact position
+		inline void getPos (CVectorPacked &dest, const CPackedVertex &src) const
+		{
+			src.getPos (dest, _DecompactScale);
+		}
 		inline void getPos (CVector &dest, const CPackedVertex &src) const
 		{
 			src.getPos (dest, _DecompactScale);
--- a/code/nel/include/nel/3d/packed_zone.h
+++ b/code/nel/include/nel/3d/packed_zone.h
@ -164,6 +164,7 @@ private:
 	void    addInstance(const CShapeInfo &si, const NLMISC::CMatrix &matrix, TVertexGrid &vertexGrid, TTriListGrid &triListGrid);
 public:
 	// PRIVATE : unpack a packed tri
+	void	unpackTri(const CPackedTri &src, NLMISC::CVectorPacked dest[3]) const;
 	void	unpackTri(const CPackedTri &src, NLMISC::CVector dest[3]) const;
 };

@ -197,6 +198,7 @@ private:
 	NLMISC::CVector			_PackedLocalToWorld;
 public:
 	// PRIVATE : unpack a packed tri
+	void	unpackTri(const CPackedTri16 &src, NLMISC::CVectorPacked dest[3]) const;
 	void	unpackTri(const CPackedTri16 &src, NLMISC::CVector dest[3]) const;
 };

--- a/code/nel/include/nel/3d/particle_system.h
+++ b/code/nel/include/nel/3d/particle_system.h
@ -1244,7 +1244,7 @@ public:
 	static std::vector<uint>						   _ParticleToRemove;			// used during the update step, contains the indices of the particles to remove
 	static std::vector<sint>						   _ParticleRemoveListIndex; 	// for each particle, -1 if it hasn't been removed, or else give the insertion number in _ParticleToRemove
 	static std::vector<uint>						   _CollidingParticles; // index of particle that collided
-	static std::vector<NLMISC::CVector>				   _SpawnPos;			// spawn position of newly created particles
+	static std::vector<NLMISC::CVectorPacked>		   _SpawnPos;			// spawn position of newly created particles
 public:
 	// current sim steps infos
 	static TAnimationTime								EllapsedTime;
--- a/code/nel/include/nel/3d/ps_attrib.h
+++ b/code/nel/include/nel/3d/ps_attrib.h
@ -563,12 +563,12 @@ void CPSAttrib<T>::swap(CPSAttrib<T> &other)

 // here we give some definition for common types

-typedef CPSAttrib<NLMISC::CVector> TPSAttribVector;
-typedef CPSAttrib<NLMISC::CRGBA>   TPSAttribRGBA;
-typedef CPSAttrib<float>		   TPSAttribFloat;
-typedef CPSAttrib<uint32>		   TPSAttribUInt;
-typedef CPSAttrib<uint8>		   TPSAttribUInt8;
-typedef CPSAttrib<TAnimationTime>  TPSAttribTime;
+typedef CPSAttrib<NLMISC::CVectorPacked>	TPSAttribVector;
+typedef CPSAttrib<NLMISC::CRGBA>			TPSAttribRGBA;
+typedef CPSAttrib<float>					TPSAttribFloat;
+typedef CPSAttrib<uint32>					TPSAttribUInt;
+typedef CPSAttrib<uint8>					TPSAttribUInt8;
+typedef CPSAttrib<TAnimationTime>			TPSAttribTime;

 } // NL3D

--- a/code/nel/include/nel/3d/ps_attrib_maker_bin_op_inline.h
+++ b/code/nel/include/nel/3d/ps_attrib_maker_bin_op_inline.h
@ -33,17 +33,17 @@ namespace NL3D {
  */

 template <class T>
-inline T PSBinOpModulate(T arg1, T arg2) { return arg1 * arg2; }
+inline T PSBinOpModulate(const T &arg1, const T &arg2) { return arg1 * arg2; }
 template <class T>
-inline T PSBinOpAdd(T arg1, T arg2) { return arg1 + arg2; }
+inline T PSBinOpAdd(const T &arg1, const T &arg2) { return arg1 + arg2; }
 template <class T>
-inline T PSBinOpSubtract(T arg1, T arg2) { return arg1 - arg2; }
+inline T PSBinOpSubtract(const T &arg1, const T &arg2) { return arg1 - arg2; }

 template <>
-inline CPlaneBasis PSBinOpModulate(CPlaneBasis p1, CPlaneBasis p2)
+inline CPlaneBasis PSBinOpModulate(const CPlaneBasis &p1, const CPlaneBasis &p2)
 {
 	// we compute p1 * p2
-	NLMISC::CVector z = p1.X ^ p1.Y;
+	NLMISC::CVector z = CVector(p1.X) ^ CVector(p1.Y);
 	CPlaneBasis r;
 	r.X.x = p2.X.x * p1.X.x + p2.X.y * p1.Y.x + p2.X.z * z.x;
 	r.X.y = p2.X.x * p1.X.y + p2.X.y * p1.Y.y + p2.X.z * z.y;
@ -57,13 +57,13 @@ inline CPlaneBasis PSBinOpModulate(CPlaneBasis p1, CPlaneBasis p2)

 }
 template <>
-inline CPlaneBasis PSBinOpAdd(CPlaneBasis /* p1 */, CPlaneBasis /* p2 */)
+inline CPlaneBasis PSBinOpAdd(const CPlaneBasis &/* p1 */, const CPlaneBasis &/* p2 */)
 {
 	nlassert(0); // not allowed for now
 	return CPlaneBasis(NLMISC::CVector::Null);
 }
 template <>
-inline CPlaneBasis PSBinOpSubtract(CPlaneBasis /* p1 */, CPlaneBasis /* p2 */)
+inline CPlaneBasis PSBinOpSubtract(const CPlaneBasis &/* p1 */, const CPlaneBasis &/* p2 */)
 {
 	nlassert(0); // not allowed for now
 	return CPlaneBasis(NLMISC::CVector::Null);
@ -71,21 +71,21 @@ inline CPlaneBasis PSBinOpSubtract(CPlaneBasis /* p1 */, CPlaneBasis /* p2 */)


 template <>
-inline uint32 PSBinOpSubtract(uint32 lhs, uint32 rhs)
+inline uint32 PSBinOpSubtract(const uint32 &lhs, const uint32 &rhs)
 {
 	return rhs > lhs ? 0 : lhs - rhs; // avoid overflow
 }


 template <>
-inline NLMISC::CRGBA PSBinOpModulate(NLMISC::CRGBA t1, NLMISC::CRGBA t2)
+inline NLMISC::CRGBA PSBinOpModulate(const NLMISC::CRGBA &t1, const NLMISC::CRGBA &t2)
 {
 	NLMISC::CRGBA result;
 	result.modulateFromColor(t1, t2);
 	return result;
 }
 template <>
-inline NLMISC::CRGBA PSBinOpAdd(NLMISC::CRGBA t1, NLMISC::CRGBA t2)
+inline NLMISC::CRGBA PSBinOpAdd(const NLMISC::CRGBA &t1, const NLMISC::CRGBA &t2)
 {
 	NLMISC::CRGBA r;
 	uint S = t1.R + t2.R; if (S > 255) S = 255; r.R = (uint8) S;
@ -94,7 +94,7 @@ inline NLMISC::CRGBA PSBinOpAdd(NLMISC::CRGBA t1, NLMISC::CRGBA t2)
 	return r;
 }
 template <>
-inline NLMISC::CRGBA PSBinOpSubtract(NLMISC::CRGBA t1, NLMISC::CRGBA t2)
+inline NLMISC::CRGBA PSBinOpSubtract(const NLMISC::CRGBA &t1, const NLMISC::CRGBA &t2)
 {
 	NLMISC::CRGBA r;
 	sint S = t1.R - t2.R; if (S < 0) S = 0; r.R = (uint8) S;
--- a/code/nel/include/nel/3d/ps_attrib_maker_helper.h
+++ b/code/nel/include/nel/3d/ps_attrib_maker_helper.h
@ -1190,10 +1190,10 @@ T  CPSAttribMakerT<T, F>::get(CPSLocated *loc, uint32 index)
 			result=  getInternal(loc->getInvMass()[index]);
 		break;
 		case CPSInputType::attrSpeed:
-			result = getInternal(loc->getSpeed()[index].norm());
+			result = getInternal(NLMISC::CVector(loc->getSpeed()[index]).norm());
 		break;
 		case CPSInputType::attrPosition:
-			result = getInternal(loc->getPos()[index].norm());
+			result = getInternal(NLMISC::CVector(loc->getPos()[index]).norm());
 		break;
 		case CPSInputType::attrUniformRandom:
 		{
@ -1210,7 +1210,7 @@ T  CPSAttribMakerT<T, F>::get(CPSLocated *loc, uint32 index)
 			static NLMISC::CVector lodVect;
 			float lodOffset;
 			loc->getLODVect(lodVect, lodOffset, loc->getMatrixMode());
-			float r = fabsf(loc->getPos()[index] * lodVect + lodOffset);
+			float r = fabsf(NLMISC::CVector(loc->getPos()[index]) * lodVect + lodOffset);
 			r = this->_NbCycles * r > MaxInputValue ? MaxInputValue : r;
 			if (_Clamp)
 			{
@ -1224,7 +1224,7 @@ T  CPSAttribMakerT<T, F>::get(CPSLocated *loc, uint32 index)
 			static NLMISC::CVector lodVect;
 			float lodOffset;
 			loc->getLODVect(lodVect, lodOffset, loc->getMatrixMode());
-			float r = loc->getPos()[index] * lodVect + lodOffset;
+			float r = NLMISC::CVector(loc->getPos()[index]) * lodVect + lodOffset;
 			r = this->_NbCycles * (r > MaxInputValue ? MaxInputValue : r * r);

 			if (_Clamp)
@ -1240,7 +1240,7 @@ T  CPSAttribMakerT<T, F>::get(CPSLocated *loc, uint32 index)
 			float lodOffset;
 			loc->getLODVect(lodVect, lodOffset, loc->getMatrixMode());

-			float r = loc->getPos()[index] * lodVect + lodOffset;
+			float r = NLMISC::CVector(loc->getPos()[index]) * lodVect + lodOffset;
 			if (r < 0)
 			{
 				result = _F(MaxInputValue);
@ -1260,7 +1260,7 @@ T  CPSAttribMakerT<T, F>::get(CPSLocated *loc, uint32 index)
 			float lodOffset;
 			loc->getLODVect(lodVect, lodOffset, loc->getMatrixMode());

-			float r = loc->getPos()[index] * lodVect + lodOffset;
+			float r = NLMISC::CVector(loc->getPos()[index]) * lodVect + lodOffset;
 			if (r < 0)
 			{
 				result = _F(MaxInputValue);
@ -1432,7 +1432,7 @@ public:
 	  *
 	  */

-    virtual void setDefaultValue(T defaultValue) { _DefaultValue = defaultValue;}
+    virtual void setDefaultValue(const T &defaultValue) { _DefaultValue = defaultValue;}

 	/// get the default value :
 	virtual T getDefaultValue(void) const { return _DefaultValue; }
--- a/code/nel/include/nel/3d/ps_attrib_maker_iterators.h
+++ b/code/nel/include/nel/3d/ps_attrib_maker_iterators.h
@ -48,7 +48,7 @@ namespace NL3D
 	template <class TBaseIter>
 	struct CVectNormIterator : CPSBaseIterator<TBaseIter>
 	{
-		GET_INLINE float get() const { return this->Iter.get().norm(); }
+		GET_INLINE float get() const { return CVector(this->Iter.get()).norm(); }
 		CVectNormIterator(const TBaseIter &it) : CPSBaseIterator<TBaseIter>(it) {}
 	};

@ -76,7 +76,7 @@ namespace NL3D
 	template <class TBaseIter>
 	struct CDistIterator : CPSBaseIterator<TBaseIter>
 	{
-		NLMISC::CVector V;
+		NLMISC::CVectorPacked V;
 		float Offset;
 		CDistIterator(const TBaseIter &it) : CPSBaseIterator<TBaseIter>(it) {}
 	};
@ -89,7 +89,7 @@ namespace NL3D
 		GET_INLINE
 		float get() const
 		{
-			const float r = fabsf(this->Iter.get() * this->V + this->Offset);
+			const float r = fabsf(CVector(this->Iter.get()) * this->V + this->Offset);
 			return r > MaxInputValue ? MaxInputValue : r;
 		}
 		CFDot3AddIterator(const TBaseIter &it) : CDistIterator<TBaseIter>(it) {}
@ -101,7 +101,7 @@ namespace NL3D
 	{
 		float get() const
 		{
-			float r = this->Iter.get() * this->V + this->Offset;
+			float r = CVector(this->Iter.get()) * this->V + this->Offset;
 			r *= r;
 			return r > MaxInputValue ? MaxInputValue : r;
 		}
@ -115,7 +115,7 @@ namespace NL3D
 		GET_INLINE
 		float get() const
 		{
-			const float r = this->Iter.get() * this->V + this->Offset;
+			const float r = CVector(this->Iter.get()) * this->V + this->Offset;
 			if (r < 0.f) return MaxInputValue;
 			return r > MaxInputValue ? MaxInputValue : r;
 		}
@ -130,7 +130,7 @@ namespace NL3D
 		GET_INLINE
 		float get() const
 		{
-			float r = this->Iter.get() * this->V + this->Offset;
+			float r = CVector(this->Iter.get()) * this->V + this->Offset;
 			if (r < 0) return MaxInputValue;
 			r *= r;
 			return r > MaxInputValue ? MaxInputValue : r;
--- a/code/nel/include/nel/3d/ps_attrib_maker_template.h
+++ b/code/nel/include/nel/3d/ps_attrib_maker_template.h
@ -68,7 +68,7 @@ template <typename T> struct CPSValueBlendFuncBase
 {
 	virtual ~CPSValueBlendFuncBase() {}
 	virtual void getValues(T &startValue, T &endValue) const = 0;
-	virtual void setValues(T startValue, T endValue) = 0;
+	virtual void setValues(const T &startValue, const T &endValue) = 0;
 };


@ -122,7 +122,7 @@ public:
 		}

 		/// Set the Values between which to blend.
-		virtual void setValues(T startValue, T endValue)
+		virtual void setValues(const T &startValue, const T &endValue)
 		{
 			_StartValue = startValue;
 			_EndValue = endValue;
@ -210,7 +210,7 @@ public:

 	/// set the Values

-	virtual void setValues(T startValue, T endValue)
+	virtual void setValues(const T &startValue, const T &endValue)
 	{
 		float step = 1.f / n;
 		float alpha = 0.0f;
--- a/code/nel/include/nel/3d/ps_color.h
+++ b/code/nel/include/nel/3d/ps_color.h
@ -62,7 +62,7 @@ public:
 		endValue = convertVBColor(endValue, _ColorType);

 	}
-	virtual void setValues(NLMISC::CRGBA startValue, NLMISC::CRGBA endValue)
+	virtual void setValues(const NLMISC::CRGBA &startValue, const NLMISC::CRGBA &endValue)
 	{
 		CPSValueBlendFunc<NLMISC::CRGBA>::setValues(convertVBColor(startValue, _ColorType), convertVBColor(endValue, _ColorType));
 	}
@ -96,7 +96,7 @@ public:
 		endValue = convertVBColor(endValue, _ColorType);

 	}
-	virtual void setValues(NLMISC::CRGBA startValue, NLMISC::CRGBA endValue)
+	virtual void setValues(const NLMISC::CRGBA &startValue, const NLMISC::CRGBA &endValue)
 	{
 		CPSValueBlendSampleFunc<NLMISC::CRGBA, RGBA_BLENDER_NUM_VALUES>::setValues(convertVBColor(startValue, _ColorType), convertVBColor(endValue, _ColorType));
 	}
--- a/code/nel/include/nel/3d/ps_edit.h
+++ b/code/nel/include/nel/3d/ps_edit.h
@ -82,7 +82,7 @@ struct IPSMover
 	virtual NLMISC::CVector			getNormal(uint32 /* index */) { NL_PS_FUNC(getNormal); return NLMISC::CVector::Null ; }

 	/// if the object only stores a normal, this set the normal of the object. Otherwise it has no effect
-	virtual void					setNormal(uint32 /* index */, NLMISC::CVector /* n */) { NL_PS_FUNC(setNormal); }
+	virtual void					setNormal(uint32 /* index */, const NLMISC::CVector &/* n */) { NL_PS_FUNC(setNormal); }

 	// set a new orthogonal matrix for the object
 	virtual void					setMatrix(uint32 index, const NLMISC::CMatrix &m) = 0 ;
--- a/code/nel/include/nel/3d/ps_force.h
+++ b/code/nel/include/nel/3d/ps_force.h
@ -87,9 +87,9 @@ public:
 	  * 'accumulate' set to false.
 	  * NB : works only with integrable forces
 	  */
-	 virtual void integrate(float /* date */, CPSLocated * /* src */, uint32 /* startIndex */, uint32 /* numObjects */, NLMISC::CVector * /* destPos */ = NULL, NLMISC::CVector * /* destSpeed */ = NULL,
+	 virtual void integrate(float /* date */, CPSLocated * /* src */, uint32 /* startIndex */, uint32 /* numObjects */, NLMISC::CVectorPacked * /* destPos */ = NULL, NLMISC::CVectorPacked * /* destSpeed */ = NULL,
 							bool /* accumulate */ = false,
-							uint /* posStride */ = sizeof(NLMISC::CVector), uint /* speedStride */ = sizeof(NLMISC::CVector)
+							uint /* posStride */ = sizeof(NLMISC::CVectorPacked), uint /* speedStride */ = sizeof(NLMISC::CVectorPacked)
 							) const
 	 {
 		 nlassert(0); // not an integrable force
@ -102,9 +102,9 @@ public:
 	  */
 	virtual void integrateSingle(float /* startDate */, float /* deltaT */, uint /* numStep */,
 								 const CPSLocated * /* src */, uint32 /* indexInLocated */,
-								 NLMISC::CVector * /* destPos */,
+								 NLMISC::CVectorPacked * /* destPos */,
 								 bool /* accumulate */ = false,
-								 uint /* posStride */ = sizeof(NLMISC::CVector)) const
+								 uint /* posStride */ = sizeof(NLMISC::CVectorPacked)) const
 	{
 		 nlassert(0); // not an integrable force
 	}
@ -325,7 +325,10 @@ template <class T> void CIsotropicForceT<T>::computeForces(CPSLocated &target)

 		for (; speedIt != endSpeedIt; ++speedIt, ++posIt, ++invMassIt)
 		{
-			_F(*posIt, *speedIt, *invMassIt);
+			const CVector posv = *posIt;
+			CVector speedv = *speedIt;
+			_F(posv, speedv, *invMassIt);
+			*speedIt = speedv;
 		}
 	}
 }
@ -412,9 +415,9 @@ public:

 	virtual void integrateSingle(float startDate, float deltaT, uint numStep,
 								 const CPSLocated *src, uint32 indexInLocated,
-								 NLMISC::CVector *destPos,
+								 NLMISC::CVectorPacked *destPos,
 								 bool accumulate = false,
-								 uint posStride = sizeof(NLMISC::CVector)) const;
+								 uint posStride = sizeof(NLMISC::CVectorPacked)) const;

 protected:
 	/// inherited from CPSForceIntensityHelper
@ -583,9 +586,9 @@ public:

 	virtual void integrateSingle(float startDate, float deltaT, uint numStep,
 								 const CPSLocated *src, uint32 indexInLocated,
-								 NLMISC::CVector *destPos,
+								 NLMISC::CVectorPacked *destPos,
 								 bool accumulate = false,
-								 uint posStride = sizeof(NLMISC::CVector)) const;
+								 uint posStride = sizeof(NLMISC::CVectorPacked)) const;

 	/// perform initialisations
 	static void initPrecalc();
@ -741,7 +744,7 @@ public:
 	virtual NLMISC::CVector getScale(uint32 k) const { return NLMISC::CVector(_Radius[k], _Radius[k], _Radius[k]); }
 	virtual bool onlyStoreNormal(void) const { return true; }
 	virtual NLMISC::CVector getNormal(uint32 index) { return _Normal[index]; }
-	virtual void setNormal(uint32 index, NLMISC::CVector n) { _Normal[index] = n; }
+	virtual void setNormal(uint32 index, const NLMISC::CVector &n) { _Normal[index] = n; }

 	virtual void setMatrix(uint32 index, const NLMISC::CMatrix &m);
 	virtual NLMISC::CMatrix getMatrix(uint32 index) const;
@ -770,7 +773,7 @@ protected:
 	virtual CPSLocated *getForceIntensityOwner(void) { return _Owner; }

 	// the normal of the vortex
-	CPSAttrib<NLMISC::CVector> _Normal;
+	CPSAttrib<NLMISC::CVectorPacked> _Normal;
 	// radius of the vortex
 	TPSAttribFloat _Radius;

--- a/code/nel/include/nel/3d/ps_iterator.h
+++ b/code/nel/include/nel/3d/ps_iterator.h
@ -134,10 +134,10 @@ namespace NL3D
 	/// Some typedefs
 	typedef CAdvance1Iterator<TPSAttribFloat::const_iterator, float> TIteratorFloatStep1;
 	typedef CAdvance1Iterator<TPSAttribFloat::const_iterator, TAnimationTime> TIteratorFloatStep1;
-	typedef CAdvance1Iterator<TPSAttribVector::const_iterator, NLMISC::CVector> TIteratorVectStep1;
+	typedef CAdvance1Iterator<TPSAttribVector::const_iterator, NLMISC::CVectorPacked> TIteratorVectStep1;
 	typedef CAdvance1616Iterator<TPSAttribFloat::const_iterator, float> TIteratorFloatStep1616;
 	typedef CAdvance1616Iterator<TPSAttribFloat::const_iterator, TAnimationTime> TIteratorTimeStep1616;
-	typedef CAdvance1616Iterator<TPSAttribVector::const_iterator, NLMISC::CVector> TIteratorVectStep1616;
+	typedef CAdvance1616Iterator<TPSAttribVector::const_iterator, NLMISC::CVectorPacked> TIteratorVectStep1616;

 } // NL3D

--- a/code/nel/include/nel/3d/ps_located.h
+++ b/code/nel/include/nel/3d/ps_located.h
@ -220,6 +220,7 @@ public:
 	CScene *getScene(void);

 	/// shortcut to the same method of the owning particle system
+	void getLODVect(NLMISC::CVectorPacked &v, float &offset, TPSMatrixMode matrixMode);
 	void getLODVect(NLMISC::CVector &v, float &offset, TPSMatrixMode matrixMode);


@ -411,7 +412,7 @@ public:
 	void computeForces();

 	// compute collisions
-	void computeCollisions(uint firstInstanceIndex, const NLMISC::CVector *posBefore, const NLMISC::CVector *posAfter);
+	void computeCollisions(uint firstInstanceIndex, const NLMISC::CVectorPacked *posBefore, const NLMISC::CVectorPacked *posAfter);

 	// get a conversion matrix between 2 matrix modes
 	static const NLMISC::CMatrix &getConversionMatrix(const CParticleSystem &ps, TPSMatrixMode to, TPSMatrixMode from);
@ -508,11 +509,12 @@ public:
 	  */
 	void integrateSingle(float startDate, float deltaT, uint numStep,
 						 uint32 indexInLocated,
-						 NLMISC::CVector *destPos,
-						 uint posStride = sizeof(NLMISC::CVector)) const;
+						 NLMISC::CVectorPacked *destPos,
+						 uint posStride = sizeof(NLMISC::CVectorPacked)) const;

 	// compute position for a single element at the given date
 	// NB : only works with object that have parametric trajectories
+	inline void computeParametricPos(float date, uint indexInLocated, NLMISC::CVectorPacked &dest) const;
 	inline void computeParametricPos(float date, uint indexInLocated, NLMISC::CVector &dest) const;


@ -613,7 +615,7 @@ public:
 	struct CParametricInfo
 	{
 		CParametricInfo() {}
-		CParametricInfo(NLMISC::CVector pos, NLMISC::CVector speed, float date)
+		CParametricInfo(const NLMISC::CVector &pos, const NLMISC::CVector &speed, float date)
 			: Pos(pos), Speed(speed), Date(date)
 		{
 		}
@ -1052,6 +1054,12 @@ inline TAnimationTime	CPSLocated::getAgeInSeconds(uint elementIndex) const

 // *****************************************************************************************************
 inline void	CPSLocated::computeParametricPos(float date, uint indexInLocated, NLMISC::CVector &dest) const
+{
+	NLMISC::CVectorPacked temp;
+	integrateSingle(date, 1.f, 1, indexInLocated, &temp);
+	dest = temp;
+}
+inline void	CPSLocated::computeParametricPos(float date, uint indexInLocated, NLMISC::CVectorPacked &dest) const
 {
 	integrateSingle(date, 1.f, 1, indexInLocated, &dest);
 }
--- a/code/nel/include/nel/3d/ps_misc.h
+++ b/code/nel/include/nel/3d/ps_misc.h
@ -51,12 +51,12 @@ inline uint ScaleFloatGE(float f, float deltaT, float clampValue, uint numStep)
  * \param destPos		The destination, that will be filled with the given value
  * \param stride		Number of byte between each value to be copied
  */
-inline NLMISC::CVector *FillBufUsingSubdiv(const	NLMISC::CVector &value,
+inline NLMISC::CVectorPacked *FillBufUsingSubdiv(const	NLMISC::CVector &value,
 									  float					clampValue,
 									  float					&startValue,
 									  float					deltaT,
 									  uint					&maxNumStep,
-									  NLMISC::CVector		*destPos,
+									  NLMISC::CVectorPacked	*destPos,
 									  uint32				stride
 									  )
 {
@ -68,7 +68,7 @@ inline NLMISC::CVector *FillBufUsingSubdiv(const	NLMISC::CVector &value,
 	while (numToFill--)
 	{
 		*destPos = value;
-		destPos = (NLMISC::CVector *) ( (uint8 *) destPos + stride);
+		destPos = (NLMISC::CVectorPacked *) ( (uint8 *) destPos + stride);
 	}

 	return destPos;
--- a/code/nel/include/nel/3d/ps_plane_basis.h
+++ b/code/nel/include/nel/3d/ps_plane_basis.h
@ -37,8 +37,8 @@ namespace NL3D {

 struct CPlaneBasis
 {
-	NLMISC::CVector X ;
-	NLMISC::CVector Y ;
+	NLMISC::CVectorPacked X ;
+	NLMISC::CVectorPacked Y ;


 	// default ctor
@ -62,7 +62,7 @@ struct CPlaneBasis
 	/// compute the normal of the plane basis
 	NLMISC::CVector getNormal(void) const
 	{
-		return X ^ Y ;
+		return CVector(X) ^ CVector(Y) ;
 	}


--- a/code/nel/include/nel/3d/ps_ribbon.h
+++ b/code/nel/include/nel/3d/ps_ribbon.h
@ -247,7 +247,7 @@ private:
 	//@}

 	CSmartPtr<ITexture>				_Tex;
-	CPSVector<NLMISC::CVector>::V	_Shape;
+	CPSVector<NLMISC::CVectorPacked>::V	_Shape;
 	float							_UFactor, _VFactor;
 	TOrientation					_Orientation;

--- a/code/nel/include/nel/3d/ps_ribbon_base.h
+++ b/code/nel/include/nel/3d/ps_ribbon_base.h
@ -120,8 +120,8 @@ protected:
 	  * The dest tab must have at least nbSegs + 1 entries.
 	  */
 	void							computeRibbon( uint index,
-												   NLMISC::CVector *dest,
-												   uint stride = sizeof(NLMISC::CVector)
+												   NLMISC::CVectorPacked *dest,
+												   uint stride = sizeof(NLMISC::CVectorPacked)
 												  );

 	/// Called each time the time of the system change in order to update the ribbons positions
@ -168,26 +168,26 @@ private:

 	/// Compute the ribbon points using linear interpolation between each sampling point.
 	void					computeLinearRibbon( uint index,
-											     NLMISC::CVector *dest,
-										         uint stride = sizeof(NLMISC::CVector)
+											     NLMISC::CVectorPacked *dest,
+										         uint stride = sizeof(NLMISC::CVectorPacked)
 										       );
 	/// The same as compute linear ribbon but try to make its length constant
 	void					computeLinearCstSizeRibbon( uint index,
-											     NLMISC::CVector *dest,
-										         uint stride = sizeof(NLMISC::CVector)
+											     NLMISC::CVectorPacked *dest,
+										         uint stride = sizeof(NLMISC::CVectorPacked)
 										       );
 	/// Compute the ribbon points using hermitte splines between each sampling point.
 	void					computeHermitteRibbon( uint index,
-											     NLMISC::CVector *dest,
-										         uint stride = sizeof(NLMISC::CVector)
+											     NLMISC::CVectorPacked *dest,
+										         uint stride = sizeof(NLMISC::CVectorPacked)
 										       );

 	/** Compute the ribbon points using hermitte splines between each sampling point,
 	  * and make a rough approximation to get a constant length
 	  */
 	void					computeHermitteCstSizeRibbon( uint index,
-											     NLMISC::CVector *dest,
-										         uint stride = sizeof(NLMISC::CVector)
+											     NLMISC::CVectorPacked *dest,
+										         uint stride = sizeof(NLMISC::CVectorPacked)
 										       );
 	// called by the system when its date has been manually changed
 	virtual void			systemDateChanged();
--- a/code/nel/include/nel/3d/ps_util.h
+++ b/code/nel/include/nel/3d/ps_util.h
@ -28,6 +28,7 @@ namespace NLMISC
 {
 	class CMatrix;
 	class CVector;
+	class CVectorPacked;
 };

 namespace NL3D
--- a/code/nel/include/nel/3d/ps_zone.h
+++ b/code/nel/include/nel/3d/ps_zone.h
@ -106,7 +106,7 @@ public:
 	/** Compute collisions for the given target. This will update the collisions infos.
 	  * The caller must provide pointer to arrays positions before and after time step.
 	  */
-	virtual	void computeCollisions(CPSLocated &target, uint firstInstanceIndex, const NLMISC::CVector *posBefore, const NLMISC::CVector *posAfter) = 0;
+	virtual	void computeCollisions(CPSLocated &target, uint firstInstanceIndex, const NLMISC::CVectorPacked *posBefore, const NLMISC::CVectorPacked *posAfter) = 0;

 protected:

@ -141,7 +141,7 @@ protected:
 class CPSZonePlane : public CPSZone, public IPSMover
 {
 	public:
-		virtual	void computeCollisions(CPSLocated &target, uint firstInstanceIndex, const NLMISC::CVector *posBefore, const NLMISC::CVector *posAfter);
+		virtual	void computeCollisions(CPSLocated &target, uint firstInstanceIndex, const NLMISC::CVectorPacked *posBefore, const NLMISC::CVectorPacked *posAfter);
 		virtual void show();


@ -153,7 +153,7 @@ class CPSZonePlane : public CPSZone, public IPSMover
 		virtual NLMISC::CMatrix getMatrix(uint32 index) const;
 		virtual bool onlyStoreNormal(void) const { return true; }
 		virtual NLMISC::CVector getNormal(uint32 index);
-		virtual void setNormal(uint32 index, NLMISC::CVector n);
+		virtual void setNormal(uint32 index, const NLMISC::CVector &n);

 		virtual void serial(NLMISC::IStream &f) throw(NLMISC::EStream);

@ -192,7 +192,7 @@ typedef CPSAttrib<CRadiusPair> TPSAttribRadiusPair;
 class CPSZoneSphere : public CPSZone, public IPSMover
 {
 	public:
-		virtual	void computeCollisions(CPSLocated &target, uint firstInstanceIndex, const NLMISC::CVector *posBefore, const NLMISC::CVector *posAfter);
+		virtual	void computeCollisions(CPSLocated &target, uint firstInstanceIndex, const NLMISC::CVectorPacked *posBefore, const NLMISC::CVectorPacked *posAfter);
 		virtual void show();


@ -236,7 +236,7 @@ class CPSZoneSphere : public CPSZone, public IPSMover
 class CPSZoneDisc : public CPSZone, public IPSMover
 {
 	public:
-		virtual	void computeCollisions(CPSLocated &target, uint firstInstanceIndex, const NLMISC::CVector *posBefore, const NLMISC::CVector *posAfter);
+		virtual	void computeCollisions(CPSLocated &target, uint firstInstanceIndex, const NLMISC::CVectorPacked *posBefore, const NLMISC::CVectorPacked *posAfter);
 		virtual void show();

 		CPSZoneDisc()
@ -255,7 +255,7 @@ class CPSZoneDisc : public CPSZone, public IPSMover
 		virtual NLMISC::CVector getScale(uint32 k) const;
 		virtual bool onlyStoreNormal(void) const { return true; }
 		virtual NLMISC::CVector getNormal(uint32 index);
-		virtual void setNormal(uint32 index, NLMISC::CVector n);
+		virtual void setNormal(uint32 index, const NLMISC::CVector &n);

 		virtual void serial(NLMISC::IStream &f) throw(NLMISC::EStream);

@ -283,7 +283,7 @@ class CPSZoneDisc : public CPSZone, public IPSMover
 class CPSZoneCylinder : public CPSZone, public IPSMover
 {
 	public:
-		virtual	void computeCollisions(CPSLocated &target, uint firstInstanceIndex, const NLMISC::CVector *posBefore, const NLMISC::CVector *posAfter);
+		virtual	void computeCollisions(CPSLocated &target, uint firstInstanceIndex, const NLMISC::CVectorPacked *posBefore, const NLMISC::CVectorPacked *posAfter);
 		virtual void show();

 		CPSZoneCylinder()
@ -335,7 +335,7 @@ class CPSZoneCylinder : public CPSZone, public IPSMover
 class CPSZoneRectangle : public CPSZone, public IPSMover
 {
 	public:
-		virtual	void computeCollisions(CPSLocated &target, uint firstInstanceIndex, const NLMISC::CVector *posBefore, const NLMISC::CVector *posAfter);
+		virtual	void computeCollisions(CPSLocated &target, uint firstInstanceIndex, const NLMISC::CVectorPacked *posBefore, const NLMISC::CVectorPacked *posAfter);
 		virtual void show();

 		CPSZoneRectangle()
--- a/code/nel/include/nel/3d/quad_grid.h
+++ b/code/nel/include/nel/3d/quad_grid.h
@ -314,11 +314,11 @@ private:// Methods.
 	}

 	// return the coordinates on the grid of what include the bbox.
-	void		selectQuads(CVector bmin, CVector bmax, sint &x0, sint &x1, sint &y0, sint &y1)
+	void		selectQuads(const CVector &bminp, const CVector &bmaxp, sint &x0, sint &x1, sint &y0, sint &y1)
 	{
-		CVector		bminp, bmaxp;
-		bminp= bmin;
-		bmaxp= bmax;
+		CVector		bmin, bmax;
+		bmin= bminp;
+		bmax= bmaxp;
 		bmin.minof(bminp, bmaxp);
 		bmax.maxof(bminp, bmaxp);
 		bmin/= _EltSize;
--- a/code/nel/include/nel/3d/raw_skin.h
+++ b/code/nel/include/nel/3d/raw_skin.h
@ -30,15 +30,21 @@ namespace NL3D


 using	NLMISC::CVector;
+using	NLMISC::CVectorPacked;
 using	NLMISC::CUV;

 /// A simple Vertex Pos/Normal/Uv
 class	CRawSkinVertex
 {
 public:
-	CVector		Pos;
-	CVector		Normal;
-	CUV			UV;
+#if NL_HAS_SSE2
+	CVectorPacked	Pos;
+	CVectorPacked	Normal;
+#else
+	CVector			Pos;
+	CVector			Normal;
+#endif
+	CUV				UV;
 };

 /// Vertices influenced by 1 matrix only.
--- a/code/nel/include/nel/3d/shadow_poly_receiver.h
+++ b/code/nel/include/nel/3d/shadow_poly_receiver.h
@ -24,6 +24,7 @@
 #include "nel/3d/vertex_buffer.h"
 #include "nel/3d/index_buffer.h"

+using NLMISC::CVectorPacked;

 namespace NL3D {

@ -85,10 +86,26 @@ public:
 	// a vertex
 	struct CRGBAVertex
 	{
+#if NL_HAS_SSE2
+		CVectorPacked V;
+#else
 		CVector V;
+#endif
 		CRGBA Color;
 		CRGBAVertex() {}
 		CRGBAVertex(const CVector &v, CRGBA c) : V(v), Color(c) {}
+#if NL_HAS_SSE2
+		const CVector &asVector() const
+		{
+			nlctassert(sizeof(CVector) == sizeof(CRGBAVertex));
+			*reinterpret_cast<const CVector *>(this);
+		}
+#else
+		const CVector &asVector() const
+		{
+			return V;
+		}
+#endif
 	};

 	/** Compute list of clipped tri under the shadow mat
--- a/code/nel/include/nel/3d/shadow_skin.h
+++ b/code/nel/include/nel/3d/shadow_skin.h
@ -74,6 +74,7 @@ public:
 public:

 	// skinning
+	void		applySkin(NLMISC::CVectorPacked *dst, std::vector<CMatrix3x4> &boneMat3x4);
 	void		applySkin(NLMISC::CVector *dst, std::vector<CMatrix3x4> &boneMat3x4);

 	/** return ray intersection.
--- a/code/nel/include/nel/3d/static_quad_grid.h
+++ b/code/nel/include/nel/3d/static_quad_grid.h
@ -102,8 +102,10 @@ private:// Atttributes.


 	// return the coordinates on the grid of what include the bbox.
-	void		selectPoint(CVector point, sint &x0, sint &y0)
+	void		selectPoint(const CVector &pointp, sint &x0, sint &y0)
 	{
+		CVector point = pointp;
+
 		point/= _EltSize;
 		x0= (sint)(floor(point.x));
 		y0= (sint)(floor(point.y));
--- a/code/nel/include/nel/3d/u_driver.h
+++ b/code/nel/include/nel/3d/u_driver.h
@ -47,6 +47,7 @@ namespace NL3D


 using NLMISC::CVector;
+using NLMISC::CVectorPacked;
 using NLMISC::CMatrix;
 using NLMISC::CRGBA;
 using NLMISC::CBitmap;
--- a/code/nel/include/nel/3d/vertex_buffer.h
+++ b/code/nel/include/nel/3d/vertex_buffer.h
@ -790,8 +790,8 @@ public:
 	 *	A call to IDriver::activeVertexBuffer() will change this format to the format returned by IDriver::getVertexColorFormat().
 	 *	So, before each write of vertex color in the vertex buffer, the vertex color format must be checked with CVertexBuffer::getVertexColorFormat().
 	 */
-	NLMISC::CVector*		getVertexCoordPointer(uint idx=0);
-	NLMISC::CVector*		getNormalCoordPointer(uint idx=0);
+	NLMISC::CVectorPacked*		getVertexCoordPointer(uint idx=0);
+	NLMISC::CVectorPacked*		getNormalCoordPointer(uint idx=0);
 	NLMISC::CUV*			getTexCoordPointer(uint idx=0, uint8 stage=0);
 	void*					getColorPointer(uint idx=0);
 	void*					getSpecularPointer(uint idx=0);
@ -854,8 +854,8 @@ public:
 	 *	A call to IDriver::activeVertexBuffer() will change this format to the format returned by IDriver::getVertexColorFormat().
 	 *	So, before each write of vertex color in the vertex buffer, the vertex color format must be checked with CVertexBuffer::getVertexColorFormat().
 	 */
-	const NLMISC::CVector*	getVertexCoordPointer(uint idx=0) const;
-	const NLMISC::CVector*	getNormalCoordPointer(uint idx=0) const;
+	const NLMISC::CVectorPacked*	getVertexCoordPointer(uint idx=0) const;
+	const NLMISC::CVectorPacked*	getNormalCoordPointer(uint idx=0) const;
 	const NLMISC::CUV*		getTexCoordPointer(uint idx=0, uint8 stage=0) const;
 	const void*				getColorPointer(uint idx=0) const;
 	const void*				getSpecularPointer(uint idx=0) const;
--- a/code/nel/include/nel/3d/zone_lighter.h
+++ b/code/nel/include/nel/3d/zone_lighter.h
@ -421,7 +421,7 @@ private:
 	  * The vector of water shapes is released then
 	  * \param bbox the bbox of the zone containing the water shapes
 	  */
-	void makeQuadGridFromWaterShapes(NLMISC::CAABBox zoneBBox);
+	void makeQuadGridFromWaterShapes(const NLMISC::CAABBox &zoneBBox);


 	/** For each tile of the current zone, check whether it below or above water.
--- a/code/nel/include/nel/ligo/primitive.h
+++ b/code/nel/include/nel/ligo/primitive.h
@ -523,7 +523,7 @@ public:

 	std::vector<CPrimVector>	VPoints;

-	static float getSegmentDist(const NLMISC::CVector v, const NLMISC::CVector &p1, const NLMISC::CVector &p2, NLMISC::CVector &nearPos);
+	static float getSegmentDist(const NLMISC::CVector &v, const NLMISC::CVector &p1, const NLMISC::CVector &p2, NLMISC::CVector &nearPos);

 public:

--- a/code/nel/include/nel/misc/vector.h
+++ b/code/nel/include/nel/misc/vector.h
@ -28,6 +28,7 @@ namespace	NLMISC
 {

 class IStream;
+class CVectorPacked;

 // ======================================================================================================
 /**
@ -36,10 +37,14 @@ class IStream;
 * \author Nevrax France
 * \date 2000
 */
-class CVector
+class NL_ALIGN_SSE2 CVector
 {
 public:		// Attributes.
+#ifdef NL_HAS_SSE2
+	union { struct { float x, y, z; }; __m128 mm; };
+#else
 	float	x,y,z;
+#endif

 public:		// const.
 	/// Null vector (0,0,0).
@ -55,11 +60,15 @@ public:		// Methods.
 	/// @name Object.
 	//@{
 	/// Constructor which does nothing.
-	CVector() {}
+	CVector() { /* if (((uintptr_t)(void *)(this) & 0xF) != 0) nlerror("Vector alignment error"); */ }
 	/// Constructor .
-	CVector(float	_x, float _y, float _z) : x(_x), y(_y), z(_z) {}
+	CVector(float	_x, float _y, float _z) : x(_x), y(_y), z(_z) { /* if (((uintptr_t)(void *)(this) & 0xF) != 0) nlerror("Vector alignment error"); */ }
 	/// Copy Constructor.
-	CVector(const CVector &v) : x(v.x), y(v.y), z(v.z) {}
+#ifdef NL_HAS_SSE2
+	CVector(const CVector &v) : mm(v.mm) { /* if (((uintptr_t)(void *)(this) & 0xF) != 0) nlerror("Vector alignment error"); */ }
+#else
+	CVector(const CVector &v) : x(v.x), y(v.y), z(v.z) { /* if (((uintptr_t)(void *)(this) & 0xF) != 0) nlerror("Vector alignment error"); */ }
+#endif
 	//@}

 	/// @name Base Maths.
@ -134,6 +143,99 @@ public:		// Methods.

 	// friends.
 	friend	CVector	operator*(float f, const CVector &v0);
+
+	operator const CVectorPacked &() const
+	{
+		return reinterpret_cast<const CVectorPacked &>(*this);
+	}
+};
+
+class CVectorPacked
+{
+public: // Attributes.
+	float	x,y,z;
+
+public:
+	/// @name Object.
+	//@{
+	/// Constructor which does nothing.
+	CVectorPacked() { }
+	/// Constructor .
+	CVectorPacked(float _x, float _y, float _z) : x(_x), y(_y), z(_z) {}
+	/// Copy Constructor.
+	CVectorPacked(const CVectorPacked &v) : x(v.x), y(v.y), z(v.z) {}
+	//@}
+
+	void set(float _x, float _y, float _z)
+	{
+		x = _x;
+		y = _y;
+		z = _z;
+	}
+
+	CVectorPacked &operator += (const CVectorPacked &v)
+	{
+		x += v.x;
+		y += v.y;
+		z += v.z;
+		return *this;
+	}
+
+	CVectorPacked &operator -= (const CVectorPacked &v)
+	{
+		x -= v.x;
+		y -= v.y;
+		z -= v.z;
+		return *this;
+	}
+
+	operator CVector () const
+	{
+		return CVector(x, y, z);
+	}
+
+	CVector	operator+(const CVector &v) const
+	{
+		return CVector(*this) + v;
+	}
+
+	CVector	operator-(const CVector &v) const
+	{
+		return CVector(*this) - v;
+	}
+
+	bool operator==(const CVectorPacked &v) const
+	{
+		return x==v.x && y==v.y && z==v.z;
+	}
+
+	bool operator!=(const CVectorPacked &v) const
+	{
+		return !(*this==v);
+	}
+
+	bool operator<(const CVectorPacked &v) const
+	{
+		if(x!=v.x)
+			return x<v.x;
+		if(y!=v.y)
+			return y<v.y;
+		return z<v.z;
+	}
+	
+	CVector	operator^(const CVector &v) const
+	{
+		CVector	ret;
+		ret.x= y*v.z - z*v.y;
+		ret.y= z*v.x - x*v.z;
+		ret.z= x*v.y - y*v.x;
+		return ret;
+	}
+
+	void serial(IStream &f)
+	{
+		f.serial(x, y, z);
+	}
 };

 // blend (faster version than the generic version found in algo.h)
@ -145,13 +247,25 @@ inline CVector blend(const CVector &v0, const CVector &v1, float lambda)
 				   invLambda * v0.z + lambda * v1.z);
 }

-
 }

+namespace std {
+	inline void swap(NLMISC::CVectorPacked &v1, NLMISC::CVector &v2)
+	{
+		NLMISC::CVectorPacked temp = v2;
+		v2 = NLMISC::CVector(v1);
+		v1 = temp;
+	}
+	inline void swap(NLMISC::CVector &v1,  NLMISC::CVectorPacked &v2)
+	{
+		NLMISC::CVectorPacked temp = v1;
+		v1 = NLMISC::CVector(v2);
+		v2 = temp;
+	}
+}

 #include "vector_inline.h"

-
 #endif // NL_VECTOR_H

 /* End of vector.h */
--- a/code/nel/include/nel/misc/vector_inline.h
+++ b/code/nel/include/nel/misc/vector_inline.h
@ -31,95 +31,196 @@ namespace	NLMISC
 // Base Maths.
 inline	CVector	&CVector::operator+=(const CVector &v)
 {
+#ifdef NL_HAS_SSE2
+	mm = _mm_add_ps(mm, v.mm);
+#else
 	x+=v.x;
 	y+=v.y;
 	z+=v.z;
+#endif
 	return *this;
 }
 inline	CVector	&CVector::operator-=(const CVector &v)
 {
+#ifdef NL_HAS_SSE2
+	mm = _mm_sub_ps(mm, v.mm);
+#else
 	x-=v.x;
 	y-=v.y;
 	z-=v.z;
+#endif
 	return *this;
 }
 inline	CVector	&CVector::operator*=(float f)
 {
+#ifdef NL_HAS_SSE2
+	mm = _mm_mul_ps(mm, _mm_set1_ps(f));
+#else
 	x*=f;
 	y*=f;
 	z*=f;
+#endif
 	return *this;
 }
 inline	CVector	&CVector::operator/=(float f)
 {
+#ifdef NL_HAS_SSE2
+	mm = _mm_mul_ps(mm, _mm_set1_ps(1.0f / f));
+	return *this;
+#else
 	return *this*= (1.0f/f);
+#endif
 }
 inline	CVector	CVector::operator+(const CVector &v) const
 {
+#ifdef NL_HAS_SSE2
+	CVector ret;
+	ret.mm = _mm_add_ps(mm, v.mm);
+	return ret;
+#else
 	CVector	ret(x+v.x, y+v.y, z+v.z);
 	return ret;
+#endif
 }
 inline	CVector	CVector::operator-(const CVector &v) const
 {
+#ifdef NL_HAS_SSE2
+	CVector ret;
+	ret.mm = _mm_sub_ps(mm, v.mm);
+	return ret;
+#else
 	CVector	ret(x-v.x, y-v.y, z-v.z);
 	return ret;
+#endif
 }
 inline	CVector	CVector::operator*(float f) const
 {
+#ifdef NL_HAS_SSE2
+	CVector ret;
+	ret.mm = _mm_mul_ps(mm, _mm_set1_ps(f));
+	return ret;
+#else
 	CVector	ret(x*f, y*f, z*f);
 	return ret;
+#endif
 }
 inline	CVector	CVector::operator/(float f) const
 {
+#ifdef NL_HAS_SSE2
+	CVector ret;
+	ret.mm = _mm_mul_ps(mm, _mm_set1_ps(1.0f / f));
+	return ret;
+#else
 	return *this*(1.0f/f);
+#endif
 }
 inline	CVector	CVector::operator-() const
 {
+#ifdef NL_HAS_SSE2
+	CVector ret;
+	ret.mm = _mm_mul_ps(mm, _mm_set1_ps(-1.0f));
+	return ret;
+#else
 	return CVector(-x,-y,-z);
+#endif
 }
 inline CVector	operator*(float f, const CVector &v)
 {
+#ifdef NL_HAS_SSE2
+	CVector ret;
+	ret.mm = _mm_mul_ps(_mm_set1_ps(f), v.mm);
+	return ret;
+#else
 	CVector	ret(v.x*f, v.y*f, v.z*f);
 	return ret;
+#endif
 }

+#ifdef NL_HAS_SSE2
+inline __m128 dotsplat(const __m128 &l, const __m128 &r)
+{
+	// TODO: _mm_hadd_ps SSE3
+	__m128 mult = _mm_mul_ps(l, r);
+	__m128 vx = _mm_shuffle_ps(mult, mult, _MM_SHUFFLE(0, 0, 0, 0));
+	__m128 vy = _mm_shuffle_ps(mult, mult, _MM_SHUFFLE(1, 1, 1, 1));
+	__m128 vz = _mm_shuffle_ps(mult, mult, _MM_SHUFFLE(2, 2, 2, 2));
+	__m128 result = _mm_add_ps(_mm_add_ps(vx, vy), vz);
+	return result;
+}
+#endif

 // ============================================================================================
 // Advanced Maths.
 inline	float	CVector::operator*(const CVector &v) const
 {
+#ifdef NL_HAS_SSE2
+	return _mm_cvtss_f32(dotsplat(mm, v.mm));
+#else
 	return x*v.x + y*v.y + z*v.z;
+#endif
 }
 inline	CVector	CVector::operator^(const CVector &v) const
 {
+#ifdef NL_HAS_SSE2
+	CVector ret;
+	__m128 l = _mm_shuffle_ps(mm, mm, _MM_SHUFFLE(3, 0, 2, 1));
+	__m128 r = _mm_shuffle_ps(v.mm, v.mm, _MM_SHUFFLE(3, 1, 0, 2));
+	__m128 mul1 = _mm_mul_ps(l, r);
+	l = _mm_shuffle_ps(mm, mm, _MM_SHUFFLE(3, 1, 0, 2));
+	r = _mm_shuffle_ps(v.mm, v.mm, _MM_SHUFFLE(3, 0, 2, 1));
+	__m128 mul2 = _mm_mul_ps(l, r);
+	ret.mm = _mm_sub_ps(mul1, mul2);
+	return ret;
+#else
 	CVector	ret;
-
 	ret.x= y*v.z - z*v.y;
 	ret.y= z*v.x - x*v.z;
 	ret.z= x*v.y - y*v.x;
-
 	return ret;
+#endif
 }
 inline	float	CVector::sqrnorm() const
 {
+#ifdef NL_HAS_SSE2
+	return _mm_cvtss_f32(dotsplat(mm, mm));
+#else
 	return (float)(x*x + y*y + z*z);
+#endif
 }
 inline	float	CVector::norm() const
 {
+#ifdef NL_HAS_SSE2
+	return sqrt(_mm_cvtss_f32(dotsplat(mm, mm)));
+#else
 	return (float)sqrt(x*x + y*y + z*z);
+#endif
 }
 inline	void	CVector::normalize()
 {
+#ifdef NL_HAS_SSE2
+	__m128 normsplat = _mm_sqrt_ps(dotsplat(mm, mm));
+	if (_mm_cvtss_f32(normsplat))
+		mm = _mm_div_ps(mm, normsplat);
+#else
 	float	n=norm();
 	if(n)
 		*this/=n;
+#endif
 }
 inline	CVector	CVector::normed() const
 {
+#ifdef NL_HAS_SSE2
+	CVector res;
+	__m128 normsplat = _mm_sqrt_ps(dotsplat(mm, mm));
+	if (_mm_cvtss_f32(normsplat))
+		res.mm = _mm_div_ps(mm, normsplat);
+	return res;
+#else
 	CVector	ret;
 	ret= *this;
 	ret.normalize();
 	return ret;
+#endif
 }


@ -127,19 +228,35 @@ inline	CVector	CVector::normed() const
 // Misc.
 inline	void	CVector::set(float _x, float _y, float _z)
 {
+#ifdef NL_HAS_SSE2
+	mm = _mm_setr_ps(_x, _y, _z, 0.0f);
+#else
 	x=_x; y=_y; z=_z;
+#endif
 }
 inline	bool	CVector::operator==(const CVector &v) const
 {
+#ifdef NL_HAS_SSE2
+	return (_mm_movemask_ps(_mm_cmpeq_ps(mm, v.mm)) & 0x07) == 0x07;
+#else
 	return x==v.x && y==v.y && z==v.z;
+#endif
 }
 inline	bool	CVector::operator!=(const CVector &v) const
 {
+#ifdef NL_HAS_SSE2
+	return (_mm_movemask_ps(_mm_cmpneq_ps(mm, v.mm)) & 0x07) != 0;
+#else
 	return !(*this==v);
+#endif
 }
 inline	bool	CVector::isNull() const
 {
+#ifdef NL_HAS_SSE2
+	return (_mm_movemask_ps(_mm_cmpeq_ps(mm, _mm_setzero_ps())) & 0x07) == 0x07;
+#else
 	return *this==CVector::Null;
+#endif
 }
 inline	bool	CVector::operator<(const CVector &v) const
 {
@ -177,15 +294,23 @@ inline	void	CVector::sphericToCartesian(float r, float theta,float phi)
 }
 inline	void	CVector::minof(const CVector &a, const CVector &b)
 {
+#ifdef NL_HAS_SSE2
+	mm = _mm_min_ps(a.mm, b.mm);
+#else
 	x= std::min(a.x, b.x);
 	y= std::min(a.y, b.y);
 	z= std::min(a.z, b.z);
+#endif
 }
 inline	void	CVector::maxof(const CVector &a, const CVector &b)
 {
+#ifdef NL_HAS_SSE2
+	mm = _mm_max_ps(a.mm, b.mm);
+#else
 	x= std::max(a.x, b.x);
 	y= std::max(a.y, b.y);
 	z= std::max(a.z, b.z);
+#endif
 }
 inline	void	CVector::serial(IStream &f)
 {
--- a/code/nel/include/nel/pacs/chain_quad.h
+++ b/code/nel/include/nel/pacs/chain_quad.h
@ -81,7 +81,7 @@ public:
 	 * \param cst the array of CEdgeChainEntry to fill. contain also OChainLUT, an array for internal use. In: must be filled with 0xFFFF. Out: still filled with 0xFFFF.
 	 * \return number of edgechain found. stored in cst.EdgeChainEntries (array cleared first).
 	 */
-	sint			selectEdges(CVector start, CVector end, CCollisionSurfaceTemp &cst) const;
+	sint			selectEdges(const CVector &start, const CVector &end, CCollisionSurfaceTemp &cst) const;


 	/// serial.
--- a/code/nel/include/nel/pacs/edge_quad.h
+++ b/code/nel/include/nel/pacs/edge_quad.h
@ -92,7 +92,7 @@ public:
 	 * \param cst the array of CExteriorEdgeEntry to fill. contain also OChainLUT, an array for internal use. In: must be filled with 0xFFFF. Out: still filled with 0xFFFF.
 	 * \return number of exterioredge found. stored in cst.ExteriorEdgeEntries (array cleared first).
 	 */
-	sint			selectEdges(CVector start, CVector end, CCollisionSurfaceTemp &cst) const;
+	sint			selectEdges(const CVector &start, const CVector &end, CCollisionSurfaceTemp &cst) const;


 	/// Get the whole set of edge entries
--- a/code/nel/include/nel/pacs/local_retriever.h
+++ b/code/nel/include/nel/pacs/local_retriever.h
@ -548,12 +548,12 @@ public:
 	/**
 	 * Check all surfaces integrity
 	 */
-	bool								checkSurfacesIntegrity(NLMISC::CVector translation = NLMISC::CVector::Null, bool verbose = false) const;
+	bool								checkSurfacesIntegrity(const NLMISC::CVector &translation = NLMISC::CVector::Null, bool verbose = false) const;

 	/**
 	 * Check surface integrity
 	 */
-	bool								checkSurfaceIntegrity(uint surf, NLMISC::CVector translation = NLMISC::CVector::Null, bool verbose = false) const;
+	bool								checkSurfaceIntegrity(uint surf, const NLMISC::CVector &translation = NLMISC::CVector::Null, bool verbose = false) const;

 	// @}

@ -565,7 +565,7 @@ protected:
 	bool								insurePosition(ULocalPosition &local) const;

 	/// Retrieves a position inside the retriever (from the local position), returns true if the position is close to a border
-	void								retrievePosition(NLMISC::CVector estimated, CCollisionSurfaceTemp &cst) const;
+	void								retrievePosition(const NLMISC::CVector &estimated, CCollisionSurfaceTemp &cst) const;

 	/// Retrieves a position inside the retriever (from the local position), returns true if the position is close to a border
 	void								retrieveAccuratePosition(CVector2s estimated, CCollisionSurfaceTemp &cst, bool &onBorder) const;
--- a/code/nel/include/nel/pacs/quad_grid.h
+++ b/code/nel/include/nel/pacs/quad_grid.h
@ -187,11 +187,11 @@ private:// Atttributes.
 private:// Methods.

 	// return the coordinates on the grid of what include the bbox.
-	void		selectQuads(CVector bmin, CVector bmax, sint &x0, sint &x1, sint &y0, sint &y1)
+	void		selectQuads(const CVector &bminp, const CVector &bmaxp, sint &x0, sint &x1, sint &y0, sint &y1)
 	{
-		CVector		bminp, bmaxp;
-		bminp= bmin;
-		bmaxp= bmax;
+		CVector		bmin, bmax;
+		bmin= bminp;
+		bmax= bmaxp;
 		bmin.minof(bminp, bmaxp);
 		bmax.maxof(bminp, bmaxp);
 		bmin/= _EltSize;
--- a/code/nel/include/nel/sound/audio_mixer_user.h
+++ b/code/nel/include/nel/sound/audio_mixer_user.h
@ -471,7 +471,7 @@ protected:
 	/// Returns nb available tracks (or NULL)
 	void						getFreeTracks( uint nb, CTrack **tracks );
 	/// Fill a vector of position and mute flag for all playing sound source.
-	virtual void				getPlayingSoundsPos(bool virtualPos, std::vector<std::pair<bool, NLMISC::CVector> > &pos);
+	virtual void				getPlayingSoundsPos(bool virtualPos, std::vector<CPlayingSoundPos> &pos);

 	typedef CHashMap<NLMISC::TStringId, CControledSources, NLMISC::CStringIdHashMapTraits>	TUserVarControlsContainer;
 	/// Container for all user controler and currently controled playing source
--- a/code/nel/include/nel/sound/background_sound_manager.h
+++ b/code/nel/include/nel/sound/background_sound_manager.h
@ -258,19 +258,19 @@ private:
 	struct TSoundStatus
 	{
 		/// The data of the sound.
-		TSoundData			&SoundData;
+		TSoundData				&SoundData;
 		/// The position of the source.
-		NLMISC::CVector		Position;
+		NLMISC::CVectorPacked	Position;
 		/** The relative gain of the source. This is used for patatoid competition.when
 		  * a smaller patatoid mute bigger one.
 		  */
-		float				Gain;
+		float					Gain;
 		/// The distance beween listener and source.
-		float				Distance;
+		float					Distance;
 		/// flag if inside a sound zone
-		bool				Inside;
+		bool					Inside;
 		/// Constructor.
-		TSoundStatus(TSoundData &sd, NLMISC::CVector position, float gain, float distance, bool inside)
+		TSoundStatus(TSoundData &sd, const NLMISC::CVector &position, float gain, float distance, bool inside)
 			: SoundData(sd), Position(position), Gain(gain), Distance(distance), Inside(inside)
 		{}
 	};
--- a/code/nel/include/nel/sound/clustered_sound.h
+++ b/code/nel/include/nel/sound/clustered_sound.h
@ -59,11 +59,11 @@ public:
 		/// The ratio distance/max earing distance
 		float			DistFactor;
 		/// The sound virtual position (in fact Dist * Direction)
-		NLMISC::CVector	Position;
+		NLMISC::CVectorPacked	Position;
 		/// The blending factor between real sound pos and virtual pos (1 mean virtual pos, 0 mean real pos).
 		float			PosAlpha;
 		/// The direction vector for the virtual sound source.
-		NLMISC::CVector	Direction;
+		NLMISC::CVectorPacked	Direction;
 		/// The occlusion att.
 		sint32			Occlusion;
 		/// The occlusion LF factor (see EAX spec)
@ -97,18 +97,18 @@ public:
 		/// A blending factor to compute virtual source position.
 		float			Alpha;
 		/// The direction vector from listener to the first portal/cluster
-		NLMISC::CVector	Direction1;
+		NLMISC::CVectorPacked	Direction1;
 		/// The direction vector from the first portal/cluster to the second one.
-		NLMISC::CVector	Direction2;
+		NLMISC::CVectorPacked	Direction2;
 		/// The current blended direction used to place vitual source.
-		NLMISC::CVector	Direction;
+		NLMISC::CVectorPacked	Direction;
 		/// The previously traversed cluster. Used to stop back traversal.
 		NL3D::CCluster	*PreviousCluster;
 		/// The previous sound propagation vector
-		NLMISC::CVector	PreviousVector;
+		NLMISC::CVectorPacked	PreviousVector;

 		/// The last pseudo listener position
-		NLMISC::CVector	ListenerPos;
+		NLMISC::CVectorPacked	ListenerPos;

 		/// Constructor. Init all default value.
 		CSoundTravContext(const NLMISC::CVector &listenerPos,
@ -188,7 +188,7 @@ public:

 	const TClusterStatusMap &getAudibleClusters() {return _AudibleClusters;}

-	const std::vector<std::pair<NLMISC::CVector, NLMISC::CVector> >	&getAudioPath() { return _AudioPath;}
+	const std::vector<std::pair<NLMISC::CVectorPacked, NLMISC::CVectorPacked> >	&getAudioPath() { return _AudioPath;}



@ -248,7 +248,7 @@ private:
 	/// The last set environment size.
 	float					_LastEnvSize;
 	/// The segment of all the audio path.
-	std::vector<std::pair<NLMISC::CVector, NLMISC::CVector> >	_AudioPath;
+	std::vector<std::pair<NLMISC::CVectorPacked, NLMISC::CVectorPacked> >	_AudioPath;

 	typedef CHashMap<NLMISC::TStringId, CClusterSound, NLMISC::CStringIdHashMapTraits>	TClusterSoundCont;
 	/// The current cluster playing source indexed with sound group id
--- a/code/nel/include/nel/sound/u_audio_mixer.h
+++ b/code/nel/include/nel/sound/u_audio_mixer.h
@ -336,7 +336,13 @@ public:
 	virtual uint		getMutedPlayingSourcesCount() const = 0;
 	/// Return a string showing the playing sources
 	virtual std::string	getSourcesStats() const = 0;
-	virtual void		getPlayingSoundsPos(bool virtualPos, std::vector<std::pair<bool, NLMISC::CVector> > &pos) =0;
+	struct CPlayingSoundPos
+	{
+		CPlayingSoundPos(bool first_, const NLMISC::CVector &second_) : first(first_), second(second_) { }
+		bool first;
+		NLMISC::CVector second;
+	};
+	virtual void		getPlayingSoundsPos(bool virtualPos, std::vector<CPlayingSoundPos> &pos) =0;
 	/** Write profiling information about the mixer to the output stream.
 	 *  \param out The output stream to which to write the information
 	 */
--- a/code/nel/src/3d/cloud.cpp
+++ b/code/nel/src/3d/cloud.cpp
@ -120,10 +120,10 @@ void CCloud::generate (CNoise3d &noise)
 	{
 		CVertexBufferReadWrite vba;
 		rVB.lock (vba);
-		CVector *pVertices = vba.getVertexCoordPointer (0);
-		*pVertices = CVector(0.0f,				0.0f,				0.0f); pVertices = (CVector*)( ((uint8*)pVertices) + nVSize );
-		*pVertices = CVector((float)_NbW*_Width,0.0f,				0.0f); pVertices = (CVector*)( ((uint8*)pVertices) + nVSize );
-		*pVertices = CVector((float)_NbW*_Width,(float)_NbH*_Height,0.0f); pVertices = (CVector*)( ((uint8*)pVertices) + nVSize );
+		CVectorPacked *pVertices = vba.getVertexCoordPointer (0);
+		*pVertices = CVector(0.0f,				0.0f,				0.0f); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize );
+		*pVertices = CVector((float)_NbW*_Width,0.0f,				0.0f); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize );
+		*pVertices = CVector((float)_NbW*_Width,(float)_NbH*_Height,0.0f); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize );
 		*pVertices = CVector(0.0f,				(float)_NbH*_Height,0.0f);
 		_CloudScape->_MatClear.setColor (CRGBA(0,0,0,0));
 	}
@ -197,10 +197,10 @@ void CCloud::light ()
 	{
 		CVertexBufferReadWrite vba;
 		rVB.lock (vba);
-		CVector *pVertices = vba.getVertexCoordPointer (0);
-		*pVertices = CVector((float)0.0f,	(float)0.0f,	0.0f); pVertices = (CVector*)( ((uint8*)pVertices) + nVSize );
-		*pVertices = CVector((float)1.f,	(float)0.0f,	0.0f); pVertices = (CVector*)( ((uint8*)pVertices) + nVSize );
-		*pVertices = CVector((float)1.f,	(float)1.f,		0.0f); pVertices = (CVector*)( ((uint8*)pVertices) + nVSize );
+		CVectorPacked *pVertices = vba.getVertexCoordPointer (0);
+		*pVertices = CVector((float)0.0f,	(float)0.0f,	0.0f); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize );
+		*pVertices = CVector((float)1.f,	(float)0.0f,	0.0f); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize );
+		*pVertices = CVector((float)1.f,	(float)1.f,		0.0f); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize );
 		*pVertices = CVector((float)0.0f,	(float)1.f,		0.0f);
 	}

@ -340,10 +340,10 @@ void CCloud::reset (NL3D::CCamera *pViewer)
 		CVertexBufferReadWrite vba;
 		rVB.lock (vba);
 		uint32 nVSize = rVB.getVertexSize ();
-		CVector *pVertices = vba.getVertexCoordPointer (0);
-		*pVertices = CVector(0.0f, 0.0f, 0.0f); pVertices = (CVector*)( ((uint8*)pVertices) + nVSize );
-		*pVertices = CVector(5.0f, 0.0f, 0.0f); pVertices = (CVector*)( ((uint8*)pVertices) + nVSize );
-		*pVertices = CVector(5.0f, 5.0f, 0.0f); pVertices = (CVector*)( ((uint8*)pVertices) + nVSize );
+		CVectorPacked *pVertices = vba.getVertexCoordPointer (0);
+		*pVertices = CVector(0.0f, 0.0f, 0.0f); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize );
+		*pVertices = CVector(5.0f, 0.0f, 0.0f); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize );
+		*pVertices = CVector(5.0f, 5.0f, 0.0f); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize );
 		*pVertices = CVector(0.0f, 5.0f, 0.0f);
 		_CloudScape->_MatClear.setColor (CRGBA(0,0,0,0));
 	}
@ -469,7 +469,7 @@ void CCloud::dispXYZ (CMaterial *pMat)
 	float oneOverNbWNbH = 1.0f / (_NbW*_NbH);
 	CVertexBuffer &rVB = _CloudScape->_VertexBuffer;
 	uint32 nVSize = rVB.getVertexSize ();
-	CVector *pVertices;
+	CVectorPacked *pVertices;
 	CUV *pUV;
 	_Driver->activeVertexBuffer (rVB);

@ -487,9 +487,9 @@ void CCloud::dispXYZ (CMaterial *pMat)
 				rVB.lock (vba);

 				pVertices = vba.getVertexCoordPointer (0);
-				*pVertices = CVector(_Pos.x,			_Pos.y,			_Pos.z+_Size.z*(_NbW*_NbH-d)*oneOverNbWNbH); pVertices = (CVector*)( ((uint8*)pVertices) + nVSize );
-				*pVertices = CVector(_Pos.x+_Size.x,	_Pos.y,			_Pos.z+_Size.z*(_NbW*_NbH-d)*oneOverNbWNbH); pVertices = (CVector*)( ((uint8*)pVertices) + nVSize );
-				*pVertices = CVector(_Pos.x+_Size.x,	_Pos.y+_Size.y,	_Pos.z+_Size.z*(_NbW*_NbH-d)*oneOverNbWNbH); pVertices = (CVector*)( ((uint8*)pVertices) + nVSize );
+				*pVertices = CVector(_Pos.x,			_Pos.y,			_Pos.z+_Size.z*(_NbW*_NbH-d)*oneOverNbWNbH); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize );
+				*pVertices = CVector(_Pos.x+_Size.x,	_Pos.y,			_Pos.z+_Size.z*(_NbW*_NbH-d)*oneOverNbWNbH); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize );
+				*pVertices = CVector(_Pos.x+_Size.x,	_Pos.y+_Size.y,	_Pos.z+_Size.z*(_NbW*_NbH-d)*oneOverNbWNbH); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize );
 				*pVertices = CVector(_Pos.x,			_Pos.y+_Size.y,	_Pos.z+_Size.z*(_NbW*_NbH-d)*oneOverNbWNbH);

 				pUV = vba.getTexCoordPointer (0, 0);
@ -512,10 +512,10 @@ void CCloud::dispXYZ (CMaterial *pMat)
 	{
 		CVertexBufferReadWrite vba;
 		rVB.lock (vba);
-		CVector *pVertices = vba.getVertexCoordPointer (0);
-		*pVertices = CVector((float)0.25f,	0, (float)0.0f); pVertices = (CVector*)( ((uint8*)pVertices) + nVSize );
-		*pVertices = CVector((float)0.75f,	0, (float)0.0f); pVertices = (CVector*)( ((uint8*)pVertices) + nVSize );
-		*pVertices = CVector((float)0.75f,	0, (float)0.75f); pVertices = (CVector*)( ((uint8*)pVertices) + nVSize );
+		CVectorPacked *pVertices = vba.getVertexCoordPointer (0);
+		*pVertices = CVector((float)0.25f,	0, (float)0.0f); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize );
+		*pVertices = CVector((float)0.75f,	0, (float)0.0f); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize );
+		*pVertices = CVector((float)0.75f,	0, (float)0.75f); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize );
 		*pVertices = CVector((float)0.25f,	0, (float)0.75f);
 	}
 }
@ -664,10 +664,10 @@ void CCloud::genBill (CCamera *pCam, uint32 nBillSize)
 		CVertexBufferReadWrite vba;
 		rVB.lock (vba);
 		{
-			CVector *pVertices = vba.getVertexCoordPointer (0);
-			*pVertices = CVector(0.0f,	0.0f,	0.0f); pVertices = (CVector*)( ((uint8*)pVertices) + nVSize );
-			*pVertices = CVector(1.0f,	0.0f,	0.0f); pVertices = (CVector*)( ((uint8*)pVertices) + nVSize );
-			*pVertices = CVector(1.0f,	0.0f,	1.0f); pVertices = (CVector*)( ((uint8*)pVertices) + nVSize );
+			CVectorPacked *pVertices = vba.getVertexCoordPointer (0);
+			*pVertices = CVector(0.0f,	0.0f,	0.0f); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize );
+			*pVertices = CVector(1.0f,	0.0f,	0.0f); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize );
+			*pVertices = CVector(1.0f,	0.0f,	1.0f); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize );
 			*pVertices = CVector(0.0f,	0.0f,	1.0f);
 		}
 	}
@ -782,10 +782,10 @@ void CCloud::dispBill (CCamera *pCam)
 		rVB.lock (vba);

 		uint32 nVSize = rVB.getVertexSize ();
-		CVector *pVertices = vba.getVertexCoordPointer (0);
-		*pVertices = qc.V0; pVertices = (CVector*)( ((uint8*)pVertices) + nVSize );
-		*pVertices = qc.V1; pVertices = (CVector*)( ((uint8*)pVertices) + nVSize );
-		*pVertices = qc.V2; pVertices = (CVector*)( ((uint8*)pVertices) + nVSize );
+		CVectorPacked *pVertices = vba.getVertexCoordPointer (0);
+		*pVertices = qc.V0; pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize );
+		*pVertices = qc.V1; pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize );
+		*pVertices = qc.V2; pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize );
 		*pVertices = qc.V3;

 		CUV *pUV = vba.getTexCoordPointer (0, 0);
--- a/code/nel/src/3d/computed_string.cpp
+++ b/code/nel/src/3d/computed_string.cpp
@ -30,6 +30,7 @@
 #include "nel/misc/fast_mem.h"

 using namespace std;
+using NLMISC::CVectorPacked;

 namespace NL3D {

@ -270,9 +271,9 @@ void CComputedString::render2DClip (IDriver& driver, CRenderStringBuffer &rdrBuf
 			// copy and translate pos
 			CHECK_VBA_RANGE(srcvba, srcPtr, Vertices.getVertexSize());
 			CHECK_VBA_RANGE(dstvba, dstPtr, rdrBuffer.Vertices.getVertexSize())
-			((CVector*)dstPtr)->x= x + ((CVector*)srcPtr)->x;
-			((CVector*)dstPtr)->y= ((CVector*)srcPtr)->y;
-			((CVector*)dstPtr)->z= z + ((CVector*)srcPtr)->z;
+			((CVectorPacked*)dstPtr)->x= x + ((CVectorPacked*)srcPtr)->x;
+			((CVectorPacked*)dstPtr)->y= ((CVectorPacked*)srcPtr)->y;
+			((CVectorPacked*)dstPtr)->z= z + ((CVectorPacked*)srcPtr)->z;
 			// uv
 			*((CUV*)(dstPtr+ofsDstUV))= *((CUV*)(srcPtr+ofsSrcUV));
 			// color
@ -298,12 +299,12 @@ void CComputedString::render2DClip (IDriver& driver, CRenderStringBuffer &rdrBuf
 		uint	numVerts= nNumQuadSrc*4;

 		// clip into VerticesClipped
-		CVector *pIniPos0 = (CVector*)srcPtr;
-		CVector *pIniPos2 = (CVector*)(((uint8*)pIniPos0) + srcSize*2);
-		CVector *pClipPos0 = (CVector*)dstPtr;
-		CVector *pClipPos1 = (CVector*)(((uint8*)pClipPos0) + dstSize);
-		CVector *pClipPos2 = (CVector*)(((uint8*)pClipPos1) + dstSize);
-		CVector *pClipPos3 = (CVector*)(((uint8*)pClipPos2) + dstSize);
+		CVectorPacked *pIniPos0 = (CVectorPacked*)srcPtr;
+		CVectorPacked *pIniPos2 = (CVectorPacked*)(((uint8*)pIniPos0) + srcSize*2);
+		CVectorPacked *pClipPos0 = (CVectorPacked*)dstPtr;
+		CVectorPacked *pClipPos1 = (CVectorPacked*)(((uint8*)pClipPos0) + dstSize);
+		CVectorPacked *pClipPos2 = (CVectorPacked*)(((uint8*)pClipPos1) + dstSize);
+		CVectorPacked *pClipPos3 = (CVectorPacked*)(((uint8*)pClipPos2) + dstSize);
 		CUV *pClipUV0 = (CUV*)(dstPtr + ofsDstUV );
 		CUV *pClipUV1 = (CUV*)(((uint8*)pClipUV0) + dstSize);
 		CUV *pClipUV2 = (CUV*)(((uint8*)pClipUV1) + dstSize);
@ -336,28 +337,28 @@ void CComputedString::render2DClip (IDriver& driver, CRenderStringBuffer &rdrBuf

 				// copy with no clip
 				// v0
-				*((CVector*) (dstPtr + dstSize*0))= *((CVector*) (srcPtr + srcSize*0));
+				*((CVectorPacked*) (dstPtr + dstSize*0))= *((CVectorPacked*) (srcPtr + srcSize*0));
 				*((CUV*)	 (dstPtr + dstSize*0 + ofsDstUV))= *((CUV*)(srcPtr + srcSize*0 + ofsSrcUV));
 				if (vtype == CVertexBuffer::TRGBA)
 					*((CRGBA*)	 (dstPtr + dstSize*0 + ofsDstColor))= mCol;
 				else
 					*((CBGRA*)	 (dstPtr + dstSize*0 + ofsDstColor))= mCol;
 				// v1
-				*((CVector*) (dstPtr + dstSize*1))= *((CVector*) (srcPtr + srcSize*1));
+				*((CVectorPacked*) (dstPtr + dstSize*1))= *((CVectorPacked*) (srcPtr + srcSize*1));
 				*((CUV*)	 (dstPtr + dstSize*1 + ofsDstUV))= *((CUV*)(srcPtr + srcSize*1 + ofsSrcUV));
 				if (vtype == CVertexBuffer::TRGBA)
 					*((CRGBA*)	 (dstPtr + dstSize*1 + ofsDstColor))= mCol;
 				else
 					*((CBGRA*)	 (dstPtr + dstSize*1 + ofsDstColor))= mCol;
 				// v2
-				*((CVector*) (dstPtr + dstSize*2))= *((CVector*) (srcPtr + srcSize*2));
+				*((CVectorPacked*) (dstPtr + dstSize*2))= *((CVectorPacked*) (srcPtr + srcSize*2));
 				*((CUV*)	 (dstPtr + dstSize*2 + ofsDstUV))= *((CUV*)(srcPtr + srcSize*2 + ofsSrcUV));
 				if (vtype == CVertexBuffer::TRGBA)
 					*((CRGBA*)	 (dstPtr + dstSize*2 + ofsDstColor))= mCol;
 				else
 					*((CBGRA*)	 (dstPtr + dstSize*2 + ofsDstColor))= mCol;
 				// v3
-				*((CVector*) (dstPtr + dstSize*3))= *((CVector*) (srcPtr + srcSize*3));
+				*((CVectorPacked*) (dstPtr + dstSize*3))= *((CVectorPacked*) (srcPtr + srcSize*3));
 				*((CUV*)	 (dstPtr + dstSize*3 + ofsDstUV))= *((CUV*)(srcPtr + srcSize*3 + ofsSrcUV));
 				if (vtype == CVertexBuffer::TRGBA)
 					*((CRGBA*)	 (dstPtr + dstSize*3 + ofsDstColor))= mCol;
@ -410,10 +411,10 @@ void CComputedString::render2DClip (IDriver& driver, CRenderStringBuffer &rdrBuf

 				// next quad out
 				++nNumQuadClipped;
-				pClipPos0 = (CVector*)(((uint8*)pClipPos0) + dstSize*4);
-				pClipPos1 = (CVector*)(((uint8*)pClipPos0) + dstSize);
-				pClipPos2 = (CVector*)(((uint8*)pClipPos1) + dstSize);
-				pClipPos3 = (CVector*)(((uint8*)pClipPos2) + dstSize);
+				pClipPos0 = (CVectorPacked*)(((uint8*)pClipPos0) + dstSize*4);
+				pClipPos1 = (CVectorPacked*)(((uint8*)pClipPos0) + dstSize);
+				pClipPos2 = (CVectorPacked*)(((uint8*)pClipPos1) + dstSize);
+				pClipPos3 = (CVectorPacked*)(((uint8*)pClipPos2) + dstSize);
 				pClipUV0 = (CUV*)( ((uint8*)pClipUV0) + dstSize*4 );
 				pClipUV1 = (CUV*)(((uint8*)pClipUV0) + dstSize);
 				pClipUV2 = (CUV*)(((uint8*)pClipUV1) + dstSize);
@ -421,8 +422,8 @@ void CComputedString::render2DClip (IDriver& driver, CRenderStringBuffer &rdrBuf
 				dstPtr+=  4*dstSize;
 			}
 			// next quad in
-			pIniPos0 = (CVector*)(((uint8*)pIniPos0) + srcSize*4);
-			pIniPos2 = (CVector*)(((uint8*)pIniPos0) + srcSize*2);
+			pIniPos0 = (CVectorPacked*)(((uint8*)pIniPos0) + srcSize*4);
+			pIniPos2 = (CVectorPacked*)(((uint8*)pIniPos0) + srcSize*2);
 			srcPtr+=  4*srcSize;
 		}

@ -506,8 +507,8 @@ void CComputedString::render2DUnProjected (IDriver& driver, CRenderStringBuffer
 			// copy and translate pos
 			CHECK_VBA_RANGE(dstvba, dstPtr, Vertices.getVertexSize());
 			CHECK_VBA_RANGE(srcvba, srcPtr, rdrBuffer.Vertices.getVertexSize());
-			((CVector*)dstPtr)->x= x + ((CVector*)srcPtr)->x;
-			((CVector*)dstPtr)->z= z + ((CVector*)srcPtr)->z;
+			((CVectorPacked*)dstPtr)->x= x + ((CVectorPacked*)srcPtr)->x;
+			((CVectorPacked*)dstPtr)->z= z + ((CVectorPacked*)srcPtr)->z;

 			// uv
 			*((CUV*)(dstPtr+ofsDstUV))= *((CUV*)(srcPtr+ofsSrcUV));
@ -533,12 +534,12 @@ void CComputedString::render2DUnProjected (IDriver& driver, CRenderStringBuffer
 		uint	numVerts= nNumQuadSrc*4;

 		// clip into VerticesClipped
-		CVector *pIniPos0 = (CVector*)srcPtr;
-		CVector *pIniPos2 = (CVector*)(((uint8*)pIniPos0) + srcSize*2);
-		CVector *pClipPos0 = (CVector*)dstPtr;
-		CVector *pClipPos1 = (CVector*)(((uint8*)pClipPos0) + dstSize);
-		CVector *pClipPos2 = (CVector*)(((uint8*)pClipPos1) + dstSize);
-		CVector *pClipPos3 = (CVector*)(((uint8*)pClipPos2) + dstSize);
+		CVectorPacked *pIniPos0 = (CVectorPacked*)srcPtr;
+		CVectorPacked *pIniPos2 = (CVectorPacked*)(((uint8*)pIniPos0) + srcSize*2);
+		CVectorPacked *pClipPos0 = (CVectorPacked*)dstPtr;
+		CVectorPacked *pClipPos1 = (CVectorPacked*)(((uint8*)pClipPos0) + dstSize);
+		CVectorPacked *pClipPos2 = (CVectorPacked*)(((uint8*)pClipPos1) + dstSize);
+		CVectorPacked *pClipPos3 = (CVectorPacked*)(((uint8*)pClipPos2) + dstSize);
 		CUV *pClipUV0 = (CUV*)(dstPtr + ofsDstUV );
 		CUV *pClipUV1 = (CUV*)(((uint8*)pClipUV0) + dstSize);
 		CUV *pClipUV2 = (CUV*)(((uint8*)pClipUV1) + dstSize);
@ -555,28 +556,28 @@ void CComputedString::render2DUnProjected (IDriver& driver, CRenderStringBuffer
 			{
 				// copy with no clip
 				// v0
-				*((CVector*) (dstPtr + dstSize*0))= *((CVector*) (srcPtr + srcSize*0));
+				*((CVectorPacked*) (dstPtr + dstSize*0))= *((CVectorPacked*) (srcPtr + srcSize*0));
 				*((CUV*)	 (dstPtr + dstSize*0 + ofsDstUV))= *((CUV*)(srcPtr + srcSize*0 + ofsSrcUV));
 				if (vtype == CVertexBuffer::TRGBA)
 					*((CRGBA*)	 (dstPtr + dstSize*0 + ofsDstColor))= Color;
 				else
 					*((CBGRA*)	 (dstPtr + dstSize*0 + ofsDstColor))= Color;
 				// v1
-				*((CVector*) (dstPtr + dstSize*1))= *((CVector*) (srcPtr + srcSize*1));
+				*((CVectorPacked*) (dstPtr + dstSize*1))= *((CVectorPacked*) (srcPtr + srcSize*1));
 				*((CUV*)	 (dstPtr + dstSize*1 + ofsDstUV))= *((CUV*)(srcPtr + srcSize*1 + ofsSrcUV));
 				if (vtype == CVertexBuffer::TRGBA)
 					*((CRGBA*)	 (dstPtr + dstSize*1 + ofsDstColor))= Color;
 				else
 					*((CBGRA*)	 (dstPtr + dstSize*1 + ofsDstColor))= Color;
 				// v2
-				*((CVector*) (dstPtr + dstSize*2))= *((CVector*) (srcPtr + srcSize*2));
+				*((CVectorPacked*) (dstPtr + dstSize*2))= *((CVectorPacked*) (srcPtr + srcSize*2));
 				*((CUV*)	 (dstPtr + dstSize*2 + ofsDstUV))= *((CUV*)(srcPtr + srcSize*2 + ofsSrcUV));
 				if (vtype == CVertexBuffer::TRGBA)
 					*((CRGBA*)	 (dstPtr + dstSize*2 + ofsDstColor))= Color;
 				else
 					*((CBGRA*)	 (dstPtr + dstSize*2 + ofsDstColor))= Color;
 				// v3
-				*((CVector*) (dstPtr + dstSize*3))= *((CVector*) (srcPtr + srcSize*3));
+				*((CVectorPacked*) (dstPtr + dstSize*3))= *((CVectorPacked*) (srcPtr + srcSize*3));
 				*((CUV*)	 (dstPtr + dstSize*3 + ofsDstUV))= *((CUV*)(srcPtr + srcSize*3 + ofsSrcUV));
 				if (vtype == CVertexBuffer::TRGBA)
 					*((CRGBA*)	 (dstPtr + dstSize*3 + ofsDstColor))= Color;
@ -630,10 +631,10 @@ void CComputedString::render2DUnProjected (IDriver& driver, CRenderStringBuffer

 				// next quad out
 				++nNumQuadClipped;
-				pClipPos0 = (CVector*)(((uint8*)pClipPos0) + dstSize*4);
-				pClipPos1 = (CVector*)(((uint8*)pClipPos0) + dstSize);
-				pClipPos2 = (CVector*)(((uint8*)pClipPos1) + dstSize);
-				pClipPos3 = (CVector*)(((uint8*)pClipPos2) + dstSize);
+				pClipPos0 = (CVectorPacked*)(((uint8*)pClipPos0) + dstSize*4);
+				pClipPos1 = (CVectorPacked*)(((uint8*)pClipPos0) + dstSize);
+				pClipPos2 = (CVectorPacked*)(((uint8*)pClipPos1) + dstSize);
+				pClipPos3 = (CVectorPacked*)(((uint8*)pClipPos2) + dstSize);
 				pClipUV0 = (CUV*)( ((uint8*)pClipUV0) + dstSize*4 );
 				pClipUV1 = (CUV*)(((uint8*)pClipUV0) + dstSize);
 				pClipUV2 = (CUV*)(((uint8*)pClipUV1) + dstSize);
@ -641,8 +642,8 @@ void CComputedString::render2DUnProjected (IDriver& driver, CRenderStringBuffer
 				dstPtr+=  4*dstSize;
 			}
 			// next quad in
-			pIniPos0 = (CVector*)(((uint8*)pIniPos0) + srcSize*4);
-			pIniPos2 = (CVector*)(((uint8*)pIniPos0) + srcSize*2);
+			pIniPos0 = (CVectorPacked*)(((uint8*)pIniPos0) + srcSize*4);
+			pIniPos2 = (CVectorPacked*)(((uint8*)pIniPos0) + srcSize*2);
 			srcPtr+=  4*srcSize;
 		}

@ -657,13 +658,13 @@ void CComputedString::render2DUnProjected (IDriver& driver, CRenderStringBuffer
 	{
 		// preset unprojection
 		CVector tmp;
-		tmp.x = ((CVector*)dstPtrBackup)->x * OOW;
-		tmp.y = ((CVector*)dstPtrBackup)->z * OOH;
+		tmp.x = ((CVectorPacked*)dstPtrBackup)->x * OOW;
+		tmp.y = ((CVectorPacked*)dstPtrBackup)->z * OOH;
 		tmp.z = depth;
 		// mul by user scale matrix
 		tmp= scaleMatrix * tmp;
 		// Unproject it
-		*((CVector*)dstPtrBackup) = frustum.unProjectZ(tmp);
+		*((CVectorPacked*)dstPtrBackup) = frustum.unProjectZ(tmp);
 		dstPtrBackup += dstSize;
 	}

--- a/code/nel/src/3d/driver_user.cpp
+++ b/code/nel/src/3d/driver_user.cpp
@ -920,29 +920,29 @@ void			CDriverUser::drawQuads(const NLMISC::CQuadColorUV *quads, uint32 nbQuads,
 			for (uint32 i = 0; i < nbQuads; ++i)
 			{
 				const NLMISC::CQuadColorUV &qcuv = quads[i];
-				CHECK_VBA_RANGE(vba, dstPtr+0, sizeof(CVector))
-				*(CVector*)(dstPtr+0)= qcuv.V0;
+				CHECK_VBA_RANGE(vba, dstPtr+0, sizeof(CVectorPacked))
+				*(CVectorPacked*)(dstPtr+0)= qcuv.V0;
 				CHECK_VBA_RANGE(vba, dstPtr+uvOfs, sizeof(CUV))
 				*(CUV*)(dstPtr+uvOfs)= qcuv.Uv0;
 				CHECK_VBA_RANGE(vba, dstPtr+colorOfs, sizeof(CRGBA))
 				*(CRGBA*)(dstPtr+colorOfs)= qcuv.Color0;
 				dstPtr+= vSize;
-				CHECK_VBA_RANGE(vba, dstPtr+0, sizeof(CVector))
-				*(CVector*)(dstPtr+0)= qcuv.V1;
+				CHECK_VBA_RANGE(vba, dstPtr+0, sizeof(CVectorPacked))
+				*(CVectorPacked*)(dstPtr+0)= qcuv.V1;
 				CHECK_VBA_RANGE(vba, dstPtr+uvOfs, sizeof(CUV))
 				*(CUV*)(dstPtr+uvOfs)= qcuv.Uv1;
 				CHECK_VBA_RANGE(vba, dstPtr+colorOfs, sizeof(CRGBA))
 				*(CRGBA*)(dstPtr+colorOfs)= qcuv.Color1;
 				dstPtr+= vSize;
-				CHECK_VBA_RANGE(vba, dstPtr+0, sizeof(CVector))
-				*(CVector*)(dstPtr+0)= qcuv.V2;
+				CHECK_VBA_RANGE(vba, dstPtr+0, sizeof(CVectorPacked))
+				*(CVectorPacked*)(dstPtr+0)= qcuv.V2;
 				CHECK_VBA_RANGE(vba, dstPtr+uvOfs, sizeof(CUV))
 				*(CUV*)(dstPtr+uvOfs)= qcuv.Uv2;
 				CHECK_VBA_RANGE(vba, dstPtr+colorOfs, sizeof(CRGBA))
 				*(CRGBA*)(dstPtr+colorOfs)= qcuv.Color2;
 				dstPtr+= vSize;
-				CHECK_VBA_RANGE(vba, dstPtr+0, sizeof(CVector))
-				*(CVector*)(dstPtr+0)= qcuv.V3;
+				CHECK_VBA_RANGE(vba, dstPtr+0, sizeof(CVectorPacked))
+				*(CVectorPacked*)(dstPtr+0)= qcuv.V3;
 				CHECK_VBA_RANGE(vba, dstPtr+uvOfs, sizeof(CUV))
 				*(CUV*)(dstPtr+uvOfs)= qcuv.Uv3;
 				CHECK_VBA_RANGE(vba, dstPtr+colorOfs, sizeof(CRGBA))
@ -955,29 +955,29 @@ void			CDriverUser::drawQuads(const NLMISC::CQuadColorUV *quads, uint32 nbQuads,
 			for (uint32 i = 0; i < nbQuads; ++i)
 			{
 				const NLMISC::CQuadColorUV &qcuv = quads[i];
-				CHECK_VBA_RANGE(vba, dstPtr+0, sizeof(CVector))
-				*(CVector*)(dstPtr+0)= qcuv.V0;
+				CHECK_VBA_RANGE(vba, dstPtr+0, sizeof(CVectorPacked))
+				*(CVectorPacked*)(dstPtr+0)= qcuv.V0;
 				CHECK_VBA_RANGE(vba, dstPtr+uvOfs, sizeof(CUV))
 				*(CUV*)(dstPtr+uvOfs)= qcuv.Uv0;
 				CHECK_VBA_RANGE(vba, dstPtr+colorOfs, sizeof(CRGBA))
 				*(CBGRA*)(dstPtr+colorOfs)= qcuv.Color0;
 				dstPtr+= vSize;
-				CHECK_VBA_RANGE(vba, dstPtr+0, sizeof(CVector))
-				*(CVector*)(dstPtr+0)= qcuv.V1;
+				CHECK_VBA_RANGE(vba, dstPtr+0, sizeof(CVectorPacked))
+				*(CVectorPacked*)(dstPtr+0)= qcuv.V1;
 				CHECK_VBA_RANGE(vba, dstPtr+uvOfs, sizeof(CUV))
 				*(CUV*)(dstPtr+uvOfs)= qcuv.Uv1;
 				CHECK_VBA_RANGE(vba, dstPtr+colorOfs, sizeof(CRGBA))
 				*(CBGRA*)(dstPtr+colorOfs)= qcuv.Color1;
 				dstPtr+= vSize;
-				CHECK_VBA_RANGE(vba, dstPtr+0, sizeof(CVector))
-				*(CVector*)(dstPtr+0)= qcuv.V2;
+				CHECK_VBA_RANGE(vba, dstPtr+0, sizeof(CVectorPacked))
+				*(CVectorPacked*)(dstPtr+0)= qcuv.V2;
 				CHECK_VBA_RANGE(vba, dstPtr+uvOfs, sizeof(CUV))
 				*(CUV*)(dstPtr+uvOfs)= qcuv.Uv2;
 				CHECK_VBA_RANGE(vba, dstPtr+colorOfs, sizeof(CBGRA))
 				*(CBGRA*)(dstPtr+colorOfs)= qcuv.Color2;
 				dstPtr+= vSize;
-				CHECK_VBA_RANGE(vba, dstPtr+0, sizeof(CVector))
-				*(CVector*)(dstPtr+0)= qcuv.V3;
+				CHECK_VBA_RANGE(vba, dstPtr+0, sizeof(CVectorPacked))
+				*(CVectorPacked*)(dstPtr+0)= qcuv.V3;
 				CHECK_VBA_RANGE(vba, dstPtr+uvOfs, sizeof(CUV))
 				*(CUV*)(dstPtr+uvOfs)= qcuv.Uv3;
 				CHECK_VBA_RANGE(vba, dstPtr+colorOfs, sizeof(CRGBA))
@ -1014,8 +1014,8 @@ void			CDriverUser::drawQuads(const NLMISC::CQuadColorUV2 *quads, uint32 nbQuads
 			for (uint32 i = 0; i < nbQuads; ++i)
 			{
 				const NLMISC::CQuadColorUV2 &qcuv = quads[i];
-				CHECK_VBA_RANGE(vba, dstPtr+0, sizeof(CVector))
-				*(CVector*)(dstPtr+0)= qcuv.V0;
+				CHECK_VBA_RANGE(vba, dstPtr+0, sizeof(CVectorPacked))
+				*(CVectorPacked*)(dstPtr+0)= qcuv.V0;
 				CHECK_VBA_RANGE(vba, dstPtr+uvOfs0, sizeof(CUV))
 				*(CUV*)(dstPtr+uvOfs0)= qcuv.Uv0;
 				CHECK_VBA_RANGE(vba, dstPtr+uvOfs1, sizeof(CUV))
@ -1023,8 +1023,8 @@ void			CDriverUser::drawQuads(const NLMISC::CQuadColorUV2 *quads, uint32 nbQuads
 				CHECK_VBA_RANGE(vba, dstPtr+colorOfs, sizeof(CRGBA))
 				*(CRGBA*)(dstPtr+colorOfs)= qcuv.Color0;
 				dstPtr+= vSize;
-				CHECK_VBA_RANGE(vba, dstPtr+0, sizeof(CVector))
-				*(CVector*)(dstPtr+0)= qcuv.V1;
+				CHECK_VBA_RANGE(vba, dstPtr+0, sizeof(CVectorPacked))
+				*(CVectorPacked*)(dstPtr+0)= qcuv.V1;
 				CHECK_VBA_RANGE(vba, dstPtr+uvOfs0, sizeof(CUV))
 				*(CUV*)(dstPtr+uvOfs0)= qcuv.Uv1;
 				CHECK_VBA_RANGE(vba, dstPtr+uvOfs1, sizeof(CUV))
@ -1032,8 +1032,8 @@ void			CDriverUser::drawQuads(const NLMISC::CQuadColorUV2 *quads, uint32 nbQuads
 				CHECK_VBA_RANGE(vba, dstPtr+colorOfs, sizeof(CRGBA))
 				*(CRGBA*)(dstPtr+colorOfs)= qcuv.Color1;
 				dstPtr+= vSize;
-				CHECK_VBA_RANGE(vba, dstPtr+0, sizeof(CVector))
-				*(CVector*)(dstPtr+0)= qcuv.V2;
+				CHECK_VBA_RANGE(vba, dstPtr+0, sizeof(CVectorPacked))
+				*(CVectorPacked*)(dstPtr+0)= qcuv.V2;
 				CHECK_VBA_RANGE(vba, dstPtr+uvOfs0, sizeof(CUV))
 				*(CUV*)(dstPtr+uvOfs0)= qcuv.Uv2;
 				CHECK_VBA_RANGE(vba, dstPtr+uvOfs1, sizeof(CUV))
@ -1041,8 +1041,8 @@ void			CDriverUser::drawQuads(const NLMISC::CQuadColorUV2 *quads, uint32 nbQuads
 				CHECK_VBA_RANGE(vba, dstPtr+colorOfs, sizeof(CRGBA))
 				*(CRGBA*)(dstPtr+colorOfs)= qcuv.Color2;
 				dstPtr+= vSize;
-				CHECK_VBA_RANGE(vba, dstPtr+0, sizeof(CVector))
-				*(CVector*)(dstPtr+0)= qcuv.V3;
+				CHECK_VBA_RANGE(vba, dstPtr+0, sizeof(CVectorPacked))
+				*(CVectorPacked*)(dstPtr+0)= qcuv.V3;
 				CHECK_VBA_RANGE(vba, dstPtr+uvOfs0, sizeof(CUV))
 				*(CUV*)(dstPtr+uvOfs0)= qcuv.Uv3;
 				CHECK_VBA_RANGE(vba, dstPtr+uvOfs1, sizeof(CUV))
@ -1057,8 +1057,8 @@ void			CDriverUser::drawQuads(const NLMISC::CQuadColorUV2 *quads, uint32 nbQuads
 			for (uint32 i = 0; i < nbQuads; ++i)
 			{
 				const NLMISC::CQuadColorUV2 &qcuv = quads[i];
-				CHECK_VBA_RANGE(vba, dstPtr+0, sizeof(CVector))
-				*(CVector*)(dstPtr+0)= qcuv.V0;
+				CHECK_VBA_RANGE(vba, dstPtr+0, sizeof(CVectorPacked))
+				*(CVectorPacked*)(dstPtr+0)= qcuv.V0;
 				CHECK_VBA_RANGE(vba, dstPtr+uvOfs0, sizeof(CUV))
 				*(CUV*)(dstPtr+uvOfs0)= qcuv.Uv0;
 				CHECK_VBA_RANGE(vba, dstPtr+uvOfs1, sizeof(CUV))
@ -1066,8 +1066,8 @@ void			CDriverUser::drawQuads(const NLMISC::CQuadColorUV2 *quads, uint32 nbQuads
 				CHECK_VBA_RANGE(vba, dstPtr+colorOfs, sizeof(CRGBA))
 				*(CBGRA*)(dstPtr+colorOfs)= qcuv.Color0;
 				dstPtr+= vSize;
-				CHECK_VBA_RANGE(vba, dstPtr+0, sizeof(CVector))
-				*(CVector*)(dstPtr+0)= qcuv.V1;
+				CHECK_VBA_RANGE(vba, dstPtr+0, sizeof(CVectorPacked))
+				*(CVectorPacked*)(dstPtr+0)= qcuv.V1;
 				CHECK_VBA_RANGE(vba, dstPtr+uvOfs0, sizeof(CUV))
 				*(CUV*)(dstPtr+uvOfs0)= qcuv.Uv1;
 				CHECK_VBA_RANGE(vba, dstPtr+uvOfs1, sizeof(CUV))
@ -1075,8 +1075,8 @@ void			CDriverUser::drawQuads(const NLMISC::CQuadColorUV2 *quads, uint32 nbQuads
 				CHECK_VBA_RANGE(vba, dstPtr+colorOfs, sizeof(CRGBA))
 				*(CBGRA*)(dstPtr+colorOfs)= qcuv.Color1;
 				dstPtr+= vSize;
-				CHECK_VBA_RANGE(vba, dstPtr+0, sizeof(CVector))
-				*(CVector*)(dstPtr+0)= qcuv.V2;
+				CHECK_VBA_RANGE(vba, dstPtr+0, sizeof(CVectorPacked))
+				*(CVectorPacked*)(dstPtr+0)= qcuv.V2;
 				CHECK_VBA_RANGE(vba, dstPtr+uvOfs0, sizeof(CUV))
 				*(CUV*)(dstPtr+uvOfs0)= qcuv.Uv2;
 				CHECK_VBA_RANGE(vba, dstPtr+uvOfs1, sizeof(CUV))
@ -1084,8 +1084,8 @@ void			CDriverUser::drawQuads(const NLMISC::CQuadColorUV2 *quads, uint32 nbQuads
 				CHECK_VBA_RANGE(vba, dstPtr+colorOfs, sizeof(CRGBA))
 				*(CBGRA*)(dstPtr+colorOfs)= qcuv.Color2;
 				dstPtr+= vSize;
-				CHECK_VBA_RANGE(vba, dstPtr+0, sizeof(CVector))
-				*(CVector*)(dstPtr+0)= qcuv.V3;
+				CHECK_VBA_RANGE(vba, dstPtr+0, sizeof(CVectorPacked))
+				*(CVectorPacked*)(dstPtr+0)= qcuv.V3;
 				CHECK_VBA_RANGE(vba, dstPtr+uvOfs0, sizeof(CUV))
 				*(CUV*)(dstPtr+uvOfs0)= qcuv.Uv3;
 				CHECK_VBA_RANGE(vba, dstPtr+uvOfs1, sizeof(CUV))
@ -1127,24 +1127,24 @@ void CDriverUser::drawTriangles(const NLMISC::CTriangleColorUV *tris, uint32 nbT
 			do
 			{
 				//
-				CHECK_VBA_RANGE(vba, dstPtr+0, sizeof(CVector))
-				*(CVector*)(dstPtr+0)= tris->V0;
+				CHECK_VBA_RANGE(vba, dstPtr+0, sizeof(CVectorPacked))
+				*(CVectorPacked*)(dstPtr+0)= tris->V0;
 				CHECK_VBA_RANGE(vba, dstPtr+uvOfs0, sizeof(CUV))
 				*(CUV*)(dstPtr+uvOfs0)= tris->Uv0;
 				CHECK_VBA_RANGE(vba, dstPtr+colorOfs, sizeof(CRGBA))
 				*(CRGBA*)(dstPtr+colorOfs)= tris->Color0;
 				dstPtr+= vSize;
 				//
-				CHECK_VBA_RANGE(vba, dstPtr+0, sizeof(CVector))
-				*(CVector*)(dstPtr+0)= tris->V1;
+				CHECK_VBA_RANGE(vba, dstPtr+0, sizeof(CVectorPacked))
+				*(CVectorPacked*)(dstPtr+0)= tris->V1;
 				CHECK_VBA_RANGE(vba, dstPtr+uvOfs0, sizeof(CUV))
 				*(CUV*)(dstPtr+uvOfs0)= tris->Uv1;
 				CHECK_VBA_RANGE(vba, dstPtr+colorOfs, sizeof(CRGBA))
 				*(CRGBA*)(dstPtr+colorOfs)= tris->Color1;
 				dstPtr+= vSize;
 				//
-				CHECK_VBA_RANGE(vba, dstPtr+0, sizeof(CVector))
-				*(CVector*)(dstPtr+0)= tris->V2;
+				CHECK_VBA_RANGE(vba, dstPtr+0, sizeof(CVectorPacked))
+				*(CVectorPacked*)(dstPtr+0)= tris->V2;
 				CHECK_VBA_RANGE(vba, dstPtr+uvOfs0, sizeof(CUV))
 				*(CUV*)(dstPtr+uvOfs0)= tris->Uv2;
 				CHECK_VBA_RANGE(vba, dstPtr+colorOfs, sizeof(CRGBA))
@ -1159,24 +1159,24 @@ void CDriverUser::drawTriangles(const NLMISC::CTriangleColorUV *tris, uint32 nbT
 			do
 			{
 				//
-				CHECK_VBA_RANGE(vba, dstPtr+0, sizeof(CVector))
-				*(CVector*)(dstPtr+0)= tris->V0;
+				CHECK_VBA_RANGE(vba, dstPtr+0, sizeof(CVectorPacked))
+				*(CVectorPacked*)(dstPtr+0)= tris->V0;
 				CHECK_VBA_RANGE(vba, dstPtr+uvOfs0, sizeof(CUV))
 				*(CUV*)(dstPtr+uvOfs0)= tris->Uv0;
 				CHECK_VBA_RANGE(vba, dstPtr+colorOfs, sizeof(CRGBA))
 				*(CBGRA*)(dstPtr+colorOfs)= tris->Color0;
 				dstPtr+= vSize;
 				//
-				CHECK_VBA_RANGE(vba, dstPtr+0, sizeof(CVector))
-				*(CVector*)(dstPtr+0)= tris->V1;
+				CHECK_VBA_RANGE(vba, dstPtr+0, sizeof(CVectorPacked))
+				*(CVectorPacked*)(dstPtr+0)= tris->V1;
 				CHECK_VBA_RANGE(vba, dstPtr+uvOfs0, sizeof(CUV))
 				*(CUV*)(dstPtr+uvOfs0)= tris->Uv1;
 				CHECK_VBA_RANGE(vba, dstPtr+colorOfs, sizeof(CRGBA))
 				*(CBGRA*)(dstPtr+colorOfs)= tris->Color1;
 				dstPtr+= vSize;
 				//
-				CHECK_VBA_RANGE(vba, dstPtr+0, sizeof(CVector))
-				*(CVector*)(dstPtr+0)= tris->V2;
+				CHECK_VBA_RANGE(vba, dstPtr+0, sizeof(CVectorPacked))
+				*(CVectorPacked*)(dstPtr+0)= tris->V2;
 				CHECK_VBA_RANGE(vba, dstPtr+uvOfs0, sizeof(CUV))
 				*(CUV*)(dstPtr+uvOfs0)= tris->Uv2;
 				CHECK_VBA_RANGE(vba, dstPtr+colorOfs, sizeof(CRGBA))
--- a/code/nel/src/3d/lod_character_manager.cpp
+++ b/code/nel/src/3d/lod_character_manager.cpp
@ -676,7 +676,7 @@ bool			CLodCharacterManager::addRenderCharacterKey(CLodCharacterInstance &instan
 		{
 			// NB: order is important for AGP filling optimisation
 			// transform vertex, and store.
-			CVector		*dstVector= (CVector*)dstPtr;
+			CVectorPacked		*dstVector= (CVectorPacked*)dstPtr;
 			fVect.x= vertPtr->x; fVect.y= vertPtr->y; fVect.z= vertPtr->z;
 			++vertPtr;
 			dstVector->x= a00 * fVect.x + a01 * fVect.y + a02 * fVect.z + matPos.x;
--- a/code/nel/src/3d/mesh.cpp
+++ b/code/nel/src/3d/mesh.cpp
@ -1049,7 +1049,7 @@ bool	CMeshGeom::retrieveVertices(std::vector<NLMISC::CVector> &vertices) const
 		uint		vSize= vb.getVertexSize();
 		for(i=0;i<vertices.size();i++)
 		{
-			vertices[i]= *(const CVector*)pVert;
+			vertices[i]= *(const CVectorPacked*)pVert;
 			pVert+= vSize;
 		}
 	}
@ -1718,7 +1718,7 @@ void	CMeshGeom::bkupOriginalSkinVertices()
 		_OriginalTGSpace.resize(numVertices);
 		for(uint i=0; i<numVertices;i++)
 		{
-			_OriginalTGSpace[i]= *(CVector*)vba.getTexCoordPointer(i, tgSpaceStage);
+			_OriginalTGSpace[i]= *(CVectorPacked*)vba.getTexCoordPointer(i, tgSpaceStage);
 		}
 	}
 }
@ -1760,7 +1760,7 @@ void	CMeshGeom::restoreOriginalSkinVertices()
 		// copy tangent space vectors
 		for(uint i = 0; i < numVertices; ++i)
 		{
-			*(CVector*)vba.getTexCoordPointer(i, numTexCoords - 1)= _OriginalTGSpace[i];
+			*(CVectorPacked*)vba.getTexCoordPointer(i, numTexCoords - 1)= _OriginalTGSpace[i];
 		}
 	}

@ -1870,15 +1870,15 @@ void	CMeshGeom::applySkin(CSkeletonModel *skeleton)
 				nlassert(psPal->MatrixId[3]<IDriver::MaxModelMatrix);

 				// compute vertex part.
-				computeSoftwarePointSkinning(matrixes, srcVector, psPal, (float*)srcWgt, (CVector*)dstVector);
+				computeSoftwarePointSkinning(matrixes, srcVector, psPal, (float*)srcWgt, (CVectorPacked*)dstVector);

 				// compute normal part.
 				if(skinType>=SkinWithNormal)
-					computeSoftwareVectorSkinning(matrixes, srcNormal, psPal, (float*)srcWgt, (CVector*)dstNormal);
+					computeSoftwareVectorSkinning(matrixes, srcNormal, psPal, (float*)srcWgt, (CVectorPacked*)dstNormal);

 				// compute tg part.
 				if(skinType>=SkinWithTgSpace)
-					computeSoftwareVectorSkinning(matrixes, srcTgSpace, psPal, (float*)srcWgt, (CVector*)dstTgSpace);
+					computeSoftwareVectorSkinning(matrixes, srcTgSpace, psPal, (float*)srcWgt, (CVectorPacked*)dstTgSpace);
 			}

 			// inc flags.
@ -1938,42 +1938,48 @@ void	CMeshGeom::flagSkinVerticesForMatrixBlock(uint8 *skinFlags, CMatrixBlock &m


 // ***************************************************************************
-void	CMeshGeom::computeSoftwarePointSkinning(CMatrix3x4 *matrixes, CVector *srcVec, CPaletteSkin *srcPal, float *srcWgt, CVector *pDst)
+void	CMeshGeom::computeSoftwarePointSkinning(CMatrix3x4 *matrixes, CVector *srcVec, CPaletteSkin *srcPal, float *srcWgt, CVectorPacked *pDst)
 {
 	CMatrix3x4		*pMat;
+	CVector			temp;

 	// 0th matrix influence.
 	pMat= matrixes + srcPal->MatrixId[0];
-	pMat->mulSetPoint(*srcVec, srcWgt[0], *pDst);
+	pMat->mulSetPoint(*srcVec, srcWgt[0], temp);
 	// 1th matrix influence.
 	pMat= matrixes + srcPal->MatrixId[1];
-	pMat->mulAddPoint(*srcVec, srcWgt[1], *pDst);
+	pMat->mulAddPoint(*srcVec, srcWgt[1], temp);
 	// 2th matrix influence.
 	pMat= matrixes + srcPal->MatrixId[2];
-	pMat->mulAddPoint(*srcVec, srcWgt[2], *pDst);
+	pMat->mulAddPoint(*srcVec, srcWgt[2], temp);
 	// 3th matrix influence.
 	pMat= matrixes + srcPal->MatrixId[3];
-	pMat->mulAddPoint(*srcVec, srcWgt[3], *pDst);
+	pMat->mulAddPoint(*srcVec, srcWgt[3], temp);
+
+	*pDst = temp;
 }


 // ***************************************************************************
-void	CMeshGeom::computeSoftwareVectorSkinning(CMatrix3x4 *matrixes, CVector *srcVec, CPaletteSkin *srcPal, float *srcWgt, CVector *pDst)
+void	CMeshGeom::computeSoftwareVectorSkinning(CMatrix3x4 *matrixes, CVector *srcVec, CPaletteSkin *srcPal, float *srcWgt, CVectorPacked *pDst)
 {
 	CMatrix3x4		*pMat;
+	CVector			temp;

 	// 0th matrix influence.
 	pMat= matrixes + srcPal->MatrixId[0];
-	pMat->mulSetVector(*srcVec, srcWgt[0], *pDst);
+	pMat->mulSetVector(*srcVec, srcWgt[0], temp);
 	// 1th matrix influence.
 	pMat= matrixes + srcPal->MatrixId[1];
-	pMat->mulAddVector(*srcVec, srcWgt[1], *pDst);
+	pMat->mulAddVector(*srcVec, srcWgt[1], temp);
 	// 2th matrix influence.
 	pMat= matrixes + srcPal->MatrixId[2];
-	pMat->mulAddVector(*srcVec, srcWgt[2], *pDst);
+	pMat->mulAddVector(*srcVec, srcWgt[2], temp);
 	// 3th matrix influence.
 	pMat= matrixes + srcPal->MatrixId[3];
-	pMat->mulAddVector(*srcVec, srcWgt[3], *pDst);
+	pMat->mulAddVector(*srcVec, srcWgt[3], temp);
+
+	*pDst = temp;
 }


@ -2111,7 +2117,7 @@ void	CMeshGeom::buildShadowSkin()
 		for(uint i=0; i<numVertices;i++)
 		{
 			// Copy Vertex
-			_ShadowSkin.Vertices[i].Vertex= *((CVector*)srcVert);
+			_ShadowSkin.Vertices[i].Vertex= *((CVectorPacked*)srcVert);
 			// Suppose the 0 matrix inf is the highest (we are at least sure it is not 0)
 			// And SkinWeight Export show the 0th is the highest one...
 			_ShadowSkin.Vertices[i].MatrixId= ((CPaletteSkin*)srcPal)->MatrixId[0];
--- a/code/nel/src/3d/mesh_morpher.cpp
+++ b/code/nel/src/3d/mesh_morpher.cpp
@ -163,21 +163,21 @@ void CMeshMorpher::update (std::vector<CAnimatedMorph> *pBSFactor)
 			if (_VBDst->getVertexFormat() & CVertexBuffer::PositionFlag)
 			if (rBS.deltaPos.size() > 0)
 			{
-				CVector *pV = dstvba.getVertexCoordPointer (vp);
+				CVectorPacked *pV = dstvba.getVertexCoordPointer (vp);
 				*pV += rBS.deltaPos[j] * rFactor;
 			}

 			if (_VBDst->getVertexFormat() & CVertexBuffer::NormalFlag)
 			if (rBS.deltaNorm.size() > 0)
 			{
-				CVector *pV = dstvba.getNormalCoordPointer (vp);
+				CVectorPacked *pV = dstvba.getNormalCoordPointer (vp);
 				*pV += rBS.deltaNorm[j] * rFactor;
 			}

 			if (_UseTgSpace)
 			if (rBS.deltaTgSpace.size() > 0)
 			{
-				CVector *pV = (CVector*)dstvba.getTexCoordPointer (vp, tgSpaceStage);
+				CVectorPacked *pV = (CVectorPacked*)dstvba.getTexCoordPointer (vp, tgSpaceStage);
 				*pV += rBS.deltaTgSpace[j] * rFactor;
 			}

@ -264,13 +264,13 @@ void CMeshMorpher::updateSkinned (std::vector<CAnimatedMorph> *pBSFactor)
 			pDst[j+i*VBVertexSize] = pOri[j+i*VBVertexSize];

 		if (_Vertices != NULL)
-			_Vertices->operator[](i) = ((CVector*)(pOri+i*VBVertexSize))[0];
+			_Vertices->operator[](i) = ((CVectorPacked*)(pOri+i*VBVertexSize))[0];

 		if (_Normals != NULL)
-			_Normals->operator[](i) = ((CVector*)(pOri+i*VBVertexSize))[1];
+			_Normals->operator[](i) = ((CVectorPacked*)(pOri+i*VBVertexSize))[1];

 		if (_TgSpace != NULL)
-			(*_TgSpace)[i] = * (CVector*)(pOri + i * VBVertexSize + tgSpaceOff);
+			(*_TgSpace)[i] = * (CVectorPacked*)(pOri + i * VBVertexSize + tgSpaceOff);

 		_Flags[i] = OriginalVBDst;
 	}
@ -388,8 +388,8 @@ void CMeshMorpher::updateRawSkin (CVertexBuffer *vbOri,
 	{
 		if(*vRemap)
 		{
-			(*vRemap)->Pos= *(CVector*)(pOri);
-			(*vRemap)->Normal= *(CVector*)(pOri + NL3D_RAWSKIN_NORMAL_OFF);
+			(*vRemap)->Pos= *(CVectorPacked*)(pOri);
+			(*vRemap)->Normal= *(CVectorPacked*)(pOri + NL3D_RAWSKIN_NORMAL_OFF);
 			(*vRemap)->UV= *(CUV*)(pOri + NL3D_RAWSKIN_UV_OFF);
 		}
 		pOri+= NL3D_RAWSKIN_VERTEX_SIZE;
@ -420,9 +420,9 @@ void CMeshMorpher::updateRawSkin (CVertexBuffer *vbOri,
 				// If exist in this Lod RawSkin, apply
 				if(rsVert)
 				{
-					if(hasPos)
+					if(hasPos) // FIXME_SSE2: +=
 						rsVert->Pos+= rBS.deltaPos[j] * rFactor;
-					if(hasNorm)
+					if(hasNorm) // FIXME_SSE2: +=
 						rsVert->Normal+= rBS.deltaNorm[j] * rFactor;
 					if(hasUV)
 						rsVert->UV+= rBS.deltaUV[j] * rFactor;
--- a/code/nel/src/3d/mesh_mrm.cpp
+++ b/code/nel/src/3d/mesh_mrm.cpp
@ -2066,7 +2066,7 @@ void	CMeshMRMGeom::bkupOriginalSkinVerticesSubset(uint wedgeStart, uint wedgeEnd
 		_OriginalTGSpace.resize(_VBufferFinal.getNumVertices());
 		for(uint i=wedgeStart; i<wedgeEnd;i++)
 		{
-			_OriginalTGSpace[i]= *(CVector*)vba.getTexCoordPointer(i, tgSpaceStage);
+			_OriginalTGSpace[i]= *(CVectorPacked*)vba.getTexCoordPointer(i, tgSpaceStage);
 		}
 	}
 }
@ -2105,7 +2105,7 @@ void	CMeshMRMGeom::restoreOriginalSkinVertices()
 		// copy tangent space vectors
 		for(uint i = 0; i < _VBufferFinal.getNumVertices(); ++i)
 		{
-			*(CVector*)vba.getTexCoordPointer(i, numTexCoords - 1)= _OriginalTGSpace[i];
+			*(CVectorPacked*)vba.getTexCoordPointer(i, numTexCoords - 1)= _OriginalTGSpace[i];
 		}
 	}
 }
@ -2164,8 +2164,8 @@ void	CMeshMRMGeom::restoreOriginalSkinPart(CLod &lod)
 			CVector				*srcVertex= srcVertexPtr + index;
 			CVector				*srcNormal= srcNormalPtr + index;
 			uint8				*dstVertexVB= destVertexPtr + index * vertexSize;
-			CVector				*dstVertex= (CVector*)(dstVertexVB);
-			CVector				*dstNormal= (CVector*)(dstVertexVB + normalOff);
+			CVectorPacked		*dstVertex= (CVectorPacked*)(dstVertexVB);
+			CVectorPacked		*dstNormal= (CVectorPacked*)(dstVertexVB + normalOff);


 			// Vertex.
@ -2621,7 +2621,7 @@ bool	CMeshMRMGeom::buildGeometryForLod(uint lodId, std::vector<CVector> &vertice
 				// Final remaping of vertex to final index
 				vertexRemap[i]= dstIndex;
 				// copy to dest
-				*pDstVert= *(CVector*)pSrcVert;
+				*pDstVert= *(CVectorPacked*)pSrcVert;

 				// next dest
 				pDstVert++;
@ -3467,7 +3467,7 @@ sint			CMeshMRMGeom::renderShadowSkinGeom(CMeshMRMInstance	*mi, uint remainingVe
 	CLod	&lod= _Lods[_Lods.size()-1];
 	computeBoneMatrixes3x4(boneMat3x4, lod.MatrixInfluences, skeleton);

-	_ShadowSkin.applySkin((CVector*)vbDest, boneMat3x4);
+	_ShadowSkin.applySkin((CVectorPacked*)vbDest, boneMat3x4);


 	// How many vertices are added to the VBuffer ???
--- a/code/nel/src/3d/mesh_mrm_skin.cpp
+++ b/code/nel/src/3d/mesh_mrm_skin.cpp
@ -104,11 +104,13 @@ void	CMeshMRMGeom::applySkin(CLod &lod, const CSkeletonModel *skeleton)
 				CMesh::CSkinWeight	*srcSkin= srcSkinPtr + index;
 				CVector				*srcVertex= srcVertexPtr + index;
 				uint8				*dstVertexVB= destVertexPtr + index * vertexSize;
-				CVector				*dstVertex= (CVector*)(dstVertexVB);
+				CVectorPacked		*dstVertex= (CVectorPacked*)(dstVertexVB);


 				// Vertex.
-				boneMat3x4[ srcSkin->MatrixId[0] ].mulSetPoint( *srcVertex, *dstVertex);
+				CVector temp;
+				boneMat3x4[ srcSkin->MatrixId[0] ].mulSetPoint( *srcVertex, temp);
+				*dstVertex = temp;
 			}
 			break;

@ -121,12 +123,14 @@ void	CMeshMRMGeom::applySkin(CLod &lod, const CSkeletonModel *skeleton)
 				CMesh::CSkinWeight	*srcSkin= srcSkinPtr + index;
 				CVector				*srcVertex= srcVertexPtr + index;
 				uint8				*dstVertexVB= destVertexPtr + index * vertexSize;
-				CVector				*dstVertex= (CVector*)(dstVertexVB);
+				CVectorPacked		*dstVertex= (CVectorPacked*)(dstVertexVB);


 				// Vertex.
-				boneMat3x4[ srcSkin->MatrixId[0] ].mulSetPoint( *srcVertex, srcSkin->Weights[0], *dstVertex);
-				boneMat3x4[ srcSkin->MatrixId[1] ].mulAddPoint( *srcVertex, srcSkin->Weights[1], *dstVertex);
+				CVector temp;
+				boneMat3x4[ srcSkin->MatrixId[0] ].mulSetPoint( *srcVertex, srcSkin->Weights[0], temp);
+				boneMat3x4[ srcSkin->MatrixId[1] ].mulAddPoint( *srcVertex, srcSkin->Weights[1], temp);
+				*dstVertex = temp;
 			}
 			break;

@ -139,13 +143,15 @@ void	CMeshMRMGeom::applySkin(CLod &lod, const CSkeletonModel *skeleton)
 				CMesh::CSkinWeight	*srcSkin= srcSkinPtr + index;
 				CVector				*srcVertex= srcVertexPtr + index;
 				uint8				*dstVertexVB= destVertexPtr + index * vertexSize;
-				CVector				*dstVertex= (CVector*)(dstVertexVB);
+				CVectorPacked		*dstVertex= (CVectorPacked*)(dstVertexVB);


 				// Vertex.
-				boneMat3x4[ srcSkin->MatrixId[0] ].mulSetPoint( *srcVertex, srcSkin->Weights[0], *dstVertex);
-				boneMat3x4[ srcSkin->MatrixId[1] ].mulAddPoint( *srcVertex, srcSkin->Weights[1], *dstVertex);
-				boneMat3x4[ srcSkin->MatrixId[2] ].mulAddPoint( *srcVertex, srcSkin->Weights[2], *dstVertex);
+				CVector temp;
+				boneMat3x4[ srcSkin->MatrixId[0] ].mulSetPoint( *srcVertex, srcSkin->Weights[0], temp);
+				boneMat3x4[ srcSkin->MatrixId[1] ].mulAddPoint( *srcVertex, srcSkin->Weights[1], temp);
+				boneMat3x4[ srcSkin->MatrixId[2] ].mulAddPoint( *srcVertex, srcSkin->Weights[2], temp);
+				*dstVertex = temp;
 			}
 			break;

@ -158,14 +164,16 @@ void	CMeshMRMGeom::applySkin(CLod &lod, const CSkeletonModel *skeleton)
 				CMesh::CSkinWeight	*srcSkin= srcSkinPtr + index;
 				CVector				*srcVertex= srcVertexPtr + index;
 				uint8				*dstVertexVB= destVertexPtr + index * vertexSize;
-				CVector				*dstVertex= (CVector*)(dstVertexVB);
+				CVectorPacked		*dstVertex= (CVectorPacked*)(dstVertexVB);


 				// Vertex.
-				boneMat3x4[ srcSkin->MatrixId[0] ].mulSetPoint( *srcVertex, srcSkin->Weights[0], *dstVertex);
-				boneMat3x4[ srcSkin->MatrixId[1] ].mulAddPoint( *srcVertex, srcSkin->Weights[1], *dstVertex);
-				boneMat3x4[ srcSkin->MatrixId[2] ].mulAddPoint( *srcVertex, srcSkin->Weights[2], *dstVertex);
-				boneMat3x4[ srcSkin->MatrixId[3] ].mulAddPoint( *srcVertex, srcSkin->Weights[3], *dstVertex);
+				CVector temp;
+				boneMat3x4[ srcSkin->MatrixId[0] ].mulSetPoint( *srcVertex, srcSkin->Weights[0], temp);
+				boneMat3x4[ srcSkin->MatrixId[1] ].mulAddPoint( *srcVertex, srcSkin->Weights[1], temp);
+				boneMat3x4[ srcSkin->MatrixId[2] ].mulAddPoint( *srcVertex, srcSkin->Weights[2], temp);
+				boneMat3x4[ srcSkin->MatrixId[3] ].mulAddPoint( *srcVertex, srcSkin->Weights[3], temp);
+				*dstVertex = temp;
 			}
 			break;

--- a/code/nel/src/3d/mesh_mrm_skin_template.cpp
+++ b/code/nel/src/3d/mesh_mrm_skin_template.cpp
@ -94,14 +94,18 @@ static void	applyArraySkinNormalT(uint numMatrixes, uint32 *infPtr, CMesh::CSkin
 			CVector				*srcVertex= srcVertexPtr + index;
 			CVector				*srcNormal= srcNormalPtr + index;
 			uint8				*dstVertexVB= destVertexPtr + index * vertexSize;
-			CVector				*dstVertex= (CVector*)(dstVertexVB);
-			CVector				*dstNormal= (CVector*)(dstVertexVB + normalOff);
+			CVectorPacked		*dstVertex= (CVectorPacked*)(dstVertexVB);
+			CVectorPacked		*dstNormal= (CVectorPacked*)(dstVertexVB + normalOff);
+			CVector				tempVertex;
+			CVector				tempNormal;


 			// Vertex.
-			boneMat3x4[ srcSkin->MatrixId[0] ].mulSetPoint( *srcVertex, *dstVertex);
+			boneMat3x4[ srcSkin->MatrixId[0] ].mulSetPoint( *srcVertex, tempVertex);
+			*dstVertex = tempVertex;
 			// Normal.
-			boneMat3x4[ srcSkin->MatrixId[0] ].mulSetVector( *srcNormal, *dstNormal);
+			boneMat3x4[ srcSkin->MatrixId[0] ].mulSetVector( *srcNormal, tempNormal);
+			*dstNormal = tempNormal;
 		}
 		break;

@ -115,16 +119,20 @@ static void	applyArraySkinNormalT(uint numMatrixes, uint32 *infPtr, CMesh::CSkin
 			CVector				*srcVertex= srcVertexPtr + index;
 			CVector				*srcNormal= srcNormalPtr + index;
 			uint8				*dstVertexVB= destVertexPtr + index * vertexSize;
-			CVector				*dstVertex= (CVector*)(dstVertexVB);
-			CVector				*dstNormal= (CVector*)(dstVertexVB + normalOff);
+			CVectorPacked		*dstVertex= (CVectorPacked*)(dstVertexVB);
+			CVectorPacked		*dstNormal= (CVectorPacked*)(dstVertexVB + normalOff);
+			CVector				tempVertex;
+			CVector				tempNormal;


 			// Vertex.
-			boneMat3x4[ srcSkin->MatrixId[0] ].mulSetPoint( *srcVertex, srcSkin->Weights[0], *dstVertex);
-			boneMat3x4[ srcSkin->MatrixId[1] ].mulAddPoint( *srcVertex, srcSkin->Weights[1], *dstVertex);
+			boneMat3x4[ srcSkin->MatrixId[0] ].mulSetPoint( *srcVertex, srcSkin->Weights[0], tempVertex);
+			boneMat3x4[ srcSkin->MatrixId[1] ].mulAddPoint( *srcVertex, srcSkin->Weights[1], tempVertex);
+			*dstVertex = tempVertex;
 			// Normal.
-			boneMat3x4[ srcSkin->MatrixId[0] ].mulSetVector( *srcNormal, srcSkin->Weights[0], *dstNormal);
-			boneMat3x4[ srcSkin->MatrixId[1] ].mulAddVector( *srcNormal, srcSkin->Weights[1], *dstNormal);
+			boneMat3x4[ srcSkin->MatrixId[0] ].mulSetVector( *srcNormal, srcSkin->Weights[0], tempVertex);
+			boneMat3x4[ srcSkin->MatrixId[1] ].mulAddVector( *srcNormal, srcSkin->Weights[1], tempVertex);
+			*dstNormal = tempNormal;
 		}
 		break;

@ -138,18 +146,22 @@ static void	applyArraySkinNormalT(uint numMatrixes, uint32 *infPtr, CMesh::CSkin
 			CVector				*srcVertex= srcVertexPtr + index;
 			CVector				*srcNormal= srcNormalPtr + index;
 			uint8				*dstVertexVB= destVertexPtr + index * vertexSize;
-			CVector				*dstVertex= (CVector*)(dstVertexVB);
-			CVector				*dstNormal= (CVector*)(dstVertexVB + normalOff);
+			CVectorPacked		*dstVertex= (CVectorPacked*)(dstVertexVB);
+			CVectorPacked		*dstNormal= (CVectorPacked*)(dstVertexVB + normalOff);
+			CVector				tempVertex;
+			CVector				tempNormal;


 			// Vertex.
-			boneMat3x4[ srcSkin->MatrixId[0] ].mulSetPoint( *srcVertex, srcSkin->Weights[0], *dstVertex);
-			boneMat3x4[ srcSkin->MatrixId[1] ].mulAddPoint( *srcVertex, srcSkin->Weights[1], *dstVertex);
-			boneMat3x4[ srcSkin->MatrixId[2] ].mulAddPoint( *srcVertex, srcSkin->Weights[2], *dstVertex);
+			boneMat3x4[ srcSkin->MatrixId[0] ].mulSetPoint( *srcVertex, srcSkin->Weights[0], tempVertex);
+			boneMat3x4[ srcSkin->MatrixId[1] ].mulAddPoint( *srcVertex, srcSkin->Weights[1], tempVertex);
+			boneMat3x4[ srcSkin->MatrixId[2] ].mulAddPoint( *srcVertex, srcSkin->Weights[2], tempVertex);
+			*dstVertex = tempVertex;
 			// Normal.
-			boneMat3x4[ srcSkin->MatrixId[0] ].mulSetVector( *srcNormal, srcSkin->Weights[0], *dstNormal);
-			boneMat3x4[ srcSkin->MatrixId[1] ].mulAddVector( *srcNormal, srcSkin->Weights[1], *dstNormal);
-			boneMat3x4[ srcSkin->MatrixId[2] ].mulAddVector( *srcNormal, srcSkin->Weights[2], *dstNormal);
+			boneMat3x4[ srcSkin->MatrixId[0] ].mulSetVector( *srcNormal, srcSkin->Weights[0], tempNormal);
+			boneMat3x4[ srcSkin->MatrixId[1] ].mulAddVector( *srcNormal, srcSkin->Weights[1], tempNormal);
+			boneMat3x4[ srcSkin->MatrixId[2] ].mulAddVector( *srcNormal, srcSkin->Weights[2], tempNormal);
+			*dstNormal = tempNormal;
 		}
 		break;

@ -163,20 +175,24 @@ static void	applyArraySkinNormalT(uint numMatrixes, uint32 *infPtr, CMesh::CSkin
 			CVector				*srcVertex= srcVertexPtr + index;
 			CVector				*srcNormal= srcNormalPtr + index;
 			uint8				*dstVertexVB= destVertexPtr + index * vertexSize;
-			CVector				*dstVertex= (CVector*)(dstVertexVB);
-			CVector				*dstNormal= (CVector*)(dstVertexVB + normalOff);
+			CVectorPacked		*dstVertex= (CVectorPacked*)(dstVertexVB);
+			CVectorPacked		*dstNormal= (CVectorPacked*)(dstVertexVB + normalOff);
+			CVector				tempVertex;
+			CVector				tempNormal;


 			// Vertex.
-			boneMat3x4[ srcSkin->MatrixId[0] ].mulSetPoint( *srcVertex, srcSkin->Weights[0], *dstVertex);
-			boneMat3x4[ srcSkin->MatrixId[1] ].mulAddPoint( *srcVertex, srcSkin->Weights[1], *dstVertex);
-			boneMat3x4[ srcSkin->MatrixId[2] ].mulAddPoint( *srcVertex, srcSkin->Weights[2], *dstVertex);
-			boneMat3x4[ srcSkin->MatrixId[3] ].mulAddPoint( *srcVertex, srcSkin->Weights[3], *dstVertex);
+			boneMat3x4[ srcSkin->MatrixId[0] ].mulSetPoint( *srcVertex, srcSkin->Weights[0], tempVertex);
+			boneMat3x4[ srcSkin->MatrixId[1] ].mulAddPoint( *srcVertex, srcSkin->Weights[1], tempVertex);
+			boneMat3x4[ srcSkin->MatrixId[2] ].mulAddPoint( *srcVertex, srcSkin->Weights[2], tempVertex);
+			boneMat3x4[ srcSkin->MatrixId[3] ].mulAddPoint( *srcVertex, srcSkin->Weights[3], tempVertex);
+			*dstVertex = tempVertex;
 			// Normal.
-			boneMat3x4[ srcSkin->MatrixId[0] ].mulSetVector( *srcNormal, srcSkin->Weights[0], *dstNormal);
-			boneMat3x4[ srcSkin->MatrixId[1] ].mulAddVector( *srcNormal, srcSkin->Weights[1], *dstNormal);
-			boneMat3x4[ srcSkin->MatrixId[2] ].mulAddVector( *srcNormal, srcSkin->Weights[2], *dstNormal);
-			boneMat3x4[ srcSkin->MatrixId[3] ].mulAddVector( *srcNormal, srcSkin->Weights[3], *dstNormal);
+			boneMat3x4[ srcSkin->MatrixId[0] ].mulSetVector( *srcNormal, srcSkin->Weights[0], tempNormal);
+			boneMat3x4[ srcSkin->MatrixId[1] ].mulAddVector( *srcNormal, srcSkin->Weights[1], tempNormal);
+			boneMat3x4[ srcSkin->MatrixId[2] ].mulAddVector( *srcNormal, srcSkin->Weights[2], tempNormal);
+			boneMat3x4[ srcSkin->MatrixId[3] ].mulAddVector( *srcNormal, srcSkin->Weights[3], tempNormal);
+			*dstNormal = tempNormal;
 		}
 		break;

@ -254,18 +270,24 @@ static void	applyArraySkinTangentSpaceT(uint numMatrixes, uint32 *infPtr, CMesh:
 			CVector				*srcTgSpace= tgSpacePtr + index;
 			//
 			uint8				*dstVertexVB= destVertexPtr + index * vertexSize;
-			CVector				*dstVertex= (CVector*)(dstVertexVB);
-			CVector				*dstNormal= (CVector*)(dstVertexVB + normalOff);
-			CVector				*dstTgSpace= (CVector*)(dstVertexVB + tgSpaceOff);
+			CVectorPacked		*dstVertex= (CVectorPacked*)(dstVertexVB);
+			CVectorPacked		*dstNormal= (CVectorPacked*)(dstVertexVB + normalOff);
+			CVectorPacked		*dstTgSpace= (CVectorPacked*)(dstVertexVB + tgSpaceOff);
+			CVector				tempVertex;
+			CVector				tempNormal;
+			CVector				tempTgSpace;



 			// Vertex.
-			boneMat3x4[ srcSkin->MatrixId[0] ].mulSetPoint( *srcVertex, *dstVertex);
+			boneMat3x4[ srcSkin->MatrixId[0] ].mulSetPoint( *srcVertex, tempVertex);
+			*dstVertex = tempVertex;
 			// Normal.
-			boneMat3x4[ srcSkin->MatrixId[0] ].mulSetVector( *srcNormal, *dstNormal);
+			boneMat3x4[ srcSkin->MatrixId[0] ].mulSetVector( *srcNormal, tempNormal);
+			*dstNormal = tempNormal;
 			// Tg space
-			boneMat3x4[ srcSkin->MatrixId[0] ].mulSetVector( *srcTgSpace, *dstTgSpace);
+			boneMat3x4[ srcSkin->MatrixId[0] ].mulSetVector( *srcTgSpace, tempTgSpace);
+			*dstTgSpace = tempTgSpace;

 		}
 		break;
@ -282,19 +304,25 @@ static void	applyArraySkinTangentSpaceT(uint numMatrixes, uint32 *infPtr, CMesh:
 			CVector				*srcTgSpace= tgSpacePtr + index;
 			//
 			uint8				*dstVertexVB= destVertexPtr + index * vertexSize;
-			CVector				*dstVertex= (CVector*)(dstVertexVB);
-			CVector				*dstNormal= (CVector*)(dstVertexVB + normalOff);
-			CVector				*dstTgSpace= (CVector*)(dstVertexVB + tgSpaceOff);
+			CVectorPacked		*dstVertex= (CVectorPacked*)(dstVertexVB);
+			CVectorPacked		*dstNormal= (CVectorPacked*)(dstVertexVB + normalOff);
+			CVectorPacked		*dstTgSpace= (CVectorPacked*)(dstVertexVB + tgSpaceOff);
+			CVector				tempVertex;
+			CVector				tempNormal;
+			CVector				tempTgSpace;

 			// Vertex.
-			boneMat3x4[ srcSkin->MatrixId[0] ].mulSetPoint( *srcVertex, srcSkin->Weights[0], *dstVertex);
-			boneMat3x4[ srcSkin->MatrixId[1] ].mulAddPoint( *srcVertex, srcSkin->Weights[1], *dstVertex);
+			boneMat3x4[ srcSkin->MatrixId[0] ].mulSetPoint( *srcVertex, srcSkin->Weights[0], tempVertex);
+			boneMat3x4[ srcSkin->MatrixId[1] ].mulAddPoint( *srcVertex, srcSkin->Weights[1], tempVertex);
+			*dstVertex = tempVertex;
 			// Normal.
-			boneMat3x4[ srcSkin->MatrixId[0] ].mulSetVector( *srcNormal, srcSkin->Weights[0], *dstNormal);
-			boneMat3x4[ srcSkin->MatrixId[1] ].mulAddVector( *srcNormal, srcSkin->Weights[1], *dstNormal);
+			boneMat3x4[ srcSkin->MatrixId[0] ].mulSetVector( *srcNormal, srcSkin->Weights[0], tempNormal);
+			boneMat3x4[ srcSkin->MatrixId[1] ].mulAddVector( *srcNormal, srcSkin->Weights[1], tempNormal);
+			*dstNormal = tempNormal;
 			// Tg space
-			boneMat3x4[ srcSkin->MatrixId[0] ].mulSetVector( *srcTgSpace, srcSkin->Weights[0], *dstTgSpace);
-			boneMat3x4[ srcSkin->MatrixId[1] ].mulAddVector( *srcTgSpace, srcSkin->Weights[1], *dstTgSpace);
+			boneMat3x4[ srcSkin->MatrixId[0] ].mulSetVector( *srcTgSpace, srcSkin->Weights[0], tempTgSpace);
+			boneMat3x4[ srcSkin->MatrixId[1] ].mulAddVector( *srcTgSpace, srcSkin->Weights[1], tempTgSpace);
+			*dstTgSpace = tempTgSpace;
 		}
 		break;

@ -310,22 +338,28 @@ static void	applyArraySkinTangentSpaceT(uint numMatrixes, uint32 *infPtr, CMesh:
 			CVector				*srcTgSpace= tgSpacePtr + index;
 			//
 			uint8				*dstVertexVB= destVertexPtr + index * vertexSize;
-			CVector				*dstVertex= (CVector*)(dstVertexVB);
-			CVector				*dstNormal= (CVector*)(dstVertexVB + normalOff);
-			CVector				*dstTgSpace= (CVector*)(dstVertexVB + tgSpaceOff);
+			CVectorPacked		*dstVertex= (CVectorPacked*)(dstVertexVB);
+			CVectorPacked		*dstNormal= (CVectorPacked*)(dstVertexVB + normalOff);
+			CVectorPacked		*dstTgSpace= (CVectorPacked*)(dstVertexVB + tgSpaceOff);
+			CVector				tempVertex;
+			CVector				tempNormal;
+			CVector				tempTgSpace;

 			// Vertex.
-			boneMat3x4[ srcSkin->MatrixId[0] ].mulSetPoint( *srcVertex, srcSkin->Weights[0], *dstVertex);
-			boneMat3x4[ srcSkin->MatrixId[1] ].mulAddPoint( *srcVertex, srcSkin->Weights[1], *dstVertex);
-			boneMat3x4[ srcSkin->MatrixId[2] ].mulAddPoint( *srcVertex, srcSkin->Weights[2], *dstVertex);
+			boneMat3x4[ srcSkin->MatrixId[0] ].mulSetPoint( *srcVertex, srcSkin->Weights[0], tempVertex);
+			boneMat3x4[ srcSkin->MatrixId[1] ].mulAddPoint( *srcVertex, srcSkin->Weights[1], tempVertex);
+			boneMat3x4[ srcSkin->MatrixId[2] ].mulAddPoint( *srcVertex, srcSkin->Weights[2], tempVertex);
+			*dstVertex = tempVertex;
 			// Normal.
-			boneMat3x4[ srcSkin->MatrixId[0] ].mulSetVector( *srcNormal, srcSkin->Weights[0], *dstNormal);
-			boneMat3x4[ srcSkin->MatrixId[1] ].mulAddVector( *srcNormal, srcSkin->Weights[1], *dstNormal);
-			boneMat3x4[ srcSkin->MatrixId[2] ].mulAddVector( *srcNormal, srcSkin->Weights[2], *dstNormal);
+			boneMat3x4[ srcSkin->MatrixId[0] ].mulSetVector( *srcNormal, srcSkin->Weights[0], tempNormal);
+			boneMat3x4[ srcSkin->MatrixId[1] ].mulAddVector( *srcNormal, srcSkin->Weights[1], tempNormal);
+			boneMat3x4[ srcSkin->MatrixId[2] ].mulAddVector( *srcNormal, srcSkin->Weights[2], tempNormal);
+			*dstNormal = tempNormal;
 			// Tg space
-			boneMat3x4[ srcSkin->MatrixId[0] ].mulSetVector( *srcTgSpace, srcSkin->Weights[0], *dstTgSpace);
-			boneMat3x4[ srcSkin->MatrixId[1] ].mulAddVector( *srcTgSpace, srcSkin->Weights[1], *dstTgSpace);
-			boneMat3x4[ srcSkin->MatrixId[2] ].mulAddVector( *srcTgSpace, srcSkin->Weights[2], *dstTgSpace);
+			boneMat3x4[ srcSkin->MatrixId[0] ].mulSetVector( *srcTgSpace, srcSkin->Weights[0], tempTgSpace);
+			boneMat3x4[ srcSkin->MatrixId[1] ].mulAddVector( *srcTgSpace, srcSkin->Weights[1], tempTgSpace);
+			boneMat3x4[ srcSkin->MatrixId[2] ].mulAddVector( *srcTgSpace, srcSkin->Weights[2], tempTgSpace);
+			*dstTgSpace = tempTgSpace;
 		}
 		break;

@ -341,25 +375,33 @@ static void	applyArraySkinTangentSpaceT(uint numMatrixes, uint32 *infPtr, CMesh:
 			CVector				*srcTgSpace= tgSpacePtr + index;
 			//
 			uint8				*dstVertexVB= destVertexPtr + index * vertexSize;
-			CVector				*dstVertex= (CVector*)(dstVertexVB);
-			CVector				*dstNormal= (CVector*)(dstVertexVB + normalOff);
-			CVector				*dstTgSpace= (CVector*)(dstVertexVB + tgSpaceOff);
+			CVectorPacked		*dstVertex= (CVectorPacked*)(dstVertexVB);
+			CVectorPacked		*dstNormal= (CVectorPacked*)(dstVertexVB + normalOff);
+			CVectorPacked		*dstTgSpace= (CVectorPacked*)(dstVertexVB + tgSpaceOff);
+
+			CVector				tempVertex;
+			CVector				tempNormal;
+			CVector				tempTgSpace;

 			// Vertex.
-			boneMat3x4[ srcSkin->MatrixId[0] ].mulSetPoint( *srcVertex, srcSkin->Weights[0], *dstVertex);
-			boneMat3x4[ srcSkin->MatrixId[1] ].mulAddPoint( *srcVertex, srcSkin->Weights[1], *dstVertex);
-			boneMat3x4[ srcSkin->MatrixId[2] ].mulAddPoint( *srcVertex, srcSkin->Weights[2], *dstVertex);
-			boneMat3x4[ srcSkin->MatrixId[3] ].mulAddPoint( *srcVertex, srcSkin->Weights[3], *dstVertex);
+			boneMat3x4[ srcSkin->MatrixId[0] ].mulSetPoint( *srcVertex, srcSkin->Weights[0], tempVertex);
+			boneMat3x4[ srcSkin->MatrixId[1] ].mulAddPoint( *srcVertex, srcSkin->Weights[1], tempVertex);
+			boneMat3x4[ srcSkin->MatrixId[2] ].mulAddPoint( *srcVertex, srcSkin->Weights[2], tempVertex);
+			boneMat3x4[ srcSkin->MatrixId[3] ].mulAddPoint( *srcVertex, srcSkin->Weights[3], tempVertex);
 			// Normal.
-			boneMat3x4[ srcSkin->MatrixId[0] ].mulSetVector( *srcNormal, srcSkin->Weights[0], *dstNormal);
-			boneMat3x4[ srcSkin->MatrixId[1] ].mulAddVector( *srcNormal, srcSkin->Weights[1], *dstNormal);
-			boneMat3x4[ srcSkin->MatrixId[2] ].mulAddVector( *srcNormal, srcSkin->Weights[2], *dstNormal);
-			boneMat3x4[ srcSkin->MatrixId[3] ].mulAddVector( *srcNormal, srcSkin->Weights[3], *dstNormal);
+			boneMat3x4[ srcSkin->MatrixId[0] ].mulSetVector( *srcNormal, srcSkin->Weights[0], tempNormal);
+			boneMat3x4[ srcSkin->MatrixId[1] ].mulAddVector( *srcNormal, srcSkin->Weights[1], tempNormal);
+			boneMat3x4[ srcSkin->MatrixId[2] ].mulAddVector( *srcNormal, srcSkin->Weights[2], tempNormal);
+			boneMat3x4[ srcSkin->MatrixId[3] ].mulAddVector( *srcNormal, srcSkin->Weights[3], tempNormal);
 			// Tg space
-			boneMat3x4[ srcSkin->MatrixId[0] ].mulSetVector( *srcTgSpace, srcSkin->Weights[0], *dstTgSpace);
-			boneMat3x4[ srcSkin->MatrixId[1] ].mulAddVector( *srcTgSpace, srcSkin->Weights[1], *dstTgSpace);
-			boneMat3x4[ srcSkin->MatrixId[2] ].mulAddVector( *srcTgSpace, srcSkin->Weights[2], *dstTgSpace);
-			boneMat3x4[ srcSkin->MatrixId[3] ].mulAddVector( *srcTgSpace, srcSkin->Weights[3], *dstTgSpace);
+			boneMat3x4[ srcSkin->MatrixId[0] ].mulSetVector( *srcTgSpace, srcSkin->Weights[0], tempTgSpace);
+			boneMat3x4[ srcSkin->MatrixId[1] ].mulAddVector( *srcTgSpace, srcSkin->Weights[1], tempTgSpace);
+			boneMat3x4[ srcSkin->MatrixId[2] ].mulAddVector( *srcTgSpace, srcSkin->Weights[2], tempTgSpace);
+			boneMat3x4[ srcSkin->MatrixId[3] ].mulAddVector( *srcTgSpace, srcSkin->Weights[3], tempTgSpace);
+
+			*dstVertex = tempVertex;
+			*dstNormal = tempNormal;
+			*dstTgSpace = tempTgSpace;
 		}
 		break;

@ -528,7 +570,7 @@ void	CMeshMRMGeom::applySkinWithTangentSpace(CLod &lod, const CSkeletonModel *sk
 	On a P4-2.4Ghz, for 40000 vertices skinned, both no precaching and asm
 	saves 27% of execution time in the applyRawSkinNormal*() loop (ie 1 ms)
 */
-#if defined(NL_OS_WINDOWS) && !defined(NL_NO_ASM)
+#if defined(NL_OS_WINDOWS) && !defined(NL_NO_ASM) && !defined(NL_HAS_SSE2)
 //#define	NL3D_RAWSKIN_PRECACHE
 #define	NL3D_RAWSKIN_ASM
 #endif
@ -564,16 +606,19 @@ void		CMeshMRMGeom::applyArrayRawSkinNormal1(CRawVertexNormalSkin1 *src, uint8 *

 #ifndef NL3D_RAWSKIN_ASM
 		//  for all InfluencedVertices only.
+		CVector tmp;
 		for(;nBlockInf>0;nBlockInf--, src++, destVertexPtr+=NL3D_RAWSKIN_VERTEX_SIZE)
 		{
-			CVector				*dstVertex= (CVector*)(destVertexPtr);
-			CVector				*dstNormal= (CVector*)(destVertexPtr + NL3D_RAWSKIN_NORMAL_OFF);
+			CVectorPacked				*dstVertex= (CVectorPacked*)(destVertexPtr);
+			CVectorPacked				*dstNormal= (CVectorPacked*)(destVertexPtr + NL3D_RAWSKIN_NORMAL_OFF);

 			// For 1 matrix, can write directly to AGP (if destVertexPtr is AGP...)
 			// Vertex.
-			boneMat3x4[ src->MatrixId[0] ].mulSetPoint( src->Vertex.Pos, *(CVector*)(destVertexPtr) );
+			boneMat3x4[ src->MatrixId[0] ].mulSetPoint( src->Vertex.Pos, tmp );
+			*(CVectorPacked*)(destVertexPtr) = tmp;
 			// Normal.
-			boneMat3x4[ src->MatrixId[0] ].mulSetVector( src->Vertex.Normal, *(CVector*)(destVertexPtr + NL3D_RAWSKIN_NORMAL_OFF) );
+			boneMat3x4[ src->MatrixId[0] ].mulSetVector( src->Vertex.Normal, tmp );
+			*(CVectorPacked*)(destVertexPtr + NL3D_RAWSKIN_NORMAL_OFF) = tmp;
 			// UV copy.
 			*(CUV*)(destVertexPtr + NL3D_RAWSKIN_UV_OFF)= src->Vertex.UV;
 		}
@ -751,11 +796,11 @@ void		CMeshMRMGeom::applyArrayRawSkinNormal2(CRawVertexNormalSkin2 *src, uint8 *
 			// Vertex.
 			boneMat3x4[ src->MatrixId[0] ].mulSetPoint( src->Vertex.Pos, src->Weights[0], tmpVert);
 			boneMat3x4[ src->MatrixId[1] ].mulAddPoint( src->Vertex.Pos, src->Weights[1], tmpVert);
-			*(CVector*)(destVertexPtr)= tmpVert;
+			*(CVectorPacked*)(destVertexPtr)= tmpVert;
 			// Normal.
 			boneMat3x4[ src->MatrixId[0] ].mulSetVector( src->Vertex.Normal, src->Weights[0], tmpVert);
 			boneMat3x4[ src->MatrixId[1] ].mulAddVector( src->Vertex.Normal, src->Weights[1], tmpVert);
-			*(CVector*)(destVertexPtr + NL3D_RAWSKIN_NORMAL_OFF)= tmpVert;
+			*(CVectorPacked*)(destVertexPtr + NL3D_RAWSKIN_NORMAL_OFF)= tmpVert;
 			// UV copy.
 			*(CUV*)(destVertexPtr + NL3D_RAWSKIN_UV_OFF)= src->Vertex.UV;
 		}
@ -1055,12 +1100,12 @@ void		CMeshMRMGeom::applyArrayRawSkinNormal3(CRawVertexNormalSkin3 *src, uint8 *
 			boneMat3x4[ src->MatrixId[0] ].mulSetPoint( src->Vertex.Pos, src->Weights[0], tmpVert);
 			boneMat3x4[ src->MatrixId[1] ].mulAddPoint( src->Vertex.Pos, src->Weights[1], tmpVert);
 			boneMat3x4[ src->MatrixId[2] ].mulAddPoint( src->Vertex.Pos, src->Weights[2], tmpVert);
-			*(CVector*)(destVertexPtr)= tmpVert;
+			*(CVectorPacked*)(destVertexPtr)= tmpVert;
 			// Normal.
 			boneMat3x4[ src->MatrixId[0] ].mulSetVector( src->Vertex.Normal, src->Weights[0], tmpVert);
 			boneMat3x4[ src->MatrixId[1] ].mulAddVector( src->Vertex.Normal, src->Weights[1], tmpVert);
 			boneMat3x4[ src->MatrixId[2] ].mulAddVector( src->Vertex.Normal, src->Weights[2], tmpVert);
-			*(CVector*)(destVertexPtr + NL3D_RAWSKIN_NORMAL_OFF)= tmpVert;
+			*(CVectorPacked*)(destVertexPtr + NL3D_RAWSKIN_NORMAL_OFF)= tmpVert;
 			// UV copy.
 			*(CUV*)(destVertexPtr + NL3D_RAWSKIN_UV_OFF)= src->Vertex.UV;
 		}
@ -1448,13 +1493,13 @@ void		CMeshMRMGeom::applyArrayRawSkinNormal4(CRawVertexNormalSkin4 *src, uint8 *
 			boneMat3x4[ src->MatrixId[1] ].mulAddPoint( src->Vertex.Pos, src->Weights[1], tmpVert);
 			boneMat3x4[ src->MatrixId[2] ].mulAddPoint( src->Vertex.Pos, src->Weights[2], tmpVert);
 			boneMat3x4[ src->MatrixId[3] ].mulAddPoint( src->Vertex.Pos, src->Weights[3], tmpVert);
-			*(CVector*)(destVertexPtr)= tmpVert;
+			*(CVectorPacked*)(destVertexPtr)= tmpVert;
 			// Normal.
 			boneMat3x4[ src->MatrixId[0] ].mulSetVector( src->Vertex.Normal, src->Weights[0], tmpVert);
 			boneMat3x4[ src->MatrixId[1] ].mulAddVector( src->Vertex.Normal, src->Weights[1], tmpVert);
 			boneMat3x4[ src->MatrixId[2] ].mulAddVector( src->Vertex.Normal, src->Weights[2], tmpVert);
 			boneMat3x4[ src->MatrixId[3] ].mulAddVector( src->Vertex.Normal, src->Weights[3], tmpVert);
-			*(CVector*)(destVertexPtr + NL3D_RAWSKIN_NORMAL_OFF)= tmpVert;
+			*(CVectorPacked*)(destVertexPtr + NL3D_RAWSKIN_NORMAL_OFF)= tmpVert;
 			// UV copy.
 			*(CUV*)(destVertexPtr + NL3D_RAWSKIN_UV_OFF)= src->Vertex.UV;
 		}
--- a/code/nel/src/3d/mesh_mrm_skinned.cpp
+++ b/code/nel/src/3d/mesh_mrm_skinned.cpp
@ -1962,7 +1962,7 @@ sint			CMeshMRMSkinnedGeom::renderShadowSkinGeom(CMeshMRMSkinnedInstance	*mi, ui
 	CLod	&lod= _Lods[_Lods.size()-1];
 	computeBoneMatrixes3x4(boneMat3x4, lod.MatrixInfluences, skeleton);

-	_ShadowSkin.applySkin((CVector*)vbDest, boneMat3x4);
+	_ShadowSkin.applySkin((CVectorPacked*)vbDest, boneMat3x4);


 	// How many vertices are added to the VBuffer ???
--- a/code/nel/src/3d/mesh_mrm_skinned_template.cpp
+++ b/code/nel/src/3d/mesh_mrm_skinned_template.cpp
@ -43,7 +43,7 @@
 	On a P4-2.4Ghz, for 40000 vertices skinned, both no precaching and asm
 	saves 27% of execution time in the applyRawSkinNormal*() loop (ie 1 ms)
 */
-#if defined(NL_OS_WINDOWS) && !defined(NL_NO_ASM)
+#if defined(NL_OS_WINDOWS) && !defined(NL_NO_ASM) && !defined(NL_HAS_SSE2)
 //#define	NL3D_RAWSKIN_PRECACHE
 #define	NL3D_RAWSKIN_ASM
 #endif
@ -79,16 +79,19 @@ void		CMeshMRMSkinnedGeom::applyArrayRawSkinNormal1(CRawVertexNormalSkinned1 *sr

 #ifndef NL3D_RAWSKIN_ASM
 		//  for all InfluencedVertices only.
+		CVector tmp;
 		for(;nBlockInf>0;nBlockInf--, src++, destVertexPtr+=NL3D_RAWSKIN_VERTEX_SIZE)
 		{
-			CVector				*dstVertex= (CVector*)(destVertexPtr);
-			CVector				*dstNormal= (CVector*)(destVertexPtr + NL3D_RAWSKIN_NORMAL_OFF);
+			CVectorPacked				*dstVertex= (CVectorPacked*)(destVertexPtr);
+			CVectorPacked				*dstNormal= (CVectorPacked*)(destVertexPtr + NL3D_RAWSKIN_NORMAL_OFF);

 			// For 1 matrix, can write directly to AGP (if destVertexPtr is AGP...)
 			// Vertex.
-			boneMat3x4[ src->MatrixId[0] ].mulSetPoint( src->Vertex, *(CVector*)(destVertexPtr) );
+			boneMat3x4[ src->MatrixId[0] ].mulSetPoint( src->Vertex, tmp );
+			*(CVectorPacked*)(destVertexPtr) = tmp;
 			// Normal.
-			boneMat3x4[ src->MatrixId[0] ].mulSetVector( src->Normal, *(CVector*)(destVertexPtr + NL3D_RAWSKIN_NORMAL_OFF) );
+			boneMat3x4[ src->MatrixId[0] ].mulSetVector( src->Normal, tmp );
+			*(CVectorPacked*)(destVertexPtr + NL3D_RAWSKIN_NORMAL_OFF) = tmp;
 			// UV copy.
 			*(CUV*)(destVertexPtr + NL3D_RAWSKIN_UV_OFF)= src->UV;
 		}
@ -266,11 +269,11 @@ void		CMeshMRMSkinnedGeom::applyArrayRawSkinNormal2(CRawVertexNormalSkinned2 *sr
 			// Vertex.
 			boneMat3x4[ src->MatrixId[0] ].mulSetPoint( src->Vertex, src->Weights[0], tmpVert);
 			boneMat3x4[ src->MatrixId[1] ].mulAddPoint( src->Vertex, src->Weights[1], tmpVert);
-			*(CVector*)(destVertexPtr)= tmpVert;
+			*(CVectorPacked*)(destVertexPtr)= tmpVert;
 			// Normal.
 			boneMat3x4[ src->MatrixId[0] ].mulSetVector( src->Normal, src->Weights[0], tmpVert);
 			boneMat3x4[ src->MatrixId[1] ].mulAddVector( src->Normal, src->Weights[1], tmpVert);
-			*(CVector*)(destVertexPtr + NL3D_RAWSKIN_NORMAL_OFF)= tmpVert;
+			*(CVectorPacked*)(destVertexPtr + NL3D_RAWSKIN_NORMAL_OFF)= tmpVert;
 			// UV copy.
 			*(CUV*)(destVertexPtr + NL3D_RAWSKIN_UV_OFF)= src->UV;
 		}
@ -570,12 +573,12 @@ void		CMeshMRMSkinnedGeom::applyArrayRawSkinNormal3(CRawVertexNormalSkinned3 *sr
 			boneMat3x4[ src->MatrixId[0] ].mulSetPoint( src->Vertex, src->Weights[0], tmpVert);
 			boneMat3x4[ src->MatrixId[1] ].mulAddPoint( src->Vertex, src->Weights[1], tmpVert);
 			boneMat3x4[ src->MatrixId[2] ].mulAddPoint( src->Vertex, src->Weights[2], tmpVert);
-			*(CVector*)(destVertexPtr)= tmpVert;
+			*(CVectorPacked*)(destVertexPtr)= tmpVert;
 			// Normal.
 			boneMat3x4[ src->MatrixId[0] ].mulSetVector( src->Normal, src->Weights[0], tmpVert);
 			boneMat3x4[ src->MatrixId[1] ].mulAddVector( src->Normal, src->Weights[1], tmpVert);
 			boneMat3x4[ src->MatrixId[2] ].mulAddVector( src->Normal, src->Weights[2], tmpVert);
-			*(CVector*)(destVertexPtr + NL3D_RAWSKIN_NORMAL_OFF)= tmpVert;
+			*(CVectorPacked*)(destVertexPtr + NL3D_RAWSKIN_NORMAL_OFF)= tmpVert;
 			// UV copy.
 			*(CUV*)(destVertexPtr + NL3D_RAWSKIN_UV_OFF)= src->UV;
 		}
@ -963,13 +966,13 @@ void		CMeshMRMSkinnedGeom::applyArrayRawSkinNormal4(CRawVertexNormalSkinned4 *sr
 			boneMat3x4[ src->MatrixId[1] ].mulAddPoint( src->Vertex, src->Weights[1], tmpVert);
 			boneMat3x4[ src->MatrixId[2] ].mulAddPoint( src->Vertex, src->Weights[2], tmpVert);
 			boneMat3x4[ src->MatrixId[3] ].mulAddPoint( src->Vertex, src->Weights[3], tmpVert);
-			*(CVector*)(destVertexPtr)= tmpVert;
+			*(CVectorPacked*)(destVertexPtr)= tmpVert;
 			// Normal.
 			boneMat3x4[ src->MatrixId[0] ].mulSetVector( src->Normal, src->Weights[0], tmpVert);
 			boneMat3x4[ src->MatrixId[1] ].mulAddVector( src->Normal, src->Weights[1], tmpVert);
 			boneMat3x4[ src->MatrixId[2] ].mulAddVector( src->Normal, src->Weights[2], tmpVert);
 			boneMat3x4[ src->MatrixId[3] ].mulAddVector( src->Normal, src->Weights[3], tmpVert);
-			*(CVector*)(destVertexPtr + NL3D_RAWSKIN_NORMAL_OFF)= tmpVert;
+			*(CVectorPacked*)(destVertexPtr + NL3D_RAWSKIN_NORMAL_OFF)= tmpVert;
 			// UV copy.
 			*(CUV*)(destVertexPtr + NL3D_RAWSKIN_UV_OFF)= src->UV;
 		}
--- a/code/nel/src/3d/mesh_multi_lod_instance.cpp
+++ b/code/nel/src/3d/mesh_multi_lod_instance.cpp
@ -302,7 +302,7 @@ void		CMeshMultiLodInstance::setPosCoarseMesh( CMeshGeom &geom, const CMatrix &m
 	for (uint i=0; i<_LastCoarseMeshNumVertices; i++)
 	{
 		// Transform position
-		*(CVector*)vDest = matrix.mulPoint (*(const CVector*)vSrc);
+		*(CVectorPacked*)vDest = matrix.mulPoint (CVector(*(const CVectorPacked*)vSrc));

 		// Next point
 		vSrc+=vtSrcSize;
--- a/code/nel/src/3d/noise_3d.cpp
+++ b/code/nel/src/3d/noise_3d.cpp
@ -151,14 +151,14 @@ void CNoise3d::render2passes (CQuadUV &qc, float wpos, float alpha)
 	_VertexBuffer.lock (vba);

 	uint32 nVSize = _VertexBuffer.getVertexSize ();
-	CVector *pVertices = vba.getVertexCoordPointer(_NbVertices);
-	*pVertices = qc.V0; pVertices = (CVector*)( ((uint8*)pVertices) + nVSize );
-	*pVertices = qc.V1; pVertices = (CVector*)( ((uint8*)pVertices) + nVSize );
-	*pVertices = qc.V2; pVertices = (CVector*)( ((uint8*)pVertices) + nVSize );
-	*pVertices = qc.V3; pVertices = (CVector*)( ((uint8*)pVertices) + nVSize );
-	*pVertices = qc.V0; pVertices = (CVector*)( ((uint8*)pVertices) + nVSize );
-	*pVertices = qc.V1; pVertices = (CVector*)( ((uint8*)pVertices) + nVSize );
-	*pVertices = qc.V2; pVertices = (CVector*)( ((uint8*)pVertices) + nVSize );
+	CVectorPacked *pVertices = vba.getVertexCoordPointer(_NbVertices);
+	*pVertices = qc.V0; pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize );
+	*pVertices = qc.V1; pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize );
+	*pVertices = qc.V2; pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize );
+	*pVertices = qc.V3; pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize );
+	*pVertices = qc.V0; pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize );
+	*pVertices = qc.V1; pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize );
+	*pVertices = qc.V2; pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize );
 	*pVertices = qc.V3;

 	CUV *pUV = vba.getTexCoordPointer (_NbVertices, 0);
@ -232,10 +232,10 @@ void CNoise3d::render (CQuadUV &qc, float wpos, float intensity)
 	CVertexBufferReadWrite vba;
 	_VertexBuffer.lock (vba);

-	CVector *pVertices = vba.getVertexCoordPointer(_NbVertices);
-	*pVertices = qc.V0; pVertices = (CVector*)( ((uint8*)pVertices) + nVSize );
-	*pVertices = qc.V1; pVertices = (CVector*)( ((uint8*)pVertices) + nVSize );
-	*pVertices = qc.V2; pVertices = (CVector*)( ((uint8*)pVertices) + nVSize );
+	CVectorPacked *pVertices = vba.getVertexCoordPointer(_NbVertices);
+	*pVertices = qc.V0; pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize );
+	*pVertices = qc.V1; pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize );
+	*pVertices = qc.V2; pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize );
 	*pVertices = qc.V3;

 	CUV *pUV = vba.getTexCoordPointer (_NbVertices, 0);
@ -281,7 +281,7 @@ void CNoise3d::renderGrid (uint32 nbw, uint32 nbh, uint32 w, uint32 h,

 	uint32 i, j, nSlice1, nSlice2;
 	float wpos, oneOverNbWNbH = 1.0f / (nbw*nbh);
-	CVector *pVertices;
+	CVectorPacked *pVertices;
 	CUV *pUV0, *pUV1;
 	uint8 *pColA, nAlphaPos;
 	uint32 nVSize = _VertexBuffer.getVertexSize ();
@ -319,10 +319,10 @@ void CNoise3d::renderGrid (uint32 nbw, uint32 nbh, uint32 w, uint32 h,
 			// If wpos is just on slice1 alpha must be one
 			nAlphaPos = (uint8)( 255*(1.0f - _Depth*(wpos - (((float)nSlice1) / _Depth))) );

-			*pVertices = CVector((float)i*w,	 (float)j*h,	 0.0f); pVertices = (CVector*)( ((uint8*)pVertices) + nVSize );
-			*pVertices = CVector((float)(i+1)*w, (float)j*h,	 0.0f); pVertices = (CVector*)( ((uint8*)pVertices) + nVSize );
-			*pVertices = CVector((float)(i+1)*w, (float)(j+1)*h, 0.0f); pVertices = (CVector*)( ((uint8*)pVertices) + nVSize );
-			*pVertices = CVector((float)i*w,	 (float)(j+1)*h, 0.0f); pVertices = (CVector*)( ((uint8*)pVertices) + nVSize );
+			*pVertices = CVector((float)i*w,	 (float)j*h,	 0.0f); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize );
+			*pVertices = CVector((float)(i+1)*w, (float)j*h,	 0.0f); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize );
+			*pVertices = CVector((float)(i+1)*w, (float)(j+1)*h, 0.0f); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize );
+			*pVertices = CVector((float)i*w,	 (float)(j+1)*h, 0.0f); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize );

 			pUV0->U = UStart+_OffS[nSlice1].U;	pUV0->V = VStart+_OffS[nSlice1].V;	pUV0 = (CUV*)( ((uint8*)pUV0) + nVSize );
 			pUV0->U = dU+_OffS[nSlice1].U;		pUV0->V = VStart+_OffS[nSlice1].V;	pUV0 = (CUV*)( ((uint8*)pUV0) + nVSize );
@ -350,7 +350,7 @@ void CNoise3d::renderGrid2passes (uint32 nbw, uint32 nbh, uint32 w, uint32 h,
 {
 	uint32 i, j, nSlice1, nSlice2;
 	float wpos, oneOverNbWNbH = 1.0f / (nbw*nbh);
-	CVector *pVertices;
+	CVectorPacked *pVertices;
 	CUV *pUV0;
 	uint8 *pColA, nFinalAlpha;
 	uint32 nVSize = _VertexBuffer.getVertexSize ();
@ -387,14 +387,14 @@ void CNoise3d::renderGrid2passes (uint32 nbw, uint32 nbh, uint32 w, uint32 h,
 			// If wpos is just on slice1 alpha must be one
 			float alphaPos = 1.0f - _Depth*(wpos - (((float)nSlice1) / _Depth));

-			*pVertices = CVector((float)i*w,	 (float)j*h,	 0.0f); pVertices = (CVector*)( ((uint8*)pVertices) + nVSize );
-			*pVertices = CVector((float)(i+1)*w, (float)j*h,	 0.0f); pVertices = (CVector*)( ((uint8*)pVertices) + nVSize );
-			*pVertices = CVector((float)(i+1)*w, (float)(j+1)*h, 0.0f); pVertices = (CVector*)( ((uint8*)pVertices) + nVSize );
-			*pVertices = CVector((float)i*w,	 (float)(j+1)*h, 0.0f); pVertices = (CVector*)( ((uint8*)pVertices) + nVSize );
-			*pVertices = CVector((float)i*w,	 (float)j*h,	 0.0f); pVertices = (CVector*)( ((uint8*)pVertices) + nVSize );
-			*pVertices = CVector((float)(i+1)*w, (float)j*h,	 0.0f); pVertices = (CVector*)( ((uint8*)pVertices) + nVSize );
-			*pVertices = CVector((float)(i+1)*w, (float)(j+1)*h, 0.0f); pVertices = (CVector*)( ((uint8*)pVertices) + nVSize );
-			*pVertices = CVector((float)i*w,	 (float)(j+1)*h, 0.0f); pVertices = (CVector*)( ((uint8*)pVertices) + nVSize );
+			*pVertices = CVector((float)i*w,	 (float)j*h,	 0.0f); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize );
+			*pVertices = CVector((float)(i+1)*w, (float)j*h,	 0.0f); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize );
+			*pVertices = CVector((float)(i+1)*w, (float)(j+1)*h, 0.0f); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize );
+			*pVertices = CVector((float)i*w,	 (float)(j+1)*h, 0.0f); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize );
+			*pVertices = CVector((float)i*w,	 (float)j*h,	 0.0f); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize );
+			*pVertices = CVector((float)(i+1)*w, (float)j*h,	 0.0f); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize );
+			*pVertices = CVector((float)(i+1)*w, (float)(j+1)*h, 0.0f); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize );
+			*pVertices = CVector((float)i*w,	 (float)(j+1)*h, 0.0f); pVertices = (CVectorPacked*)( ((uint8*)pVertices) + nVSize );

 			pUV0->U = UStart+_OffS[nSlice1].U;	pUV0->V = VStart+_OffS[nSlice1].V;	pUV0 = (CUV*)( ((uint8*)pUV0) + nVSize );
 			pUV0->U = dU+_OffS[nSlice1].U;		pUV0->V = VStart+_OffS[nSlice1].V;	pUV0 = (CUV*)( ((uint8*)pUV0) + nVSize );
--- a/code/nel/src/3d/packed_zone.cpp
+++ b/code/nel/src/3d/packed_zone.cpp
@ -418,7 +418,7 @@ void serialPackedVector12(std::vector<uint16> &v, NLMISC::IStream &f)
 }

 // some function to ease writing of some primitives into a vertex buffer
-static inline void pushVBLine2D(NLMISC::CVector *&dest, const NLMISC::CVector &v0, const NLMISC::CVector &v1)
+static inline void pushVBLine2D(NLMISC::CVectorPacked *&dest, const NLMISC::CVector &v0, const NLMISC::CVector &v1)
 {
 	dest->x = v0.x;
 	dest->y = v0.y;
@ -434,7 +434,7 @@ static inline void pushVBLine2D(NLMISC::CVector *&dest, const NLMISC::CVector &v
 	++ dest;
 }

-static inline void pushVBTri2D(NLMISC::CVector *&dest, const NLMISC::CTriangle &tri)
+static inline void pushVBTri2D(NLMISC::CVectorPacked *&dest, const NLMISC::CTriangle &tri)
 {
 	dest->x = tri.V0.x;
 	dest->y = tri.V0.y;
@ -451,7 +451,7 @@ static inline void pushVBTri2D(NLMISC::CVector *&dest, const NLMISC::CTriangle &
 }


-static inline void pushVBQuad2D(NLMISC::CVector *&dest, const NLMISC::CQuad &quad)
+static inline void pushVBQuad2D(NLMISC::CVectorPacked *&dest, const NLMISC::CQuad &quad)
 {
 	dest->x = quad.V0.x;
 	dest->y = quad.V0.y;
@ -471,7 +471,7 @@ static inline void pushVBQuad2D(NLMISC::CVector *&dest, const NLMISC::CQuad &qua
 	++ dest;
 }

-static inline void pushVBQuad(NLMISC::CVector *&dest, const NLMISC::CQuad &quad)
+static inline void pushVBQuad(NLMISC::CVectorPacked *&dest, const NLMISC::CQuad &quad)
 {
 	*dest++ = quad.V0;
 	*dest++ = quad.V1;
@ -576,6 +576,22 @@ void CPackedZone32::unpackTri(const CPackedTri &src, CVector dest[3]) const

 }

+// ***************************************************************************************
+void CPackedZone32::unpackTri(const CPackedTri &src, CVectorPacked dest[3]) const
+{
+	// TODO: add 'multiply-add' operator
+	dest[0].set(Verts[src.V0].X * _PackedLocalToWorld.x + _Origin.x,
+		        Verts[src.V0].Y * _PackedLocalToWorld.y + _Origin.y,
+				Verts[src.V0].Z * _PackedLocalToWorld.z + _Origin.z);
+	dest[1].set(Verts[src.V1].X * _PackedLocalToWorld.x + _Origin.x,
+		        Verts[src.V1].Y * _PackedLocalToWorld.y + _Origin.y,
+				Verts[src.V1].Z * _PackedLocalToWorld.z + _Origin.z);
+	dest[2].set(Verts[src.V2].X * _PackedLocalToWorld.x + _Origin.x,
+		        Verts[src.V2].Y * _PackedLocalToWorld.y + _Origin.y,
+				Verts[src.V2].Z * _PackedLocalToWorld.z + _Origin.z);
+
+}
+
 uint32 CPackedZone32::UndefIndex = 0xffffffff;

 // ***************************************************************************************
@ -973,8 +989,8 @@ void CPackedZone32::render(CVertexBuffer &vb, IDriver &drv, CMaterial &material,
 		CVertexBufferReadWrite vba;
 		vb.setNumVertices(batchSize * 3);
 		vb.lock(vba);
-		CVector *dest = vba.getVertexCoordPointer(0);
-		const CVector *endDest = dest + batchSize * 3;
+		CVectorPacked *dest = vba.getVertexCoordPointer(0);
+		const CVectorPacked *endDest = dest + batchSize * 3;
 		for(sint y = 0; y < (sint) silhouette.size(); ++y)
 		{
 			sint gridY = y + minY;
@ -1196,8 +1212,8 @@ void CPackedZone16::render(CVertexBuffer &vb, IDriver &drv, CMaterial &material,
 		CVertexBufferReadWrite vba;
 		vb.setNumVertices(batchSize * 3);
 		vb.lock(vba);
-		CVector *dest = vba.getVertexCoordPointer(0);
-		const CVector *endDest = dest + batchSize * 3;
+		CVectorPacked *dest = vba.getVertexCoordPointer(0);
+		const CVectorPacked *endDest = dest + batchSize * 3;
 		for(sint y = 0; y < (sint) silhouette.size(); ++y)
 		{
 			sint gridY = y + minY;
@ -1254,6 +1270,23 @@ void CPackedZone16::render(CVertexBuffer &vb, IDriver &drv, CMaterial &material,



+// ***************************************************************************************
+void CPackedZone16::unpackTri(const CPackedTri16 &src, CVectorPacked dest[3]) const
+{
+	// yes this is ugly code duplication of CPackedZone16::unpackTri but this code is temporary anyway...
+	// TODO: add 'multiply-add' operator
+	dest[0].set(Verts[src.V0].X * _PackedLocalToWorld.x + _Origin.x,
+		        Verts[src.V0].Y * _PackedLocalToWorld.y + _Origin.y,
+				Verts[src.V0].Z * _PackedLocalToWorld.z + _Origin.z);
+	dest[1].set(Verts[src.V1].X * _PackedLocalToWorld.x + _Origin.x,
+		        Verts[src.V1].Y * _PackedLocalToWorld.y + _Origin.y,
+				Verts[src.V1].Z * _PackedLocalToWorld.z + _Origin.z);
+	dest[2].set(Verts[src.V2].X * _PackedLocalToWorld.x + _Origin.x,
+		        Verts[src.V2].Y * _PackedLocalToWorld.y + _Origin.y,
+				Verts[src.V2].Z * _PackedLocalToWorld.z + _Origin.z);
+
+}
+
 // ***************************************************************************************
 void CPackedZone16::unpackTri(const CPackedTri16 &src, CVector dest[3]) const
 {
--- a/code/nel/src/3d/particle_system.cpp
+++ b/code/nel/src/3d/particle_system.cpp
@ -64,7 +64,7 @@ float CParticleSystem::RealEllapsedTimeRatio = 1.f;
 bool CParticleSystem::InsideSimLoop = false;
 bool CParticleSystem::InsideRemoveLoop = false;
 bool CParticleSystem::InsideNewElementsLoop = false;;
-std::vector<NLMISC::CVector> CParticleSystem::_SpawnPos;
+std::vector<NLMISC::CVectorPacked> CParticleSystem::_SpawnPos;



--- a/code/nel/src/3d/patch_render.cpp
+++ b/code/nel/src/3d/patch_render.cpp
@ -1026,8 +1026,8 @@ inline void		CPatch::fillFar0VertexVB(CTessFarVertex *pVert)
 	if( !CLandscapeGlobals::VertexProgramEnabled )
 	{
 		// Set Pos. Set it local to the current center of landscape
-		CHECK_VBA_RANGE(CLandscapeGlobals::CurrentFar0VBInfo.Accessor, CurVBPtr, sizeof(CVector));
-		*(CVector*)CurVBPtr= pVert->Src->Pos - CLandscapeGlobals::PZBModelPosition;
+		CHECK_VBA_RANGE(CLandscapeGlobals::CurrentFar0VBInfo.Accessor, CurVBPtr, sizeof(CVectorPacked));
+		*(CVectorPacked*)CurVBPtr= pVert->Src->Pos - CLandscapeGlobals::PZBModelPosition;
 		// Set Uvs.
 		CHECK_VBA_RANGE(CLandscapeGlobals::CurrentFar0VBInfo.Accessor, CurVBPtr + CLandscapeGlobals::CurrentFar0VBInfo.TexCoordOff0, sizeof(CUV));
 		*(CUV*)(CurVBPtr + CLandscapeGlobals::CurrentFar0VBInfo.TexCoordOff0)= uv;
@ -1038,8 +1038,8 @@ inline void		CPatch::fillFar0VertexVB(CTessFarVertex *pVert)
 	{
 		// Else must setup Vertex program inputs
 		// v[0]== StartPos.
-		CHECK_VBA_RANGE(CLandscapeGlobals::CurrentFar0VBInfo.Accessor, CurVBPtr, sizeof(CVector));
-		*(CVector*)CurVBPtr= pVert->Src->StartPos;
+		CHECK_VBA_RANGE(CLandscapeGlobals::CurrentFar0VBInfo.Accessor, CurVBPtr, sizeof(CVectorPacked));
+		*(CVectorPacked*)CurVBPtr= pVert->Src->StartPos;
 		// v[8]== Tex0
 		CHECK_VBA_RANGE(CLandscapeGlobals::CurrentFar0VBInfo.Accessor, CurVBPtr + CLandscapeGlobals::CurrentFar0VBInfo.TexCoordOff0, sizeof(CUV));
 		*(CUV*)(CurVBPtr + CLandscapeGlobals::CurrentFar0VBInfo.TexCoordOff0)= uv;
@ -1056,7 +1056,7 @@ inline void		CPatch::fillFar0VertexVB(CTessFarVertex *pVert)

 		// v[11]== EndPos - StartPos
 		CHECK_VBA_RANGE(CLandscapeGlobals::CurrentFar0VBInfo.Accessor, CurVBPtr + CLandscapeGlobals::CurrentFar0VBInfo.DeltaPosOff, sizeof(CVector))
-		*(CVector*)(CurVBPtr + CLandscapeGlobals::CurrentFar0VBInfo.DeltaPosOff)=
+		*(CVectorPacked*)(CurVBPtr + CLandscapeGlobals::CurrentFar0VBInfo.DeltaPosOff)=
 			pVert->Src->EndPos - pVert->Src->StartPos;
 	}
 }
@ -1110,8 +1110,8 @@ inline void		CPatch::fillFar1VertexVB(CTessFarVertex *pVert)
 	if( !CLandscapeGlobals::VertexProgramEnabled )
 	{
 		// Set Pos. Set it local to the current center of landscape
-		CHECK_VBA_RANGE(CLandscapeGlobals::CurrentFar1VBInfo.Accessor, CurVBPtr, sizeof(CVector));
-		*(CVector*)CurVBPtr= pVert->Src->Pos - CLandscapeGlobals::PZBModelPosition;
+		CHECK_VBA_RANGE(CLandscapeGlobals::CurrentFar1VBInfo.Accessor, CurVBPtr, sizeof(CVectorPacked));
+		*(CVectorPacked*)CurVBPtr= pVert->Src->Pos - CLandscapeGlobals::PZBModelPosition;
 		// Set Uvs.
 		CHECK_VBA_RANGE(CLandscapeGlobals::CurrentFar1VBInfo.Accessor, CurVBPtr + CLandscapeGlobals::CurrentFar1VBInfo.TexCoordOff0, sizeof(CUV));
 		*(CUV*)(CurVBPtr + CLandscapeGlobals::CurrentFar1VBInfo.TexCoordOff0)= uv;
@ -1126,8 +1126,8 @@ inline void		CPatch::fillFar1VertexVB(CTessFarVertex *pVert)
 	{
 		// Else must setup Vertex program inputs
 		// v[0]== StartPos.
-		CHECK_VBA_RANGE(CLandscapeGlobals::CurrentFar1VBInfo.Accessor, CurVBPtr, sizeof(CVector));
-		*(CVector*)CurVBPtr= pVert->Src->StartPos;
+		CHECK_VBA_RANGE(CLandscapeGlobals::CurrentFar1VBInfo.Accessor, CurVBPtr, sizeof(CVectorPacked));
+		*(CVectorPacked*)CurVBPtr= pVert->Src->StartPos;
 		// v[8]== Tex0
 		CHECK_VBA_RANGE(CLandscapeGlobals::CurrentFar1VBInfo.Accessor, CurVBPtr + CLandscapeGlobals::CurrentFar1VBInfo.TexCoordOff0, sizeof(CUV));
 		*(CUV*)(CurVBPtr + CLandscapeGlobals::CurrentFar1VBInfo.TexCoordOff0)= uv;
@ -1144,7 +1144,7 @@ inline void		CPatch::fillFar1VertexVB(CTessFarVertex *pVert)

 		// v[11]== EndPos - StartPos
 		CHECK_VBA_RANGE(CLandscapeGlobals::CurrentFar1VBInfo.Accessor, CurVBPtr + CLandscapeGlobals::CurrentFar1VBInfo.DeltaPosOff, sizeof(CVector))
-		*(CVector*)(CurVBPtr + CLandscapeGlobals::CurrentFar1VBInfo.DeltaPosOff)=
+		*(CVectorPacked*)(CurVBPtr + CLandscapeGlobals::CurrentFar1VBInfo.DeltaPosOff)=
 			pVert->Src->EndPos - pVert->Src->StartPos;

 		// v[12]== Alpha information
@ -1179,8 +1179,8 @@ inline void		CPatch::fillTileVertexVB(CTessNearVertex *pVert)
 	if( !CLandscapeGlobals::VertexProgramEnabled )
 	{
 		// Set Pos. Set it local to the current center of landscape
-		CHECK_VBA_RANGE(CLandscapeGlobals::CurrentTileVBInfo.Accessor, CurVBPtr, sizeof(CVector))
-		*(CVector*)CurVBPtr= pVert->Src->Pos - CLandscapeGlobals::PZBModelPosition;
+		CHECK_VBA_RANGE(CLandscapeGlobals::CurrentTileVBInfo.Accessor, CurVBPtr, sizeof(CVectorPacked))
+		*(CVectorPacked*)CurVBPtr= pVert->Src->Pos - CLandscapeGlobals::PZBModelPosition;
 		// Set Uvs.
 		CHECK_VBA_RANGE(CLandscapeGlobals::CurrentTileVBInfo.Accessor, CurVBPtr + CLandscapeGlobals::CurrentTileVBInfo.TexCoordOff0, sizeof(CUV))
 		*(CUV*)(CurVBPtr + CLandscapeGlobals::CurrentTileVBInfo.TexCoordOff0)= pVert->PUv0;
@ -1193,8 +1193,8 @@ inline void		CPatch::fillTileVertexVB(CTessNearVertex *pVert)
 	{
 		// Else must setup Vertex program inputs
 		// v[0]== StartPos.
-		CHECK_VBA_RANGE(CLandscapeGlobals::CurrentTileVBInfo.Accessor, CurVBPtr, sizeof(CVector))
-		*(CVector*)CurVBPtr= pVert->Src->StartPos;
+		CHECK_VBA_RANGE(CLandscapeGlobals::CurrentTileVBInfo.Accessor, CurVBPtr, sizeof(CVectorPacked))
+		*(CVectorPacked*)CurVBPtr= pVert->Src->StartPos;
 		// v[8]== Tex0
 		CHECK_VBA_RANGE(CLandscapeGlobals::CurrentTileVBInfo.Accessor, CurVBPtr + CLandscapeGlobals::CurrentTileVBInfo.TexCoordOff0, sizeof(CUV))
 		*(CUV*)(CurVBPtr + CLandscapeGlobals::CurrentTileVBInfo.TexCoordOff0)= pVert->PUv0;
@ -1214,7 +1214,7 @@ inline void		CPatch::fillTileVertexVB(CTessNearVertex *pVert)

 		// v[11]== EndPos - StartPos
 		CHECK_VBA_RANGE(CLandscapeGlobals::CurrentTileVBInfo.Accessor, CurVBPtr + CLandscapeGlobals::CurrentTileVBInfo.DeltaPosOff, sizeof(CVector))
-		*(CVector*)(CurVBPtr + CLandscapeGlobals::CurrentTileVBInfo.DeltaPosOff)=
+		*(CVectorPacked*)(CurVBPtr + CLandscapeGlobals::CurrentTileVBInfo.DeltaPosOff)=
 			pVert->Src->EndPos - pVert->Src->StartPos;
 	}
 }
@ -1383,8 +1383,8 @@ void		CPatch::computeGeomorphFar0VertexListVB(CTessList<CTessFarVertex>  &vertLi
 		CurVBPtr+= pVert->Index0 * CLandscapeGlobals::CurrentFar0VBInfo.VertexSize;

 		// Set Geomorphed Position. Set it local to the current center of landscape
-		CHECK_VBA_RANGE(CLandscapeGlobals::CurrentFar0VBInfo.Accessor, CurVBPtr, sizeof(CVector))
-		*(CVector*)CurVBPtr= pVert->Src->Pos - CLandscapeGlobals::PZBModelPosition;
+		CHECK_VBA_RANGE(CLandscapeGlobals::CurrentFar0VBInfo.Accessor, CurVBPtr, sizeof(CVectorPacked))
+		*(CVectorPacked*)CurVBPtr= pVert->Src->Pos - CLandscapeGlobals::PZBModelPosition;
 	}
 }

@ -1404,8 +1404,8 @@ void		CPatch::computeGeomorphAlphaFar1VertexListVB(CTessList<CTessFarVertex>  &v
 		// NB: the filling order of data is important, for AGP write combiners.

 		// Set Geomorphed Position. Set it local to the current center of landscape
-		CHECK_VBA_RANGE(CLandscapeGlobals::CurrentFar1VBInfo.Accessor, CurVBPtr, sizeof(CVector))
-		*(CVector*)CurVBPtr= pVert->Src->Pos - CLandscapeGlobals::PZBModelPosition;
+		CHECK_VBA_RANGE(CLandscapeGlobals::CurrentFar1VBInfo.Accessor, CurVBPtr, sizeof(CVectorPacked))
+		*(CVectorPacked*)CurVBPtr= pVert->Src->Pos - CLandscapeGlobals::PZBModelPosition;

 		// Set Alpha color.
 		static CRGBA	col(255,255,255,255);
@ -1434,8 +1434,8 @@ void		CPatch::computeGeomorphTileVertexListVB(CTessList<CTessNearVertex>  &vertL
 		CurVBPtr+= pVert->Index * CLandscapeGlobals::CurrentTileVBInfo.VertexSize;

 		// Set Geomorphed Position. Set it local to the current center of landscape
-		CHECK_VBA_RANGE(CLandscapeGlobals::CurrentTileVBInfo.Accessor, CurVBPtr, sizeof(CVector))
-		*(CVector*)CurVBPtr= pVert->Src->Pos - CLandscapeGlobals::PZBModelPosition;
+		CHECK_VBA_RANGE(CLandscapeGlobals::CurrentTileVBInfo.Accessor, CurVBPtr, sizeof(CVectorPacked))
+		*(CVectorPacked*)CurVBPtr= pVert->Src->Pos - CLandscapeGlobals::PZBModelPosition;
 	}
 }

--- a/code/nel/src/3d/ps_dot.cpp
+++ b/code/nel/src/3d/ps_dot.cpp
@ -23,6 +23,8 @@
 #include "nel/3d/particle_system.h"
 #include "nel/misc/fast_mem.h"

+using NLMISC::CVectorPacked;
+
 namespace NL3D
 {

@ -84,7 +86,7 @@ inline void DrawDot(T it,
 				do
 				{
 					CHECK_VERTEX_BUFFER(vb, currPos);
-					*((CVector *) currPos) =  *it;
+					*((CVectorPacked *) currPos) =  *it;
 					++it ;
 					currPos += stride;
 				}
@ -93,7 +95,7 @@ inline void DrawDot(T it,
 			else if (srcStep == (1 << 16)) // make sure we haven't got auto-lod and that the step is 1.0
 			{
 				// there's no color information in the buffer, so we can copy it directly
-				NLMISC::CFastMem::memcpy(vba.getVertexCoordPointer(), &(*it), sizeof(NLMISC::CVector) * toProcess);
+				NLMISC::CFastMem::memcpy(vba.getVertexCoordPointer(), &(*it), sizeof(NLMISC::CVectorPacked) * toProcess);
 				it += toProcess;
 			}
 			else
@ -103,7 +105,7 @@ inline void DrawDot(T it,
 				do
 				{
 					CHECK_VERTEX_BUFFER(vb, currPos);
-					*((CVector *) currPos) =  *it;
+					*((CVectorPacked *) currPos) =  *it;
 					++it ;
 					currPos += sizeof(float[3]);
 				}
--- a/code/nel/src/3d/ps_emitter.cpp
+++ b/code/nel/src/3d/ps_emitter.cpp
@ -914,7 +914,7 @@ uint GenEmitterPositions(CPSLocated *emitter,
 									   uint numStep,
 									   TAnimationTime deltaT, /* fraction of time needed to reach the first emission */
 									   TAnimationTime step,
-									   std::vector<NLMISC::CVector> &dest
+									   std::vector<NLMISC::CVectorPacked> &dest
 									  )
 {
 	NL_PS_FUNC(GenEmitterPositions)
@ -930,8 +930,8 @@ uint GenEmitterPositions(CPSLocated *emitter,
 		}
 		else
 		{
-			std::vector<NLMISC::CVector>::iterator outIt = dest.end();
-			std::vector<NLMISC::CVector>::iterator endIt = dest.begin();
+			std::vector<NLMISC::CVectorPacked>::iterator outIt = dest.end();
+			std::vector<NLMISC::CVectorPacked>::iterator endIt = dest.begin();
 			NLMISC::CVector pos = emitter->getPos()[emitterIndex] - deltaT * emitter->getSpeed()[emitterIndex];
 			NLMISC::CVector speed = step * emitter->getSpeed()[emitterIndex];
 			do
@ -966,7 +966,7 @@ static inline uint GenEmitterPositionsWithLOD(CPSLocated *emitter,
 									   TAnimationTime deltaT, /* fraction of time needed to reach the first emission */
 									   TAnimationTime step,
 									   float invLODRatio,
-									   std::vector<NLMISC::CVector> &dest
+									   std::vector<NLMISC::CVectorPacked> &dest
 									  )
 {
 	NL_PS_FUNC(GenEmitterPositionsWithLOD)
@ -982,8 +982,8 @@ static inline uint GenEmitterPositionsWithLOD(CPSLocated *emitter,
 		}
 		else
 		{
-			std::vector<NLMISC::CVector>::iterator outIt = dest.end();
-			std::vector<NLMISC::CVector>::iterator endIt = dest.begin();
+			std::vector<NLMISC::CVectorPacked>::iterator outIt = dest.end();
+			std::vector<NLMISC::CVectorPacked>::iterator endIt = dest.begin();
 			NLMISC::CVector pos = emitter->getPos()[emitterIndex] - deltaT * emitter->getSpeed()[emitterIndex];
 			NLMISC::CVector speed = step * invLODRatio * emitter->getSpeed()[emitterIndex];
 			do
@ -1021,7 +1021,7 @@ void CPSEmitter::processRegularEmissionConsistent(uint firstInstanceIndex, float
 	//


-	static std::vector<NLMISC::CVector> emitterPositions;
+	static std::vector<NLMISC::CVectorPacked> emitterPositions;
 	// Positions for the emitter. They are computed by using a parametric trajectory or by using integration

 	const uint size = _Owner->getSize();
@ -1454,7 +1454,7 @@ void CPSEmitter::processRegularEmissionConsistentWithNoLOD(uint firstInstanceInd
 	//


-	static std::vector<NLMISC::CVector> emitterPositions;
+	static std::vector<NLMISC::CVectorPacked> emitterPositions;
 	// Positions for the emitter. They are computed by using a parametric trajectory or by using integration

 	const uint size = _Owner->getSize();
@ -2790,7 +2790,7 @@ void CPSEmitter::doEmitOnce(uint firstInstanceIndex)
 				CVector startPos;
 				if (!_Owner->isParametricMotionEnabled())
 				{
-					startPos = _Owner->getPos()[k] - _Owner->getSpeed()[k] * CParticleSystem::EllapsedTime;
+					startPos = CVector(_Owner->getPos()[k]) - CVector(_Owner->getSpeed()[k]) * CParticleSystem::EllapsedTime;
 				}
 				else
 				{
@ -2823,7 +2823,7 @@ void CPSEmitter::doEmitOnce(uint firstInstanceIndex)
 			CVector startPos;
 			if (!_Owner->isParametricMotionEnabled())
 			{
-				startPos = _Owner->getPos()[k] - _Owner->getSpeed()[k] * CParticleSystem::EllapsedTime;
+				startPos = CVector(_Owner->getPos()[k]) - CVector(_Owner->getSpeed()[k]) * CParticleSystem::EllapsedTime;
 			}
 			else
 			{
--- a/code/nel/src/3d/ps_face.cpp
+++ b/code/nel/src/3d/ps_face.cpp
@ -23,7 +23,7 @@
 #include "nel/3d/particle_system.h"
 #include "nel/misc/quat.h"

-
+using NLMISC::CVectorPacked;

 namespace NL3D
 {
@ -96,27 +96,27 @@ public:
 					{
 						const CPlaneBasis &currBasis = f._PrecompBasis[*indexIt].Basis;
 						CHECK_VERTEX_BUFFER(vb, currVertex);
-						((CVector *) currVertex)->x = (*posIt).x  + *ptSize * currBasis.X.x;
-						((CVector *) currVertex)->y = (*posIt).y  + *ptSize * currBasis.X.y;
-						((CVector *) currVertex)->z = (*posIt).z  + *ptSize * currBasis.X.z;
+						((CVectorPacked *) currVertex)->x = (*posIt).x  + *ptSize * currBasis.X.x;
+						((CVectorPacked *) currVertex)->y = (*posIt).y  + *ptSize * currBasis.X.y;
+						((CVectorPacked *) currVertex)->z = (*posIt).z  + *ptSize * currBasis.X.z;
 						currVertex += stride;

 						CHECK_VERTEX_BUFFER(vb, currVertex);
-						((CVector *) currVertex)->x = (*posIt).x  + *ptSize * currBasis.Y.x;
-						((CVector *) currVertex)->y = (*posIt).y  + *ptSize * currBasis.Y.y;
-						((CVector *) currVertex)->z = (*posIt).z  + *ptSize * currBasis.Y.z;
+						((CVectorPacked *) currVertex)->x = (*posIt).x  + *ptSize * currBasis.Y.x;
+						((CVectorPacked *) currVertex)->y = (*posIt).y  + *ptSize * currBasis.Y.y;
+						((CVectorPacked *) currVertex)->z = (*posIt).z  + *ptSize * currBasis.Y.z;
 						currVertex += stride;

 						CHECK_VERTEX_BUFFER(vb, currVertex);
-						((CVector *) currVertex)->x = (*posIt).x  - *ptSize * currBasis.X.x;
-						((CVector *) currVertex)->y = (*posIt).y  - *ptSize * currBasis.X.y;
-						((CVector *) currVertex)->z = (*posIt).z  - *ptSize * currBasis.X.z;
+						((CVectorPacked *) currVertex)->x = (*posIt).x  - *ptSize * currBasis.X.x;
+						((CVectorPacked *) currVertex)->y = (*posIt).y  - *ptSize * currBasis.X.y;
+						((CVectorPacked *) currVertex)->z = (*posIt).z  - *ptSize * currBasis.X.z;
 						currVertex += stride;

 						CHECK_VERTEX_BUFFER(vb, currVertex);
-						((CVector *) currVertex)->x = (*posIt).x  - *ptSize * currBasis.Y.x;
-						((CVector *) currVertex)->y = (*posIt).y  - *ptSize * currBasis.Y.y;
-						((CVector *) currVertex)->z = (*posIt).z  - *ptSize * currBasis.Y.z;
+						((CVectorPacked *) currVertex)->x = (*posIt).x  - *ptSize * currBasis.Y.x;
+						((CVectorPacked *) currVertex)->y = (*posIt).y  - *ptSize * currBasis.Y.y;
+						((CVectorPacked *) currVertex)->z = (*posIt).z  - *ptSize * currBasis.Y.z;
 						currVertex += stride;
 						ptSize += ptSizeIncrement;
 						++indexIt;
@ -168,27 +168,27 @@ public:
 					{
 						// we use this instead of the + operator, because we avoid 4 constructor calls this way
 						CHECK_VERTEX_BUFFER(vb, currVertex);
-						((CVector *) currVertex)->x = (*posIt).x  + *ptSize * currBasis->X.x;
-						((CVector *) currVertex)->y = (*posIt).y  + *ptSize * currBasis->X.y;
-						((CVector *) currVertex)->z = (*posIt).z  + *ptSize * currBasis->X.z;
+						((CVectorPacked *) currVertex)->x = (*posIt).x  + *ptSize * currBasis->X.x;
+						((CVectorPacked *) currVertex)->y = (*posIt).y  + *ptSize * currBasis->X.y;
+						((CVectorPacked *) currVertex)->z = (*posIt).z  + *ptSize * currBasis->X.z;
 						currVertex += vSize;

 						CHECK_VERTEX_BUFFER(vb, currVertex);
-						((CVector *) currVertex)->x = (*posIt).x  + *ptSize * currBasis->Y.x;
-						((CVector *) currVertex)->y = (*posIt).y  + *ptSize * currBasis->Y.y;
-						((CVector *) currVertex)->z = (*posIt).z  + *ptSize * currBasis->Y.z;
+						((CVectorPacked *) currVertex)->x = (*posIt).x  + *ptSize * currBasis->Y.x;
+						((CVectorPacked *) currVertex)->y = (*posIt).y  + *ptSize * currBasis->Y.y;
+						((CVectorPacked *) currVertex)->z = (*posIt).z  + *ptSize * currBasis->Y.z;
 						currVertex += vSize;

 						CHECK_VERTEX_BUFFER(vb, currVertex);
-						((CVector *) currVertex)->x = (*posIt).x  - *ptSize * currBasis->X.x;
-						((CVector *) currVertex)->y = (*posIt).y  - *ptSize * currBasis->X.y;
-						((CVector *) currVertex)->z = (*posIt).z  - *ptSize * currBasis->X.z;
+						((CVectorPacked *) currVertex)->x = (*posIt).x  - *ptSize * currBasis->X.x;
+						((CVectorPacked *) currVertex)->y = (*posIt).y  - *ptSize * currBasis->X.y;
+						((CVectorPacked *) currVertex)->z = (*posIt).z  - *ptSize * currBasis->X.z;
 						currVertex += vSize;

 						CHECK_VERTEX_BUFFER(vb, currVertex);
-						((CVector *) currVertex)->x = (*posIt).x  - *ptSize * currBasis->Y.x;
-						((CVector *) currVertex)->y = (*posIt).y  - *ptSize * currBasis->Y.y;
-						((CVector *) currVertex)->z = (*posIt).z  - *ptSize * currBasis->Y.z;
+						((CVectorPacked *) currVertex)->x = (*posIt).x  - *ptSize * currBasis->Y.x;
+						((CVectorPacked *) currVertex)->y = (*posIt).y  - *ptSize * currBasis->Y.y;
+						((CVectorPacked *) currVertex)->z = (*posIt).z  - *ptSize * currBasis->Y.z;
 						currVertex += vSize;
 						ptSize += ptSizeIncrement;
 						++posIt;
--- a/code/nel/src/3d/ps_face_look_at.cpp
+++ b/code/nel/src/3d/ps_face_look_at.cpp
@ -23,6 +23,7 @@
 #include "nel/3d/particle_system.h"
 #include "nel/misc/fast_floor.h"

+using NLMISC::CVectorPacked;

 namespace NL3D
 {
@ -32,8 +33,8 @@ namespace NL3D
  */
 struct CLookAtAlign
 {
-	CVector I;
-	CVector K;
+	CVectorPacked I;
+	CVectorPacked K;
 };


@ -64,7 +65,7 @@ public:
 		do
 		{
 			// tmp unoptimized slow version
-			CVector normedSpeed = (*speedIt).normed();
+			CVector normedSpeed = CVector(*speedIt).normed();
 			float iProj = normedSpeed * I;
 			float kProj = normedSpeed * K;
 			dest->I = iProj * I + kProj * K;
@ -147,27 +148,27 @@ public:
 						v1 = rotTable[tabIndex] * currAlign->I + rotTable[tabIndex + 1] * currAlign->K;
 						v2 = rotTable[tabIndex + 2] * currAlign->I + rotTable[tabIndex + 3] * currAlign->K;
 						CHECK_VERTEX_BUFFER(vb, ptPos);
-						((CVector *) ptPos)->x = (*it).x  + *currentSize * v1.x;
-						((CVector *) ptPos)->y = (*it).y  + *currentSize * v1.y;
-						((CVector *) ptPos)->z = (*it).z  + *currentSize * v1.z;
+						((CVectorPacked *) ptPos)->x = (*it).x  + *currentSize * v1.x;
+						((CVectorPacked *) ptPos)->y = (*it).y  + *currentSize * v1.y;
+						((CVectorPacked *) ptPos)->z = (*it).z  + *currentSize * v1.z;
 						ptPos += stride;

 						CHECK_VERTEX_BUFFER(vb, ptPos);
-						((CVector *) ptPos)->x = (*it).x  + *currentSize * v2.x;
-						((CVector *) ptPos)->y = (*it).y  + *currentSize * v2.y;
-						((CVector *) ptPos)->z = (*it).z  + *currentSize * v2.z;
+						((CVectorPacked *) ptPos)->x = (*it).x  + *currentSize * v2.x;
+						((CVectorPacked *) ptPos)->y = (*it).y  + *currentSize * v2.y;
+						((CVectorPacked *) ptPos)->z = (*it).z  + *currentSize * v2.z;
 						ptPos += stride;

 						CHECK_VERTEX_BUFFER(vb, ptPos);
-						((CVector *) ptPos)->x = (*it).x  - *currentSize * v1.x;
-						((CVector *) ptPos)->y = (*it).y  - *currentSize * v1.y;
-						((CVector *) ptPos)->z = (*it).z  - *currentSize * v1.z;
+						((CVectorPacked *) ptPos)->x = (*it).x  - *currentSize * v1.x;
+						((CVectorPacked *) ptPos)->y = (*it).y  - *currentSize * v1.y;
+						((CVectorPacked *) ptPos)->z = (*it).z  - *currentSize * v1.z;
 						ptPos += stride;

 						CHECK_VERTEX_BUFFER(vb, ptPos);
-						((CVector *) ptPos)->x = (*it).x  - *currentSize * v2.x;
-						((CVector *) ptPos)->y = (*it).y  - *currentSize * v2.y;
-						((CVector *) ptPos)->z = (*it).z  - *currentSize * v2.z;
+						((CVectorPacked *) ptPos)->x = (*it).x  - *currentSize * v2.x;
+						((CVectorPacked *) ptPos)->y = (*it).y  - *currentSize * v2.y;
+						((CVectorPacked *) ptPos)->z = (*it).z  - *currentSize * v2.z;
 						ptPos += stride;

 						++it;
@ -199,27 +200,27 @@ public:
 						v1 = CPSUtil::getCos((sint32) la._Angle2D) * currAlign->I  + CPSUtil::getSin((sint32) la._Angle2D) * currAlign->K;
 						v2 = - CPSUtil::getSin((sint32) la._Angle2D) * currAlign->I + CPSUtil::getCos((sint32) la._Angle2D) * currAlign->K;
 						CHECK_VERTEX_BUFFER(vb, ptPos);
-						((CVector *) ptPos)->x = (*it).x  - *currentSize * v1.x + *currentSize2 * v2.x;
-						((CVector *) ptPos)->y = (*it).y  - *currentSize * v1.y + *currentSize2 * v2.y;
-						((CVector *) ptPos)->z = (*it).z  - *currentSize * v1.z + *currentSize2 * v2.z;
+						((CVectorPacked *) ptPos)->x = (*it).x  - *currentSize * v1.x + *currentSize2 * v2.x;
+						((CVectorPacked *) ptPos)->y = (*it).y  - *currentSize * v1.y + *currentSize2 * v2.y;
+						((CVectorPacked *) ptPos)->z = (*it).z  - *currentSize * v1.z + *currentSize2 * v2.z;
 						ptPos += stride;

 						CHECK_VERTEX_BUFFER(vb, ptPos);
-						((CVector *) ptPos)->x = (*it).x  + *currentSize * v1.x + *currentSize2 * v2.x;
-						((CVector *) ptPos)->y = (*it).y  + *currentSize * v1.y + *currentSize2 * v2.y;
-						((CVector *) ptPos)->z = (*it).z  + *currentSize * v1.z + *currentSize2 * v2.z;
+						((CVectorPacked *) ptPos)->x = (*it).x  + *currentSize * v1.x + *currentSize2 * v2.x;
+						((CVectorPacked *) ptPos)->y = (*it).y  + *currentSize * v1.y + *currentSize2 * v2.y;
+						((CVectorPacked *) ptPos)->z = (*it).z  + *currentSize * v1.z + *currentSize2 * v2.z;
 						ptPos += stride;

 						CHECK_VERTEX_BUFFER(vb, ptPos);
-						((CVector *) ptPos)->x = (*it).x  + *currentSize * v1.x - *currentSize2 * v2.x;
-						((CVector *) ptPos)->y = (*it).y  + *currentSize * v1.y - *currentSize2 * v2.y;
-						((CVector *) ptPos)->z = (*it).z  + *currentSize * v1.z - *currentSize2 * v2.z;
+						((CVectorPacked *) ptPos)->x = (*it).x  + *currentSize * v1.x - *currentSize2 * v2.x;
+						((CVectorPacked *) ptPos)->y = (*it).y  + *currentSize * v1.y - *currentSize2 * v2.y;
+						((CVectorPacked *) ptPos)->z = (*it).z  + *currentSize * v1.z - *currentSize2 * v2.z;
 						ptPos += stride;

 						CHECK_VERTEX_BUFFER(vb, ptPos);
-						((CVector *) ptPos)->x = (*it).x  - *currentSize * v1.x - *currentSize2 * v2.x;
-						((CVector *) ptPos)->y = (*it).y  - *currentSize * v1.y - *currentSize2 * v2.y;
-						((CVector *) ptPos)->z = (*it).z  - *currentSize * v1.z - *currentSize2 * v2.z;
+						((CVectorPacked *) ptPos)->x = (*it).x  - *currentSize * v1.x - *currentSize2 * v2.x;
+						((CVectorPacked *) ptPos)->y = (*it).y  - *currentSize * v1.y - *currentSize2 * v2.y;
+						((CVectorPacked *) ptPos)->z = (*it).z  - *currentSize * v1.z - *currentSize2 * v2.z;
 						ptPos += stride;
 						++it;
 						++currAlign;
@ -283,24 +284,24 @@ public:
 						CHECK_VERTEX_BUFFER(vb, ptPos + stride2);
 						CHECK_VERTEX_BUFFER(vb, ptPos + stride3);

-						((CVector *) ptPos)->x  = (*it).x  + v1.x;
-						((CVector *) ptPos)->y  = (*it).y  + v1.y;
-						((CVector *) ptPos)->z = (*it).z  + v1.z;
+						((CVectorPacked *) ptPos)->x  = (*it).x  + v1.x;
+						((CVectorPacked *) ptPos)->y  = (*it).y  + v1.y;
+						((CVectorPacked *) ptPos)->z = (*it).z  + v1.z;
 						ptPos += stride;

-						((CVector *) ptPos)->x  = (*it).x  + v2.x;
-						((CVector *) ptPos)->y  = (*it).y  + v2.y;
-						((CVector *) ptPos)->z = (*it).z  + v2.z;
+						((CVectorPacked *) ptPos)->x  = (*it).x  + v2.x;
+						((CVectorPacked *) ptPos)->y  = (*it).y  + v2.y;
+						((CVectorPacked *) ptPos)->z = (*it).z  + v2.z;
 						ptPos += stride;

-						((CVector *) ptPos)->x  = (*it).x  - v1.x;
-						((CVector *) ptPos)->y  = (*it).y  - v1.y;
-						((CVector *) ptPos)->z = (*it).z  - v1.z;
+						((CVectorPacked *) ptPos)->x  = (*it).x  - v1.x;
+						((CVectorPacked *) ptPos)->y  = (*it).y  - v1.y;
+						((CVectorPacked *) ptPos)->z = (*it).z  - v1.z;
 						ptPos += stride;

-						((CVector *) ptPos)->x  = (*it).x  - v2.x;
-						((CVector *) ptPos)->y  = (*it).y  - v2.y;
-						((CVector *) ptPos)->z = (*it).z  - v2.z;
+						((CVectorPacked *) ptPos)->x  = (*it).x  - v2.x;
+						((CVectorPacked *) ptPos)->y  = (*it).y  - v2.y;
+						((CVectorPacked *) ptPos)->z = (*it).z  - v2.z;
 						ptPos += stride;

 						++it;
@ -336,27 +337,27 @@ public:
 						v2 = - sinAngle * currAlign->I + cosAngle * currAlign->K;

 						CHECK_VERTEX_BUFFER(vb, ptPos);
-						((CVector *) ptPos)->x = (*it).x  - *currentSize * v1.x + *currentSize2 * v2.x;
-						((CVector *) ptPos)->y = (*it).y  - *currentSize * v1.y + *currentSize2 * v2.y;
-						((CVector *) ptPos)->z = (*it).z  - *currentSize * v1.z + *currentSize2 * v2.z;
+						((CVectorPacked *) ptPos)->x = (*it).x  - *currentSize * v1.x + *currentSize2 * v2.x;
+						((CVectorPacked *) ptPos)->y = (*it).y  - *currentSize * v1.y + *currentSize2 * v2.y;
+						((CVectorPacked *) ptPos)->z = (*it).z  - *currentSize * v1.z + *currentSize2 * v2.z;
 						ptPos += stride;

 						CHECK_VERTEX_BUFFER(vb, ptPos);
-						((CVector *) ptPos)->x = (*it).x  + *currentSize * v1.x + *currentSize2 * v2.x;
-						((CVector *) ptPos)->y = (*it).y  + *currentSize * v1.y + *currentSize2 * v2.y;
-						((CVector *) ptPos)->z = (*it).z  + *currentSize * v1.z + *currentSize2 * v2.z;
+						((CVectorPacked *) ptPos)->x = (*it).x  + *currentSize * v1.x + *currentSize2 * v2.x;
+						((CVectorPacked *) ptPos)->y = (*it).y  + *currentSize * v1.y + *currentSize2 * v2.y;
+						((CVectorPacked *) ptPos)->z = (*it).z  + *currentSize * v1.z + *currentSize2 * v2.z;
 						ptPos += stride;

 						CHECK_VERTEX_BUFFER(vb, ptPos);
-						((CVector *) ptPos)->x = (*it).x  + *currentSize * v1.x - *currentSize2 * v2.x;
-						((CVector *) ptPos)->y = (*it).y  + *currentSize * v1.y - *currentSize2 * v2.y;
-						((CVector *) ptPos)->z = (*it).z  + *currentSize * v1.z - *currentSize2 * v2.z;
+						((CVectorPacked *) ptPos)->x = (*it).x  + *currentSize * v1.x - *currentSize2 * v2.x;
+						((CVectorPacked *) ptPos)->y = (*it).y  + *currentSize * v1.y - *currentSize2 * v2.y;
+						((CVectorPacked *) ptPos)->z = (*it).z  + *currentSize * v1.z - *currentSize2 * v2.z;
 						ptPos += stride;

 						CHECK_VERTEX_BUFFER(vb, ptPos);
-						((CVector *) ptPos)->x = (*it).x  - *currentSize * v1.x - *currentSize2 * v2.x;
-						((CVector *) ptPos)->y = (*it).y  - *currentSize * v1.y - *currentSize2 * v2.y;
-						((CVector *) ptPos)->z = (*it).z  - *currentSize * v1.z - *currentSize2 * v2.z;
+						((CVectorPacked *) ptPos)->x = (*it).x  - *currentSize * v1.x - *currentSize2 * v2.x;
+						((CVectorPacked *) ptPos)->y = (*it).y  - *currentSize * v1.y - *currentSize2 * v2.y;
+						((CVectorPacked *) ptPos)->z = (*it).z  - *currentSize * v1.z - *currentSize2 * v2.z;
 						ptPos += stride;
 						++it;
 						++currentAngle;
@ -462,27 +463,27 @@ public:
 							while (it != endIt)
 							{
 								CHECK_VERTEX_BUFFER(vb, ptPos);
-								((CVector *) ptPos)->x = (*it).x  + *currentSize * v1.x;
-								((CVector *) ptPos)->y = (*it).y  + *currentSize * v1.y;
-								((CVector *) ptPos)->z = (*it).z  + *currentSize * v1.z;
+								((CVectorPacked *) ptPos)->x = (*it).x  + *currentSize * v1.x;
+								((CVectorPacked *) ptPos)->y = (*it).y  + *currentSize * v1.y;
+								((CVectorPacked *) ptPos)->z = (*it).z  + *currentSize * v1.z;
 								ptPos += stride;

 								CHECK_VERTEX_BUFFER(vb, ptPos);
-								((CVector *) ptPos)->x = (*it).x  + *currentSize * v2.x;
-								((CVector *) ptPos)->y = (*it).y  + *currentSize * v2.y;
-								((CVector *) ptPos)->z = (*it).z  + *currentSize * v2.z;
+								((CVectorPacked *) ptPos)->x = (*it).x  + *currentSize * v2.x;
+								((CVectorPacked *) ptPos)->y = (*it).y  + *currentSize * v2.y;
+								((CVectorPacked *) ptPos)->z = (*it).z  + *currentSize * v2.z;
 								ptPos += stride;

 								CHECK_VERTEX_BUFFER(vb, ptPos);
-								((CVector *) ptPos)->x = (*it).x  - *currentSize * v1.x;
-								((CVector *) ptPos)->y = (*it).y  - *currentSize * v1.y;
-								((CVector *) ptPos)->z = (*it).z  - *currentSize * v1.z;
+								((CVectorPacked *) ptPos)->x = (*it).x  - *currentSize * v1.x;
+								((CVectorPacked *) ptPos)->y = (*it).y  - *currentSize * v1.y;
+								((CVectorPacked *) ptPos)->z = (*it).z  - *currentSize * v1.z;
 								ptPos += stride;

 								CHECK_VERTEX_BUFFER(vb, ptPos);
-								((CVector *) ptPos)->x = (*it).x  - *currentSize * v2.x;
-								((CVector *) ptPos)->y = (*it).y  - *currentSize * v2.y;
-								((CVector *) ptPos)->z = (*it).z  - *currentSize * v2.z;
+								((CVectorPacked *) ptPos)->x = (*it).x  - *currentSize * v2.x;
+								((CVectorPacked *) ptPos)->y = (*it).y  - *currentSize * v2.y;
+								((CVectorPacked *) ptPos)->z = (*it).z  - *currentSize * v2.z;
 								ptPos += stride;

 								++it;
@ -498,27 +499,27 @@ public:
 							while (it != endIt)
 							{
 								CHECK_VERTEX_BUFFER(vb, ptPos);
-								((CVector *) ptPos)->x = (*it).x  + myV1.x;
-								((CVector *) ptPos)->y = (*it).y  + myV1.y;
-								((CVector *) ptPos)->z = (*it).z  + myV1.z;
+								((CVectorPacked *) ptPos)->x = (*it).x  + myV1.x;
+								((CVectorPacked *) ptPos)->y = (*it).y  + myV1.y;
+								((CVectorPacked *) ptPos)->z = (*it).z  + myV1.z;
 								ptPos += stride;

 								CHECK_VERTEX_BUFFER(vb, ptPos);
-								((CVector *) ptPos)->x = (*it).x  + myV2.x;
-								((CVector *) ptPos)->y = (*it).y  + myV2.y;
-								((CVector *) ptPos)->z = (*it).z  + myV2.z;
+								((CVectorPacked *) ptPos)->x = (*it).x  + myV2.x;
+								((CVectorPacked *) ptPos)->y = (*it).y  + myV2.y;
+								((CVectorPacked *) ptPos)->z = (*it).z  + myV2.z;
 								ptPos += stride;

 								CHECK_VERTEX_BUFFER(vb, ptPos);
-								((CVector *) ptPos)->x = (*it).x  - myV1.x;
-								((CVector *) ptPos)->y = (*it).y  - myV1.y;
-								((CVector *) ptPos)->z = (*it).z  - myV1.z;
+								((CVectorPacked *) ptPos)->x = (*it).x  - myV1.x;
+								((CVectorPacked *) ptPos)->y = (*it).y  - myV1.y;
+								((CVectorPacked *) ptPos)->z = (*it).z  - myV1.z;
 								ptPos += stride;

 								CHECK_VERTEX_BUFFER(vb, ptPos);
-								((CVector *) ptPos)->x = (*it).x  - myV2.x;
-								((CVector *) ptPos)->y = (*it).y  - myV2.y;
-								((CVector *) ptPos)->z = (*it).z  - myV2.z;
+								((CVectorPacked *) ptPos)->x = (*it).x  - myV2.x;
+								((CVectorPacked *) ptPos)->y = (*it).y  - myV2.y;
+								((CVectorPacked *) ptPos)->z = (*it).z  - myV2.z;
 								ptPos += stride;
 								++it;
 							}
@ -548,27 +549,27 @@ public:
 						while (it != endIt)
 						{
 							CHECK_VERTEX_BUFFER(vb, ptPos);
-							((CVector *) ptPos)->x = (*it).x  - *currentSize * v1.x + *currentSize2 * v2.x;
-							((CVector *) ptPos)->y = (*it).y  - *currentSize * v1.y + *currentSize2 * v2.y;
-							((CVector *) ptPos)->z = (*it).z  - *currentSize * v1.z + *currentSize2 * v2.z;
+							((CVectorPacked *) ptPos)->x = (*it).x  - *currentSize * v1.x + *currentSize2 * v2.x;
+							((CVectorPacked *) ptPos)->y = (*it).y  - *currentSize * v1.y + *currentSize2 * v2.y;
+							((CVectorPacked *) ptPos)->z = (*it).z  - *currentSize * v1.z + *currentSize2 * v2.z;
 							ptPos += stride;

 							CHECK_VERTEX_BUFFER(vb, ptPos);
-							((CVector *) ptPos)->x = (*it).x  + *currentSize * v1.x + *currentSize2 * v2.x;
-							((CVector *) ptPos)->y = (*it).y  + *currentSize * v1.y + *currentSize2 * v2.y;
-							((CVector *) ptPos)->z = (*it).z  + *currentSize * v1.z + *currentSize2 * v2.z;
+							((CVectorPacked *) ptPos)->x = (*it).x  + *currentSize * v1.x + *currentSize2 * v2.x;
+							((CVectorPacked *) ptPos)->y = (*it).y  + *currentSize * v1.y + *currentSize2 * v2.y;
+							((CVectorPacked *) ptPos)->z = (*it).z  + *currentSize * v1.z + *currentSize2 * v2.z;
 							ptPos += stride;

 							CHECK_VERTEX_BUFFER(vb, ptPos);
-							((CVector *) ptPos)->x = (*it).x  + *currentSize * v1.x - *currentSize2 * v2.x;
-							((CVector *) ptPos)->y = (*it).y  + *currentSize * v1.y - *currentSize2 * v2.y;
-							((CVector *) ptPos)->z = (*it).z  + *currentSize * v1.z - *currentSize2 * v2.z;
+							((CVectorPacked *) ptPos)->x = (*it).x  + *currentSize * v1.x - *currentSize2 * v2.x;
+							((CVectorPacked *) ptPos)->y = (*it).y  + *currentSize * v1.y - *currentSize2 * v2.y;
+							((CVectorPacked *) ptPos)->z = (*it).z  + *currentSize * v1.z - *currentSize2 * v2.z;
 							ptPos += stride;

 							CHECK_VERTEX_BUFFER(vb, ptPos);
-							((CVector *) ptPos)->x = (*it).x  - *currentSize * v1.x - *currentSize2 * v2.x;
-							((CVector *) ptPos)->y = (*it).y  - *currentSize * v1.y - *currentSize2 * v2.y;
-							((CVector *) ptPos)->z = (*it).z  - *currentSize * v1.z - *currentSize2 * v2.z;
+							((CVectorPacked *) ptPos)->x = (*it).x  - *currentSize * v1.x - *currentSize2 * v2.x;
+							((CVectorPacked *) ptPos)->y = (*it).y  - *currentSize * v1.y - *currentSize2 * v2.y;
+							((CVectorPacked *) ptPos)->z = (*it).z  - *currentSize * v1.z - *currentSize2 * v2.z;
 							ptPos += stride;
 							++it;
 							currentSize += currentSizeStep;
@ -637,81 +638,81 @@ public:
 								mbv12 = -*currentSize * mbv1n;
 								mbv1 *= *currentSize * (1 + la._MotionBlurCoeff * n * n) / n;

-								*(CVector *) ptPos = *it - mbv2;
-								*(CVector *) (ptPos + stride) = *it  + mbv1;
-								*(CVector *) (ptPos + stride2) = *it + mbv2;
-								*(CVector *) (ptPos + stride3) = *it + mbv12;
+								*(CVectorPacked *) ptPos = *it - mbv2;
+								*(CVectorPacked *) (ptPos + stride) = *it  + mbv1;
+								*(CVectorPacked *) (ptPos + stride2) = *it + mbv2;
+								*(CVectorPacked *) (ptPos + stride3) = *it + mbv12;


 								CHECK_VERTEX_BUFFER(vb, ptPos);
-								((CVector *) ptPos)->x = (*it).x  - mbv2.x;
-								((CVector *) ptPos)->y = (*it).y  - mbv2.y;
-								((CVector *) ptPos)->z = (*it).z  - mbv2.z;
+								((CVectorPacked *) ptPos)->x = (*it).x  - mbv2.x;
+								((CVectorPacked *) ptPos)->y = (*it).y  - mbv2.y;
+								((CVectorPacked *) ptPos)->z = (*it).z  - mbv2.z;

 								CHECK_VERTEX_BUFFER(vb, ptPos + stride);
-								((CVector *) (ptPos + stride))->x = (*it).x  + mbv1.x;
-								((CVector *) (ptPos + stride))->y = (*it).y  + mbv1.y;
-								((CVector *) (ptPos + stride))->z = (*it).z  + mbv1.z;
+								((CVectorPacked *) (ptPos + stride))->x = (*it).x  + mbv1.x;
+								((CVectorPacked *) (ptPos + stride))->y = (*it).y  + mbv1.y;
+								((CVectorPacked *) (ptPos + stride))->z = (*it).z  + mbv1.z;

 								CHECK_VERTEX_BUFFER(vb, ptPos + stride2);
-								((CVector *) (ptPos + stride2))->x = (*it).x  + mbv2.x;
-								((CVector *) (ptPos + stride2))->y = (*it).y  + mbv2.y;
-								((CVector *) (ptPos + stride2))->z = (*it).z  + mbv2.z;
+								((CVectorPacked *) (ptPos + stride2))->x = (*it).x  + mbv2.x;
+								((CVectorPacked *) (ptPos + stride2))->y = (*it).y  + mbv2.y;
+								((CVectorPacked *) (ptPos + stride2))->z = (*it).z  + mbv2.z;


 								CHECK_VERTEX_BUFFER(vb, ptPos + stride3);
-								((CVector *) (ptPos + stride3))->x = (*it).x  + mbv12.x;
-								((CVector *) (ptPos + stride3))->y = (*it).y  + mbv12.y;
-								((CVector *) (ptPos + stride3))->z = (*it).z  + mbv12.z;
+								((CVectorPacked *) (ptPos + stride3))->x = (*it).x  + mbv12.x;
+								((CVectorPacked *) (ptPos + stride3))->y = (*it).y  + mbv12.y;
+								((CVectorPacked *) (ptPos + stride3))->z = (*it).z  + mbv12.z;

 							}
 							else // speed too small, we must avoid imprecision
 							{
 								CHECK_VERTEX_BUFFER(vb, ptPos);
-								((CVector *) ptPos)->x = (*it).x  - *currentSize * v2.x;
-								((CVector *) ptPos)->y = (*it).y  - *currentSize * v2.y;
-								((CVector *) ptPos)->z = (*it).z  - *currentSize * v2.z;
+								((CVectorPacked *) ptPos)->x = (*it).x  - *currentSize * v2.x;
+								((CVectorPacked *) ptPos)->y = (*it).y  - *currentSize * v2.y;
+								((CVectorPacked *) ptPos)->z = (*it).z  - *currentSize * v2.z;

 								CHECK_VERTEX_BUFFER(vb, ptPos + stride);
-								((CVector *) (ptPos + stride))->x = (*it).x  + *currentSize * v1.x;
-								((CVector *) (ptPos + stride))->y = (*it).y  + *currentSize * v1.y;
-								((CVector *) (ptPos + stride))->z = (*it).z  + *currentSize * v1.z;
+								((CVectorPacked *) (ptPos + stride))->x = (*it).x  + *currentSize * v1.x;
+								((CVectorPacked *) (ptPos + stride))->y = (*it).y  + *currentSize * v1.y;
+								((CVectorPacked *) (ptPos + stride))->z = (*it).z  + *currentSize * v1.z;

 								CHECK_VERTEX_BUFFER(vb, ptPos + stride2);
-								((CVector *) (ptPos + stride2))->x = (*it).x  + *currentSize * v2.x;
-								((CVector *) (ptPos + stride2))->y = (*it).y  + *currentSize * v2.y;
-								((CVector *) (ptPos + stride2))->z = (*it).z  + *currentSize * v2.z;
+								((CVectorPacked *) (ptPos + stride2))->x = (*it).x  + *currentSize * v2.x;
+								((CVectorPacked *) (ptPos + stride2))->y = (*it).y  + *currentSize * v2.y;
+								((CVectorPacked *) (ptPos + stride2))->z = (*it).z  + *currentSize * v2.z;


 								CHECK_VERTEX_BUFFER(vb, ptPos + stride3);
-								((CVector *) (ptPos + stride3))->x = (*it).x  - *currentSize * v1.x;
-								((CVector *) (ptPos + stride3))->y = (*it).y  - *currentSize * v1.y;
-								((CVector *) (ptPos + stride3))->z = (*it).z  - *currentSize * v1.z;
+								((CVectorPacked *) (ptPos + stride3))->x = (*it).x  - *currentSize * v1.x;
+								((CVectorPacked *) (ptPos + stride3))->y = (*it).y  - *currentSize * v1.y;
+								((CVectorPacked *) (ptPos + stride3))->z = (*it).z  - *currentSize * v1.z;
 							}
 						}
 						else
 						{

 							CHECK_VERTEX_BUFFER(vb, ptPos);
-							((CVector *) ptPos)->x = (*it).x  - *currentSize * v2.x;
-							((CVector *) ptPos)->y = (*it).y  - *currentSize * v2.y;
-							((CVector *) ptPos)->z = (*it).z  - *currentSize * v2.z;
+							((CVectorPacked *) ptPos)->x = (*it).x  - *currentSize * v2.x;
+							((CVectorPacked *) ptPos)->y = (*it).y  - *currentSize * v2.y;
+							((CVectorPacked *) ptPos)->z = (*it).z  - *currentSize * v2.z;

 							CHECK_VERTEX_BUFFER(vb, ptPos + stride);
-							((CVector *) (ptPos + stride))->x = (*it).x  + *currentSize * v1.x;
-							((CVector *) (ptPos + stride))->y = (*it).y  + *currentSize * v1.y;
-							((CVector *) (ptPos + stride))->z = (*it).z  + *currentSize * v1.z;
+							((CVectorPacked *) (ptPos + stride))->x = (*it).x  + *currentSize * v1.x;
+							((CVectorPacked *) (ptPos + stride))->y = (*it).y  + *currentSize * v1.y;
+							((CVectorPacked *) (ptPos + stride))->z = (*it).z  + *currentSize * v1.z;

 							CHECK_VERTEX_BUFFER(vb, ptPos + stride2);
-							((CVector *) (ptPos + stride2))->x = (*it).x  + *currentSize * v2.x;
-							((CVector *) (ptPos + stride2))->y = (*it).y  + *currentSize * v2.y;
-							((CVector *) (ptPos + stride2))->z = (*it).z  + *currentSize * v2.z;
+							((CVectorPacked *) (ptPos + stride2))->x = (*it).x  + *currentSize * v2.x;
+							((CVectorPacked *) (ptPos + stride2))->y = (*it).y  + *currentSize * v2.y;
+							((CVectorPacked *) (ptPos + stride2))->z = (*it).z  + *currentSize * v2.z;


 							CHECK_VERTEX_BUFFER(vb, ptPos + stride3);
-							((CVector *) (ptPos + stride3))->x = (*it).x  - *currentSize * v1.x;
-							((CVector *) (ptPos + stride3))->y = (*it).y  - *currentSize * v1.y;
-							((CVector *) (ptPos + stride3))->z = (*it).z  - *currentSize * v1.z;
+							((CVectorPacked *) (ptPos + stride3))->x = (*it).x  - *currentSize * v1.x;
+							((CVectorPacked *) (ptPos + stride3))->y = (*it).y  - *currentSize * v1.y;
+							((CVectorPacked *) (ptPos + stride3))->z = (*it).z  - *currentSize * v1.z;
 						}

 						ptPos += stride4;
@ -793,30 +794,30 @@ public:
 						CHECK_VERTEX_BUFFER(vb, ptPos + stride2);
 						CHECK_VERTEX_BUFFER(vb, ptPos + stride3);

-						((CVector *) ptPos)->x  = (*it).x  + v1.x;
-						((CVector *) ptPos)->y  = (*it).y  + v1.y;
-						((CVector *) ptPos)->z = (*it).z  + v1.z;
-						//nlinfo("** %f, %f, %f", ((CVector *) ptPos)->x, ((CVector *) ptPos)->y, ((CVector *) ptPos)->z);
+						((CVectorPacked *) ptPos)->x  = (*it).x  + v1.x;
+						((CVectorPacked *) ptPos)->y  = (*it).y  + v1.y;
+						((CVectorPacked *) ptPos)->z = (*it).z  + v1.z;
+						//nlinfo("** %f, %f, %f", ((CVectorPacked *) ptPos)->x, ((CVectorPacked *) ptPos)->y, ((CVectorPacked *) ptPos)->z);
 						ptPos += stride;



-						((CVector *) ptPos)->x  = (*it).x  + v2.x;
-						((CVector *) ptPos)->y  = (*it).y  + v2.y;
-						((CVector *) ptPos)->z = (*it).z  + v2.z;
-						//nlinfo("%f, %f, %f", ((CVector *) ptPos)->x, ((CVector *) ptPos)->y, ((CVector *) ptPos)->z);
+						((CVectorPacked *) ptPos)->x  = (*it).x  + v2.x;
+						((CVectorPacked *) ptPos)->y  = (*it).y  + v2.y;
+						((CVectorPacked *) ptPos)->z = (*it).z  + v2.z;
+						//nlinfo("%f, %f, %f", ((CVectorPacked *) ptPos)->x, ((CVectorPacked *) ptPos)->y, ((CVectorPacked *) ptPos)->z);
 						ptPos += stride;

-						((CVector *) ptPos)->x  = (*it).x  - v1.x;
-						((CVector *) ptPos)->y  = (*it).y  - v1.y;
-						((CVector *) ptPos)->z = (*it).z  - v1.z;
-						//nlinfo("%f, %f, %f", ((CVector *) ptPos)->x, ((CVector *) ptPos)->y, ((CVector *) ptPos)->z);
+						((CVectorPacked *) ptPos)->x  = (*it).x  - v1.x;
+						((CVectorPacked *) ptPos)->y  = (*it).y  - v1.y;
+						((CVectorPacked *) ptPos)->z = (*it).z  - v1.z;
+						//nlinfo("%f, %f, %f", ((CVectorPacked *) ptPos)->x, ((CVectorPacked *) ptPos)->y, ((CVectorPacked *) ptPos)->z);
 						ptPos += stride;

-						((CVector *) ptPos)->x  = (*it).x  - v2.x;
-						((CVector *) ptPos)->y  = (*it).y  - v2.y;
-						((CVector *) ptPos)->z = (*it).z  - v2.z;
-						//nlinfo("%f, %f, %f", ((CVector *) ptPos)->x, ((CVector *) ptPos)->y, ((CVector *) ptPos)->z);
+						((CVectorPacked *) ptPos)->x  = (*it).x  - v2.x;
+						((CVectorPacked *) ptPos)->y  = (*it).y  - v2.y;
+						((CVectorPacked *) ptPos)->z = (*it).z  - v2.z;
+						//nlinfo("%f, %f, %f", ((CVectorPacked *) ptPos)->x, ((CVectorPacked *) ptPos)->y, ((CVectorPacked *) ptPos)->z);
 						ptPos += stride;

 						++it;
@ -851,27 +852,27 @@ public:
 						v2 = - sinAngle * I + cosAngle * K;

 						CHECK_VERTEX_BUFFER(vb, ptPos);
-						((CVector *) ptPos)->x = (*it).x  - *currentSize * v1.x + *currentSize2 * v2.x;
-						((CVector *) ptPos)->y = (*it).y  - *currentSize * v1.y + *currentSize2 * v2.y;
-						((CVector *) ptPos)->z = (*it).z  - *currentSize * v1.z + *currentSize2 * v2.z;
+						((CVectorPacked *) ptPos)->x = (*it).x  - *currentSize * v1.x + *currentSize2 * v2.x;
+						((CVectorPacked *) ptPos)->y = (*it).y  - *currentSize * v1.y + *currentSize2 * v2.y;
+						((CVectorPacked *) ptPos)->z = (*it).z  - *currentSize * v1.z + *currentSize2 * v2.z;
 						ptPos += stride;

 						CHECK_VERTEX_BUFFER(vb, ptPos);
-						((CVector *) ptPos)->x = (*it).x  + *currentSize * v1.x + *currentSize2 * v2.x;
-						((CVector *) ptPos)->y = (*it).y  + *currentSize * v1.y + *currentSize2 * v2.y;
-						((CVector *) ptPos)->z = (*it).z  + *currentSize * v1.z + *currentSize2 * v2.z;
+						((CVectorPacked *) ptPos)->x = (*it).x  + *currentSize * v1.x + *currentSize2 * v2.x;
+						((CVectorPacked *) ptPos)->y = (*it).y  + *currentSize * v1.y + *currentSize2 * v2.y;
+						((CVectorPacked *) ptPos)->z = (*it).z  + *currentSize * v1.z + *currentSize2 * v2.z;
 						ptPos += stride;

 						CHECK_VERTEX_BUFFER(vb, ptPos);
-						((CVector *) ptPos)->x = (*it).x  + *currentSize * v1.x - *currentSize2 * v2.x;
-						((CVector *) ptPos)->y = (*it).y  + *currentSize * v1.y - *currentSize2 * v2.y;
-						((CVector *) ptPos)->z = (*it).z  + *currentSize * v1.z - *currentSize2 * v2.z;
+						((CVectorPacked *) ptPos)->x = (*it).x  + *currentSize * v1.x - *currentSize2 * v2.x;
+						((CVectorPacked *) ptPos)->y = (*it).y  + *currentSize * v1.y - *currentSize2 * v2.y;
+						((CVectorPacked *) ptPos)->z = (*it).z  + *currentSize * v1.z - *currentSize2 * v2.z;
 						ptPos += stride;

 						CHECK_VERTEX_BUFFER(vb, ptPos);
-						((CVector *) ptPos)->x = (*it).x  - *currentSize * v1.x - *currentSize2 * v2.x;
-						((CVector *) ptPos)->y = (*it).y  - *currentSize * v1.y - *currentSize2 * v2.y;
-						((CVector *) ptPos)->z = (*it).z  - *currentSize * v1.z - *currentSize2 * v2.z;
+						((CVectorPacked *) ptPos)->x = (*it).x  - *currentSize * v1.x - *currentSize2 * v2.x;
+						((CVectorPacked *) ptPos)->y = (*it).y  - *currentSize * v1.y - *currentSize2 * v2.y;
+						((CVectorPacked *) ptPos)->z = (*it).z  - *currentSize * v1.z - *currentSize2 * v2.z;
 						ptPos += stride;
 						++it;
 						++currentAngle;
--- a/code/nel/src/3d/ps_fan_light.cpp
+++ b/code/nel/src/3d/ps_fan_light.cpp
@ -23,6 +23,7 @@
 #include "nel/3d/particle_system.h"
 #include "nel/3d/driver.h"

+using NLMISC::CVectorPacked;


 namespace NL3D
@ -154,7 +155,7 @@ public:
 				{

 					CHECK_VERTEX_BUFFER(*vb, ptVect);
-					*(CVector *) ptVect = *posIt;
+					*(CVectorPacked *) ptVect = *posIt;
 					// the start angle
 					currentAngle = *currentAnglePt;
 					const uint8 phaseAdd = (uint8) (f._PhaseSpeed * (*timeIt));
@ -163,7 +164,7 @@ public:
 					const float moveIntensity = f._MoveIntensity * fanSize;
 					// compute radius & vect for first fan
 					firstSize  = fanSize + (moveIntensity * CPSUtil::getCos(randomPhaseTab[0] + phaseAdd));
-					*(CVector *) ptVect = (*posIt) + I * firstSize * (CPSUtil::getCos((sint32) currentAngle))
+					*(CVectorPacked *) ptVect = (*posIt) + I * firstSize * (CPSUtil::getCos((sint32) currentAngle))
 										  + K * firstSize * (CPSUtil::getSin((sint32) currentAngle));
 					currentAngle += angleStep;
 					ptVect += stride;
@ -173,7 +174,7 @@ public:
 					for (k = 1; k <= upperBound; ++k)
 					{
 						fSize  = fanSize + (moveIntensity * CPSUtil::getCos(randomPhaseTab[k] + phaseAdd));
-						*(CVector *) ptVect = (*posIt) + I * fSize * (CPSUtil::getCos((sint32) currentAngle))
+						*(CVectorPacked *) ptVect = (*posIt) + I * fSize * (CPSUtil::getCos((sint32) currentAngle))
 											  + K * fSize * (CPSUtil::getSin((sint32) currentAngle));
 						currentAngle += angleStep;
 						ptVect += stride;
@ -183,14 +184,14 @@ public:
 					sizeStep = sizeStepBase * (firstSize - fSize);
 					for (; k <= (sint32) (f._NbFans - 1); ++k)
 					{
-						*(CVector *) ptVect = (*posIt) + I * fSize * (CPSUtil::getCos((sint32) currentAngle))
+						*(CVectorPacked *) ptVect = (*posIt) + I * fSize * (CPSUtil::getCos((sint32) currentAngle))
 											  + K * fSize * (CPSUtil::getSin((sint32) currentAngle));
 						currentAngle += angleStep;
 						ptVect += stride;
 						fSize  += sizeStep;
 					}
 					// last fan
-					*(CVector *) ptVect = (*posIt) + I * firstSize * (CPSUtil::getCos((sint32) *currentAnglePt))
+					*(CVectorPacked *) ptVect = (*posIt) + I * firstSize * (CPSUtil::getCos((sint32) *currentAnglePt))
 											  + K * firstSize * (CPSUtil::getSin((sint32) *currentAnglePt));
 					ptVect += stride;
 					currentSizePt += currentSizePtIncrement;
--- a/code/nel/src/3d/ps_force.cpp
+++ b/code/nel/src/3d/ps_force.cpp
@ -600,9 +600,9 @@ void CPSGravity::integrate(float date, CPSLocated *src, uint32 startIndex, uint3

 void CPSGravity::integrateSingle(float startDate, float deltaT, uint numStep,
 								 const CPSLocated *src, uint32 indexInLocated,
-								 NLMISC::CVector *destPos,
+								 NLMISC::CVectorPacked *destPos,
 								 bool accumulate /*= false*/,
-								 uint stride/* = sizeof(NLMISC::CVector)*/) const
+								 uint stride/* = sizeof(NLMISC::CVectorPacked)*/) const
 {
 	NL_PS_FUNC(CPSGravity_CVector )
 	nlassert(src->isParametricMotionEnabled());
@ -633,7 +633,7 @@ void CPSGravity::integrateSingle(float startDate, float deltaT, uint numStep,
 					destPos->y = startPos.y + currDate * startSpeed.y;
 					destPos->z = startPos.z + currDate * startSpeed.z - _K * halfTimeSquare;
 					currDate += deltaT;
-					destPos = (NLMISC::CVector *) ( (uint8 *) destPos + stride);
+					destPos = (NLMISC::CVectorPacked *) ( (uint8 *) destPos + stride);
 				}
 				while (--numStep);
 			}
@ -653,7 +653,7 @@ void CPSGravity::integrateSingle(float startDate, float deltaT, uint numStep,
 					float halfTimeSquare  = 0.5f * currDate * currDate;
 					destPos->z -=  _K * halfTimeSquare;
 					currDate += deltaT;
-					destPos = (NLMISC::CVector *) ( (uint8 *) destPos + stride);
+					destPos = (NLMISC::CVectorPacked *) ( (uint8 *) destPos + stride);
 				}
 				while (--numStep);
 			}
@ -860,8 +860,8 @@ void CPSCylindricVortex::computeForces(CPSLocated &target)
 					p *= 1.f / d;
 					// compute the speed vect that we should have (normalized)
 					realTangentialSpeed = n ^ p;
-					tangentialSpeed = (*speedIt * realTangentialSpeed) * realTangentialSpeed;
-					radialSpeed =  (p * *speedIt) * p;
+					tangentialSpeed = (CVector(*speedIt) * realTangentialSpeed) * realTangentialSpeed;
+					radialSpeed =  (p * CVector(*speedIt)) * p;
 					// update radial speed;
 					*speedIt -= _RadialViscosity * CParticleSystem::EllapsedTime * radialSpeed;
 					// update tangential speed
@ -979,7 +979,7 @@ void CPSMagneticForce::computeForces(CPSLocated &target)
 			TPSAttribFloat::const_iterator invMassIt = target.getInvMass().begin();
 			for (; it != itend; ++it, ++invMassIt)
 			{
-				(*it) += intensity * *invMassIt * (*it ^ toAdd);
+				(*it) += intensity * *invMassIt * (CVector(*it) ^ toAdd);
 			}
 		}
 		else
@ -987,7 +987,7 @@ void CPSMagneticForce::computeForces(CPSLocated &target)
 			float i = intensity / target.getInitialMass();
 			for (; it != itend; ++it)
 			{
-				(*it) += i * (*it ^ toAdd);
+				(*it) += i * (CVector(*it) ^ toAdd);
 			}
 		}
 	}
@ -1144,7 +1144,7 @@ void CPSBrownianForce::integrate(float date, CPSLocated *src,
 ///==========================================================
 void CPSBrownianForce::integrateSingle(float startDate, float deltaT, uint numStep,
 								 const CPSLocated *src, uint32 indexInLocated,
-								 NLMISC::CVector *destPos,
+								 NLMISC::CVectorPacked *destPos,
 								 bool accumulate,
 								 uint stride) const
 {
@ -1179,7 +1179,7 @@ void CPSBrownianForce::integrateSingle(float startDate, float deltaT, uint numSt
 					destPos->y = startPos.y + currDate * startSpeed.y + _K * PrecomputedPos[index].y;
 					destPos->z = startPos.z + currDate * startSpeed.z + _K * PrecomputedPos[index].z;
 					currDate += deltaT;
-					destPos = (NLMISC::CVector *) ( (uint8 *) destPos + stride);
+					destPos = (NLMISC::CVectorPacked *) ( (uint8 *) destPos + stride);
 				}
 				while (--numStep);
 			}
@ -1201,7 +1201,7 @@ void CPSBrownianForce::integrateSingle(float startDate, float deltaT, uint numSt
 					destPos->y += _K * PrecomputedPos[index].y;
 					destPos->z += _K * PrecomputedPos[index].z;
 					currDate += deltaT;
-					destPos = (NLMISC::CVector *) ( (uint8 *) destPos + stride);
+					destPos = (NLMISC::CVectorPacked *) ( (uint8 *) destPos + stride);
 				}
 				while (--numStep);
 			}
--- a/code/nel/src/3d/ps_located.cpp
+++ b/code/nel/src/3d/ps_located.cpp
@ -257,7 +257,7 @@ void CPSLocated::notifyMotionTypeChanged(void)
 /// ***************************************************************************************
 void CPSLocated::integrateSingle(float startDate, float deltaT, uint numStep,
 								uint32 indexInLocated,
-								NLMISC::CVector *destPos,
+								NLMISC::CVectorPacked *destPos,
 								uint stride) const
 {
 	NL_PS_FUNC(CPSLocated_integrateSingle)
@ -293,7 +293,7 @@ void CPSLocated::integrateSingle(float startDate, float deltaT, uint numStep,
 					destPos->y = pi.Pos.y + currDate * pi.Speed.y;
 					destPos->z = pi.Pos.z + currDate * pi.Speed.z;
 					currDate += deltaT;
-					destPos = (NLMISC::CVector *) ( (uint8 *) destPos + stride);
+					destPos = (NLMISC::CVectorPacked *) ( (uint8 *) destPos + stride);
 				}
 				while (--numStep);
 			}
@ -502,6 +502,18 @@ bool CPSLocated::hasEmitters(void) const
 	return false;
 }

+/// ***************************************************************************************
+void CPSLocated::getLODVect(NLMISC::CVectorPacked &v, float &offset, TPSMatrixMode matrixMode)
+{
+	NL_PS_FUNC(CPSLocated_getLODVect)
+	nlassert(_Owner);
+	CHECK_PS_INTEGRITY
+	CVector temp;
+	_Owner->getLODVect(temp, offset, matrixMode);
+	v = temp;
+	CHECK_PS_INTEGRITY
+}
+
 /// ***************************************************************************************
 void CPSLocated::getLODVect(NLMISC::CVector &v, float &offset, TPSMatrixMode matrixMode)
 {
@ -1866,7 +1878,7 @@ void CPSLocated::updateCollisions()
 			if (_Time[currCollision->Index] >= 1.f)
 			{
 				// check whether particles died before the collision. If so, just continue (particle has already been inserted in the remove list), and cancel the collision
-				float timeToCollision = currCollision->Dist / _Speed[currCollision->Index].norm();
+				float timeToCollision = currCollision->Dist / CVector(_Speed[currCollision->Index]).norm();
 				if (_Time[currCollision->Index] / _TimeIncrement[currCollision->Index] - timeToCollision * CParticleSystem::RealEllapsedTimeRatio >= 1.f)
 				{
 					// says that collision did not occurs
@ -2196,12 +2208,12 @@ void CPSLocated::removeOldParticles()

 					if (_LifeScheme)
 					{
-						_Pos[*it] -= _Speed[*it] * ((_Time[*it] - 1.f) / _TimeIncrement[*it]) * ellapsedTimeRatio;
+						_Pos[*it] -= CVector(_Speed[*it]) * ((_Time[*it] - 1.f) / _TimeIncrement[*it]) * ellapsedTimeRatio;
 						timeUntilNextSimStep = (_Time[*it] - 1.f) / _TimeIncrement[*it];
 					}
 					else
 					{
-						_Pos[*it] -= _Speed[*it] * ((_Time[*it] - 1.f) * _InitialLife) * ellapsedTimeRatio;
+						_Pos[*it] -= CVector(_Speed[*it]) * ((_Time[*it] - 1.f) * _InitialLife) * ellapsedTimeRatio;
 						timeUntilNextSimStep = (_Time[*it] - 1.f) * _InitialLife;
 					}
 					_Time[*it] = 0.9999f;
@ -2255,7 +2267,7 @@ void CPSLocated::removeOldParticles()
 					{
 						// move position backward (compute its position at death)
 						timeUntilNextSimStep = ((_Time[*it] - 1.f) / _TimeIncrement[*it]) * ellapsedTimeRatio;
-						_Pos[*it] -= _Speed[*it] * timeUntilNextSimStep;
+						_Pos[*it] -= CVector(_Speed[*it]) * timeUntilNextSimStep;

 						// force time to 1 because emitter 'on death' may rely on the date of emitter to compute its attributes
 						_Time[*it] = 0.9999f;
@ -2283,7 +2295,7 @@ void CPSLocated::removeOldParticles()
 					{
 						// move position backward
 						timeUntilNextSimStep = (_Time[*it] - 1.f) * _InitialLife * ellapsedTimeRatio;
-						_Pos[*it] -= _Speed[*it] * timeUntilNextSimStep;
+						_Pos[*it] -= CVector(_Speed[*it]) * timeUntilNextSimStep;
 						// force time to 1 because emitter 'on death' may rely on the date of emitter to compute its attributes
 						_Time[*it] = 0.9999f;
 					}
@ -3038,7 +3050,7 @@ void CPSLocated::setZBias(float value)
 }

 /// ***************************************************************************************
-void CPSLocated::computeCollisions(uint firstInstanceIndex, const NLMISC::CVector *posBefore, const NLMISC::CVector *posAfter)
+void CPSLocated::computeCollisions(uint firstInstanceIndex, const NLMISC::CVectorPacked *posBefore, const NLMISC::CVectorPacked *posAfter)
 {
 	NL_PS_FUNC(CPSLocated_computeCollisions)
 	for(TDtorObserversVect::iterator it = _DtorObserversVect.begin(); it != _DtorObserversVect.end(); ++it)
--- a/code/nel/src/3d/ps_mesh.cpp
+++ b/code/nel/src/3d/ps_mesh.cpp
@ -404,8 +404,8 @@ void CPSMesh::updatePos()



-			mat.setRot( ptBasis->X * CPSUtil::getCos((sint32) *ptCurrAngle) + ptBasis->Y * CPSUtil::getSin((sint32) *ptCurrAngle)
-						, ptBasis->X * CPSUtil::getCos((sint32) *ptCurrAngle + 64) + ptBasis->Y * CPSUtil::getSin((sint32) *ptCurrAngle + 64)
+			mat.setRot( CVector(ptBasis->X) * CPSUtil::getCos((sint32) *ptCurrAngle) + CVector(ptBasis->Y) * CPSUtil::getSin((sint32) *ptCurrAngle)
+						, CVector(ptBasis->X) * CPSUtil::getCos((sint32) *ptCurrAngle + 64) + CVector(ptBasis->Y) * CPSUtil::getSin((sint32) *ptCurrAngle + 64)
 						, ptBasis->X ^ ptBasis->Y
 					  );

@ -660,9 +660,9 @@ public:
 								CHECK_VERTEX_BUFFER(outVb,	  outVertex + outNormalOff);

 								// translate and resize the vertex (relatively to the mesh origin)
-								*(CVector *) outVertex = *posIt + sM * *(CVector *) inVertex;
+								*(CVectorPacked *) outVertex = *posIt + sM * CVector(*(CVectorPacked *) inVertex);
 								// copy the normal
-								*(CVector *) (outVertex + outNormalOff) = M * *(CVector *) (inVertex + inNormalOff);
+								*(CVectorPacked *) (outVertex + outNormalOff) = M * CVector(*(CVectorPacked *) (inVertex + inNormalOff));


 								inVertex  += inVSize;
@ -683,7 +683,7 @@ public:
 								CHECK_VERTEX_BUFFER(outVb, outVertex);

 								// translate and resize the vertex (relatively to the mesh origin)
-								*(CVector *) outVertex = *posIt + sM * *(CVector *) inVertex;
+								*(CVectorPacked *) outVertex = *posIt + sM * CVector(*(CVectorPacked *) inVertex);

 								inVertex  += inVSize;
 								outVertex += outVSize;
@ -774,10 +774,10 @@ public:
 								CHECK_VERTEX_BUFFER(outVb,	  outVertex + outNormalOff);

 								// morph, and transform the vertex
-								*(CVector *) outVertex = *posIt + sM * (opLambda * *(CVector *) m0 + lambda * *(CVector *) m1);
+								*(CVectorPacked *) outVertex = *posIt + sM * (opLambda * CVector(*(CVectorPacked *) m0) + lambda * CVector(*(CVectorPacked *) m1));
 								// morph, and transform the normal
-								*(CVector *) (outVertex + outNormalOff) = M * (opLambda * *(CVector *) (m0 + inNormalOff)
-																			  + lambda * *(CVector *) (m1 + inNormalOff)).normed();
+								*(CVectorPacked *) (outVertex + outNormalOff) = M * (opLambda * CVector(*(CVectorPacked *) (m0 + inNormalOff))
+																			  + lambda * CVector(*(CVectorPacked *) (m1 + inNormalOff))).normed();


 								m0  += inVSize;
@ -799,7 +799,7 @@ public:
 								CHECK_VERTEX_BUFFER((*inVB1),	  m1);
 								CHECK_VERTEX_BUFFER(outVb, outVertex);
 								// morph, and transform the vertex
-								*(CVector *) outVertex = *posIt + sM * (opLambda * *(CVector *) m0 + opLambda * *(CVector *) m1);
+								*(CVectorPacked *) outVertex = *posIt + sM * (opLambda * CVector(*(CVectorPacked *) m0) + opLambda * CVector(*(CVectorPacked *) m1));

 								m0  += inVSize;
 								m1  += inVSize;
@ -948,9 +948,9 @@ public:


 							// translate and resize the vertex (relatively to the mesh origin)
-							*(CVector *)  outVertex						 = *posIt + *ptCurrSize * *(CVector *) inVertex;
+							*(CVectorPacked *)  outVertex						 = *posIt + *ptCurrSize * CVector(*(CVectorPacked *) inVertex);
 							// copy the normal
-							*(CVector *)  (outVertex + normalOff ) = *(CVector *) (inVertex + pNormalOff);
+							*(CVectorPacked *)  (outVertex + normalOff ) = *(CVectorPacked *) (inVertex + pNormalOff);
 							inVertex  += inVSize;
 							outVertex += outVSize;
 						}
@ -963,7 +963,7 @@ public:
 							// translate and resize the vertex (relatively to the mesh origin)
 							CHECK_VERTEX_BUFFER(outVb, outVertex);
 							CHECK_VERTEX_BUFFER(prerotVb, inVertex);
-							*(CVector *)  outVertex = *posIt + *ptCurrSize * *(CVector *) inVertex;
+							*(CVectorPacked *)  outVertex = *posIt + *ptCurrSize * CVector(*(CVectorPacked *) inVertex);
 							inVertex  += inVSize;
 							outVertex += outVSize;
 						}
@ -1684,8 +1684,8 @@ CVertexBuffer &CPSConstraintMesh::makePrerotatedVb(const CVertexBuffer &inVb)
 				CHECK_VERTEX_BUFFER(prerotatedVb, outVertex);
 				CHECK_VERTEX_BUFFER(prerotatedVb, outVertex + pNormalOff);

-				* (CVector *) outVertex =  mat.mulVector(* (CVector *) inVertex);
-				* (CVector *) (outVertex + normalOff) =  mat.mulVector(* (CVector *) (inVertex + pNormalOff) );
+				* (CVectorPacked *) outVertex =  mat.mulVector(* (CVectorPacked *) inVertex);
+				* (CVectorPacked *) (outVertex + normalOff) =  mat.mulVector(* (CVectorPacked *) (inVertex + pNormalOff) );
 				outVertex += vpSize;
 				inVertex  += vSize;

@ -1701,7 +1701,7 @@ CVertexBuffer &CPSConstraintMesh::makePrerotatedVb(const CVertexBuffer &inVb)
 				CHECK_VERTEX_BUFFER(prerotatedVb, outVertex);
 				CHECK_VERTEX_BUFFER(inVb, inVertex);

-				* (CVector *) outVertex =  mat.mulVector(* (CVector *) inVertex);
+				* (CVectorPacked *) outVertex =  mat.mulVector(* (CVectorPacked *) inVertex);
 				outVertex += vpSize;
 				inVertex += vSize;
 			}
--- a/code/nel/src/3d/ps_plane_basis_maker.cpp
+++ b/code/nel/src/3d/ps_plane_basis_maker.cpp
@ -130,7 +130,7 @@ void *CPSPlaneBasisFollowSpeed::make(CPSLocated *loc,
 			case XY:
 				while (numAttrib --)
 				{
-					const CVector *speedVect = &(*(speedIt + (fpIndex >> 16)));
+					const NLMISC::CVectorPacked *speedVect = &(*(speedIt + (fpIndex >> 16)));
 					float norm = sqrtf(speedVect->x * speedVect->x + speedVect->y * speedVect->y);
 					float invNorm = (norm != 0.f) ? 1.f / norm : 0.f;
 					CPlaneBasis &pb = *(CPlaneBasis *) ptDat;
@ -143,7 +143,7 @@ void *CPSPlaneBasisFollowSpeed::make(CPSLocated *loc,
 			case XZ:
 				while (numAttrib --)
 				{
-					const CVector *speedVect = &(*(speedIt + (fpIndex >> 16)));
+					const NLMISC::CVectorPacked *speedVect = &(*(speedIt + (fpIndex >> 16)));
 					float norm = sqrtf(speedVect->x * speedVect->x + speedVect->z * speedVect->z);
 					float invNorm = (norm != 0.f) ? 1.f / norm : 0.f;
 					CPlaneBasis &pb = *(CPlaneBasis *) ptDat;
@ -156,7 +156,7 @@ void *CPSPlaneBasisFollowSpeed::make(CPSLocated *loc,
 			case YZ:
 				while (numAttrib --)
 				{
-					const CVector *speedVect = &(*(speedIt + (fpIndex >> 16)));
+					const NLMISC::CVectorPacked *speedVect = &(*(speedIt + (fpIndex >> 16)));
 					float norm = sqrtf(speedVect->y * speedVect->y + speedVect->z * speedVect->z);
 					float invNorm = (norm != 0.f) ? 1.f / norm : 0.f;
 					CPlaneBasis &pb = *(CPlaneBasis *) ptDat;
--- a/code/nel/src/3d/ps_ribbon.cpp
+++ b/code/nel/src/3d/ps_ribbon.cpp
@ -397,7 +397,7 @@ static inline uint8 *BuildRibbonFirstSlice(const NLMISC::CVector &pos,
 	NL_PS_FUNC(BuildRibbonFirstSlice)
 	do
 	{
-		* (NLMISC::CVector *) dest = pos;
+		* (NLMISC::CVectorPacked *) dest = pos;
 		dest += vertexSize;
 	}
 	while (--numVerts);
@ -409,7 +409,7 @@ static inline uint8 *BuildRibbonFirstSlice(const NLMISC::CVector &pos,
 // This compute one slice of a ribbon, and return the next vertex to be filled
 static inline uint8 *ComputeRibbonSliceFollowPath(const NLMISC::CVector &prev,
 									    const NLMISC::CVector &next,
-									    const NLMISC::CVector *shape,
+									    const NLMISC::CVectorPacked *shape,
 									    uint  numVerts,
 									    uint8 *dest,
 									    uint  vertexSize,
@ -430,10 +430,10 @@ static inline uint8 *ComputeRibbonSliceFollowPath(const NLMISC::CVector &prev,
 	}
 	basis.setPos(next);

-	const NLMISC::CVector *shapeEnd = shape + numVerts;
+	const NLMISC::CVectorPacked *shapeEnd = shape + numVerts;
 	do
 	{
-		*(NLMISC::CVector *) dest = basis * (size * (*shape));
+		*(NLMISC::CVectorPacked *) dest = basis * (size * CVector(*shape));
 		++shape;
 		dest += vertexSize;
 	}
@ -445,7 +445,7 @@ static inline uint8 *ComputeRibbonSliceFollowPath(const NLMISC::CVector &prev,
 // This compute one slice of a ribbon, and return the next vertex to be filled
 static inline uint8 *ComputeRibbonSliceIdentity(const NLMISC::CVector &prev,
 											   const NLMISC::CVector &next,
-											   const NLMISC::CVector *shape,
+											   const NLMISC::CVectorPacked *shape,
 											   uint  numVerts,
 											   uint8 *dest,
 											   uint  vertexSize,
@ -453,10 +453,10 @@ static inline uint8 *ComputeRibbonSliceIdentity(const NLMISC::CVector &prev,
 											  )
 {
 	NL_PS_FUNC(ComputeRibbonSliceIdentity)
-	const NLMISC::CVector *shapeEnd = shape + numVerts;
+	const NLMISC::CVectorPacked *shapeEnd = shape + numVerts;
 	do
 	{
-		((NLMISC::CVector *) dest)->set(size * shape->x + next.x,
+		((NLMISC::CVectorPacked *) dest)->set(size * shape->x + next.x,
 			                            size * shape->y + next.y,
 										size * shape->z + next.z);
 		++shape;
@ -469,7 +469,7 @@ static inline uint8 *ComputeRibbonSliceIdentity(const NLMISC::CVector &prev,
 ///=========================================================================
 static inline uint8 *ComputeRibbonSliceFollowPathXY(const NLMISC::CVector &prev,
 												  const NLMISC::CVector &next,
-												  const NLMISC::CVector *shape,
+												  const NLMISC::CVectorPacked *shape,
 												  uint  numVerts,
 												  uint8 *dest,
 												  uint  vertexSize,
@ -492,10 +492,10 @@ static inline uint8 *ComputeRibbonSliceFollowPathXY(const NLMISC::CVector &prev,
 		basis.setRot(I, CVector::K, J, true);
 	}
 	basis.setPos(next);
-	const NLMISC::CVector *shapeEnd = shape + numVerts;
+	const NLMISC::CVectorPacked *shapeEnd = shape + numVerts;
 	do
 	{
-		*(NLMISC::CVector *) dest = basis * (size * (*shape));
+		*(NLMISC::CVectorPacked *) dest = basis * (size * CVector(*shape));
 		++shape;
 		dest += vertexSize;
 	}
@ -511,8 +511,8 @@ static inline uint8 *ComputeRibbonSliceFollowPathXY(const NLMISC::CVector &prev,
 // This is for untextured versions (no need to duplicate the last vertex of each slice)
 static inline uint8 *ComputeUntexturedRibbonMesh(uint8 *destVb,
 											     uint  vertexSize,
-											     const NLMISC::CVector *curve,
-											     const NLMISC::CVector *shape,
+											     const NLMISC::CVectorPacked *curve,
+											     const NLMISC::CVectorPacked *shape,
 											     uint  numSegs,
 												 uint  numVerticesInShape,
 												 float sizeIncrement,
@ -585,8 +585,8 @@ static inline uint8 *ComputeUntexturedRibbonMesh(uint8 *destVb,
 // (Textured Version)
 static inline uint8 *ComputeTexturedRibbonMesh(uint8 *destVb,
 											   uint  vertexSize,
-											   const NLMISC::CVector *curve,
-											   const NLMISC::CVector *shape,
+											   const NLMISC::CVectorPacked *curve,
+											   const NLMISC::CVectorPacked *shape,
 											   uint  numSegs,
 											   uint  numVerticesInShape,
 											   float sizeIncrement,
@ -612,7 +612,7 @@ static inline uint8 *ComputeTexturedRibbonMesh(uint8 *destVb,
 													   basis
 													  );
 				// duplicate last vertex ( equal first)
-				* (NLMISC::CVector *) nextDestVb = * (NLMISC::CVector *) destVb;
+				* (NLMISC::CVectorPacked *) nextDestVb = * (NLMISC::CVectorPacked *) destVb;
 				destVb = nextDestVb + vertexSize;
 				//
 				++ curve;
@ -633,7 +633,7 @@ static inline uint8 *ComputeTexturedRibbonMesh(uint8 *destVb,
 					basis
 					);
 				// duplicate last vertex ( equal first)
-				* (NLMISC::CVector *) nextDestVb = * (NLMISC::CVector *) destVb;
+				* (NLMISC::CVectorPacked *) nextDestVb = * (NLMISC::CVectorPacked *) destVb;
 				destVb = nextDestVb + vertexSize;
 				//
 				++ curve;
@ -653,7 +653,7 @@ static inline uint8 *ComputeTexturedRibbonMesh(uint8 *destVb,
 					size
 					);
 				// duplicate last vertex ( equal first)
-				* (NLMISC::CVector *) nextDestVb = * (NLMISC::CVector *) destVb;
+				* (NLMISC::CVectorPacked *) nextDestVb = * (NLMISC::CVectorPacked *) destVb;
 				destVb = nextDestVb + vertexSize;
 				//
 				++ curve;
@ -727,7 +727,7 @@ void CPSRibbon::displayRibbons(uint32 nbRibbons, uint32 srcStep)
 		const uint numVerticesInShape = (uint)_Shape.size();
 		//
 		static std::vector<float> sizes;
-		static std::vector<NLMISC::CVector> ribbonPos;  // this is where the position of each ribbon slice center i stored
+		static std::vector<NLMISC::CVectorPacked> ribbonPos;  // this is where the position of each ribbon slice center i stored
 		ribbonPos.resize(_UsedNbSegs + 1); // make sure we have enough room
 		sizes.resize(numRibbonBatch);

@ -782,7 +782,7 @@ void CPSRibbon::displayRibbons(uint32 nbRibbons, uint32 srcStep)
 							const float ribbonSizeIncrement = *ptCurrSize / (float) _UsedNbSegs;
 							ptCurrSize += ptCurrSizeIncrement;
 							// the parent class has a method to get the ribbons positions
-							computeRibbon((uint) (fpRibbonIndex >> 16), &ribbonPos[0], sizeof(NLMISC::CVector));
+							computeRibbon((uint) (fpRibbonIndex >> 16), &ribbonPos[0], sizeof(NLMISC::CVectorPacked));
 							currVert = ComputeTexturedRibbonMesh(currVert,
 																 vertexSize,
 																 &ribbonPos[0],
@ -804,7 +804,7 @@ void CPSRibbon::displayRibbons(uint32 nbRibbons, uint32 srcStep)
 							const float ribbonSizeIncrement = *ptCurrSize / (float) _UsedNbSegs;
 							ptCurrSize += ptCurrSizeIncrement;
 							// the parent class has a method to get the ribbons positions
-							computeRibbon((uint) (fpRibbonIndex >> 16), &ribbonPos[0], sizeof(NLMISC::CVector));
+							computeRibbon((uint) (fpRibbonIndex >> 16), &ribbonPos[0], sizeof(NLMISC::CVectorPacked));
 							currVert = ComputeUntexturedRibbonMesh(currVert,
 																   vertexSize,
 																   &ribbonPos[0],
--- a/code/nel/src/3d/ps_ribbon_base.cpp
+++ b/code/nel/src/3d/ps_ribbon_base.cpp
@ -33,7 +33,7 @@ static inline void BuildHermiteVector(const NLMISC::CVector &P0,
 							   const NLMISC::CVector &P1,
 							   const NLMISC::CVector &T0,
 							   const NLMISC::CVector &T1,
-									 NLMISC::CVector &dest,
+									 NLMISC::CVectorPacked &dest,
 							   float lambda
 							   )
 {
@ -54,7 +54,7 @@ static inline void BuildHermiteVector(const NLMISC::CVector &P0,
 /// for test
 static inline void BuildLinearVector(const NLMISC::CVector &P0,
 									 const NLMISC::CVector &P1,
-									 NLMISC::CVector &dest,
+									 NLMISC::CVectorPacked &dest,
 									 float lambda,
 									 float oneMinusLambda
 							        )
@ -204,7 +204,7 @@ void	CPSRibbonBase::updateGlobals()


 //=======================================================
-void	CPSRibbonBase::computeHermitteRibbon(uint index, NLMISC::CVector *dest, uint stride /* = sizeof(NLMISC::CVector)*/)
+void	CPSRibbonBase::computeHermitteRibbon(uint index, NLMISC::CVectorPacked *dest, uint stride /* = sizeof(NLMISC::CVectorPacked)*/)
 {
 	NL_PS_FUNC(CPSRibbonBase_CVector )
 	nlassert(!_Parametric);
@ -242,7 +242,7 @@ void	CPSRibbonBase::computeHermitteRibbon(uint index, NLMISC::CVector *dest, uin
 					nlassert(NLMISC::isValidDouble(dest->y));
 					nlassert(NLMISC::isValidDouble(dest->z));
 				#endif
-				dest  = (NLMISC::CVector *) ((uint8 *) dest + stride);
+				dest  = (NLMISC::CVectorPacked *) ((uint8 *) dest + stride);
 			}
 			while (--leftToDo);
 			return;
@ -262,7 +262,7 @@ void	CPSRibbonBase::computeHermitteRibbon(uint index, NLMISC::CVector *dest, uin
 				nlassert(NLMISC::isValidDouble(dest->y));
 				nlassert(NLMISC::isValidDouble(dest->z));
 			#endif
-			dest  = (NLMISC::CVector *) ((uint8 *) dest + stride);
+			dest  = (NLMISC::CVectorPacked *) ((uint8 *) dest + stride);
 			-- leftToDo;
 			if (!leftToDo) return;
 			lambda += lambdaStep;
@ -289,7 +289,7 @@ void	CPSRibbonBase::computeHermitteRibbon(uint index, NLMISC::CVector *dest, uin
 }

 //=======================================================
-void CPSRibbonBase::computeLinearRibbon(uint index, NLMISC::CVector *dest, uint stride)
+void CPSRibbonBase::computeLinearRibbon(uint index, NLMISC::CVectorPacked *dest, uint stride)
 {
 	NL_PS_FUNC(CPSRibbonBase_computeLinearRibbon)
 	nlassert(!_Parametric);
@ -321,7 +321,7 @@ void CPSRibbonBase::computeLinearRibbon(uint index, NLMISC::CVector *dest, uint
 					nlassert(NLMISC::isValidDouble(dest->y));
 					nlassert(NLMISC::isValidDouble(dest->z));
 				#endif
-				dest  = (NLMISC::CVector *) ((uint8 *) dest + stride);
+				dest  = (NLMISC::CVectorPacked *) ((uint8 *) dest + stride);

 			}
 			while (--leftToDo);
@ -345,7 +345,7 @@ void CPSRibbonBase::computeLinearRibbon(uint index, NLMISC::CVector *dest, uint
 				nlassert(NLMISC::isValidDouble(dest->y));
 				nlassert(NLMISC::isValidDouble(dest->z));
 			#endif
-			dest  = (NLMISC::CVector *) ((uint8 *) dest + stride);
+			dest  = (NLMISC::CVectorPacked *) ((uint8 *) dest + stride);
 			-- leftToDo;
 			if (!leftToDo) return;
 			lambda += lambdaStep;
@ -387,14 +387,14 @@ void CPSRibbonBase::computeLinearRibbon(uint index, NLMISC::CVector *dest, uint
 		do
 		{
 			*dest = *currIt;
-			dest  = (NLMISC::CVector *) ((uint8 *) dest + stride);
+			dest  = (NLMISC::CVectorPacked *) ((uint8 *) dest + stride);
 		}
 		while (--leftToDo);
 		return;
 	}
 	float lambdaStep = _UsedSegDuration / dt;
 	BuildLinearVector(*currIt, *nextIt, *dest, 0.f, 1.f);
-	dest  = (NLMISC::CVector *) ((uint8 *) dest + stride);
+	dest  = (NLMISC::CVectorPacked *) ((uint8 *) dest + stride);
 	-- leftToDo;
 	// snap lambda to nearest time step
 	lambda = lambdaStep * fmodf(date[0], _UsedSegDuration) / _UsedSegDuration;
@ -406,7 +406,7 @@ void CPSRibbonBase::computeLinearRibbon(uint index, NLMISC::CVector *dest, uint
 			if (lambda >= 1.f) break;
 			/// compute a location
 			BuildLinearVector(*currIt, *nextIt, *dest, lambda, oneMinusLambda);
-			dest  = (NLMISC::CVector *) ((uint8 *) dest + stride);
+			dest  = (NLMISC::CVectorPacked *) ((uint8 *) dest + stride);
 			-- leftToDo;
 			if (!leftToDo) return;
 			lambda += lambdaStep;
@ -426,7 +426,7 @@ void CPSRibbonBase::computeLinearRibbon(uint index, NLMISC::CVector *dest, uint
 			do
 			{
 				*dest = *currIt;
-				dest  = (NLMISC::CVector *) ((uint8 *) dest + stride);
+				dest  = (NLMISC::CVectorPacked *) ((uint8 *) dest + stride);
 			}
 			while (--leftToDo);
 			return;
@ -442,7 +442,7 @@ void CPSRibbonBase::computeLinearRibbon(uint index, NLMISC::CVector *dest, uint


 //=======================================================
-void CPSRibbonBase::computeLinearCstSizeRibbon(uint index, NLMISC::CVector *dest, uint stride /* = sizeof(NLMISC::CVector)*/)
+void CPSRibbonBase::computeLinearCstSizeRibbon(uint index, NLMISC::CVectorPacked *dest, uint stride /* = sizeof(NLMISC::CVectorPacked)*/)
 {
 	NL_PS_FUNC(CPSRibbonBase_CVector )
 	nlassert(!_Parametric);
@ -485,7 +485,7 @@ void CPSRibbonBase::computeLinearCstSizeRibbon(uint index, NLMISC::CVector *dest
 					nlassert(NLMISC::isValidDouble(dest->y));
 					nlassert(NLMISC::isValidDouble(dest->z));
 				#endif
-				dest  = (NLMISC::CVector *) ((uint8 *) dest + stride);
+				dest  = (NLMISC::CVectorPacked *) ((uint8 *) dest + stride);
 				-- leftToDo;
 				if (!leftToDo) return;
 				lambda += lambdaStep;
@ -512,7 +512,7 @@ void CPSRibbonBase::computeLinearCstSizeRibbon(uint index, NLMISC::CVector *dest
 					nlassert(NLMISC::isValidDouble(dest->y));
 					nlassert(NLMISC::isValidDouble(dest->z));
 				#endif
-				dest  = (NLMISC::CVector *) ((uint8 *) dest + stride);
+				dest  = (NLMISC::CVectorPacked *) ((uint8 *) dest + stride);
 			}
 			return;
 		}
@ -520,7 +520,7 @@ void CPSRibbonBase::computeLinearCstSizeRibbon(uint index, NLMISC::CVector *dest
 }

 //=======================================================
-void CPSRibbonBase::computeHermitteCstSizeRibbon(uint index, NLMISC::CVector *dest, uint stride /* = sizeof(NLMISC::CVector)*/)
+void CPSRibbonBase::computeHermitteCstSizeRibbon(uint index, NLMISC::CVectorPacked *dest, uint stride /* = sizeof(NLMISC::CVectorPacked)*/)
 {
 	NL_PS_FUNC(CPSRibbonBase_CVector )
 	nlassert(!_Parametric);
@ -567,7 +567,7 @@ void CPSRibbonBase::computeHermitteCstSizeRibbon(uint index, NLMISC::CVector *de
 					nlassert(NLMISC::isValidDouble(dest->z));
 				#endif

-				dest  = (NLMISC::CVector *) ((uint8 *) dest + stride);
+				dest  = (NLMISC::CVectorPacked *) ((uint8 *) dest + stride);
 				-- leftToDo;
 				if (!leftToDo) return;
 				lambda += lambdaStep;
@ -593,7 +593,7 @@ void CPSRibbonBase::computeHermitteCstSizeRibbon(uint index, NLMISC::CVector *de
 					nlassert(NLMISC::isValidDouble(dest->y));
 					nlassert(NLMISC::isValidDouble(dest->z));
 				#endif
-				dest  = (NLMISC::CVector *) ((uint8 *) dest + stride);
+				dest  = (NLMISC::CVectorPacked *) ((uint8 *) dest + stride);
 			}
 			return;
 		}
@ -605,7 +605,7 @@ void CPSRibbonBase::computeHermitteCstSizeRibbon(uint index, NLMISC::CVector *de


 //=======================================================
-void CPSRibbonBase::computeRibbon(uint index, NLMISC::CVector *dest, uint stride /* = sizeof(NLMISC::CVector)*/)
+void CPSRibbonBase::computeRibbon(uint index, NLMISC::CVectorPacked *dest, uint stride /* = sizeof(NLMISC::CVectorPacked)*/)
 {
 	NL_PS_FUNC(CPSRibbonBase_CVector )
 	switch (_InterpolationMode)
--- a/code/nel/src/3d/ps_ribbon_look_at.cpp
+++ b/code/nel/src/3d/ps_ribbon_look_at.cpp
@ -34,7 +34,7 @@ const float NormEpsilon = 10E-8f;

 struct CVectInfo
 {
-	NLMISC::CVector Interp;
+	NLMISC::CVectorPacked Interp;
 	NLMISC::CVector Proj;
 };
 typedef std::vector<CVectInfo> TRibbonVect; // a vector used for intermediate computations
@ -247,8 +247,8 @@ static inline void BuildSlice(const NLMISC::CMatrix &mat, CVertexBuffer &vb, uin
 			invTgNorm = 1.f;
 		}
 		// build orthogonals vectors to tangent
-		*(NLMISC::CVector *) currVert = pos->Interp + ribSize * invTgNorm * (tangent.x * K - tangent.z * I);
-		*(NLMISC::CVector *) (currVert + vertexSize) = pos->Interp + ribSize * invTgNorm * (- tangent.x * K + tangent.z * I);
+		*(NLMISC::CVectorPacked *) currVert = NLMISC::CVector(pos->Interp) + ribSize * invTgNorm * (tangent.x * K - tangent.z * I);
+		*(NLMISC::CVectorPacked *) (currVert + vertexSize) = NLMISC::CVector(pos->Interp) + ribSize * invTgNorm * (- tangent.x * K + tangent.z * I);
 	}
 	else if (prev->Proj.y > ZEpsilon) // second point cross the near plane
 	{
@ -263,8 +263,8 @@ static inline void BuildSlice(const NLMISC::CMatrix &mat, CVertexBuffer &vb, uin
 		}
 		else //
 		{
-			*(NLMISC::CVector *) currVert = pos->Interp;
-			*(NLMISC::CVector *) (currVert + vertexSize) = pos->Interp;
+			*(NLMISC::CVectorPacked *) currVert = pos->Interp;
+			*(NLMISC::CVectorPacked *) (currVert + vertexSize) = pos->Interp;
 			return;
 		}

@ -282,8 +282,8 @@ static inline void BuildSlice(const NLMISC::CMatrix &mat, CVertexBuffer &vb, uin
 		}
 		// build orthogonals vectors to tangent

-		*(NLMISC::CVector *) currVert = inter + ribSize *  invTgNorm * (tangent.x * K - tangent.z * I);
-		*(NLMISC::CVector *) (currVert + vertexSize) = inter + ribSize * invTgNorm * (- tangent.x * K + tangent.z * I);
+		*(NLMISC::CVectorPacked *) currVert = inter + ribSize *  invTgNorm * (tangent.x * K - tangent.z * I);
+		*(NLMISC::CVectorPacked *) (currVert + vertexSize) = inter + ribSize * invTgNorm * (- tangent.x * K + tangent.z * I);
 	}
 	else if (next->Proj.y > ZEpsilon) // first point cross the near plane
 	{
@ -298,8 +298,8 @@ static inline void BuildSlice(const NLMISC::CMatrix &mat, CVertexBuffer &vb, uin
 		}
 		else //
 		{
-			*(NLMISC::CVector *) currVert = pos->Interp;
-			*(NLMISC::CVector *) (currVert + vertexSize) = pos->Interp;
+			*(NLMISC::CVectorPacked *) currVert = pos->Interp;
+			*(NLMISC::CVectorPacked *) (currVert + vertexSize) = pos->Interp;
 			return;
 		}

@ -316,14 +316,14 @@ static inline void BuildSlice(const NLMISC::CMatrix &mat, CVertexBuffer &vb, uin
 		}
 		// build orthogonals vectors to tangent

-		*(NLMISC::CVector *) currVert = inter + ribSize * invTgNorm * (tangent.x * K - tangent.z * I);
-		*(NLMISC::CVector *) (currVert + vertexSize) = inter + ribSize * invTgNorm * (- tangent.x * K + tangent.z * I);
+		*(NLMISC::CVectorPacked *) currVert = inter + ribSize * invTgNorm * (tangent.x * K - tangent.z * I);
+		*(NLMISC::CVectorPacked *) (currVert + vertexSize) = inter + ribSize * invTgNorm * (- tangent.x * K + tangent.z * I);

 	}
 	else // two points are not visible
 	{
-		*(NLMISC::CVector *) currVert = pos->Interp;
-		*(NLMISC::CVector *) (currVert + vertexSize) = pos->Interp;
+		*(NLMISC::CVectorPacked *) currVert = pos->Interp;
+		*(NLMISC::CVectorPacked *) (currVert + vertexSize) = pos->Interp;
 	}

 }
--- a/code/nel/src/3d/ps_shockwave.cpp
+++ b/code/nel/src/3d/ps_shockwave.cpp
@ -23,6 +23,7 @@
 #include "nel/3d/ps_iterator.h"
 #include "nel/3d/particle_system.h"

+using NLMISC::CVectorPacked;

 namespace NL3D
 {
@ -159,10 +160,10 @@ public:
 						radVect = *ptCurrSize * (CPSUtil::getCos((sint32) currAngle) * ptCurrBasis->X + CPSUtil::getSin((sint32) currAngle) * ptCurrBasis->Y);
 						innerVect = radiusRatio * radVect;
 						CHECK_VERTEX_BUFFER(*vb, currVertex);
-						* (CVector *) currVertex = *posIt + radVect;
+						* (CVectorPacked *) currVertex = CVector(*posIt) + radVect;
 						currVertex += vSize;
 						CHECK_VERTEX_BUFFER(*vb, currVertex);
-						* (CVector *) currVertex = *posIt + innerVect;
+						* (CVectorPacked *) currVertex = CVector(*posIt) + innerVect;
 						currVertex += vSize;
 						currAngle += angleStep;
 					}
--- a/code/nel/src/3d/ps_sound.cpp
+++ b/code/nel/src/3d/ps_sound.cpp
@ -148,8 +148,8 @@ void			CPSSound::step(TPSProcessPass pass)

 	CPSAttrib<UPSSoundInstance *>::iterator it = _Sounds.begin(),
 												 endIt;
-	CPSAttrib<NLMISC::CVector>::const_iterator posIt = _Owner->getPos().begin();
-	CPSAttrib<NLMISC::CVector>::const_iterator speedIt = _Owner->getSpeed().begin();
+	CPSAttrib<NLMISC::CVectorPacked>::const_iterator posIt = _Owner->getPos().begin();
+	CPSAttrib<NLMISC::CVectorPacked>::const_iterator speedIt = _Owner->getSpeed().begin();

 	do
 	{
--- a/code/nel/src/3d/ps_tail_dot.cpp
+++ b/code/nel/src/3d/ps_tail_dot.cpp
@ -25,6 +25,8 @@

 #include <memory>

+using NLMISC::CVectorPacked;
+
 namespace NL3D
 {
 static NLMISC::CRGBA GradientB2W[] = {NLMISC::CRGBA(0, 0, 0, 0), NLMISC::CRGBA(255, 255, 255, 255) };
@ -330,7 +332,7 @@ void CPSTailDot::displayRibbons(uint32 nbRibbons, uint32 srcStep)
 				do
 				{
 					// the parent class has a method to get the ribbons positions
-					computeRibbon((uint) (fpRibbonIndex >> 16), (CVector *) currVert, vertexSize);
+					computeRibbon((uint) (fpRibbonIndex >> 16), (CVectorPacked *) currVert, vertexSize);
 					currVert += vertexSize * (_UsedNbSegs + 1);
 					fpRibbonIndex += srcStep;
 				}
@ -345,7 +347,7 @@ void CPSTailDot::displayRibbons(uint32 nbRibbons, uint32 srcStep)
 				{
 					// we compute each pos thanks to the parametric curve
 					_Owner->integrateSingle(date - _UsedSegDuration * (_UsedNbSegs + 1), _UsedSegDuration, _UsedNbSegs + 1, (uint) (fpRibbonIndex >> 16),
-											(NLMISC::CVector *) currVert, vertexSize);
+											(NLMISC::CVectorPacked *) currVert, vertexSize);
 					currVert += vertexSize * (_UsedNbSegs + 1);
 					fpRibbonIndex += srcStep;
 				}
--- a/code/nel/src/3d/ps_util.cpp
+++ b/code/nel/src/3d/ps_util.cpp
@ -44,6 +44,7 @@ namespace NL3D {


 using NLMISC::CVector;
+using NLMISC::CVectorPacked;


 //#ifdef NL_DEBUG
--- a/code/nel/src/3d/ps_zone.cpp
+++ b/code/nel/src/3d/ps_zone.cpp
@ -194,7 +194,7 @@ void CPSZonePlane::deleteElement(uint32 index)
 }


-void CPSZonePlane::computeCollisions(CPSLocated &target, uint firstInstanceIndex, const NLMISC::CVector *posBefore, const NLMISC::CVector *posAfter)
+void CPSZonePlane::computeCollisions(CPSLocated &target, uint firstInstanceIndex, const NLMISC::CVectorPacked *posBefore, const NLMISC::CVectorPacked *posAfter)
 {
 	NL_PS_FUNC(CPSZonePlane_computeCollisions)
 	MINI_TIMER(PSStatsZonePlane)
@ -213,9 +213,9 @@ void CPSZonePlane::computeCollisions(CPSLocated &target, uint firstInstanceIndex
 		NLMISC::CPlane p;
 		p.make(m.mulVector(*normalIt), m * (*planePosIt));
 		// deals with each particle
-		const NLMISC::CVector *itPosBefore = posBefore + firstInstanceIndex;
-		const NLMISC::CVector *itPosBeforeEnd = posBefore + target.getSize();
-		const NLMISC::CVector *itPosAfter = posAfter + firstInstanceIndex;
+		const NLMISC::CVectorPacked *itPosBefore = posBefore + firstInstanceIndex;
+		const NLMISC::CVectorPacked *itPosBeforeEnd = posBefore + target.getSize();
+		const NLMISC::CVectorPacked *itPosAfter = posAfter + firstInstanceIndex;
 		while (itPosBefore != itPosBeforeEnd)
 		{
 			float posSide = p * *itPosBefore;
@ -235,7 +235,7 @@ void CPSZonePlane::computeCollisions(CPSLocated &target, uint firstInstanceIndex
 				ci.Dist = startEnd.norm();
 				// we translate the particle from an epsilon so that it won't get hooked to the plane
 				ci.NewPos = *itPosBefore  + startEnd + PSCollideEpsilon * p.getNormal();
-				const CVector &speed = target.getSpeed()[(uint32)(itPosBefore - posBefore)];
+				const CVector speed = target.getSpeed()[(uint32)(itPosBefore - posBefore)];
 				ci.NewSpeed = _BounceFactor * (speed - 2.0f * (speed * p.getNormal()) * p.getNormal());
 				ci.CollisionZone = this;
 				CPSLocated::_Collisions[itPosBefore - posBefore].update(ci);
@ -267,7 +267,7 @@ CVector CPSZonePlane::getNormal(uint32 index)
 	NL_PS_FUNC(CPSZonePlane_getNormal)
 	return _Normal[index];
 }
-void CPSZonePlane::setNormal(uint32 index, CVector n)
+void CPSZonePlane::setNormal(uint32 index, const CVector &n)
 {
 	NL_PS_FUNC(CPSZonePlane_setNormal)
 	_Normal[index] = n;
@ -290,7 +290,7 @@ void CPSZonePlane::serial(NLMISC::IStream &f) throw(NLMISC::EStream)
 // sphere implementation //
 ///////////////////////////

-void CPSZoneSphere::computeCollisions(CPSLocated &target, uint firstInstanceIndex, const NLMISC::CVector *posBefore, const NLMISC::CVector *posAfter)
+void CPSZoneSphere::computeCollisions(CPSLocated &target, uint firstInstanceIndex, const NLMISC::CVectorPacked *posBefore, const NLMISC::CVectorPacked *posAfter)
 {
 	NL_PS_FUNC(CPSZoneSphere_computeCollisions)
 	MINI_TIMER(PSStatsZoneSphere)
@ -308,9 +308,9 @@ void CPSZoneSphere::computeCollisions(CPSLocated &target, uint firstInstanceInde
 		const CMatrix &m = CPSLocated::getConversionMatrix(&target, this->_Owner);
 		CVector center = m * *spherePosIt;
 		// deals with each particle
-		const NLMISC::CVector *itPosBefore = posBefore + firstInstanceIndex;
-		const NLMISC::CVector *itPosBeforeEnd = posBefore + target.getSize();
-		const NLMISC::CVector *itPosAfter = posAfter + firstInstanceIndex;
+		const NLMISC::CVectorPacked *itPosBefore = posBefore + firstInstanceIndex;
+		const NLMISC::CVectorPacked *itPosBeforeEnd = posBefore + target.getSize();
+		const NLMISC::CVectorPacked *itPosAfter = posAfter + firstInstanceIndex;
 		while (itPosBefore != itPosBeforeEnd)
 		{
 			// check whether the located is going through the sphere
@ -346,7 +346,7 @@ void CPSZoneSphere::computeCollisions(CPSLocated &target, uint firstInstanceInde
 						ci.Dist = startEnd.norm();
 						// we translate the particle from an epsilon so that it won't get hooked to the sphere
 						ci.NewPos = pos  + startEnd + PSCollideEpsilon * normal;
-						const CVector &speed = target.getSpeed()[(uint32)(itPosBefore - posBefore)];
+						const CVector speed = target.getSpeed()[(uint32)(itPosBefore - posBefore)];
 						ci.NewSpeed = _BounceFactor * (speed - 2.0f * (speed * normal) * normal);
 						ci.CollisionZone = this;
 						CPSLocated::_Collisions[itPosBefore - posBefore].update(ci);
@ -450,7 +450,7 @@ void CPSZoneSphere::deleteElement(uint32 index)
 ////////////////////////////////
 // CPSZoneDisc implementation //
 ////////////////////////////////
-void CPSZoneDisc::computeCollisions(CPSLocated &target, uint firstInstanceIndex, const NLMISC::CVector *posBefore, const NLMISC::CVector *posAfter)
+void CPSZoneDisc::computeCollisions(CPSLocated &target, uint firstInstanceIndex, const NLMISC::CVectorPacked *posBefore, const NLMISC::CVectorPacked *posAfter)
 {
 	NL_PS_FUNC(CPSZoneDisc_computeCollisions)
 	MINI_TIMER(PSStatsZoneDisc)
@ -477,9 +477,9 @@ void CPSZoneDisc::computeCollisions(CPSLocated &target, uint firstInstanceIndex,
 		const float epsilon = 0.5f * PSCollideEpsilon;

 		// deals with each particle
-		const NLMISC::CVector *itPosBefore = posBefore + firstInstanceIndex;
-		const NLMISC::CVector *itPosBeforeEnd = posBefore + target.getSize();
-		const NLMISC::CVector *itPosAfter = posAfter + firstInstanceIndex;
+		const NLMISC::CVectorPacked *itPosBefore = posBefore + firstInstanceIndex;
+		const NLMISC::CVectorPacked *itPosBeforeEnd = posBefore + target.getSize();
+		const NLMISC::CVectorPacked *itPosAfter = posAfter + firstInstanceIndex;
 		while (itPosBefore != itPosBeforeEnd)
 		{
 			float posSide = p * *itPosBefore;
@ -503,7 +503,7 @@ void CPSZoneDisc::computeCollisions(CPSLocated &target, uint firstInstanceIndex,
 				hitRadius2 = (ci.NewPos - center) * (ci.NewPos - center);
 				if (hitRadius2 < radiusIt->R2) // check collision against disc
 				{
-					const CVector &speed = target.getSpeed()[(uint32)(itPosBefore - posBefore)];
+					const CVector speed = target.getSpeed()[(uint32)(itPosBefore - posBefore)];
 					ci.NewSpeed = _BounceFactor * (speed - 2.0f * (speed * p.getNormal()) * p.getNormal());
 					ci.CollisionZone = this;
 					CPSLocated::_Collisions[itPosBefore - posBefore].update(ci);
@ -576,7 +576,7 @@ CVector CPSZoneDisc::getNormal(uint32 index)
 	NL_PS_FUNC(CPSZoneDisc_getNormal)
 	return _Normal[index];
 }
-void CPSZoneDisc::setNormal(uint32 index, CVector n)
+void CPSZoneDisc::setNormal(uint32 index, const CVector &n)
 {
 	NL_PS_FUNC(CPSZoneDisc_setNormal)
 	_Normal[index] = n;
@ -847,7 +847,7 @@ void CPSZoneCylinder::performMotion(TAnimationTime ellapsedTime)
 */


-void CPSZoneCylinder::computeCollisions(CPSLocated &target, uint firstInstanceIndex, const NLMISC::CVector *posBefore, const NLMISC::CVector *posAfter)
+void CPSZoneCylinder::computeCollisions(CPSLocated &target, uint firstInstanceIndex, const NLMISC::CVectorPacked *posBefore, const NLMISC::CVectorPacked *posAfter)
 {
 	NL_PS_FUNC(CPSZoneCylinder_computeCollisions)
 	MINI_TIMER(PSStatsZoneCylinder)
@ -873,9 +873,9 @@ void CPSZoneCylinder::computeCollisions(CPSLocated &target, uint firstInstanceIn
 		CVector destProjectedPos, destTPos;
 		// deals with each particle
 		// deals with each particle
-		const NLMISC::CVector *itPosBefore = posBefore + firstInstanceIndex;
-		const NLMISC::CVector *itPosBeforeEnd = posBefore + target.getSize();
-		const NLMISC::CVector *itPosAfter = posAfter + firstInstanceIndex;
+		const NLMISC::CVectorPacked *itPosBefore = posBefore + firstInstanceIndex;
+		const NLMISC::CVectorPacked *itPosBeforeEnd = posBefore + target.getSize();
+		const NLMISC::CVectorPacked *itPosAfter = posAfter + firstInstanceIndex;
 		while (itPosBefore != itPosBeforeEnd)
 		{
 			const CVector &pos = *itPosBefore;
@ -1123,7 +1123,7 @@ void CPSZoneCylinder::deleteElement(uint32 index)
 //	implementation of CPSZoneRectangle      //
 //////////////////////////////////////////////

-void CPSZoneRectangle::computeCollisions(CPSLocated &target, uint firstInstanceIndex, const NLMISC::CVector *posBefore, const NLMISC::CVector *posAfter)
+void CPSZoneRectangle::computeCollisions(CPSLocated &target, uint firstInstanceIndex, const NLMISC::CVectorPacked *posBefore, const NLMISC::CVectorPacked *posAfter)
 {
 	NL_PS_FUNC(CPSZoneRectangle_computeCollisions)
 	MINI_TIMER(PSStatsZoneRectangle)
@ -1149,9 +1149,9 @@ void CPSZoneRectangle::computeCollisions(CPSLocated &target, uint firstInstanceI
 		p.make(X ^ Y, center);
 		// deals with each particle
 		const float epsilon = 0.5f * PSCollideEpsilon;
-		const NLMISC::CVector *itPosBefore = posBefore + firstInstanceIndex;
-		const NLMISC::CVector *itPosBeforeEnd = posBefore + target.getSize();
-		const NLMISC::CVector *itPosAfter = posAfter + firstInstanceIndex;
+		const NLMISC::CVectorPacked *itPosBefore = posBefore + firstInstanceIndex;
+		const NLMISC::CVectorPacked *itPosBeforeEnd = posBefore + target.getSize();
+		const NLMISC::CVectorPacked *itPosAfter = posAfter + firstInstanceIndex;
 		while (itPosBefore != itPosBeforeEnd)
 		{
 			float posSide = p * *itPosBefore;
--- a/code/nel/src/3d/seg_remanence.cpp
+++ b/code/nel/src/3d/seg_remanence.cpp
@ -27,6 +27,7 @@
 #include "nel/3d/dru.h"


+using NLMISC::CVectorPacked;



@ -162,8 +163,8 @@ void CSegRemanence::registerBasic()
 // helper functions to fill vb
 static inline void vbPush(uint8 *&dest, const CVector &v)
 {
-	*(CVector *) dest = v;
-	dest +=sizeof(CVector);
+	*(CVectorPacked *) dest = v;
+	dest +=sizeof(CVectorPacked);
 }

 static inline void vbPush(uint8 *&dest, float f)
--- a/code/nel/src/3d/shadow_skin.cpp
+++ b/code/nel/src/3d/shadow_skin.cpp
@ -38,6 +38,33 @@ uint	CShadowSkin::NumCacheVertexShadow= NL_BlockByteL1 / sizeof(CShadowVertex);


 // ***************************************************************************
+void		CShadowSkin::applySkin(CVectorPacked *dst, std::vector<CMatrix3x4> &boneMat3x4)
+{
+	if(Vertices.empty())
+		return;
+	uint	numVerts= (uint)Vertices.size();
+	CShadowVertex	*src= &Vertices[0];
+
+	// Then do the skin
+	for(;numVerts>0;)
+	{
+		// number of vertices to process for this block.
+		uint	nBlockInf= min(NumCacheVertexShadow, numVerts);
+		// next block.
+		numVerts-= nBlockInf;
+
+		// cache the data in L1 cache.
+		CFastMem::precache(src, nBlockInf * sizeof(CShadowVertex));
+
+		CVector temp;
+		//  for all InfluencedVertices only.
+		for(;nBlockInf>0;nBlockInf--, src++, dst++)
+		{
+			boneMat3x4[ src->MatrixId ].mulSetPoint( src->Vertex, temp );
+			*dst = temp;
+		}
+	}
+}
 void		CShadowSkin::applySkin(CVector *dst, std::vector<CMatrix3x4> &boneMat3x4)
 {
 	if(Vertices.empty())
@ -56,6 +83,7 @@ void		CShadowSkin::applySkin(CVector *dst, std::vector<CMatrix3x4> &boneMat3x4)
 		// cache the data in L1 cache.
 		CFastMem::precache(src, nBlockInf * sizeof(CShadowVertex));

+		CVector temp;
 		//  for all InfluencedVertices only.
 		for(;nBlockInf>0;nBlockInf--, src++, dst++)
 		{
--- a/code/nel/src/3d/vegetable_manager.cpp
+++ b/code/nel/src/3d/vegetable_manager.cpp
@ -1379,21 +1379,21 @@ void			CVegetableManager::addInstance(CVegetableInstanceGroup *ig,
 		// Pos.
 		//-------
 		// Separate Center and relative pos.
-		CVector	relPos= mat.mulVector(*(CVector*)srcPtr);	// mulVector, because translation in v[center]
+		CVector	relPos= mat.mulVector(*(CVectorPacked*)srcPtr);	// mulVector, because translation in v[center]
 		// compute bendCenterPos
 		CVector	bendCenterPos;
 		if(shape->BendCenterMode == CVegetableShapeBuild::BendCenterNull)
 			bendCenterPos= CVector::Null;
 		else
 		{
-			CVector	v= *(CVector*)srcPtr;
+			CVector	v= *(CVectorPacked*)srcPtr;
 			v.z= 0;
 			bendCenterPos= mat.mulVector(v);				// mulVector, because translation in v[center]
 		}
 		// copy
 		deltaPos= relPos-bendCenterPos;
-		*(CVector*)dstPtr= deltaPos;
-		*(CVector*)(dstPtr + dstCenterOff)= instancePos + bendCenterPos;
+		*(CVectorPacked*)dstPtr= deltaPos;
+		*(CVectorPacked*)(dstPtr + dstCenterOff)= instancePos + bendCenterPos;
 		// if !destLighted, then VP is different
 		if(!destLighted)
 		{
@ -1426,7 +1426,7 @@ void			CVegetableManager::addInstance(CVegetableInstanceGroup *ig,
 			if(destLighted)
 			{
 				// normal
-				*(CVector*)(dstPtr + dstNormalOff)= normalMat.mulVector( *(CVector*)(srcPtr + srcNormalOff) );
+				*(CVectorPacked*)(dstPtr + dstNormalOff)= normalMat.mulVector( *(CVectorPacked*)(srcPtr + srcNormalOff) );
 			}
 			// If destLighted, secondaryRGBA is the ambient
 			// else secondaryRGBA is used only for Alpha (DLM uv.v).
@ -1437,7 +1437,7 @@ void			CVegetableManager::addInstance(CVegetableInstanceGroup *ig,
 			nlassert(!destLighted);

 			// compute normal.
-			CVector		rotNormal= normalMat.mulVector( *(CVector*)(srcPtr + srcNormalOff) );
+			CVector		rotNormal= normalMat.mulVector( *(CVectorPacked*)(srcPtr + srcNormalOff) );
 			// must normalize() because scale is possible.
 			rotNormal.normalize();

@ -1466,7 +1466,7 @@ void			CVegetableManager::addInstance(CVegetableInstanceGroup *ig,

 		// Bend.
 		//-------
-		CVector		*dstBendPtr= (CVector*)(dstPtr + dstBendOff);
+		CVectorPacked		*dstBendPtr= (CVectorPacked*)(dstPtr + dstBendOff);
 		// setup bend Phase.
 		dstBendPtr->y= bendPhase;
 		// setup bend Weight.
@ -2704,7 +2704,7 @@ uint		CVegetableManager::updateInstanceLighting(CVegetableInstanceGroup *ig, uin
 			nlassert(!destLighted);

 			// compute normal.
-			CVector		rotNormal= normalMat.mulVector( *(CVector*)(srcPtr + srcNormalOff) );
+			CVector		rotNormal= normalMat.mulVector( *(CVectorPacked*)(srcPtr + srcNormalOff) );
 			// must normalize() because scale is possible.
 			rotNormal.normalize();

--- a/code/nel/src/3d/vegetable_shape.cpp
+++ b/code/nel/src/3d/vegetable_shape.cpp
@ -128,15 +128,15 @@ void		CVegetableShape::build(CVegetableShapeBuild &vbuild)
 	for(i=0;i<nbVerts;i++)
 	{
 		// Position.
-		const CVector		*srcPos= vba.getVertexCoordPointer(i);
-		CVector		*dstPos= vbaOut.getVertexCoordPointer(i);
+		const CVectorPacked		*srcPos= vba.getVertexCoordPointer(i);
+		CVectorPacked		*dstPos= vbaOut.getVertexCoordPointer(i);
 		*dstPos= *srcPos;

 		// Normal
 		if(Lighted)
 		{
-			const CVector *srcNormal= vba.getNormalCoordPointer(i);
-			CVector		*dstNormal= vbaOut.getNormalCoordPointer(i);
+			const CVectorPacked *srcNormal= vba.getNormalCoordPointer(i);
+			CVectorPacked		*dstNormal= vbaOut.getNormalCoordPointer(i);
 			*dstNormal= *srcNormal;
 		}

--- a/code/nel/src/3d/vertex_buffer.cpp
+++ b/code/nel/src/3d/vertex_buffer.cpp
@ -674,13 +674,13 @@ void		CVertexBuffer::serialOldV1Minus(NLMISC::IStream &f, sint ver)
 		// XYZ.
 		if(_Flags & PositionFlag)
 		{
-			CVector		&vert= *(CVector*)(pointer + stridedId + _Offset[Position]);
+			CVectorPacked		&vert= *(CVectorPacked*)(pointer + stridedId + _Offset[Position]);
 			f.serial(vert);
 		}
 		// Normal
 		if(_Flags & NormalFlag)
 		{
-			CVector		&norm= *(CVector*)(pointer + stridedId + _Offset[Normal]);
+			CVectorPacked		&norm= *(CVectorPacked*)(pointer + stridedId + _Offset[Normal]);
 			f.serial(norm);
 		}
 		// Uvs.
@ -1155,19 +1155,19 @@ IVBDrvInfos::~IVBDrvInfos()
 // CVertexBufferReadWrite
 // --------------------------------------------------

-NLMISC::CVector* CVertexBufferReadWrite::getVertexCoordPointer(uint idx)
+NLMISC::CVectorPacked* CVertexBufferReadWrite::getVertexCoordPointer(uint idx)
 {
 	nlassert (_Parent->checkLockedBuffer());
 	uint8*	ptr;

 	ptr=_Parent->_LockedBuffer;
 	ptr+=(idx*_Parent->_VertexSize);
-	return((NLMISC::CVector*)ptr);
+	return((NLMISC::CVectorPacked*)ptr);
 }

 // --------------------------------------------------

-NLMISC::CVector* CVertexBufferReadWrite::getNormalCoordPointer(uint idx)
+NLMISC::CVectorPacked* CVertexBufferReadWrite::getNormalCoordPointer(uint idx)
 {
 	nlassert (_Parent->checkLockedBuffer());
 	uint8*	ptr;
@ -1179,7 +1179,7 @@ NLMISC::CVector* CVertexBufferReadWrite::getNormalCoordPointer(uint idx)
 	ptr=_Parent->_LockedBuffer;
 	ptr+=_Parent->_Offset[CVertexBuffer::Normal];
 	ptr+=idx*_Parent->_VertexSize;
-	return((NLMISC::CVector*)ptr);
+	return((NLMISC::CVectorPacked*)ptr);
 }

 // --------------------------------------------------
@ -1280,19 +1280,19 @@ void CVertexBufferReadWrite::touchVertices (uint first, uint last)
 // CVertexBufferRead
 // --------------------------------------------------

-const NLMISC::CVector* CVertexBufferRead::getVertexCoordPointer(uint idx) const
+const NLMISC::CVectorPacked* CVertexBufferRead::getVertexCoordPointer(uint idx) const
 {
 	nlassert (_Parent->checkLockedBuffer());
 	const uint8*	ptr;

 	ptr=_Parent->_LockedBuffer;
 	ptr+=(idx*_Parent->_VertexSize);
-	return((const NLMISC::CVector*)ptr);
+	return((const NLMISC::CVectorPacked*)ptr);
 }

 // --------------------------------------------------

-const NLMISC::CVector* CVertexBufferRead::getNormalCoordPointer(uint idx) const
+const NLMISC::CVectorPacked* CVertexBufferRead::getNormalCoordPointer(uint idx) const
 {
 	nlassert (_Parent->checkLockedBuffer());
 	const uint8*	ptr;
@ -1304,7 +1304,7 @@ const NLMISC::CVector* CVertexBufferRead::getNormalCoordPointer(uint idx) const
 	ptr=_Parent->_LockedBuffer;
 	ptr+=_Parent->_Offset[CVertexBuffer::Normal];
 	ptr+=idx*_Parent->_VertexSize;
-	return((const NLMISC::CVector*)ptr);
+	return((const NLMISC::CVectorPacked*)ptr);
 }

 // --------------------------------------------------
--- a/code/nel/src/3d/water_model.cpp
+++ b/code/nel/src/3d/water_model.cpp
@ -33,6 +33,7 @@
 #include "nel/3d/texture_bump.h"
 #include "nel/3d/water_env_map.h"

+using NLMISC::CVectorPacked;

 using NLMISC::CVector2f;

@ -1450,15 +1451,15 @@ uint CWaterModel::fillVBSoft(void *datas, uint startTri)
 			}
 			for(uint l = 0; l < numVerts - 2; ++l)
 			{
-				*(CVector *) dest = unprojectedTriSoft[0];
+				*(CVectorPacked *) dest = unprojectedTriSoft[0];
 				dest += sizeof(float[3]);
 				*(CVector2f *) dest = envMap[0];
 				dest += sizeof(float[2]);
-				*(CVector *) dest = unprojectedTriSoft[l + 1];
+				*(CVectorPacked *) dest = unprojectedTriSoft[l + 1];
 				dest += sizeof(float[3]);
 				*(CVector2f *) dest = envMap[l + 1];
 				dest += sizeof(float[2]);
-				*(CVector *) dest = unprojectedTriSoft[l + 2];
+				*(CVectorPacked *) dest = unprojectedTriSoft[l + 2];
 				dest += sizeof(float[3]);
 				*(CVector2f *) dest = envMap[l + 2];
 				dest += sizeof(float[2]);
@ -1481,27 +1482,27 @@ uint CWaterModel::fillVBSoft(void *datas, uint startTri)
 				computeWaterVertexSoft((float) (x + 1), (float) (y + 1), proj[2], envMap[2], camI, camJ, camK, denom, date, camMat.getPos());
 				computeWaterVertexSoft((float) x, (float) (y + 1), proj[3], envMap[3], camI, camJ, camK, denom, date, camMat.getPos());
 				//
-				*(CVector *) dest = proj[0];
+				*(CVectorPacked *) dest = proj[0];
 				dest += sizeof(float[3]);
 				*(CVector2f *) dest = envMap[0];
 				dest += sizeof(float[2]);
-				*(CVector *) dest = proj[2];
+				*(CVectorPacked *) dest = proj[2];
 				dest += sizeof(float[3]);
 				*(CVector2f *) dest = envMap[2];
 				dest += sizeof(float[2]);
-				*(CVector *) dest = proj[1];
+				*(CVectorPacked *) dest = proj[1];
 				dest += sizeof(float[3]);
 				*(CVector2f *) dest = envMap[1];
 				dest += sizeof(float[2]);
-				*(CVector *) dest = proj[0];
+				*(CVectorPacked *) dest = proj[0];
 				dest += sizeof(float[3]);
 				*(CVector2f *) dest = envMap[0];
 				dest += sizeof(float[2]);
-				*(CVector *) dest = proj[3];
+				*(CVectorPacked *) dest = proj[3];
 				dest += sizeof(float[3]);
 				*(CVector2f *) dest = envMap[3];
 				dest += sizeof(float[2]);
-				*(CVector *) dest = proj[2];
+				*(CVectorPacked *) dest = proj[2];
 				dest += sizeof(float[3]);
 				*(CVector2f *) dest = envMap[2];
 				dest += sizeof(float[2]);
@ -1557,11 +1558,11 @@ uint CWaterModel::fillVBHard(void *datas, uint startTri)
 			}
 			for(uint l = 0; l < numVerts - 2; ++l)
 			{
-				*(CVector *) dest = unprojectedTri[0];
+				*(CVectorPacked *) dest = unprojectedTri[0];
 				dest += WATER_VERTEX_HARD_SIZE;
-				*(CVector *) dest = unprojectedTri[l + 1];
+				*(CVectorPacked *) dest = unprojectedTri[l + 1];
 				dest += WATER_VERTEX_HARD_SIZE;
-				*(CVector *) dest = unprojectedTri[l + 2];
+				*(CVectorPacked *) dest = unprojectedTri[l + 2];
 				dest += WATER_VERTEX_HARD_SIZE;
 			}
 		}
@ -1581,17 +1582,17 @@ uint CWaterModel::fillVBHard(void *datas, uint startTri)
 				computeWaterVertexHard((float) (x + 1), (float) (y + 1), proj[2], camI, camJ, camK, denom);
 				computeWaterVertexHard((float) x, (float) (y + 1), proj[3], camI, camJ, camK, denom);
 				//
-				*(CVector *) dest = proj[0];
+				*(CVectorPacked *) dest = proj[0];
 				dest += WATER_VERTEX_HARD_SIZE;
-				*(CVector *) dest = proj[2];
+				*(CVectorPacked *) dest = proj[2];
 				dest += WATER_VERTEX_HARD_SIZE;
-				*(CVector *) dest = proj[1];
+				*(CVectorPacked *) dest = proj[1];
 				dest += WATER_VERTEX_HARD_SIZE;
-				*(CVector *) dest = proj[0];
+				*(CVectorPacked *) dest = proj[0];
 				dest += WATER_VERTEX_HARD_SIZE;
-				*(CVector *) dest = proj[3];
+				*(CVectorPacked *) dest = proj[3];
 				dest += WATER_VERTEX_HARD_SIZE;
-				*(CVector *) dest = proj[2];
+				*(CVectorPacked *) dest = proj[2];
 				dest += WATER_VERTEX_HARD_SIZE;
 			}
 		}
--- a/code/nel/src/3d/zone_lighter.cpp
+++ b/code/nel/src/3d/zone_lighter.cpp
@ -3111,7 +3111,7 @@ void CZoneLighter::addWaterShape(CWaterShape *shape, const NLMISC::CMatrix &MT)
 }

 // ***********************************************************
-void CZoneLighter::makeQuadGridFromWaterShapes(NLMISC::CAABBox zoneBBox)
+void CZoneLighter::makeQuadGridFromWaterShapes(const NLMISC::CAABBox &zoneBBox)
 {
 	if (!_WaterShapes.size()) return;

--- a/code/nel/src/ligo/primitive.cpp
+++ b/code/nel/src/ligo/primitive.cpp
@ -875,7 +875,7 @@ bool CPrimZone::contains (const NLMISC::CVector &v, const std::vector<CVector> &

 // ***************************************************************************

-float CPrimZone::getSegmentDist(const NLMISC::CVector v, const NLMISC::CVector &p1, const NLMISC::CVector &p2, NLMISC::CVector &nearPos)
+float CPrimZone::getSegmentDist(const NLMISC::CVector &v, const NLMISC::CVector &p1, const NLMISC::CVector &p2, NLMISC::CVector &nearPos)
 {
 	// two points, compute distance to the segment.
 	CVector V = (p2-p1).normed();
--- a/code/nel/src/misc/polygon.cpp
+++ b/code/nel/src/misc/polygon.cpp
@ -249,7 +249,7 @@ public:
 		Back = NULL;
 		Front = NULL;
 	}
-	CBSPNode2v ( const CPlane &plane, CVector p0, CVector p1, uint v0, uint v1 ) : Plane (plane), P0 (p0), P1 (p1)
+	CBSPNode2v ( const CPlane &plane, const CVector &p0, const CVector &p1, uint v0, uint v1 ) : Plane (plane), P0 (p0), P1 (p1)
 	{
 		Back = NULL;
 		Front = NULL;
--- a/code/nel/src/pacs/chain_quad.cpp
+++ b/code/nel/src/pacs/chain_quad.cpp
@ -344,8 +344,11 @@ sint			CChainQuad::selectEdges(const NLMISC::CAABBox &bbox, CCollisionSurfaceTem
 	return nRes;
 }

-sint		CChainQuad::selectEdges(CVector start, CVector end, CCollisionSurfaceTemp &cst) const
+sint		CChainQuad::selectEdges(const CVector &startp, const CVector &endp, CCollisionSurfaceTemp &cst) const
 {
+	CVector start = startp;
+	CVector end = endp;
+
 	sint	nRes=0;
 	sint	i;
 	uint16	*ochainLUT= cst.OChainLUT;
--- a/code/nel/src/pacs/edge_quad.cpp
+++ b/code/nel/src/pacs/edge_quad.cpp
@ -453,8 +453,11 @@ sint			CEdgeQuad::selectEdges(const NLMISC::CAABBox &bbox, CCollisionSurfaceTemp
 	return nRes;
 }

-sint		CEdgeQuad::selectEdges(CVector start, CVector end, CCollisionSurfaceTemp &cst) const
+sint		CEdgeQuad::selectEdges(const CVector &startp, const CVector &endp, CCollisionSurfaceTemp &cst) const
 {
+	CVector start = startp;
+	CVector end = endp;
+
 	sint	nRes=0;
 	sint	i;
 	uint16	*indexLUT= cst.OChainLUT;
--- a/code/nel/src/pacs/local_retriever.cpp
+++ b/code/nel/src/pacs/local_retriever.cpp
@ -1052,7 +1052,7 @@ bool	NLPACS::CLocalRetriever::testPosition(NLPACS::ULocalPosition &local, CColli
 }


-void	NLPACS::CLocalRetriever::retrievePosition(CVector estimated, CCollisionSurfaceTemp &cst) const
+void	NLPACS::CLocalRetriever::retrievePosition(const CVector &estimated, CCollisionSurfaceTemp &cst) const
 {
 	if (!_Loaded)
 		return;
@ -2200,7 +2200,7 @@ void	NLPACS::CLocalRetriever::replaceChain(uint32 chainId, const std::vector<NLP
 /*
 * Check surface integrity
 */
-bool	NLPACS::CLocalRetriever::checkSurfacesIntegrity(NLMISC::CVector translation, bool verbose) const
+bool	NLPACS::CLocalRetriever::checkSurfacesIntegrity(const NLMISC::CVector &translation, bool verbose) const
 {
 	bool	success = true;
 	uint	surf;
@ -2225,7 +2225,7 @@ bool	NLPACS::CLocalRetriever::checkSurfacesIntegrity(NLMISC::CVector translation
 /**
 * Check surface integrity
 */
-bool	NLPACS::CLocalRetriever::checkSurfaceIntegrity(uint surf, NLMISC::CVector translation, bool verbose) const
+bool	NLPACS::CLocalRetriever::checkSurfaceIntegrity(uint surf, const NLMISC::CVector &translation, bool verbose) const
 {
 	if (surf >= _Surfaces.size())
 		return false;
--- a/code/nel/src/sound/audio_mixer_user.cpp
+++ b/code/nel/src/sound/audio_mixer_user.cpp
@ -1475,7 +1475,7 @@ void CAudioMixerUser::freeTrack(CTrack *track)

 // ******************************************************************

-void CAudioMixerUser::getPlayingSoundsPos(bool virtualPos, std::vector<std::pair<bool, NLMISC::CVector> > &pos)
+void CAudioMixerUser::getPlayingSoundsPos(bool virtualPos, std::vector<CPlayingSoundPos> &pos)
 {
 	int nbplay = 0;
 	int	nbmute = 0;
@ -1493,9 +1493,9 @@ void CAudioMixerUser::getPlayingSoundsPos(bool virtualPos, std::vector<std::pair
 			if (source->isPlaying())
 			{
 				if (virtualPos)
-					pos.push_back(make_pair(source->getTrack() == 0, source->getVirtualPos()));
+					pos.push_back(CPlayingSoundPos(source->getTrack() == 0, source->getVirtualPos()));
 				else
-					pos.push_back(make_pair(source->getTrack() == 0,
+					pos.push_back(CPlayingSoundPos(source->getTrack() == 0,
 						source->getSourceRelativeMode()
 						? source->getPos() + _ListenPosition
 						: source->getPos()));
@ -1517,9 +1517,9 @@ void CAudioMixerUser::getPlayingSoundsPos(bool virtualPos, std::vector<std::pair
 			if (source->isPlaying())
 			{
 				if (virtualPos)
-					pos.push_back(make_pair(source->getTrack() == 0, source->getVirtualPos()));
+					pos.push_back(CPlayingSoundPos(source->getTrack() == 0, source->getVirtualPos()));
 				else
-					pos.push_back(make_pair(source->getTrack() == 0,
+					pos.push_back(CPlayingSoundPos(source->getTrack() == 0,
 						source->getSourceRelativeMode()
 						? source->getPos() + _ListenPosition
 						: source->getPos()));
@ -1705,7 +1705,7 @@ void				CAudioMixerUser::update()
 						{
 							// there is some data here, update the virtual position of the sound.
 							float dist = (css->Position - source->getPos()).norm();
-							CVector vpos(_ListenPosition + css->Direction * (css->Dist + dist));
+							CVector vpos(_ListenPosition + CVector(css->Direction) * (css->Dist + dist));
 //							_Tracks[i]->DrvSource->setPos(source->getPos() * (1-css->PosAlpha) + css->Position*(css->PosAlpha));
 							_Tracks[i]->getPhysicalSource()->setPos(source->getPos() * (1-css->PosAlpha) + vpos*(css->PosAlpha));
 							// update the relative gain
--- a/code/nel/src/sound/clustered_sound.cpp
+++ b/code/nel/src/sound/clustered_sound.cpp
@ -263,7 +263,7 @@ void CClusteredSound::update(const CVector &listenerPos, const CVector &/* view
 				{
 					// this one is better !
 					cs.Distance = css.Dist;
-					cs.Source->setPos(listenerPos + css.Direction * css.Dist + CVector(0,0,2));
+					cs.Source->setPos(listenerPos + CVector(css.Direction) * css.Dist + CVector(0,0,2));
 					if (css.DistFactor < 1.0f)
 						cs.Source->setRelativeGain(css.Gain * (1.0f - (css.DistFactor*css.DistFactor*css.DistFactor*css.DistFactor)));
 					else
@ -289,7 +289,7 @@ void CClusteredSound::update(const CVector &listenerPos, const CVector &/* view
 					cs.Source = CAudioMixerUser::instance()->createSource(soundName, false, NULL, NULL, cluster);
 					if (cs.Source != 0)
 					{
-						cs.Source->setPos(listenerPos + css.Direction * css.Dist + CVector(0,0,2));
+						cs.Source->setPos(listenerPos + CVector(css.Direction) * css.Dist + CVector(0,0,2));
 						if (css.DistFactor < 1.0f)
 							cs.Source->setRelativeGain(css.Gain * (1.0f - (css.DistFactor*css.DistFactor/**css.DistFactor*css.DistFactor*/)));
 						else
@ -769,7 +769,7 @@ void CClusteredSound::soundTraverse(const std::vector<CCluster *> &clusters, CSo
 								stc.Alpha = alpha;
 								stc.PreviousVector = (nearPos - travContext.ListenerPos).normed();
 								addNextTraverse(c, stc);
-								_AudioPath.push_back(make_pair(travContext.ListenerPos, nearPos));
+								_AudioPath.push_back(make_pair(CVectorPacked(travContext.ListenerPos), CVectorPacked(nearPos)));
 							}
 						}
 					}
@ -867,7 +867,7 @@ bool CClusteredSound::addAudibleCluster(CCluster *cluster, CClusterSoundStatus &
 {
 	TClusterStatusMap::iterator it(_AudibleClusters.find(cluster));
 	nlassert(soundStatus.Dist < _MaxEarDistance);
-	nlassert(soundStatus.Direction.norm() <= 1.01f);
+	nlassert(CVector(soundStatus.Direction).norm() <= 1.01f);

 	if (it != _AudibleClusters.end())
 	{
--- a/code/nel/src/sound/simple_source.cpp
+++ b/code/nel/src/sound/simple_source.cpp
@ -109,7 +109,7 @@ CVector CSimpleSource::getVirtualPos() const
 		{
 			// there is some data here, update the virtual position of the sound.
 			float dist = (css->Position - getPos()).norm();
-			CVector vpos(CAudioMixerUser::instance()->getListenPosVector() + css->Direction * (css->Dist + dist));
+			CVector vpos(CAudioMixerUser::instance()->getListenPosVector() + CVector(css->Direction) * (css->Dist + dist));
 			vpos = _Position * (1-css->PosAlpha) + vpos*(css->PosAlpha);
 			return vpos;
 		}
--- a/code/nel/src/sound/stream_source.cpp
+++ b/code/nel/src/sound/stream_source.cpp
@ -134,7 +134,7 @@ CVector CStreamSource::getVirtualPos() const
 		{
 			// there is some data here, update the virtual position of the sound.
 			float dist = (css->Position - getPos()).norm();
-			CVector vpos(CAudioMixerUser::instance()->getListenPosVector() + css->Direction * (css->Dist + dist));
+			CVector vpos(CAudioMixerUser::instance()->getListenPosVector() + CVector(css->Direction) * (css->Dist + dist));
 			vpos = _Position * (1-css->PosAlpha) + vpos*(css->PosAlpha);
 			return vpos;
 		}
--- a/code/ryzom/client/src/decal.cpp
+++ b/code/ryzom/client/src/decal.cpp
@ -434,7 +434,7 @@ void CDecal::renderTriCache(NL3D::IDriver &drv,   NL3D::CShadowPolyReceiver &/*
 			do
 			{
 				dest->V = srcVert->V;
-				float dist = (camPos - srcVert->V).norm();
+				float dist = (camPos - srcVert->asVector()).norm();
 				float intensity = scale * dist + bias;
 				float bottomBlend = srcVert->V.z * bottomBlendScale + bottomBlendBias;
 				clamp(bottomBlend, 0.f, 1.f);
--- a/code/ryzom/client/src/landscape_poly_drawer.cpp
+++ b/code/ryzom/client/src/landscape_poly_drawer.cpp
@ -486,7 +486,7 @@ void CLandscapePolyDrawer::drawShadowVolume(uint poly, bool firstPass)

 		uint i;
 		CVector2f vertex;
-		CVector * vertexVB = NULL;
+		CVectorPacked * vertexVB = NULL;
 		const CVector cameraPos = Scene->getCam().getPos();

 		float height = 2000.0;
--- a/code/ryzom/client/src/sound_manager.cpp
+++ b/code/ryzom/client/src/sound_manager.cpp
@ -343,7 +343,7 @@ void CSoundManager::drawSounds(float camHeight)
 			const CClusteredSound::CClusterSoundStatus &css = first->second;
 			if (css.Direction != CVector::Null)
 			{
-				CVector dest = pos+css.Direction*css.Dist;
+				CVector dest = pos+CVector(css.Direction)*css.Dist;

 				NL3D::CDRU::drawLine(pos, dest, CRGBA(0,255,255,255), *idriver);
 				NL3D::CDRU::drawLine(dest+CVector(0.5f,0.5f,0), dest+CVector(-0.5f,-0.5f,0), CRGBA(0, 255,255,255), *idriver);
@ -354,8 +354,8 @@ void CSoundManager::drawSounds(float camHeight)
 	// draw the audio path
 	{
 		idriver->setupMaterial(mat);
-		const std::vector<std::pair<NLMISC::CVector, NLMISC::CVector> > &lines = cs->getAudioPath();
-		std::vector<std::pair<NLMISC::CVector, NLMISC::CVector> >::const_iterator first(lines.begin()), last(lines.end());
+		const std::vector<std::pair<NLMISC::CVectorPacked, NLMISC::CVectorPacked> > &lines = cs->getAudioPath();
+		std::vector<std::pair<NLMISC::CVectorPacked, NLMISC::CVectorPacked> >::const_iterator first(lines.begin()), last(lines.end());
 		for (; first != last; ++first)
 		{
 			NL3D::CDRU::drawLine(first->first, first->second, CRGBA(0,255,0,255), *idriver);
@ -363,10 +363,10 @@ void CSoundManager::drawSounds(float camHeight)
 	}
 	// draw the sound source position
 	{
-		std::vector<std::pair<bool, CVector> > soundPos;
+		std::vector<UAudioMixer::CPlayingSoundPos> soundPos;
 		_AudioMixer->getPlayingSoundsPos(true, soundPos);

-		std::vector<std::pair<bool, CVector> >::iterator first(soundPos.begin()), last(soundPos.end());
+		std::vector<UAudioMixer::CPlayingSoundPos>::iterator first(soundPos.begin()), last(soundPos.end());
 		for (; first != last; ++first)
 		{
 			NL3D::CDRU::drawLine(first->second + CVector(0.5f,0.5f,0), first->second + CVector(-0.5f,-0.5f,0), CRGBA(255,0,255,255), *idriver);
Author	SHA1	Message	Date
kaetemi	406346e23f	Merge with default --HG-- branch : sse2	11 years ago
kaetemi	02fd3d68eb	SSE2: Discard previous branch head --HG-- branch : sse2	11 years ago
kaetemi	923e5a3473	SSE2: Vectorize CVector --HG-- branch : sse2	11 years ago
kaetemi	267e544bc4	SSE2: Vectorize CMatrix3x4 --HG-- branch : sse2	11 years ago
kaetemi	a73faa4115	SSE2: Align CMatrix3x4 --HG-- branch : sse2	11 years ago
kaetemi	9ad4da9127	SSE2: Align CVector --HG-- branch : sse2	11 years ago
kaetemi	0bbb8cae2f	SSE2: Fix for MinGW --HG-- branch : sse2	11 years ago
kaetemi	b074ec61b5	Merge with default --HG-- branch : sse2	11 years ago
kaetemi	dc3515592e	SSE2: Adjust accessor --HG-- branch : sse2	11 years ago
kaetemi	e48dd29b13	SSE2: Replace prefetch --HG-- branch : sse2	11 years ago
kaetemi	dc37b9cc48	SSE2: Discard previous branch head --HG-- branch : sse2	11 years ago
kaetemi	26f9b25f65	SSE2: Add a few useful functions --HG-- branch : sse2	11 years ago
kaetemi	80f3516ef5	SSE2: Ensure correct allocator is used --HG-- branch : sse2	11 years ago
kaetemi	885f10eb43	SSE2: Close this branch --HG-- branch : sse2	11 years ago
kaetemi	70eaca1c8e	SSE2: Remove dead code --HG-- branch : sse2	11 years ago
kaetemi	e66485bca9	SSE2: Add converter function for vectors --HG-- branch : sse2	11 years ago
kaetemi	3ec065c9a2	SSE2: Link regular matrix class with fast matrix --HG-- branch : sse2	11 years ago
kaetemi	c5d7664ce5	SSE2: Add fast new matrix class --HG-- branch : sse2	11 years ago
kaetemi	cb14815832	SSE2: Add fast new vector classes --HG-- branch : sse2	11 years ago
kaetemi	60879d87e4	SSE2: Vectorize some code --HG-- branch : sse2	11 years ago
kaetemi	afa315b1b5	SSE2: Implement some more of CVector --HG-- branch : sse2	11 years ago
kaetemi	d18159616e	SSE2: Fix hopefully the last few alignment issues --HG-- branch : sse2	11 years ago
kaetemi	5ec363a8a9	SSE2: Don't divide by zero when normalizing --HG-- branch : sse2	11 years ago
kaetemi	7867db4654	SSE2: Non-SSE2 copy to avoid some issues with STL pairs --HG-- branch : sse2	11 years ago
kaetemi	d787c54567	SSE2: Implement alignment for arena allocator --HG-- branch : sse2	11 years ago
kaetemi	d3847e10cc	SSE2: Workaround alignment issue related to std::pair --HG-- branch : sse2	11 years ago
kaetemi	00b8ad4c91	SSE2: More alignment workarounds --HG-- branch : sse2	11 years ago
kaetemi	35737498b5	SSE2: Implement CVector --HG-- branch : sse2	11 years ago
kaetemi	1ceaed828a	SSE2: More alignment fixes --HG-- branch : sse2	11 years ago
kaetemi	7c7db53c72	SSE2: Another workaround for a stupid uint8 alloc --HG-- branch : sse2	11 years ago
kaetemi	f8b6d81b25	SSE2: More alignment workarounds --HG-- branch : sse2	11 years ago
kaetemi	ba2231f068	SSE2: Some initial CVector SSE2 math --HG-- branch : sse2	11 years ago
kaetemi	d94a49b3d8	SSE2: More CVector alignment fixes --HG-- branch : sse2	11 years ago
kaetemi	31b2141b12	SSE2: CVector alignment fixes for particle systems --HG-- branch : sse2	11 years ago
kaetemi	95fb48fbfc	SSE2: Prepare for CVector alignment --HG-- branch : sse2	11 years ago
kaetemi	dbb966c8a5	SSE2: Some reference --HG-- branch : sse2	11 years ago
kaetemi	4c86f536ae	Disable outdated assembly --HG-- branch : sse2	11 years ago
kaetemi	e8852d630e	SSE2: Add FIXME_SSE2 notes --HG-- branch : sse2	11 years ago
kaetemi	8e8d6ac940	SSE2: Align CVector --HG-- branch : sse2	11 years ago
kaetemi	5f54f75802	SSE2: Initial testing implementation --HG-- branch : sse2	11 years ago