Merge branch 'master' of https://github.com/g-truc/glm

ago%!(EXTRA string=10 years) · f16dd36dfb
parent c65918b2a1 9d7b180596
commit f16dd36dfb
22 changed files with 605 additions and 274 deletions
--- a/glm/detail/_swizzle.hpp
+++ b/glm/detail/_swizzle.hpp
@ -20,25 +20,25 @@ namespace detail
 		char    _buffer[1];
 	};
-	template <int N, typename T, precision P, template <typename, precision> class vecType, int E0, int E1, int E2, int E3>
+	template <int N, typename T, precision P, template <typename, precision> class vecType, int E0, int E1, int E2, int E3, bool Aligned>
 	struct _swizzle_base1 : public _swizzle_base0<T, N>
 	{
 	};
-	template <typename T, precision P, template <typename, precision> class vecType, int E0, int E1>
+	template <typename T, precision P, template <typename, precision> class vecType, int E0, int E1, bool Aligned>
-	struct _swizzle_base1<2, T, P, vecType, E0,E1,-1,-2> : public _swizzle_base0<T, 2>
+	struct _swizzle_base1<2, T, P, vecType, E0,E1,-1,-2, Aligned> : public _swizzle_base0<T, 2>
 	{
 		GLM_FUNC_QUALIFIER vecType<T, P> operator ()()  const { return vecType<T, P>(this->elem(E0), this->elem(E1)); }
 	};
-	template <typename T, precision P, template <typename, precision> class vecType, int E0, int E1, int E2>
+	template <typename T, precision P, template <typename, precision> class vecType, int E0, int E1, int E2, bool Aligned>
-	struct _swizzle_base1<3, T, P, vecType, E0,E1,E2,-1> : public _swizzle_base0<T, 3>
+	struct _swizzle_base1<3, T, P, vecType, E0,E1,E2,-1, Aligned> : public _swizzle_base0<T, 3>
 	{
 		GLM_FUNC_QUALIFIER vecType<T, P> operator ()()  const { return vecType<T, P>(this->elem(E0), this->elem(E1), this->elem(E2)); }
 	};
-	template <typename T, precision P, template <typename, precision> class vecType, int E0, int E1, int E2, int E3>
+	template <typename T, precision P, template <typename, precision> class vecType, int E0, int E1, int E2, int E3, bool Aligned>
-	struct _swizzle_base1<4, T, P, vecType, E0,E1,E2,E3> : public _swizzle_base0<T, 4>
+	struct _swizzle_base1<4, T, P, vecType, E0,E1,E2,E3, Aligned> : public _swizzle_base0<T, 4>
 	{ 
 		GLM_FUNC_QUALIFIER vecType<T, P> operator ()()  const { return vecType<T, P>(this->elem(E0), this->elem(E1), this->elem(E2), this->elem(E3)); }
 	};
@ -56,7 +56,7 @@ namespace detail
 			containing duplicate elements so that they cannot be used as r-values).            
 	*/
 	template <int N, typename T, precision P, template <typename, precision> class vecType, int E0, int E1, int E2, int E3, int DUPLICATE_ELEMENTS>
-	struct _swizzle_base2 : public _swizzle_base1<N, T, P, vecType, E0,E1,E2,E3>
+	struct _swizzle_base2 : public _swizzle_base1<N, T, P, vecType, E0,E1,E2,E3, detail::is_aligned<P>::value>
 	{
 		GLM_FUNC_QUALIFIER _swizzle_base2& operator= (const T& t)
 		{
@ -134,7 +134,7 @@ namespace detail
 	// Specialization for swizzles containing duplicate elements.  These cannot be modified.
 	template <int N, typename T, precision P, template <typename, precision> class vecType, int E0, int E1, int E2, int E3>
-	struct _swizzle_base2<N, T, P, vecType, E0,E1,E2,E3, 1> : public _swizzle_base1<N, T, P, vecType, E0,E1,E2,E3>
+	struct _swizzle_base2<N, T, P, vecType, E0,E1,E2,E3, 1> : public _swizzle_base1<N, T, P, vecType, E0,E1,E2,E3, detail::is_aligned<P>::value>
 	{
 		struct Stub {};
--- a/glm/detail/func_common.inl
+++ b/glm/detail/func_common.inl
@ -107,7 +107,7 @@ namespace detail
 		}
 	};
-	template <typename T, precision P, template <typename, precision> class vecType>
+	template <typename T, precision P, template <typename, precision> class vecType, bool Aligned>
 	struct compute_abs_vector
 	{
 		GLM_FUNC_QUALIFIER static vecType<T, P> call(vecType<T, P> const & x)
@ -116,7 +116,7 @@ namespace detail
 		}
 	};
-	template <typename T, typename U, precision P, template <typename, precision> class vecType>
+	template <typename T, typename U, precision P, template <typename, precision> class vecType, bool Aligned>
 	struct compute_mix_vector
 	{
 		GLM_FUNC_QUALIFIER static vecType<T, P> call(vecType<T, P> const & x, vecType<T, P> const & y, vecType<U, P> const & a)
@ -127,8 +127,8 @@ namespace detail
 		}
 	};
-	template <typename T, precision P, template <typename, precision> class vecType>
+	template <typename T, precision P, template <typename, precision> class vecType, bool Aligned>
-	struct compute_mix_vector<T, bool, P, vecType>
+	struct compute_mix_vector<T, bool, P, vecType, Aligned>
 	{
 		GLM_FUNC_QUALIFIER static vecType<T, P> call(vecType<T, P> const & x, vecType<T, P> const & y, vecType<bool, P> const & a)
 		{
@ -139,7 +139,7 @@ namespace detail
 		}
 	};
-	template <typename T, typename U, precision P, template <typename, precision> class vecType>
+	template <typename T, typename U, precision P, template <typename, precision> class vecType, bool Aligned>
 	struct compute_mix_scalar
 	{
 		GLM_FUNC_QUALIFIER static vecType<T, P> call(vecType<T, P> const & x, vecType<T, P> const & y, U const & a)
@ -150,8 +150,8 @@ namespace detail
 		}
 	};
-	template <typename T, precision P, template <typename, precision> class vecType>
+	template <typename T, precision P, template <typename, precision> class vecType, bool Aligned>
-	struct compute_mix_scalar<T, bool, P, vecType>
+	struct compute_mix_scalar<T, bool, P, vecType, Aligned>
 	{
 		GLM_FUNC_QUALIFIER static vecType<T, P> call(vecType<T, P> const & x, vecType<T, P> const & y, bool const & a)
 		{
@ -179,7 +179,7 @@ namespace detail
 		}
 	};
-	template <typename T, precision P, template <typename, precision> class vecType, bool isFloat = true>
+	template <typename T, precision P, template <typename, precision> class vecType, bool isFloat, bool Aligned>
 	struct compute_sign
 	{
 		GLM_FUNC_QUALIFIER static vecType<T, P> call(vecType<T, P> const & x)
@ -189,8 +189,8 @@ namespace detail
 	};
 #	if GLM_ARCH == GLM_ARCH_X86
-	template <typename T, precision P, template <typename, precision> class vecType>
+	template <typename T, precision P, template <typename, precision> class vecType, bool Aligned>
-	struct compute_sign<T, P, vecType, false>
+	struct compute_sign<T, P, vecType, Aligned>
 	{
 		GLM_FUNC_QUALIFIER static vecType<T, P> call(vecType<T, P> const & x)
 		{
@ -202,7 +202,7 @@ namespace detail
 	};
 #	endif
-	template <typename T, precision P, template <typename, precision> class vecType>
+	template <typename T, precision P, template <typename, precision> class vecType, bool Aligned>
 	struct compute_floor
 	{
 		GLM_FUNC_QUALIFIER static vecType<T, P> call(vecType<T, P> const & x)
@ -211,7 +211,7 @@ namespace detail
 		}
 	};
-	template <typename T, precision P, template <typename, precision> class vecType>
+	template <typename T, precision P, template <typename, precision> class vecType, bool Aligned>
 	struct compute_ceil
 	{
 		GLM_FUNC_QUALIFIER static vecType<T, P> call(vecType<T, P> const & x)
@ -220,7 +220,7 @@ namespace detail
 		}
 	};
-	template <typename T, precision P, template <typename, precision> class vecType>
+	template <typename T, precision P, template <typename, precision> class vecType, bool Aligned>
 	struct compute_fract
 	{
 		GLM_FUNC_QUALIFIER static vecType<T, P> call(vecType<T, P> const & x)
@ -229,7 +229,7 @@ namespace detail
 		}
 	};
-	template <typename T, precision P, template <typename, precision> class vecType>
+	template <typename T, precision P, template <typename, precision> class vecType, bool Aligned>
 	struct compute_trunc
 	{
 		GLM_FUNC_QUALIFIER static vecType<T, P> call(vecType<T, P> const & x)
@ -238,7 +238,7 @@ namespace detail
 		}
 	};
-	template <typename T, precision P, template <typename, precision> class vecType>
+	template <typename T, precision P, template <typename, precision> class vecType, bool Aligned>
 	struct compute_round
 	{
 		GLM_FUNC_QUALIFIER static vecType<T, P> call(vecType<T, P> const & x)
@ -247,7 +247,7 @@ namespace detail
 		}
 	};
-	template <typename T, precision P, template <typename, precision> class vecType>
+	template <typename T, precision P, template <typename, precision> class vecType, bool Aligned>
 	struct compute_mod
 	{
 		GLM_FUNC_QUALIFIER static vecType<T, P> call(vecType<T, P> const & a, vecType<T, P> const & b)
@ -257,7 +257,7 @@ namespace detail
 		}
 	};
-	template <typename T, precision P, template <typename, precision> class vecType>
+	template <typename T, precision P, template <typename, precision> class vecType, bool Aligned>
 	struct compute_min_vector
 	{
 		GLM_FUNC_QUALIFIER static vecType<T, P> call(vecType<T, P> const & x, vecType<T, P> const & y)
@ -266,7 +266,7 @@ namespace detail
 		}
 	};
-	template <typename T, precision P, template <typename, precision> class vecType>
+	template <typename T, precision P, template <typename, precision> class vecType, bool Aligned>
 	struct compute_max_vector
 	{
 		GLM_FUNC_QUALIFIER static vecType<T, P> call(vecType<T, P> const & x, vecType<T, P> const & y)
@ -275,7 +275,7 @@ namespace detail
 		}
 	};
-	template <typename T, precision P, template <typename, precision> class vecType>
+	template <typename T, precision P, template <typename, precision> class vecType, bool Aligned>
 	struct compute_clamp_vector
 	{
 		GLM_FUNC_QUALIFIER static vecType<T, P> call(vecType<T, P> const & x, vecType<T, P> const & minVal, vecType<T, P> const & maxVal)
@ -284,7 +284,7 @@ namespace detail
 		}
 	};
-	template <typename T, precision P, template <typename, precision> class vecType>
+	template <typename T, precision P, template <typename, precision> class vecType, bool Aligned>
 	struct compute_step_vector
 	{
 		GLM_FUNC_QUALIFIER static vecType<T, P> call(vecType<T, P> const & edge, vecType<T, P> const & x)
@ -293,7 +293,7 @@ namespace detail
 		}
 	};
-	template <typename T, precision P, template <typename, precision> class vecType>
+	template <typename T, precision P, template <typename, precision> class vecType, bool Aligned>
 	struct compute_smoothstep_vector
 	{
 		GLM_FUNC_QUALIFIER static vecType<T, P> call(vecType<T, P> const & edge0, vecType<T, P> const & edge1, vecType<T, P> const & x)
@ -314,7 +314,7 @@ namespace detail
 	template <typename T, precision P, template <typename, precision> class vecType>
 	GLM_FUNC_QUALIFIER vecType<T, P> abs(vecType<T, P> const & x)
 	{
-		return detail::compute_abs_vector<T, P, vecType>::call(x);
+		return detail::compute_abs_vector<T, P, vecType, detail::is_aligned<P>::value>::call(x);
 	}
 	// sign
@ -326,7 +326,7 @@ namespace detail
 			std::numeric_limits<genFIType>::is_iec559 || (std::numeric_limits<genFIType>::is_signed && std::numeric_limits<genFIType>::is_integer),
 			"'sign' only accept signed inputs");
-		return detail::compute_sign<genFIType, defaultp, tvec1, std::numeric_limits<genFIType>::is_iec559>::call(tvec1<genFIType>(x)).x;
+		return detail::compute_sign<genFIType, defaultp, tvec1, std::numeric_limits<genFIType>::is_iec559, highp>::call(tvec1<genFIType>(x)).x;
 	}
 	template <typename T, precision P, template <typename, precision> class vecType>
@ -336,7 +336,7 @@ namespace detail
 			std::numeric_limits<T>::is_iec559 || (std::numeric_limits<T>::is_signed && std::numeric_limits<T>::is_integer),
 			"'sign' only accept signed inputs");
-		return detail::compute_sign<T, P, vecType, std::numeric_limits<T>::is_iec559>::call(x);
+		return detail::compute_sign<T, P, vecType, std::numeric_limits<T>::is_iec559, detail::is_aligned<P>::value>::call(x);
 	}
 	// floor
@ -345,21 +345,21 @@ namespace detail
 	GLM_FUNC_QUALIFIER vecType<T, P> floor(vecType<T, P> const & x)
 	{
 		GLM_STATIC_ASSERT(std::numeric_limits<T>::is_iec559, "'floor' only accept floating-point inputs.");
-		return detail::compute_floor<T, P, vecType>::call(x);
+		return detail::compute_floor<T, P, vecType, detail::is_aligned<P>::value>::call(x);
 	}
 	template <typename T, precision P, template <typename, precision> class vecType>
 	GLM_FUNC_QUALIFIER vecType<T, P> trunc(vecType<T, P> const & x)
 	{
 		GLM_STATIC_ASSERT(std::numeric_limits<T>::is_iec559, "'trunc' only accept floating-point inputs");
-		return detail::compute_trunc<T, P, vecType>::call(x);
+		return detail::compute_trunc<T, P, vecType, detail::is_aligned<P>::value>::call(x);
 	}
 	template <typename T, precision P, template <typename, precision> class vecType>
 	GLM_FUNC_QUALIFIER vecType<T, P> round(vecType<T, P> const & x)
 	{
 		GLM_STATIC_ASSERT(std::numeric_limits<T>::is_iec559, "'round' only accept floating-point inputs");
-		return detail::compute_round<T, P, vecType>::call(x);
+		return detail::compute_round<T, P, vecType, detail::is_aligned<P>::value>::call(x);
 	}
 /*
@ -432,7 +432,7 @@ namespace detail
 	GLM_FUNC_QUALIFIER vecType<T, P> fract(vecType<T, P> const & x)
 	{
 		GLM_STATIC_ASSERT(std::numeric_limits<T>::is_iec559, "'fract' only accept floating-point inputs");
-		return detail::compute_fract<T, P, vecType>::call(x);
+		return detail::compute_fract<T, P, vecType, detail::is_aligned<P>::value>::call(x);
 	}
 	// mod
@ -445,13 +445,13 @@ namespace detail
 	template <typename T, precision P, template <typename, precision> class vecType>
 	GLM_FUNC_QUALIFIER vecType<T, P> mod(vecType<T, P> const & x, T y)
 	{
-		return detail::compute_mod<T, P, vecType>::call(x, vecType<T, P>(y));
+		return detail::compute_mod<T, P, vecType, detail::is_aligned<P>::value>::call(x, vecType<T, P>(y));
 	}
 	template <typename T, precision P, template <typename, precision> class vecType>
 	GLM_FUNC_QUALIFIER vecType<T, P> mod(vecType<T, P> const & x, vecType<T, P> const & y)
 	{
-		return detail::compute_mod<T, P, vecType>::call(x, y);
+		return detail::compute_mod<T, P, vecType, detail::is_aligned<P>::value>::call(x, y);
 	}
 	// modf
@ -509,26 +509,26 @@ namespace detail
 	GLM_FUNC_QUALIFIER vecType<T, P> min(vecType<T, P> const & a, T b)
 	{
 		GLM_STATIC_ASSERT(std::numeric_limits<T>::is_iec559, "'min' only accept floating-point inputs for the interpolator a");
-		return detail::compute_min_vector<T, P, vecType>::call(a, vecType<T, P>(b));
+		return detail::compute_min_vector<T, P, vecType, detail::is_aligned<P>::value>::call(a, vecType<T, P>(b));
 	}
 	template <typename T, precision P, template <typename, precision> class vecType>
 	GLM_FUNC_QUALIFIER vecType<T, P> min(vecType<T, P> const & a, vecType<T, P> const & b)
 	{
-		return detail::compute_min_vector<T, P, vecType>::call(a, b);
+		return detail::compute_min_vector<T, P, vecType, detail::is_aligned<P>::value>::call(a, b);
 	}
 	// max
 	template <typename T, precision P, template <typename, precision> class vecType>
 	GLM_FUNC_QUALIFIER vecType<T, P> max(vecType<T, P> const & a, T b)
 	{
-		return detail::compute_max_vector<T, P, vecType>::call(a, vecType<T, P>(b));
+		return detail::compute_max_vector<T, P, vecType, detail::is_aligned<P>::value>::call(a, vecType<T, P>(b));
 	}
 	template <typename T, precision P, template <typename, precision> class vecType>
 	GLM_FUNC_QUALIFIER vecType<T, P> max(vecType<T, P> const & a, vecType<T, P> const & b)
 	{
-		return detail::compute_max_vector<T, P, vecType>::call(a, b);
+		return detail::compute_max_vector<T, P, vecType, detail::is_aligned<P>::value>::call(a, b);
 	}
 	// clamp
@ -543,14 +543,14 @@ namespace detail
 	GLM_FUNC_QUALIFIER vecType<T, P> clamp(vecType<T, P> const & x, T minVal, T maxVal)
 	{
 		GLM_STATIC_ASSERT(std::numeric_limits<T>::is_iec559 || std::numeric_limits<T>::is_integer, "'clamp' only accept floating-point or integer inputs");
-		return detail::compute_clamp_vector<T, P, vecType>::call(x, vecType<T, P>(minVal), vecType<T, P>(maxVal));
+		return detail::compute_clamp_vector<T, P, vecType, detail::is_aligned<P>::value>::call(x, vecType<T, P>(minVal), vecType<T, P>(maxVal));
 	}
 	template <typename T, precision P, template <typename, precision> class vecType>
 	GLM_FUNC_QUALIFIER vecType<T, P> clamp(vecType<T, P> const & x, vecType<T, P> const & minVal, vecType<T, P> const & maxVal)
 	{
 		GLM_STATIC_ASSERT(std::numeric_limits<T>::is_iec559 || std::numeric_limits<T>::is_integer, "'clamp' only accept floating-point or integer inputs");
-		return detail::compute_clamp_vector<T, P, vecType>::call(x, minVal, maxVal);
+		return detail::compute_clamp_vector<T, P, vecType, detail::is_aligned<P>::value>::call(x, minVal, maxVal);
 	}
 	template <typename genTypeT, typename genTypeU>
@ -562,13 +562,13 @@ namespace detail
 	template <typename T, typename U, precision P, template <typename, precision> class vecType>
 	GLM_FUNC_QUALIFIER vecType<T, P> mix(vecType<T, P> const & x, vecType<T, P> const & y, U a)
 	{
-		return detail::compute_mix_scalar<T, U, P, vecType>::call(x, y, a);
+		return detail::compute_mix_scalar<T, U, P, vecType, detail::is_aligned<P>::value>::call(x, y, a);
 	}
 	template <typename T, typename U, precision P, template <typename, precision> class vecType>
 	GLM_FUNC_QUALIFIER vecType<T, P> mix(vecType<T, P> const & x, vecType<T, P> const & y, vecType<U, P> const & a)
 	{
-		return detail::compute_mix_vector<T, U, P, vecType>::call(x, y, a);
+		return detail::compute_mix_vector<T, U, P, vecType, detail::is_aligned<P>::value>::call(x, y, a);
 	}
 	// step
@ -581,13 +581,13 @@ namespace detail
 	template <template <typename, precision> class vecType, typename T, precision P>
 	GLM_FUNC_QUALIFIER vecType<T, P> step(T edge, vecType<T, P> const & x)
 	{
-		return detail::compute_step_vector<T, P, vecType>::call(vecType<T, P>(edge), x);
+		return detail::compute_step_vector<T, P, vecType, detail::is_aligned<P>::value>::call(vecType<T, P>(edge), x);
 	}
 	template <template <typename, precision> class vecType, typename T, precision P>
 	GLM_FUNC_QUALIFIER vecType<T, P> step(vecType<T, P> const & edge, vecType<T, P> const & x)
 	{
-		return detail::compute_step_vector<T, P, vecType>::call(edge, x);
+		return detail::compute_step_vector<T, P, vecType, detail::is_aligned<P>::value>::call(edge, x);
 	}
 	// smoothstep
@ -603,13 +603,13 @@ namespace detail
 	template <typename T, precision P, template <typename, precision> class vecType>
 	GLM_FUNC_QUALIFIER vecType<T, P> smoothstep(T edge0, T edge1, vecType<T, P> const & x)
 	{
-		return detail::compute_smoothstep_vector<T, P, vecType>::call(vecType<T, P>(edge0), vecType<T, P>(edge1), x);
+		return detail::compute_smoothstep_vector<T, P, vecType, detail::is_aligned<P>::value>::call(vecType<T, P>(edge0), vecType<T, P>(edge1), x);
 	}
 	template <typename T, precision P, template <typename, precision> class vecType>
 	GLM_FUNC_QUALIFIER vecType<T, P> smoothstep(vecType<T, P> const & edge0, vecType<T, P> const & edge1, vecType<T, P> const & x)
 	{
-		return detail::compute_smoothstep_vector<T, P, vecType>::call(edge0, edge1, x);
+		return detail::compute_smoothstep_vector<T, P, vecType, detail::is_aligned<P>::value>::call(edge0, edge1, x);
 	}
 #	if GLM_HAS_CXX11_STL
--- a/glm/detail/func_common_simd.inl
+++ b/glm/detail/func_common_simd.inl
@ -11,7 +11,7 @@ namespace glm{
 namespace detail
 {
 	template <precision P>
-	struct compute_abs_vector<float, P, tvec4>
+	struct compute_abs_vector<float, P, tvec4, true>
 	{
 		GLM_FUNC_QUALIFIER static tvec4<float, P> call(tvec4<float, P> const & v)
 		{
@ -22,7 +22,7 @@ namespace detail
 	};
 	template <precision P>
-	struct compute_abs_vector<int, P, tvec4>
+	struct compute_abs_vector<int, P, tvec4, true>
 	{
 		GLM_FUNC_QUALIFIER static tvec4<int, P> call(tvec4<int, P> const & v)
 		{
@ -33,7 +33,7 @@ namespace detail
 	};
 	template <precision P>
-	struct compute_floor<float, P, tvec4>
+	struct compute_floor<float, P, tvec4, true>
 	{
 		GLM_FUNC_QUALIFIER static tvec4<float, P> call(tvec4<float, P> const & v)
 		{
@ -44,7 +44,7 @@ namespace detail
 	};
 	template <precision P>
-	struct compute_ceil<float, P, tvec4>
+	struct compute_ceil<float, P, tvec4, true>
 	{
 		GLM_FUNC_QUALIFIER static tvec4<float, P> call(tvec4<float, P> const & v)
 		{
@ -55,7 +55,7 @@ namespace detail
 	};
 	template <precision P>
-	struct compute_fract<float, P, tvec4>
+	struct compute_fract<float, P, tvec4, true>
 	{
 		GLM_FUNC_QUALIFIER static tvec4<float, P> call(tvec4<float, P> const & v)
 		{
@ -66,7 +66,7 @@ namespace detail
 	};
 	template <precision P>
-	struct compute_round<float, P, tvec4>
+	struct compute_round<float, P, tvec4, true>
 	{
 		GLM_FUNC_QUALIFIER static tvec4<float, P> call(tvec4<float, P> const & v)
 		{
@ -77,7 +77,7 @@ namespace detail
 	};
 	template <precision P>
-	struct compute_mod<float, P, tvec4>
+	struct compute_mod<float, P, tvec4, true>
 	{
 		GLM_FUNC_QUALIFIER static tvec4<float, P> call(tvec4<float, P> const & x, tvec4<float, P> const & y)
 		{
@ -88,7 +88,7 @@ namespace detail
 	};
 	template <precision P>
-	struct compute_min_vector<float, P, tvec4>
+	struct compute_min_vector<float, P, tvec4, true>
 	{
 		GLM_FUNC_QUALIFIER static tvec4<float, P> call(tvec4<float, P> const & v1, tvec4<float, P> const & v2)
 		{
@ -99,7 +99,7 @@ namespace detail
 	};
 	template <precision P>
-	struct compute_min_vector<int32, P, tvec4>
+	struct compute_min_vector<int32, P, tvec4, true>
 	{
 		GLM_FUNC_QUALIFIER static tvec4<int32, P> call(tvec4<int32, P> const & v1, tvec4<int32, P> const & v2)
 		{
@ -110,7 +110,7 @@ namespace detail
 	};
 	template <precision P>
-	struct compute_min_vector<uint32, P, tvec4>
+	struct compute_min_vector<uint32, P, tvec4, true>
 	{
 		GLM_FUNC_QUALIFIER static tvec4<int32, P> call(tvec4<uint32, P> const & v1, tvec4<uint32, P> const & v2)
 		{
@ -121,7 +121,7 @@ namespace detail
 	};
 	template <precision P>
-	struct compute_max_vector<float, P, tvec4>
+	struct compute_max_vector<float, P, tvec4, true>
 	{
 		GLM_FUNC_QUALIFIER static tvec4<float, P> call(tvec4<float, P> const & v1, tvec4<float, P> const & v2)
 		{
@ -132,7 +132,7 @@ namespace detail
 	};
 	template <precision P>
-	struct compute_max_vector<int32, P, tvec4>
+	struct compute_max_vector<int32, P, tvec4, true>
 	{
 		GLM_FUNC_QUALIFIER static tvec4<int32, P> call(tvec4<int32, P> const & v1, tvec4<int32, P> const & v2)
 		{
@ -143,7 +143,7 @@ namespace detail
 	};
 	template <precision P>
-	struct compute_max_vector<uint32, P, tvec4>
+	struct compute_max_vector<uint32, P, tvec4, true>
 	{
 		GLM_FUNC_QUALIFIER static tvec4<uint32, P> call(tvec4<uint32, P> const & v1, tvec4<uint32, P> const & v2)
 		{
@ -154,7 +154,7 @@ namespace detail
 	};
 	template <precision P>
-	struct compute_clamp_vector<float, P, tvec4>
+	struct compute_clamp_vector<float, P, tvec4, true>
 	{
 		GLM_FUNC_QUALIFIER static tvec4<float, P> call(tvec4<float, P> const & x, tvec4<float, P> const & minVal, tvec4<float, P> const & maxVal)
 		{
@ -165,7 +165,7 @@ namespace detail
 	};
 	template <precision P>
-	struct compute_clamp_vector<int32, P, tvec4>
+	struct compute_clamp_vector<int32, P, tvec4, true>
 	{
 		GLM_FUNC_QUALIFIER static tvec4<int32, P> call(tvec4<int32, P> const & x, tvec4<int32, P> const & minVal, tvec4<int32, P> const & maxVal)
 		{
@ -176,7 +176,7 @@ namespace detail
 	};
 	template <precision P>
-	struct compute_clamp_vector<uint32, P, tvec4>
+	struct compute_clamp_vector<uint32, P, tvec4, true>
 	{
 		GLM_FUNC_QUALIFIER static tvec4<uint32, P> call(tvec4<uint32, P> const & x, tvec4<uint32, P> const & minVal, tvec4<uint32, P> const & maxVal)
 		{
@ -187,7 +187,7 @@ namespace detail
 	};
 	template <precision P>
-	struct compute_mix_vector<float, bool, P, tvec4>
+	struct compute_mix_vector<float, bool, P, tvec4, true>
 	{
 		GLM_FUNC_QUALIFIER static tvec4<float, P> call(tvec4<float, P> const & x, tvec4<float, P> const & y, tvec4<bool, P> const & a)
 		{
@ -210,18 +210,18 @@ namespace detail
 		GLM_FUNC_QUALIFIER static tvec4<float, P> call(tvec4<float, P> const& edge, tvec4<float, P> const& x)
 		{
 			tvec4<float, P> result(uninitialize);
-			result.data = glm_f32v4_stp(edge.data, x.data);
+			result.data = glm_vec4_step(edge.data, x.data);
 			return result;
 		}
 	};
 */
 	template <precision P>
-	struct compute_smoothstep_vector<float, P, tvec4>
+	struct compute_smoothstep_vector<float, P, tvec4, true>
 	{
 		GLM_FUNC_QUALIFIER static tvec4<float, P> call(tvec4<float, P> const& edge0, tvec4<float, P> const& edge1, tvec4<float, P> const& x)
 		{
 			tvec4<float, P> result(uninitialize);
-			result.data = glm_f32v4_ssp(edge0.data, edge1.data, x.data);
+			result.data = glm_vec4_smoothstep(edge0.data, edge1.data, x.data);
 			return result;
 		}
 	};
--- a/glm/detail/func_exponential.inl
+++ b/glm/detail/func_exponential.inl
@ -20,7 +20,7 @@ namespace detail
 		}
 #	endif
-	template <typename T, precision P, template <class, precision> class vecType, bool isFloat = true>
+	template <typename T, precision P, template <class, precision> class vecType, bool isFloat, bool Aligned>
 	struct compute_log2
 	{
 		GLM_FUNC_QUALIFIER static vecType<T, P> call(vecType<T, P> const & vec)
@ -29,7 +29,7 @@ namespace detail
 		}
 	};
-	template <template <class, precision> class vecType, typename T, precision P>
+	template <template <class, precision> class vecType, typename T, precision P, bool Aligned>
 	struct compute_sqrt
 	{
 		GLM_FUNC_QUALIFIER static vecType<T, P> call(vecType<T, P> const & x)
@ -38,7 +38,7 @@ namespace detail
 		}
 	};
-	template <template <class, precision> class vecType, typename T, precision P>
+	template <template <class, precision> class vecType, typename T, precision P, bool Aligned>
 	struct compute_inversesqrt
 	{
 		GLM_FUNC_QUALIFIER static vecType<T, P> call(vecType<T, P> const & x)
@ -47,8 +47,8 @@ namespace detail
 		}
 	};
-	template <template <class, precision> class vecType>
+	template <template <class, precision> class vecType, bool Aligned>
-	struct compute_inversesqrt<vecType, float, lowp>
+	struct compute_inversesqrt<vecType, float, lowp, Aligned>
 	{
 		GLM_FUNC_QUALIFIER static vecType<float, lowp> call(vecType<float, lowp> const & x)
 		{
@ -113,7 +113,7 @@ namespace detail
 	template <typename T, precision P, template <typename, precision> class vecType>
 	GLM_FUNC_QUALIFIER vecType<T, P> log2(vecType<T, P> const & x)
 	{
-		return detail::compute_log2<T, P, vecType, std::numeric_limits<T>::is_iec559>::call(x);
+		return detail::compute_log2<T, P, vecType, std::numeric_limits<T>::is_iec559, detail::is_aligned<P>::value>::call(x);
 	}
 	// sqrt
@ -122,7 +122,7 @@ namespace detail
 	GLM_FUNC_QUALIFIER vecType<T, P> sqrt(vecType<T, P> const & x)
 	{
 		GLM_STATIC_ASSERT(std::numeric_limits<T>::is_iec559, "'sqrt' only accept floating-point inputs");
-		return detail::compute_sqrt<vecType, T, P>::call(x);
+		return detail::compute_sqrt<vecType, T, P, detail::is_aligned<P>::value>::call(x);
 	}
 	// inversesqrt
@ -136,7 +136,7 @@ namespace detail
 	GLM_FUNC_QUALIFIER vecType<T, P> inversesqrt(vecType<T, P> const & x)
 	{
 		GLM_STATIC_ASSERT(std::numeric_limits<T>::is_iec559, "'inversesqrt' only accept floating-point inputs");
-		return detail::compute_inversesqrt<vecType, T, P>::call(x);
+		return detail::compute_inversesqrt<vecType, T, P, detail::is_aligned<P>::value>::call(x);
 	}
 }//namespace glm
--- a/glm/detail/func_exponential_simd.inl
+++ b/glm/detail/func_exponential_simd.inl
@ -9,7 +9,7 @@ namespace glm{
 namespace detail
 {
 	template <precision P>
-	struct compute_sqrt<tvec4, float, P>
+	struct compute_sqrt<tvec4, float, P, true>
 	{
 		GLM_FUNC_QUALIFIER static tvec4<float, P> call(tvec4<float, P> const & v)
 		{
@ -20,11 +20,11 @@ namespace detail
 	};
 	template <>
-	struct compute_sqrt<tvec4, float, lowp>
+	struct compute_sqrt<tvec4, float, aligned_lowp, true>
 	{
-		GLM_FUNC_QUALIFIER static tvec4<float, lowp> call(tvec4<float, lowp> const & v)
+		GLM_FUNC_QUALIFIER static tvec4<float, aligned_lowp> call(tvec4<float, aligned_lowp> const & v)
 		{
-			tvec4<float, lowp> result(uninitialize);
+			tvec4<float, aligned_lowp> result(uninitialize);
 			result.data = glm_vec4_sqrt_lowp(v.data);
 			return result;
 		}
--- a/glm/detail/func_geometric.inl
+++ b/glm/detail/func_geometric.inl
@ -10,7 +10,7 @@
 namespace glm{
 namespace detail
 {
-	template <template <typename, precision> class vecType, typename T, precision P>
+	template <template <typename, precision> class vecType, typename T, precision P, bool Aligned>
 	struct compute_length
 	{
 		GLM_FUNC_QUALIFIER static T call(vecType<T, P> const & v)
@ -19,7 +19,7 @@ namespace detail
 		}
 	};
-	template <template <typename, precision> class vecType, typename T, precision P>
+	template <template <typename, precision> class vecType, typename T, precision P, bool Aligned>
 	struct compute_distance
 	{
 		GLM_FUNC_QUALIFIER static T call(vecType<T, P> const & p0, vecType<T, P> const & p1)
@ -28,11 +28,11 @@ namespace detail
 		}
 	};
-	template <template <class, precision> class vecType, typename T, precision P>
+	template <template <class, precision> class vecType, typename T, precision P, bool Aligned>
 	struct compute_dot{};
-	template <typename T, precision P>
+	template <typename T, precision P, bool Aligned>
-	struct compute_dot<tvec1, T, P>
+	struct compute_dot<tvec1, T, P, Aligned>
 	{
 		GLM_FUNC_QUALIFIER static T call(tvec1<T, P> const & a, tvec1<T, P> const & b)
 		{
@ -40,8 +40,8 @@ namespace detail
 		}
 	};
-	template <typename T, precision P>
+	template <typename T, precision P, bool Aligned>
-	struct compute_dot<tvec2, T, P>
+	struct compute_dot<tvec2, T, P, Aligned>
 	{
 		GLM_FUNC_QUALIFIER static T call(tvec2<T, P> const & x, tvec2<T, P> const & y)
 		{
@ -50,8 +50,8 @@ namespace detail
 		}
 	};
-	template <typename T, precision P>
+	template <typename T, precision P, bool Aligned>
-	struct compute_dot<tvec3, T, P>
+	struct compute_dot<tvec3, T, P, Aligned>
 	{
 		GLM_FUNC_QUALIFIER static T call(tvec3<T, P> const & x, tvec3<T, P> const & y)
 		{
@ -60,8 +60,8 @@ namespace detail
 		}
 	};
-	template <typename T, precision P>
+	template <typename T, precision P, bool Aligned>
-	struct compute_dot<tvec4, T, P>
+	struct compute_dot<tvec4, T, P, Aligned>
 	{
 		GLM_FUNC_QUALIFIER static T call(tvec4<T, P> const & x, tvec4<T, P> const & y)
 		{
@ -70,7 +70,7 @@ namespace detail
 		}
 	};
-	template <typename T, precision P>
+	template <typename T, precision P, bool Aligned>
 	struct compute_cross
 	{
 		GLM_FUNC_QUALIFIER static tvec3<T, P> call(tvec3<T, P> const & x, tvec3<T, P> const & y)
@ -84,7 +84,7 @@ namespace detail
 		}
 	};
-	template <typename T, precision P, template <typename, precision> class vecType>
+	template <typename T, precision P, template <typename, precision> class vecType, bool Aligned>
 	struct compute_normalize
 	{
 		GLM_FUNC_QUALIFIER static vecType<T, P> call(vecType<T, P> const & v)
@ -95,7 +95,7 @@ namespace detail
 		}
 	};
-	template <typename T, precision P, template <typename, precision> class vecType>
+	template <typename T, precision P, template <typename, precision> class vecType, bool Aligned>
 	struct compute_faceforward
 	{
 		GLM_FUNC_QUALIFIER static vecType<T, P> call(vecType<T, P> const & N, vecType<T, P> const & I, vecType<T, P> const & Nref)
@ -106,7 +106,7 @@ namespace detail
 		}
 	};
-	template <typename T, precision P, template <typename, precision> class vecType>
+	template <typename T, precision P, template <typename, precision> class vecType, bool Aligned>
 	struct compute_reflect
 	{
 		GLM_FUNC_QUALIFIER static vecType<T, P> call(vecType<T, P> const & I, vecType<T, P> const & N)
@ -115,7 +115,7 @@ namespace detail
 		}
 	};
-	template <typename T, precision P, template <typename, precision> class vecType>
+	template <typename T, precision P, template <typename, precision> class vecType, bool Aligned>
 	struct compute_refract
 	{
 		GLM_FUNC_QUALIFIER static vecType<T, P> call(vecType<T, P> const & I, vecType<T, P> const & N, T eta)
@ -141,7 +141,7 @@ namespace detail
 	{
 		GLM_STATIC_ASSERT(std::numeric_limits<T>::is_iec559, "'length' accepts only floating-point inputs");
-		return detail::compute_length<vecType, T, P>::call(v);
+		return detail::compute_length<vecType, T, P, detail::is_aligned<P>::value>::call(v);
 	}
 	// distance
@ -156,7 +156,7 @@ namespace detail
 	template <typename T, precision P, template <typename, precision> class vecType>
 	GLM_FUNC_QUALIFIER T distance(vecType<T, P> const & p0, vecType<T, P> const & p1)
 	{
-		return detail::compute_distance<vecType, T, P>::call(p0, p1);
+		return detail::compute_distance<vecType, T, P, detail::is_aligned<P>::value>::call(p0, p1);
 	}
 	// dot
@ -171,14 +171,14 @@ namespace detail
 	GLM_FUNC_QUALIFIER T dot(vecType<T, P> const & x, vecType<T, P> const & y)
 	{
 		GLM_STATIC_ASSERT(std::numeric_limits<T>::is_iec559, "'dot' accepts only floating-point inputs");
-		return detail::compute_dot<vecType, T, P>::call(x, y);
+		return detail::compute_dot<vecType, T, P, detail::is_aligned<P>::value>::call(x, y);
 	}
 	// cross
 	template <typename T, precision P>
 	GLM_FUNC_QUALIFIER tvec3<T, P> cross(tvec3<T, P> const & x, tvec3<T, P> const & y)
 	{
-		return detail::compute_cross<T, P>::call(x, y);
+		return detail::compute_cross<T, P, detail::is_aligned<P>::value>::call(x, y);
 	}
 	// normalize
@ -195,7 +195,7 @@ namespace detail
 	{
 		GLM_STATIC_ASSERT(std::numeric_limits<T>::is_iec559, "'normalize' accepts only floating-point inputs");
-		return detail::compute_normalize<T, P, vecType>::call(x);
+		return detail::compute_normalize<T, P, vecType, detail::is_aligned<P>::value>::call(x);
 	}
 	// faceforward
@ -208,7 +208,7 @@ namespace detail
 	template <typename T, precision P, template <typename, precision> class vecType>
 	GLM_FUNC_QUALIFIER vecType<T, P> faceforward(vecType<T, P> const & N, vecType<T, P> const & I, vecType<T, P> const & Nref)
 	{
-		return detail::compute_faceforward<T, P, vecType>::call(N, I, Nref);
+		return detail::compute_faceforward<T, P, vecType, detail::is_aligned<P>::value>::call(N, I, Nref);
 	}
 	// reflect
@ -221,7 +221,7 @@ namespace detail
 	template <typename T, precision P, template <typename, precision> class vecType>
 	GLM_FUNC_QUALIFIER vecType<T, P> reflect(vecType<T, P> const & I, vecType<T, P> const & N)
 	{
-		return detail::compute_reflect<T, P, vecType>::call(I, N);
+		return detail::compute_reflect<T, P, vecType, detail::is_aligned<P>::value>::call(I, N);
 	}
 	// refract
@ -238,7 +238,7 @@ namespace detail
 	GLM_FUNC_QUALIFIER vecType<T, P> refract(vecType<T, P> const & I, vecType<T, P> const & N, T eta)
 	{
 		GLM_STATIC_ASSERT(std::numeric_limits<T>::is_iec559, "'refract' accepts only floating-point inputs");
-		return detail::compute_refract<T, P, vecType>::call(I, N, eta);
+		return detail::compute_refract<T, P, vecType, detail::is_aligned<P>::value>::call(I, N, eta);
 	}
 }//namespace glm
--- a/glm/detail/func_geometric_simd.inl
+++ b/glm/detail/func_geometric_simd.inl
@ -9,7 +9,7 @@ namespace glm{
 namespace detail
 {
 	template <precision P>
-	struct compute_length<tvec4, float, P>
+	struct compute_length<tvec4, float, P, true>
 	{
 		GLM_FUNC_QUALIFIER static float call(tvec4<float, P> const & v)
 		{
@ -18,7 +18,7 @@ namespace detail
 	};
 	template <precision P>
-	struct compute_distance<tvec4, float, P>
+	struct compute_distance<tvec4, float, P, true>
 	{
 		GLM_FUNC_QUALIFIER static float call(tvec4<float, P> const & p0, tvec4<float, P> const & p1)
 		{
@ -27,7 +27,7 @@ namespace detail
 	};
 	template <precision P>
-	struct compute_dot<tvec4, float, P>
+	struct compute_dot<tvec4, float, P, true>
 	{
 		GLM_FUNC_QUALIFIER static float call(tvec4<float, P> const& x, tvec4<float, P> const& y)
 		{
@ -36,7 +36,7 @@ namespace detail
 	};
 	template <precision P>
-	struct compute_cross<float, P>
+	struct compute_cross<float, P, true>
 	{
 		GLM_FUNC_QUALIFIER static tvec3<float, P> call(tvec3<float, P> const & a, tvec3<float, P> const & b)
 		{
@ -51,7 +51,7 @@ namespace detail
 	};
 	template <precision P>
-	struct compute_normalize<float, P, tvec4>
+	struct compute_normalize<float, P, tvec4, true>
 	{
 		GLM_FUNC_QUALIFIER static tvec4<float, P> call(tvec4<float, P> const & v)
 		{
@ -62,7 +62,7 @@ namespace detail
 	};
 	template <precision P>
-	struct compute_faceforward<float, P, tvec4>
+	struct compute_faceforward<float, P, tvec4, true>
 	{
 		GLM_FUNC_QUALIFIER static tvec4<float, P> call(tvec4<float, P> const& N, tvec4<float, P> const& I, tvec4<float, P> const& Nref)
 		{
@ -73,7 +73,7 @@ namespace detail
 	};
 	template <precision P>
-	struct compute_reflect<float, P, tvec4>
+	struct compute_reflect<float, P, tvec4, true>
 	{
 		GLM_FUNC_QUALIFIER static tvec4<float, P> call(tvec4<float, P> const& I, tvec4<float, P> const& N)
 		{
@ -84,7 +84,7 @@ namespace detail
 	};
 	template <precision P>
-	struct compute_refract<float, P, tvec4>
+	struct compute_refract<float, P, tvec4, true>
 	{
 		GLM_FUNC_QUALIFIER static tvec4<float, P> call(tvec4<float, P> const& I, tvec4<float, P> const& N, float eta)
 		{
--- a/glm/detail/func_integer.inl
+++ b/glm/detail/func_integer.inl
@ -138,7 +138,7 @@ namespace detail
 			return IsNotNull ? int(Result) : -1;
 		}
-		template <typename T, glm::precision P, template <class, glm::precision> class vecType>
+		template <typename T, glm::precision P, template<typename, glm::precision> class vecType>
 		struct compute_findMSB_vec<T, P, vecType, 32>
 		{
 			GLM_FUNC_QUALIFIER static vecType<int, P> call(vecType<T, P> const & x)
--- a/glm/detail/func_matrix_simd.inl
+++ b/glm/detail/func_matrix_simd.inl
@ -59,25 +59,25 @@ namespace detail
 }//namespace detail
 	template<>
-	GLM_FUNC_QUALIFIER tmat4x4<float, lowp> outerProduct<float, lowp, tvec4, tvec4>(tvec4<float, lowp> const & c, tvec4<float, lowp> const & r)
+	GLM_FUNC_QUALIFIER tmat4x4<float, aligned_lowp> outerProduct<float, aligned_lowp, tvec4, tvec4>(tvec4<float, aligned_lowp> const & c, tvec4<float, aligned_lowp> const & r)
 	{
-		tmat4x4<float, lowp> m(uninitialize);
+		tmat4x4<float, aligned_lowp> m(uninitialize);
 		glm_mat4_outerProduct(c.data, r.data, *reinterpret_cast<__m128(*)[4]>(&m[0].data));
 		return m;
 	}
 	template<>
-	GLM_FUNC_QUALIFIER tmat4x4<float, mediump> outerProduct<float, mediump, tvec4, tvec4>(tvec4<float, mediump> const & c, tvec4<float, mediump> const & r)
+	GLM_FUNC_QUALIFIER tmat4x4<float, aligned_mediump> outerProduct<float, aligned_mediump, tvec4, tvec4>(tvec4<float, aligned_mediump> const & c, tvec4<float, aligned_mediump> const & r)
 	{
-		tmat4x4<float, mediump> m(uninitialize);
+		tmat4x4<float, aligned_mediump> m(uninitialize);
 		glm_mat4_outerProduct(c.data, r.data, *reinterpret_cast<__m128(*)[4]>(&m[0].data));
 		return m;
 	}
 	template<>
-	GLM_FUNC_QUALIFIER tmat4x4<float, highp> outerProduct<float, highp, tvec4, tvec4>(tvec4<float, highp> const & c, tvec4<float, highp> const & r)
+	GLM_FUNC_QUALIFIER tmat4x4<float, aligned_highp> outerProduct<float, aligned_highp, tvec4, tvec4>(tvec4<float, aligned_highp> const & c, tvec4<float, aligned_highp> const & r)
 	{
-		tmat4x4<float, highp> m(uninitialize);
+		tmat4x4<float, aligned_highp> m(uninitialize);
 		glm_mat4_outerProduct(c.data, r.data, *reinterpret_cast<__m128(*)[4]>(&m[0].data));
 		return m;
 	}
--- a/glm/detail/precision.hpp
+++ b/glm/detail/precision.hpp
@ -10,6 +10,41 @@ namespace glm
 		highp,
 		mediump,
 		lowp,
-		defaultp = highp
+		aligned_highp,
 		aligned_mediump,
 		aligned_lowp,
 		aligned = aligned_highp,
 #		ifdef GLM_FORCE_ALIGNED
 			defaultp = aligned_highp
 #		else
 			defaultp = highp
 #		endif
 	};
 namespace detail
 {
 	template <precision P>
 	struct is_aligned
 	{
 		static const bool value = false;
 	};
 	template<>
 	struct is_aligned<aligned_lowp>
 	{
 		static const bool value = true;
 	};
 	template<>
 	struct is_aligned<aligned_mediump>
 	{
 		static const bool value = true;
 	};
 	template<>
 	struct is_aligned<aligned_highp>
 	{
 		static const bool value = true;
 	};
 }//namespace detail
 }//namespace glm
--- a/glm/detail/type_vec4.hpp
+++ b/glm/detail/type_vec4.hpp
@ -17,33 +17,33 @@
 namespace glm{
 namespace detail
 {
-	template <int Value>
+	template <typename T, bool aligned>
-	struct shuffle_mask
+	struct simd_data
 	{
-		enum{value = Value};
+		typedef T type[4];
 	};
-
+/*
 	template <typename T>
-	struct simd_data
+	GLM_ALIGNED_STRUCT(16) struct simd_data<T, true>
 	{
 		typedef T type[4];
 	};
-
+*/
 #	if (GLM_ARCH & GLM_ARCH_SSE2_BIT)
 		template <>
-		struct simd_data<float>
+		struct simd_data<float, true>
 		{
 			typedef glm_vec4 type;
 		};
 		template <>
-		struct simd_data<int>
+		struct simd_data<int, true>
 		{
 			typedef glm_ivec4 type;
 		};
 		template <>
-		struct simd_data<unsigned int>
+		struct simd_data<unsigned int, true>
 		{
 			typedef glm_uvec4 type;
 		};
@ -51,7 +51,7 @@ namespace detail
 #	if (GLM_ARCH & GLM_ARCH_AVX_BIT)
 		template <>
-		struct simd_data<double>
+		struct simd_data<double, true>
 		{
 			typedef glm_dvec4 type;
 		};
@ -59,13 +59,13 @@ namespace detail
 #	if (GLM_ARCH & GLM_ARCH_AVX2_BIT)
 		template <>
-		struct simd_data<int64>
+		struct simd_data<int64, true>
 		{
 			typedef glm_i64vec4 type;
 		};
 		template <>
-		struct simd_data<uint64>
+		struct simd_data<uint64, true>
 		{
 			typedef glm_u64vec4 type;
 		};
@ -90,7 +90,7 @@ namespace detail
 				struct { T r, g, b, a; };
 				struct { T s, t, p, q; };
-				typename detail::simd_data<T>::type data;
+				typename detail::simd_data<T, detail::is_aligned<P>::value>::type data;
 #				ifdef GLM_SWIZZLE
 					_GLM_SWIZZLE4_2_MEMBERS(T, P, glm::tvec2, x, y, z, w)
--- a/glm/detail/type_vec4.inl
+++ b/glm/detail/type_vec4.inl
@ -34,7 +34,7 @@ namespace detail
 		enum test {value = ~0};
 	};
-	template <typename T, precision P>
+	template <typename T, precision P, bool Aligned>
 	struct compute_vec4_add
 	{
 		static tvec4<T, P> call(tvec4<T, P> const & a, tvec4<T, P> const & b)
@ -43,7 +43,7 @@ namespace detail
 		}
 	};
-	template <typename T, precision P>
+	template <typename T, precision P, bool Aligned>
 	struct compute_vec4_sub
 	{
 		static tvec4<T, P> call(tvec4<T, P> const & a, tvec4<T, P> const & b)
@ -52,7 +52,7 @@ namespace detail
 		}
 	};
-	template <typename T, precision P>
+	template <typename T, precision P, bool Aligned>
 	struct compute_vec4_mul
 	{
 		static tvec4<T, P> call(tvec4<T, P> const & a, tvec4<T, P> const & b)
@ -61,7 +61,7 @@ namespace detail
 		}
 	};
-	template <typename T, precision P>
+	template <typename T, precision P, bool Aligned>
 	struct compute_vec4_div
 	{
 		static tvec4<T, P> call(tvec4<T, P> const & a, tvec4<T, P> const & b)
@ -70,7 +70,7 @@ namespace detail
 		}
 	};
-	template <typename T, precision P>
+	template <typename T, precision P, bool Aligned>
 	struct compute_vec4_mod
 	{
 		static tvec4<T, P> call(tvec4<T, P> const & a, tvec4<T, P> const & b)
@ -79,7 +79,7 @@ namespace detail
 		}
 	};
-	template <typename T, precision P, int IsInt, std::size_t Size>
+	template <typename T, precision P, int IsInt, std::size_t Size, bool Aligned>
 	struct compute_vec4_and
 	{
 		static tvec4<T, P> call(tvec4<T, P> const & a, tvec4<T, P> const & b)
@ -88,7 +88,7 @@ namespace detail
 		}
 	};
-	template <typename T, precision P, int IsInt, std::size_t Size>
+	template <typename T, precision P, int IsInt, std::size_t Size, bool Aligned>
 	struct compute_vec4_or
 	{
 		static tvec4<T, P> call(tvec4<T, P> const & a, tvec4<T, P> const & b)
@ -97,7 +97,7 @@ namespace detail
 		}
 	};
-	template <typename T, precision P, int IsInt, std::size_t Size>
+	template <typename T, precision P, int IsInt, std::size_t Size, bool Aligned>
 	struct compute_vec4_xor
 	{
 		static tvec4<T, P> call(tvec4<T, P> const & a, tvec4<T, P> const & b)
@ -106,7 +106,7 @@ namespace detail
 		}
 	};
-	template <typename T, precision P, int IsInt, std::size_t Size>
+	template <typename T, precision P, int IsInt, std::size_t Size, bool Aligned>
 	struct compute_vec4_shift_left
 	{
 		static tvec4<T, P> call(tvec4<T, P> const & a, tvec4<T, P> const & b)
@ -115,7 +115,7 @@ namespace detail
 		}
 	};
-	template <typename T, precision P, int IsInt, std::size_t Size>
+	template <typename T, precision P, int IsInt, std::size_t Size, bool Aligned>
 	struct compute_vec4_shift_right
 	{
 		static tvec4<T, P> call(tvec4<T, P> const & a, tvec4<T, P> const & b)
@ -124,7 +124,7 @@ namespace detail
 		}
 	};
-	template <typename T, precision P, int IsInt, std::size_t Size>
+	template <typename T, precision P, int IsInt, std::size_t Size, bool Aligned>
 	struct compute_vec4_equal
 	{
 		static bool call(tvec4<T, P> const & v1, tvec4<T, P> const & v2)
@ -133,7 +133,7 @@ namespace detail
 		}
 	};
-	template <typename T, precision P, int IsInt, std::size_t Size>
+	template <typename T, precision P, int IsInt, std::size_t Size, bool Aligned>
 	struct compute_vec4_nequal
 	{
 		static bool call(tvec4<T, P> const & v1, tvec4<T, P> const & v2)
@ -142,7 +142,7 @@ namespace detail
 		}
 	};
-	template <typename T, precision P, int IsInt, std::size_t Size>
+	template <typename T, precision P, int IsInt, std::size_t Size, bool Aligned>
 	struct compute_vec4_bitwise_not
 	{
 		static tvec4<T, P> call(tvec4<T, P> const & v)
@ -373,84 +373,84 @@ namespace detail
 	template <typename U>
 	GLM_FUNC_QUALIFIER tvec4<T, P> & tvec4<T, P>::operator+=(U scalar)
 	{
-		return (*this = detail::compute_vec4_add<T, P>::call(*this, tvec4<T, P>(scalar)));
+		return (*this = detail::compute_vec4_add<T, P, detail::is_aligned<P>::value>::call(*this, tvec4<T, P>(scalar)));
 	}
 	template <typename T, precision P>
 	template <typename U>
 	GLM_FUNC_QUALIFIER tvec4<T, P> & tvec4<T, P>::operator+=(tvec1<U, P> const & v)
 	{
-		return (*this = detail::compute_vec4_add<T, P>::call(*this, tvec4<T, P>(v.x)));
+		return (*this = detail::compute_vec4_add<T, P, detail::is_aligned<P>::value>::call(*this, tvec4<T, P>(v.x)));
 	}
 	template <typename T, precision P>
 	template <typename U>
 	GLM_FUNC_QUALIFIER tvec4<T, P> & tvec4<T, P>::operator+=(tvec4<U, P> const & v)
 	{
-		return (*this = detail::compute_vec4_add<T, P>::call(*this, tvec4<T, P>(v)));
+		return (*this = detail::compute_vec4_add<T, P, detail::is_aligned<P>::value>::call(*this, tvec4<T, P>(v)));
 	}
 	template <typename T, precision P>
 	template <typename U>
 	GLM_FUNC_QUALIFIER tvec4<T, P> & tvec4<T, P>::operator-=(U scalar)
 	{
-		return (*this = detail::compute_vec4_sub<T, P>::call(*this, tvec4<T, P>(scalar)));
+		return (*this = detail::compute_vec4_sub<T, P, detail::is_aligned<P>::value>::call(*this, tvec4<T, P>(scalar)));
 	}
 	template <typename T, precision P>
 	template <typename U>
 	GLM_FUNC_QUALIFIER tvec4<T, P> & tvec4<T, P>::operator-=(tvec1<U, P> const & v)
 	{
-		return (*this = detail::compute_vec4_sub<T, P>::call(*this, tvec4<T, P>(v.x)));
+		return (*this = detail::compute_vec4_sub<T, P, detail::is_aligned<P>::value>::call(*this, tvec4<T, P>(v.x)));
 	}
 	template <typename T, precision P>
 	template <typename U>
 	GLM_FUNC_QUALIFIER tvec4<T, P> & tvec4<T, P>::operator-=(tvec4<U, P> const & v)
 	{
-		return (*this = detail::compute_vec4_sub<T, P>::call(*this, tvec4<T, P>(v)));
+		return (*this = detail::compute_vec4_sub<T, P, detail::is_aligned<P>::value>::call(*this, tvec4<T, P>(v)));
 	}
 	template <typename T, precision P>
 	template <typename U>
 	GLM_FUNC_QUALIFIER tvec4<T, P> & tvec4<T, P>::operator*=(U scalar)
 	{
-		return (*this = detail::compute_vec4_mul<T, P>::call(*this, tvec4<T, P>(scalar)));
+		return (*this = detail::compute_vec4_mul<T, P, detail::is_aligned<P>::value>::call(*this, tvec4<T, P>(scalar)));
 	}
 	template <typename T, precision P>
 	template <typename U>
 	GLM_FUNC_QUALIFIER tvec4<T, P> & tvec4<T, P>::operator*=(tvec1<U, P> const & v)
 	{
-		return (*this = detail::compute_vec4_mul<T, P>::call(*this, tvec4<T, P>(v.x)));
+		return (*this = detail::compute_vec4_mul<T, P, detail::is_aligned<P>::value>::call(*this, tvec4<T, P>(v.x)));
 	}
 	template <typename T, precision P>
 	template <typename U>
 	GLM_FUNC_QUALIFIER tvec4<T, P> & tvec4<T, P>::operator*=(tvec4<U, P> const & v)
 	{
-		return (*this = detail::compute_vec4_mul<T, P>::call(*this, tvec4<T, P>(v)));
+		return (*this = detail::compute_vec4_mul<T, P, detail::is_aligned<P>::value>::call(*this, tvec4<T, P>(v)));
 	}
 	template <typename T, precision P>
 	template <typename U> 
 	GLM_FUNC_QUALIFIER tvec4<T, P> & tvec4<T, P>::operator/=(U scalar)
 	{
-		return (*this = detail::compute_vec4_div<T, P>::call(*this, tvec4<T, P>(scalar)));
+		return (*this = detail::compute_vec4_div<T, P, detail::is_aligned<P>::value>::call(*this, tvec4<T, P>(scalar)));
 	}
 	template <typename T, precision P>
 	template <typename U> 
 	GLM_FUNC_QUALIFIER tvec4<T, P> & tvec4<T, P>::operator/=(tvec1<U, P> const & v)
 	{
-		return (*this = detail::compute_vec4_div<T, P>::call(*this, tvec4<T, P>(v.x)));
+		return (*this = detail::compute_vec4_div<T, P, detail::is_aligned<P>::value>::call(*this, tvec4<T, P>(v.x)));
 	}
 	template <typename T, precision P>
 	template <typename U> 
 	GLM_FUNC_QUALIFIER tvec4<T, P> & tvec4<T, P>::operator/=(tvec4<U, P> const & v)
 	{
-		return (*this = detail::compute_vec4_div<T, P>::call(*this, tvec4<T, P>(v)));
+		return (*this = detail::compute_vec4_div<T, P, detail::is_aligned<P>::value>::call(*this, tvec4<T, P>(v)));
 	}
 	// -- Increment and decrement operators --
@ -497,126 +497,126 @@ namespace detail
 	template <typename U> 
 	GLM_FUNC_QUALIFIER tvec4<T, P> & tvec4<T, P>::operator%=(U scalar)
 	{
-		return (*this = detail::compute_vec4_mod<T, P>::call(*this, tvec4<T, P>(scalar)));
+		return (*this = detail::compute_vec4_mod<T, P, detail::is_aligned<P>::value>::call(*this, tvec4<T, P>(scalar)));
 	}
 	template <typename T, precision P>
 	template <typename U> 
 	GLM_FUNC_QUALIFIER tvec4<T, P> & tvec4<T, P>::operator%=(tvec1<U, P> const& v)
 	{
-		return (*this = detail::compute_vec4_mod<T, P>::call(*this, tvec4<T, P>(v)));
+		return (*this = detail::compute_vec4_mod<T, P, detail::is_aligned<P>::value>::call(*this, tvec4<T, P>(v)));
 	}
 	template <typename T, precision P>
 	template <typename U> 
 	GLM_FUNC_QUALIFIER tvec4<T, P> & tvec4<T, P>::operator%=(tvec4<U, P> const& v)
 	{
-		return (*this = detail::compute_vec4_mod<T, P>::call(*this, tvec4<T, P>(v)));
+		return (*this = detail::compute_vec4_mod<T, P, detail::is_aligned<P>::value>::call(*this, tvec4<T, P>(v)));
 	}
 	template <typename T, precision P>
 	template <typename U> 
 	GLM_FUNC_QUALIFIER tvec4<T, P> & tvec4<T, P>::operator&=(U scalar)
 	{
-		return (*this = detail::compute_vec4_and<T, P, detail::is_int<T>::value, sizeof(T) * 8>::call(*this, tvec4<T, P>(scalar)));
+		return (*this = detail::compute_vec4_and<T, P, detail::is_int<T>::value, sizeof(T) * 8, detail::is_aligned<P>::value>::call(*this, tvec4<T, P>(scalar)));
 	}
 	template <typename T, precision P>
 	template <typename U> 
 	GLM_FUNC_QUALIFIER tvec4<T, P> & tvec4<T, P>::operator&=(tvec1<U, P> const & v)
 	{
-		return (*this = detail::compute_vec4_and<T, P, detail::is_int<T>::value, sizeof(T) * 8>::call(*this, tvec4<T, P>(v)));
+		return (*this = detail::compute_vec4_and<T, P, detail::is_int<T>::value, sizeof(T) * 8, detail::is_aligned<P>::value>::call(*this, tvec4<T, P>(v)));
 	}
 	template <typename T, precision P>
 	template <typename U> 
 	GLM_FUNC_QUALIFIER tvec4<T, P> & tvec4<T, P>::operator&=(tvec4<U, P> const & v)
 	{
-		return (*this = detail::compute_vec4_and<T, P, detail::is_int<T>::value, sizeof(T) * 8>::call(*this, tvec4<T, P>(v)));
+		return (*this = detail::compute_vec4_and<T, P, detail::is_int<T>::value, sizeof(T) * 8, detail::is_aligned<P>::value>::call(*this, tvec4<T, P>(v)));
 	}
 	template <typename T, precision P>
 	template <typename U> 
 	GLM_FUNC_QUALIFIER tvec4<T, P> & tvec4<T, P>::operator|=(U scalar)
 	{
-		return (*this = detail::compute_vec4_or<T, P, detail::is_int<T>::value, sizeof(T) * 8>::call(*this, tvec4<T, P>(scalar)));
+		return (*this = detail::compute_vec4_or<T, P, detail::is_int<T>::value, sizeof(T) * 8, detail::is_aligned<P>::value>::call(*this, tvec4<T, P>(scalar)));
 	}
 	template <typename T, precision P>
 	template <typename U> 
 	GLM_FUNC_QUALIFIER tvec4<T, P> & tvec4<T, P>::operator|=(tvec1<U, P> const & v)
 	{
-		return (*this = detail::compute_vec4_or<T, P, detail::is_int<T>::value, sizeof(T) * 8>::call(*this, tvec4<T, P>(v)));
+		return (*this = detail::compute_vec4_or<T, P, detail::is_int<T>::value, sizeof(T) * 8, detail::is_aligned<P>::value>::call(*this, tvec4<T, P>(v)));
 	}
 	template <typename T, precision P>
 	template <typename U> 
 	GLM_FUNC_QUALIFIER tvec4<T, P> & tvec4<T, P>::operator|=(tvec4<U, P> const & v)
 	{
-		return (*this = detail::compute_vec4_or<T, P, detail::is_int<T>::value, sizeof(T) * 8>::call(*this, tvec4<T, P>(v)));
+		return (*this = detail::compute_vec4_or<T, P, detail::is_int<T>::value, sizeof(T) * 8, detail::is_aligned<P>::value>::call(*this, tvec4<T, P>(v)));
 	}
 	template <typename T, precision P>
 	template <typename U> 
 	GLM_FUNC_QUALIFIER tvec4<T, P> & tvec4<T, P>::operator^=(U scalar)
 	{
-		return (*this = detail::compute_vec4_xor<T, P, detail::is_int<T>::value, sizeof(T) * 8>::call(*this, tvec4<T, P>(scalar)));
+		return (*this = detail::compute_vec4_xor<T, P, detail::is_int<T>::value, sizeof(T) * 8, detail::is_aligned<P>::value>::call(*this, tvec4<T, P>(scalar)));
 	}
 	template <typename T, precision P>
 	template <typename U> 
 	GLM_FUNC_QUALIFIER tvec4<T, P> & tvec4<T, P>::operator^=(tvec1<U, P> const & v)
 	{
-		return (*this = detail::compute_vec4_xor<T, P, detail::is_int<T>::value, sizeof(T) * 8>::call(*this, tvec4<T, P>(v)));
+		return (*this = detail::compute_vec4_xor<T, P, detail::is_int<T>::value, sizeof(T) * 8, detail::is_aligned<P>::value>::call(*this, tvec4<T, P>(v)));
 	}
 	template <typename T, precision P>
 	template <typename U> 
 	GLM_FUNC_QUALIFIER tvec4<T, P> & tvec4<T, P>::operator^=(tvec4<U, P> const & v)
 	{
-		return (*this = detail::compute_vec4_xor<T, P, detail::is_int<T>::value, sizeof(T) * 8>::call(*this, tvec4<T, P>(v)));
+		return (*this = detail::compute_vec4_xor<T, P, detail::is_int<T>::value, sizeof(T) * 8, detail::is_aligned<P>::value>::call(*this, tvec4<T, P>(v)));
 	}
 	template <typename T, precision P>
 	template <typename U> 
 	GLM_FUNC_QUALIFIER tvec4<T, P> & tvec4<T, P>::operator<<=(U scalar)
 	{
-		return (*this = detail::compute_vec4_shift_left<T, P, detail::is_int<T>::value, sizeof(T) * 8>::call(*this, tvec4<T, P>(scalar)));
+		return (*this = detail::compute_vec4_shift_left<T, P, detail::is_int<T>::value, sizeof(T) * 8, detail::is_aligned<P>::value>::call(*this, tvec4<T, P>(scalar)));
 	}
 	template <typename T, precision P>
 	template <typename U> 
 	GLM_FUNC_QUALIFIER tvec4<T, P> & tvec4<T, P>::operator<<=(tvec1<U, P> const & v)
 	{
-		return (*this = detail::compute_vec4_shift_left<T, P, detail::is_int<T>::value, sizeof(T) * 8>::call(*this, tvec4<T, P>(v)));
+		return (*this = detail::compute_vec4_shift_left<T, P, detail::is_int<T>::value, sizeof(T) * 8, detail::is_aligned<P>::value>::call(*this, tvec4<T, P>(v)));
 	}
 	template <typename T, precision P>
 	template <typename U> 
 	GLM_FUNC_QUALIFIER tvec4<T, P> & tvec4<T, P>::operator<<=(tvec4<U, P> const & v)
 	{
-		return (*this = detail::compute_vec4_shift_left<T, P, detail::is_int<T>::value, sizeof(T) * 8>::call(*this, tvec4<T, P>(v)));
+		return (*this = detail::compute_vec4_shift_left<T, P, detail::is_int<T>::value, sizeof(T) * 8, detail::is_aligned<P>::value>::call(*this, tvec4<T, P>(v)));
 	}
 	template <typename T, precision P>
 	template <typename U> 
 	GLM_FUNC_QUALIFIER tvec4<T, P> & tvec4<T, P>::operator>>=(U scalar)
 	{
-		return (*this = detail::compute_vec4_shift_right<T, P, detail::is_int<T>::value, sizeof(T) * 8>::call(*this, tvec4<T, P>(scalar)));
+		return (*this = detail::compute_vec4_shift_right<T, P, detail::is_int<T>::value, sizeof(T) * 8, detail::is_aligned<P>::value>::call(*this, tvec4<T, P>(scalar)));
 	}
 	template <typename T, precision P>
 	template <typename U> 
 	GLM_FUNC_QUALIFIER tvec4<T, P> & tvec4<T, P>::operator>>=(tvec1<U, P> const & v)
 	{
-		return (*this = detail::compute_vec4_shift_right<T, P, detail::is_int<T>::value, sizeof(T) * 8>::call(*this, tvec4<T, P>(v)));
+		return (*this = detail::compute_vec4_shift_right<T, P, detail::is_int<T>::value, sizeof(T) * 8, detail::is_aligned<P>::value>::call(*this, tvec4<T, P>(v)));
 	}
 	template <typename T, precision P>
 	template <typename U> 
 	GLM_FUNC_QUALIFIER tvec4<T, P> & tvec4<T, P>::operator>>=(tvec4<U, P> const & v)
 	{
-		return (*this = detail::compute_vec4_shift_right<T, P, detail::is_int<T>::value, sizeof(T) * 8>::call(*this, tvec4<T, P>(v)));
+		return (*this = detail::compute_vec4_shift_right<T, P, detail::is_int<T>::value, sizeof(T) * 8, detail::is_aligned<P>::value>::call(*this, tvec4<T, P>(v)));
 	}
 	// -- Unary constant operators --
@ -940,7 +940,7 @@ namespace detail
 	template <typename T, precision P> 
 	GLM_FUNC_QUALIFIER tvec4<T, P> operator~(tvec4<T, P> const & v)
 	{
-		return detail::compute_vec4_bitwise_not<T, P, detail::is_int<T>::value, sizeof(T) * 8>::call(v);
+		return detail::compute_vec4_bitwise_not<T, P, detail::is_int<T>::value, sizeof(T) * 8, detail::is_aligned<P>::value>::call(v);
 	}
 	// -- Boolean operators --
@ -948,13 +948,13 @@ namespace detail
 	template <typename T, precision P>
 	GLM_FUNC_QUALIFIER bool operator==(tvec4<T, P> const & v1, tvec4<T, P> const & v2)
 	{
-		return detail::compute_vec4_equal<T, P, detail::is_int<T>::value, sizeof(T) * 8>::call(v1, v2);
+		return detail::compute_vec4_equal<T, P, detail::is_int<T>::value, sizeof(T) * 8, detail::is_aligned<P>::value>::call(v1, v2);
 	}
 	template <typename T, precision P>
 	GLM_FUNC_QUALIFIER bool operator!=(tvec4<T, P> const & v1, tvec4<T, P> const & v2)
 	{
-		return detail::compute_vec4_nequal<T, P, detail::is_int<T>::value, sizeof(T) * 8>::call(v1, v2);
+		return detail::compute_vec4_nequal<T, P, detail::is_int<T>::value, sizeof(T) * 8, detail::is_aligned<P>::value>::call(v1, v2);
 	}
 	template <precision P>
--- a/glm/detail/type_vec4_simd.inl
+++ b/glm/detail/type_vec4_simd.inl
@ -8,7 +8,7 @@ namespace detail
 {
 #	ifdef GLM_SWIZZLE
 	template <precision P, int E0, int E1, int E2, int E3>
-	struct _swizzle_base1<4, float, P, glm::tvec4, E0,E1,E2,E3> : public _swizzle_base0<float, 4>
+	struct _swizzle_base1<4, float, P, glm::tvec4, E0,E1,E2,E3, true> : public _swizzle_base0<float, 4>
 	{ 
 		GLM_FUNC_QUALIFIER tvec4<float, P> operator ()()  const
 		{
@ -25,7 +25,7 @@ namespace detail
 	};
 	template <precision P, int E0, int E1, int E2, int E3>
-	struct _swizzle_base1<4, int32, P, glm::tvec4, E0,E1,E2,E3> : public _swizzle_base0<int32, 4>
+	struct _swizzle_base1<4, int32, P, glm::tvec4, E0,E1,E2,E3, true> : public _swizzle_base0<int32, 4>
 	{ 
 		GLM_FUNC_QUALIFIER tvec4<int32, P> operator ()()  const
 		{
@ -38,7 +38,7 @@ namespace detail
 	};
 	template <precision P, int E0, int E1, int E2, int E3>
-	struct _swizzle_base1<4, uint32, P, glm::tvec4, E0,E1,E2,E3> : public _swizzle_base0<uint32, 4>
+	struct _swizzle_base1<4, uint32, P, glm::tvec4, E0,E1,E2,E3, true> : public _swizzle_base0<uint32, 4>
 	{ 
 		GLM_FUNC_QUALIFIER tvec4<uint32, P> operator ()()  const
 		{
@ -52,7 +52,7 @@ namespace detail
 #	endif
 	template <precision P>
-	struct compute_vec4_add<float, P>
+	struct compute_vec4_add<float, P, true>
 	{
 		static tvec4<float, P> call(tvec4<float, P> const & a, tvec4<float, P> const & b)
 		{
@ -64,7 +64,7 @@ namespace detail
 #	if GLM_ARCH & GLM_ARCH_AVX_BIT
 	template <precision P>
-	struct compute_vec4_add<double, P>
+	struct compute_vec4_add<double, P, true>
 	{
 		static tvec4<double, P> call(tvec4<double, P> const & a, tvec4<double, P> const & b)
 		{
@ -76,7 +76,7 @@ namespace detail
 #	endif
 	template <precision P>
-	struct compute_vec4_sub<float, P>
+	struct compute_vec4_sub<float, P, true>
 	{
 		static tvec4<float, P> call(tvec4<float, P> const & a, tvec4<float, P> const & b)
 		{
@ -88,7 +88,7 @@ namespace detail
 #	if GLM_ARCH & GLM_ARCH_AVX_BIT
 	template <precision P>
-	struct compute_vec4_sub<double, P>
+	struct compute_vec4_sub<double, P, true>
 	{
 		static tvec4<double, P> call(tvec4<double, P> const & a, tvec4<double, P> const & b)
 		{
@ -100,7 +100,7 @@ namespace detail
 #	endif
 	template <precision P>
-	struct compute_vec4_mul<float, P>
+	struct compute_vec4_mul<float, P, true>
 	{
 		static tvec4<float, P> call(tvec4<float, P> const & a, tvec4<float, P> const & b)
 		{
@ -112,7 +112,7 @@ namespace detail
 #	if GLM_ARCH & GLM_ARCH_AVX_BIT
 	template <precision P>
-	struct compute_vec4_mul<double, P>
+	struct compute_vec4_mul<double, P, true>
 	{
 		static tvec4<double, P> call(tvec4<double, P> const & a, tvec4<double, P> const & b)
 		{
@ -124,7 +124,7 @@ namespace detail
 #	endif
 	template <precision P>
-	struct compute_vec4_div<float, P>
+	struct compute_vec4_div<float, P, true>
 	{
 		static tvec4<float, P> call(tvec4<float, P> const & a, tvec4<float, P> const & b)
 		{
@ -136,7 +136,7 @@ namespace detail
 	#	if GLM_ARCH & GLM_ARCH_AVX_BIT
 	template <precision P>
-	struct compute_vec4_div<double, P>
+	struct compute_vec4_div<double, P, true>
 	{
 		static tvec4<double, P> call(tvec4<double, P> const & a, tvec4<double, P> const & b)
 		{
@ -148,18 +148,18 @@ namespace detail
 #	endif
 	template <>
-	struct compute_vec4_div<float, lowp>
+	struct compute_vec4_div<float, aligned_lowp, true>
 	{
-		static tvec4<float, lowp> call(tvec4<float, lowp> const & a, tvec4<float, lowp> const & b)
+		static tvec4<float, aligned_lowp> call(tvec4<float, aligned_lowp> const & a, tvec4<float, aligned_lowp> const & b)
 		{
-			tvec4<float, lowp> Result(uninitialize);
+			tvec4<float, aligned_lowp> Result(uninitialize);
 			Result.data = _mm_mul_ps(a.data, _mm_rcp_ps(b.data));
 			return Result;
 		}
 	};
 	template <typename T, precision P>
-	struct compute_vec4_and<T, P, true, 32>
+	struct compute_vec4_and<T, P, true, 32, true>
 	{
 		static tvec4<T, P> call(tvec4<T, P> const& a, tvec4<T, P> const& b)
 		{
@ -171,7 +171,7 @@ namespace detail
 #	if GLM_ARCH & GLM_ARCH_AVX2_BIT
 	template <typename T, precision P>
-	struct compute_vec4_and<T, P, true, 64>
+	struct compute_vec4_and<T, P, true, 64, true>
 	{
 		static tvec4<T, P> call(tvec4<T, P> const& a, tvec4<T, P> const& b)
 		{
@ -183,7 +183,7 @@ namespace detail
 #	endif
 	template <typename T, precision P>
-	struct compute_vec4_or<T, P, true, 32>
+	struct compute_vec4_or<T, P, true, 32, true>
 	{
 		static tvec4<T, P> call(tvec4<T, P> const& a, tvec4<T, P> const& b)
 		{
@ -195,7 +195,7 @@ namespace detail
 #	if GLM_ARCH & GLM_ARCH_AVX2_BIT
 	template <typename T, precision P>
-	struct compute_vec4_or<T, P, true, 64>
+	struct compute_vec4_or<T, P, true, 64, true>
 	{
 		static tvec4<T, P> call(tvec4<T, P> const& a, tvec4<T, P> const& b)
 		{
@ -207,7 +207,7 @@ namespace detail
 #	endif
 	template <typename T, precision P>
-	struct compute_vec4_xor<T, P, true, 32>
+	struct compute_vec4_xor<T, P, true, 32, true>
 	{
 		static tvec4<T, P> call(tvec4<T, P> const& a, tvec4<T, P> const& b)
 		{
@ -219,7 +219,7 @@ namespace detail
 #	if GLM_ARCH & GLM_ARCH_AVX2_BIT
 	template <typename T, precision P>
-	struct compute_vec4_xor<T, P, true, 64>
+	struct compute_vec4_xor<T, P, true, 64, true>
 	{
 		static tvec4<T, P> call(tvec4<T, P> const& a, tvec4<T, P> const& b)
 		{
@ -231,7 +231,7 @@ namespace detail
 #	endif
 	template <typename T, precision P>
-	struct compute_vec4_shift_left<T, P, true, 32>
+	struct compute_vec4_shift_left<T, P, true, 32, true>
 	{
 		static tvec4<T, P> call(tvec4<T, P> const& a, tvec4<T, P> const& b)
 		{
@ -243,7 +243,7 @@ namespace detail
 #	if GLM_ARCH & GLM_ARCH_AVX2_BIT
 	template <typename T, precision P>
-	struct compute_vec4_shift_left<T, P, true, 64>
+	struct compute_vec4_shift_left<T, P, true, 64, true>
 	{
 		static tvec4<T, P> call(tvec4<T, P> const& a, tvec4<T, P> const& b)
 		{
@ -255,7 +255,7 @@ namespace detail
 #	endif
 	template <typename T, precision P>
-	struct compute_vec4_shift_right<T, P, true, 32>
+	struct compute_vec4_shift_right<T, P, true, 32, true>
 	{
 		static tvec4<T, P> call(tvec4<T, P> const& a, tvec4<T, P> const& b)
 		{
@ -267,7 +267,7 @@ namespace detail
 #	if GLM_ARCH & GLM_ARCH_AVX2_BIT
 	template <typename T, precision P>
-	struct compute_vec4_shift_right<T, P, true, 64>
+	struct compute_vec4_shift_right<T, P, true, 64, true>
 	{
 		static tvec4<T, P> call(tvec4<T, P> const& a, tvec4<T, P> const& b)
 		{
@ -279,7 +279,7 @@ namespace detail
 #	endif
 	template <typename T, precision P>
-	struct compute_vec4_bitwise_not<T, P, true, 32>
+	struct compute_vec4_bitwise_not<T, P, true, 32, true>
 	{
 		static tvec4<T, P> call(tvec4<T, P> const & v)
 		{
@ -291,7 +291,7 @@ namespace detail
 #	if GLM_ARCH & GLM_ARCH_AVX2_BIT
 	template <typename T, precision P>
-	struct compute_vec4_bitwise_not<T, P, true, 64>
+	struct compute_vec4_bitwise_not<T, P, true, 64, true>
 	{
 		static tvec4<T, P> call(tvec4<T, P> const & v)
 		{
@ -303,7 +303,7 @@ namespace detail
 #	endif
 	template <precision P>
-	struct compute_vec4_equal<float, P, false, 32>
+	struct compute_vec4_equal<float, P, false, 32, true>
 	{
 		static bool call(tvec4<float, P> const & v1, tvec4<float, P> const & v2)
 		{
@ -312,7 +312,7 @@ namespace detail
 	};
 	template <precision P>
-	struct compute_vec4_equal<int32, P, true, 32>
+	struct compute_vec4_equal<int32, P, true, 32, true>
 	{
 		static bool call(tvec4<int32, P> const & v1, tvec4<int32, P> const & v2)
 		{
@ -321,7 +321,7 @@ namespace detail
 	};
 	template <precision P>
-	struct compute_vec4_nequal<float, P, false, 32>
+	struct compute_vec4_nequal<float, P, false, 32, true>
 	{
 		static bool call(tvec4<float, P> const & v1, tvec4<float, P> const & v2)
 		{
@ -330,7 +330,7 @@ namespace detail
 	};
 	template <precision P>
-	struct compute_vec4_nequal<int32, P, true, 32>
+	struct compute_vec4_nequal<int32, P, true, 32, true>
 	{
 		static bool call(tvec4<int32, P> const & v1, tvec4<int32, P> const & v2)
 		{
@ -349,117 +349,117 @@ namespace detail
 #	endif//!GLM_HAS_DEFAULTED_FUNCTIONS
 	template <>
-	GLM_FUNC_QUALIFIER GLM_CONSTEXPR_SIMD tvec4<float, lowp>::tvec4(float s) :
+	GLM_FUNC_QUALIFIER GLM_CONSTEXPR_SIMD tvec4<float, aligned_lowp>::tvec4(float s) :
 		data(_mm_set1_ps(s))
 	{}
 	template <>
-	GLM_FUNC_QUALIFIER GLM_CONSTEXPR_SIMD tvec4<float, mediump>::tvec4(float s) :
+	GLM_FUNC_QUALIFIER GLM_CONSTEXPR_SIMD tvec4<float, aligned_mediump>::tvec4(float s) :
 		data(_mm_set1_ps(s))
 	{}
 	template <>
-	GLM_FUNC_QUALIFIER GLM_CONSTEXPR_SIMD tvec4<float, highp>::tvec4(float s) :
+	GLM_FUNC_QUALIFIER GLM_CONSTEXPR_SIMD tvec4<float, aligned_highp>::tvec4(float s) :
 		data(_mm_set1_ps(s))
 	{}
 #	if GLM_ARCH & GLM_ARCH_AVX_BIT
 	template <>
-	GLM_FUNC_QUALIFIER GLM_CONSTEXPR_SIMD tvec4<double, lowp>::tvec4(double s) :
+	GLM_FUNC_QUALIFIER GLM_CONSTEXPR_SIMD tvec4<double, aligned_lowp>::tvec4(double s) :
 		data(_mm256_set1_pd(s))
 	{}
 	template <>
-	GLM_FUNC_QUALIFIER GLM_CONSTEXPR_SIMD tvec4<double, mediump>::tvec4(double s) :
+	GLM_FUNC_QUALIFIER GLM_CONSTEXPR_SIMD tvec4<double, aligned_mediump>::tvec4(double s) :
 		data(_mm256_set1_pd(s))
 	{}
 	template <>
-	GLM_FUNC_QUALIFIER GLM_CONSTEXPR_SIMD tvec4<double, highp>::tvec4(double s) :
+	GLM_FUNC_QUALIFIER GLM_CONSTEXPR_SIMD tvec4<double, aligned_highp>::tvec4(double s) :
 		data(_mm256_set1_pd(s))
 	{}
 #	endif
 	template <>
-	GLM_FUNC_QUALIFIER GLM_CONSTEXPR_SIMD tvec4<int32, lowp>::tvec4(int32 s) :
+	GLM_FUNC_QUALIFIER GLM_CONSTEXPR_SIMD tvec4<int32, aligned_lowp>::tvec4(int32 s) :
 		data(_mm_set1_epi32(s))
 	{}
 	template <>
-	GLM_FUNC_QUALIFIER GLM_CONSTEXPR_SIMD tvec4<int32, mediump>::tvec4(int32 s) :
+	GLM_FUNC_QUALIFIER GLM_CONSTEXPR_SIMD tvec4<int32, aligned_mediump>::tvec4(int32 s) :
 		data(_mm_set1_epi32(s))
 	{}
 	template <>
-	GLM_FUNC_QUALIFIER GLM_CONSTEXPR_SIMD tvec4<int32, highp>::tvec4(int32 s) :
+	GLM_FUNC_QUALIFIER GLM_CONSTEXPR_SIMD tvec4<int32, aligned_highp>::tvec4(int32 s) :
 		data(_mm_set1_epi32(s))
 	{}
 #	if GLM_ARCH & GLM_ARCH_AVX2_BIT
 	template <>
-	GLM_FUNC_QUALIFIER GLM_CONSTEXPR_SIMD tvec4<int64, lowp>::tvec4(int64 s) :
+	GLM_FUNC_QUALIFIER GLM_CONSTEXPR_SIMD tvec4<int64, aligned_lowp>::tvec4(int64 s) :
 		data(_mm256_set1_epi64x(s))
 	{}
 	template <>
-	GLM_FUNC_QUALIFIER GLM_CONSTEXPR_SIMD tvec4<int64, mediump>::tvec4(int64 s) :
+	GLM_FUNC_QUALIFIER GLM_CONSTEXPR_SIMD tvec4<int64, aligned_mediump>::tvec4(int64 s) :
 		data(_mm256_set1_epi64x(s))
 	{}
 	template <>
-	GLM_FUNC_QUALIFIER GLM_CONSTEXPR_SIMD tvec4<int64, highp>::tvec4(int64 s) :
+	GLM_FUNC_QUALIFIER GLM_CONSTEXPR_SIMD tvec4<int64, aligned_highp>::tvec4(int64 s) :
 		data(_mm256_set1_epi64x(s))
 	{}
 #	endif
 	template <>
-	GLM_FUNC_QUALIFIER GLM_CONSTEXPR_SIMD tvec4<float, lowp>::tvec4(float a, float b, float c, float d) :
+	GLM_FUNC_QUALIFIER GLM_CONSTEXPR_SIMD tvec4<float, aligned_lowp>::tvec4(float a, float b, float c, float d) :
 		data(_mm_set_ps(d, c, b, a))
 	{}
 	template <>
-	GLM_FUNC_QUALIFIER GLM_CONSTEXPR_SIMD tvec4<float, mediump>::tvec4(float a, float b, float c, float d) :
+	GLM_FUNC_QUALIFIER GLM_CONSTEXPR_SIMD tvec4<float, aligned_mediump>::tvec4(float a, float b, float c, float d) :
 		data(_mm_set_ps(d, c, b, a))
 	{}
 	template <>
-	GLM_FUNC_QUALIFIER GLM_CONSTEXPR_SIMD tvec4<float, highp>::tvec4(float a, float b, float c, float d) :
+	GLM_FUNC_QUALIFIER GLM_CONSTEXPR_SIMD tvec4<float, aligned_highp>::tvec4(float a, float b, float c, float d) :
 		data(_mm_set_ps(d, c, b, a))
 	{}
 	template <>
 	template <>
-	GLM_FUNC_QUALIFIER GLM_CONSTEXPR_SIMD tvec4<int32, lowp>::tvec4(int32 a, int32 b, int32 c, int32 d) :
+	GLM_FUNC_QUALIFIER GLM_CONSTEXPR_SIMD tvec4<int32, aligned_lowp>::tvec4(int32 a, int32 b, int32 c, int32 d) :
 		data(_mm_set_epi32(d, c, b, a))
 	{}
 	template <>
 	template <>
-	GLM_FUNC_QUALIFIER GLM_CONSTEXPR_SIMD tvec4<int32, mediump>::tvec4(int32 a, int32 b, int32 c, int32 d) :
+	GLM_FUNC_QUALIFIER GLM_CONSTEXPR_SIMD tvec4<int32, aligned_mediump>::tvec4(int32 a, int32 b, int32 c, int32 d) :
 		data(_mm_set_epi32(d, c, b, a))
 	{}
 	template <>
 	template <>
-	GLM_FUNC_QUALIFIER GLM_CONSTEXPR_SIMD tvec4<int32, highp>::tvec4(int32 a, int32 b, int32 c, int32 d) :
+	GLM_FUNC_QUALIFIER GLM_CONSTEXPR_SIMD tvec4<int32, aligned_highp>::tvec4(int32 a, int32 b, int32 c, int32 d) :
 		data(_mm_set_epi32(d, c, b, a))
 	{}
 /*
 	template <>
 	template <>
-	GLM_FUNC_QUALIFIER GLM_CONSTEXPR_SIMD tvec4<float, lowp>::tvec4(int32 a, int32 b, int32 c, int32 d) :
+	GLM_FUNC_QUALIFIER GLM_CONSTEXPR_SIMD tvec4<float, aligned_lowp>::tvec4(int32 a, int32 b, int32 c, int32 d) :
 		data(_mm_castsi128_ps(_mm_set_epi32(d, c, b, a)))
 	{}
 	template <>
 	template <>
-	GLM_FUNC_QUALIFIER GLM_CONSTEXPR_SIMD tvec4<float, mediump>::tvec4(int32 a, int32 b, int32 c, int32 d) :
+	GLM_FUNC_QUALIFIER GLM_CONSTEXPR_SIMD tvec4<float, aligned_mediump>::tvec4(int32 a, int32 b, int32 c, int32 d) :
 		data(_mm_castsi128_ps(_mm_set_epi32(d, c, b, a)))
 	{}
 	template <>
 	template <>
-	GLM_FUNC_QUALIFIER GLM_CONSTEXPR_SIMD tvec4<float, highp>::tvec4(int32 a, int32 b, int32 c, int32 d) :
+	GLM_FUNC_QUALIFIER GLM_CONSTEXPR_SIMD tvec4<float, aligned_highp>::tvec4(int32 a, int32 b, int32 c, int32 d) :
 		data(_mm_castsi128_ps(_mm_set_epi32(d, c, b, a)))
 	{}
 */
--- a/glm/gtc/integer.inl
+++ b/glm/gtc/integer.inl
@ -4,8 +4,8 @@
 namespace glm{
 namespace detail
 {
-	template <typename T, precision P, template <class, precision> class vecType>
+	template <typename T, precision P, template <typename, precision> class vecType, bool Aligned>
-	struct compute_log2<T, P, vecType, false>
+	struct compute_log2<T, P, vecType, false, Aligned>
 	{
 		GLM_FUNC_QUALIFIER static vecType<T, P> call(vecType<T, P> const & vec)
 		{
@ -16,8 +16,8 @@ namespace detail
 	};
 #	if GLM_HAS_BITSCAN_WINDOWS
-		template <precision P>
+		template <precision P, bool Aligned>
-		struct compute_log2<int, P, tvec4, false>
+		struct compute_log2<int, P, tvec4, false, Aligned>
 		{
 			GLM_FUNC_QUALIFIER static tvec4<int, P> call(tvec4<int, P> const & vec)
 			{
--- a/glm/gtc/quaternion.inl
+++ b/glm/gtc/quaternion.inl
@ -9,8 +9,8 @@
 namespace glm{
 namespace detail
 {
-	template <typename T, precision P>
+	template <typename T, precision P, bool Aligned>
-	struct compute_dot<tquat, T, P>
+	struct compute_dot<tquat, T, P, Aligned>
 	{
 		static GLM_FUNC_QUALIFIER T call(tquat<T, P> const & x, tquat<T, P> const & y)
 		{
@ -115,7 +115,7 @@ namespace detail
 	GLM_FUNC_QUALIFIER tquat<T, P>::tquat(tvec3<T, P> const & u, tvec3<T, P> const & v)
 	{
 		tvec3<T, P> const LocalW(cross(u, v));
-		T Dot = detail::compute_dot<tvec3, T, P>::call(u, v);
+		T Dot = detail::compute_dot<tvec3, T, P, detail::is_aligned<P>::value>::call(u, v);
 		tquat<T, P> q(T(1) + Dot, LocalW.x, LocalW.y, LocalW.z);
 		*this = normalize(q);
--- a/glm/gtc/type_aligned.hpp
+++ b/glm/gtc/type_aligned.hpp
@ -0,0 +1,303 @@
 /// @ref gtc_type_aligned
 /// @file glm/gtc/type_aligned.hpp
 #pragma once
 #include "../vec2.hpp"
 #include "../vec3.hpp"
 #include "../vec4.hpp"
 #include "../gtc/vec1.hpp"
 namespace glm
 {
 	template <typename T, precision P> struct tvec1;
 	template <typename T, precision P> struct tvec2;
 	template <typename T, precision P> struct tvec3;
 	template <typename T, precision P> struct tvec4;
 	typedef tvec1<float, aligned_highp>		aligned_highp_vec1_t;
 	typedef tvec1<float, aligned_mediump>	aligned_mediump_vec1_t;
 	typedef tvec1<float, aligned_lowp>		aligned_lowp_vec1_t;
 	typedef tvec1<double, aligned_highp>	aligned_highp_dvec1_t;
 	typedef tvec1<double, aligned_mediump>	aligned_mediump_dvec1_t;
 	typedef tvec1<double, aligned_lowp>		aligned_lowp_dvec1_t;
 	typedef tvec1<int, aligned_highp>		aligned_highp_ivec1_t;
 	typedef tvec1<int, aligned_mediump>		aligned_mediump_ivec1_t;
 	typedef tvec1<int, aligned_lowp>		aligned_lowp_ivec1_t;
 	typedef tvec1<uint, aligned_highp>		aligned_highp_uvec1_t;
 	typedef tvec1<uint, aligned_mediump>	aligned_mediump_uvec1_t;
 	typedef tvec1<uint, aligned_lowp>		aligned_lowp_uvec1_t;
 	typedef tvec1<bool, aligned_highp>		aligned_highp_bvec1_t;
 	typedef tvec1<bool, aligned_mediump>	aligned_mediump_bvec1_t;
 	typedef tvec1<bool, aligned_lowp>		aligned_lowp_bvec1_t;
 	/// @addtogroup gtc_type_aligned
 	/// @{
 	/// 2 components vector of high single-precision floating-point numbers.
 	/// There is no guarantee on the actual precision.
 	typedef tvec2<float, aligned_highp>		aligned_highp_vec2;
 	/// 2 components vector of medium single-precision floating-point numbers.
 	/// There is no guarantee on the actual precision.
 	typedef tvec2<float, aligned_mediump>	aligned_mediump_vec2;
 	/// 2 components vector of low single-precision floating-point numbers.
 	/// There is no guarantee on the actual precision.
 	typedef tvec2<float, aligned_lowp>		aligned_lowp_vec2;
 	/// 2 components vector of high double-precision floating-point numbers.
 	/// There is no guarantee on the actual precision.
 	typedef tvec2<double, aligned_highp>	aligned_highp_dvec2;
 	/// 2 components vector of medium double-precision floating-point numbers.
 	/// There is no guarantee on the actual precision.
 	typedef tvec2<double, aligned_mediump>	aligned_mediump_dvec2;
 	/// 2 components vector of low double-precision floating-point numbers.
 	/// There is no guarantee on the actual precision.
 	typedef tvec2<double, aligned_lowp>		aligned_lowp_dvec2;
 	/// 2 components vector of high precision signed integer numbers.
 	/// There is no guarantee on the actual precision.
 	typedef tvec2<int, aligned_highp>		aligned_highp_ivec2;
 	/// 2 components vector of medium precision signed integer numbers.
 	/// There is no guarantee on the actual precision.
 	typedef tvec2<int, aligned_mediump>		aligned_mediump_ivec2;
 	/// 2 components vector of low precision signed integer numbers.
 	/// There is no guarantee on the actual precision.
 	typedef tvec2<int, aligned_lowp>		aligned_lowp_ivec2;
 	/// 2 components vector of high precision unsigned integer numbers.
 	/// There is no guarantee on the actual precision.
 	typedef tvec2<uint, aligned_highp>		aligned_highp_uvec2;
 	/// 2 components vector of medium precision unsigned integer numbers.
 	/// There is no guarantee on the actual precision.
 	typedef tvec2<uint, aligned_mediump>	aligned_mediump_uvec2;
 	/// 2 components vector of low precision unsigned integer numbers.
 	/// There is no guarantee on the actual precision.
 	typedef tvec2<uint, aligned_lowp>		aligned_lowp_uvec2;
 	/// 2 components vector of high precision bool numbers.
 	/// There is no guarantee on the actual precision.
 	typedef tvec2<bool, aligned_highp>		aligned_highp_bvec2;
 	/// 2 components vector of medium precision bool numbers.
 	/// There is no guarantee on the actual precision.
 	typedef tvec2<bool, aligned_mediump>	aligned_mediump_bvec2;
 	/// 2 components vector of low precision bool numbers.
 	/// There is no guarantee on the actual precision.
 	typedef tvec2<bool, aligned_lowp>		aligned_lowp_bvec2;
 	/// 3 components vector of high single-precision floating-point numbers.
 	/// There is no guarantee on the actual precision.
 	typedef tvec3<float, aligned_highp>		aligned_highp_vec3;
 	/// 3 components vector of medium single-precision floating-point numbers.
 	/// There is no guarantee on the actual precision.
 	typedef tvec3<float, aligned_mediump>	aligned_mediump_vec3;
 	/// 3 components vector of low single-precision floating-point numbers.
 	/// There is no guarantee on the actual precision.
 	typedef tvec3<float, aligned_lowp>		aligned_lowp_vec3;
 	/// 3 components vector of high double-precision floating-point numbers.
 	/// There is no guarantee on the actual precision.
 	typedef tvec3<double, aligned_highp>	aligned_highp_dvec3;
 	/// 3 components vector of medium double-precision floating-point numbers.
 	/// There is no guarantee on the actual precision.
 	typedef tvec3<double, aligned_mediump>	aligned_mediump_dvec3;
 	/// 3 components vector of low double-precision floating-point numbers.
 	/// There is no guarantee on the actual precision.
 	typedef tvec3<double, aligned_lowp>		aligned_lowp_dvec3;
 	/// 3 components vector of high precision signed integer numbers.
 	/// There is no guarantee on the actual precision.
 	typedef tvec3<int, aligned_highp>		aligned_highp_ivec3;
 	/// 3 components vector of medium precision signed integer numbers.
 	/// There is no guarantee on the actual precision.
 	typedef tvec3<int, aligned_mediump>		aligned_mediump_ivec3;
 	/// 3 components vector of low precision signed integer numbers.
 	/// There is no guarantee on the actual precision.
 	typedef tvec3<int, aligned_lowp>		aligned_lowp_ivec3;
 	/// 3 components vector of high precision unsigned integer numbers.
 	/// There is no guarantee on the actual precision.
 	typedef tvec3<uint, aligned_highp>		aligned_highp_uvec3;
 	/// 3 components vector of medium precision unsigned integer numbers.
 	/// There is no guarantee on the actual precision.
 	typedef tvec3<uint, aligned_mediump>	aligned_mediump_uvec3;
 	/// 3 components vector of low precision unsigned integer numbers.
 	/// There is no guarantee on the actual precision.
 	typedef tvec3<uint, aligned_lowp>		aligned_lowp_uvec3;
 	/// 3 components vector of high precision bool numbers.
 	typedef tvec3<bool, aligned_highp>		aligned_highp_bvec3;
 	/// 3 components vector of medium precision bool numbers.
 	typedef tvec3<bool, aligned_mediump>	aligned_mediump_bvec3;
 	/// 3 components vector of low precision bool numbers.
 	typedef tvec3<bool, aligned_lowp>		aligned_lowp_bvec3;
 	/// 4 components vector of high single-precision floating-point numbers.
 	typedef tvec4<float, aligned_highp>		aligned_highp_vec4;
 	/// 4 components vector of medium single-precision floating-point numbers.
 	typedef tvec4<float, aligned_mediump>	aligned_mediump_vec4;
 	/// 4 components vector of low single-precision floating-point numbers.
 	typedef tvec4<float, aligned_lowp>		aligned_lowp_vec4;
 	/// 4 components vector of high double-precision floating-point numbers.
 	typedef tvec4<double, aligned_highp>	aligned_highp_dvec4;
 	/// 4 components vector of medium double-precision floating-point numbers.
 	typedef tvec4<double, aligned_mediump>	aligned_mediump_dvec4;
 	/// 4 components vector of low double-precision floating-point numbers.
 	typedef tvec4<double, aligned_lowp>		aligned_lowp_dvec4;
 	/// 4 components vector of high precision signed integer numbers.
 	typedef tvec4<int, aligned_highp>		aligned_highp_ivec4;
 	/// 4 components vector of medium precision signed integer numbers.
 	typedef tvec4<int, aligned_mediump>		aligned_mediump_ivec4;
 	/// 4 components vector of low precision signed integer numbers.
 	typedef tvec4<int, aligned_lowp>		aligned_lowp_ivec4;
 	/// 4 components vector of high precision unsigned integer numbers.
 	typedef tvec4<uint, aligned_highp>		aligned_highp_uvec4;
 	/// 4 components vector of medium precision unsigned integer numbers.
 	typedef tvec4<uint, aligned_mediump>	aligned_mediump_uvec4;
 	/// 4 components vector of low precision unsigned integer numbers.
 	typedef tvec4<uint, aligned_lowp>		aligned_lowp_uvec4;
 	/// 4 components vector of high precision bool numbers.
 	typedef tvec4<bool, aligned_highp>		aligned_highp_bvec4;
 	/// 4 components vector of medium precision bool numbers.
 	typedef tvec4<bool, aligned_mediump>	aligned_mediump_bvec4;
 	/// 4 components vector of low precision bool numbers.
 	typedef tvec4<bool, aligned_lowp>		aligned_lowp_bvec4;
 #if(defined(GLM_PRECISION_LOWP_FLOAT))
 	typedef aligned_lowp_vec2			aligned_vec2;
 	typedef aligned_lowp_vec3			aligned_vec3;
 	typedef aligned_lowp_vec4			aligned_vec4;
 #elif(defined(GLM_PRECISION_MEDIUMP_FLOAT))
 	typedef aligned_mediump_vec2		aligned_vec2;
 	typedef aligned_mediump_vec3		aligned_vec3;
 	typedef aligned_mediump_vec4		aligned_vec4;
 #else //defined(GLM_PRECISION_HIGHP_FLOAT)
 	/// 2 components vector of floating-point numbers.
 	///
 	/// @see <a href="http://www.opengl.org/registry/doc/GLSLangSpec.4.20.8.pdf">GLSL 4.20.8 specification, section 4.1.5 Vectors</a>
 	typedef aligned_highp_vec2			aligned_vec2;
 	//! 3 components vector of floating-point numbers.
 	///
 	/// @see <a href="http://www.opengl.org/registry/doc/GLSLangSpec.4.20.8.pdf">GLSL 4.20.8 specification, section 4.1.5 Vectors</a>
 	typedef aligned_highp_vec3			aligned_vec3;
 	//! 4 components vector of floating-point numbers.
 	///
 	/// @see <a href="http://www.opengl.org/registry/doc/GLSLangSpec.4.20.8.pdf">GLSL 4.20.8 specification, section 4.1.5 Vectors</a>
 	typedef aligned_highp_vec4			aligned_vec4;
 #endif//GLM_PRECISION
 #if(defined(GLM_PRECISION_LOWP_DOUBLE))
 	typedef aligned_lowp_dvec2			aligned_dvec2;
 	typedef aligned_lowp_dvec3			aligned_dvec3;
 	typedef aligned_lowp_dvec4			aligned_dvec4;
 #elif(defined(GLM_PRECISION_MEDIUMP_DOUBLE))
 	typedef aligned_mediump_dvec2		aligned_dvec2;
 	typedef aligned_mediump_dvec3		aligned_dvec3;
 	typedef aligned_mediump_dvec4		aligned_dvec4;
 #else //defined(GLM_PRECISION_HIGHP_DOUBLE)
 	/// 2 components vector of double-precision floating-point numbers.
 	typedef aligned_highp_dvec2			aligned_dvec2;
 	/// 3 components vector of double-precision floating-point numbers.
 	typedef aligned_highp_dvec3			aligned_dvec3;
 	/// 4 components vector of double-precision floating-point numbers.
 	typedef aligned_highp_dvec4			aligned_dvec4;
 #endif//GLM_PRECISION
 #if(defined(GLM_PRECISION_LOWP_INT))
 	typedef aligned_lowp_ivec2			aligned_ivec2;
 	typedef aligned_lowp_ivec3			aligned_ivec3;
 	typedef aligned_lowp_ivec4			aligned_ivec4;
 #elif(defined(GLM_PRECISION_MEDIUMP_INT))
 	typedef aligned_mediump_ivec2		aligned_ivec2;
 	typedef aligned_mediump_ivec3		aligned_ivec3;
 	typedef aligned_mediump_ivec4		aligned_ivec4;
 #else //defined(GLM_PRECISION_HIGHP_INT)
 	/// 2 components vector of signed integer numbers.
 	typedef aligned_highp_ivec2			aligned_ivec2;
 	/// 3 components vector of signed integer numbers.
 	typedef aligned_highp_ivec3			aligned_ivec3;
 	/// 4 components vector of signed integer numbers.
 	typedef aligned_highp_ivec4			aligned_ivec4;
 #endif//GLM_PRECISION
 	// -- Unsigned integer definition --
 #if(defined(GLM_PRECISION_LOWP_UINT))
 	typedef aligned_lowp_uvec2			aligned_uvec2;
 	typedef aligned_lowp_uvec3			aligned_uvec3;
 	typedef aligned_lowp_uvec4			aligned_uvec4;
 #elif(defined(GLM_PRECISION_MEDIUMP_UINT))
 	typedef aligned_mediump_uvec2		aligned_uvec2;
 	typedef aligned_mediump_uvec3		aligned_uvec3;
 	typedef aligned_mediump_uvec4		aligned_uvec4;
 #else //defined(GLM_PRECISION_HIGHP_UINT)
 	/// 2 components vector of unsigned integer numbers.
 	typedef aligned_highp_uvec2			aligned_uvec2;
 	/// 3 components vector of unsigned integer numbers.
 	typedef aligned_highp_uvec3			aligned_uvec3;
 	/// 4 components vector of unsigned integer numbers.
 	typedef aligned_highp_uvec4			aligned_uvec4;
 #endif//GLM_PRECISION
 #if(defined(GLM_PRECISION_LOWP_BOOL))
 	typedef aligned_lowp_bvec2			aligned_bvec2;
 	typedef aligned_lowp_bvec3			aligned_bvec3;
 	typedef aligned_lowp_bvec4			aligned_bvec4;
 #elif(defined(GLM_PRECISION_MEDIUMP_BOOL))
 	typedef aligned_mediump_bvec2		aligned_bvec2;
 	typedef aligned_mediump_bvec3		aligned_bvec3;
 	typedef aligned_mediump_bvec4		aligned_bvec4;
 #else //defined(GLM_PRECISION_HIGHP_BOOL)
 	/// 2 components vector of boolean.
 	typedef aligned_highp_bvec2			aligned_bvec2;
 	/// 3 components vector of boolean.
 	typedef aligned_highp_bvec3			aligned_bvec3;
 	/// 4 components vector of boolean.
 	typedef aligned_highp_bvec4			aligned_bvec4;
 #endif//GLM_PRECISION
 	/// @}
 }//namespace glm
--- a/glm/gtx/fast_square_root.inl
+++ b/glm/gtx/fast_square_root.inl
@ -23,17 +23,17 @@ namespace glm
 	GLM_FUNC_QUALIFIER genType fastInverseSqrt(genType x)
 	{
 #		ifdef __CUDACC__ // Wordaround for a CUDA compiler bug up to CUDA6
-			tvec1<T, P> tmp(detail::compute_inversesqrt<tvec1, genType, lowp>::call(tvec1<genType, lowp>(x)));
+			tvec1<T, P> tmp(detail::compute_inversesqrt<tvec1, genType, lowp, detail::is_aligned<lowp>::value>::call(tvec1<genType, lowp>(x)));
 			return tmp.x;
 #		else
-			return detail::compute_inversesqrt<tvec1, genType, lowp>::call(tvec1<genType, lowp>(x)).x;
+			return detail::compute_inversesqrt<tvec1, genType, highp, detail::is_aligned<highp>::value>::call(tvec1<genType, lowp>(x)).x;
 #		endif
 	}
 	template <typename T, precision P, template <typename, precision> class vecType>
 	GLM_FUNC_QUALIFIER vecType<T, P> fastInverseSqrt(vecType<T, P> const & x)
 	{
-		return detail::compute_inversesqrt<vecType, T, P>::call(x);
+		return detail::compute_inversesqrt<vecType, T, P, detail::is_aligned<P>::value>::call(x);
 	}
 	// fastLength
--- a/readme.md
+++ b/readme.md
@ -63,6 +63,8 @@ glm::mat4 camera(float Translate, glm::vec2 const & Rotate)
 - Added missing bvec* && and || operators
 - Added iround and uround to GTC_integer, fast round on positive values
 - Added raw SIMD API
 - Added 'aligned' qualifiers
 - Added GTC_type_aligned with aligned *vec* types
 ##### Improvements:
 - Improved SIMD and swizzle operators interactions with GCC and Clang #474
--- a/test/core/core_type_vec4.cpp
+++ b/test/core/core_type_vec4.cpp
@ -29,6 +29,7 @@
 /// @author Christophe Riccio
 ///////////////////////////////////////////////////////////////////////////////////
 #define GLM_FORCE_ALIGNED
 #define GLM_SWIZZLE
 #include <glm/vector_relational.hpp>
 #include <glm/vec2.hpp>
@ -334,7 +335,7 @@ int test_vec4_equal()
 int test_vec4_size()
 {
 	int Error = 0;
-	
+
 	Error += sizeof(glm::vec4) == sizeof(glm::lowp_vec4) ? 0 : 1;
 	Error += sizeof(glm::vec4) == sizeof(glm::mediump_vec4) ? 0 : 1;
 	Error += sizeof(glm::vec4) == sizeof(glm::highp_vec4) ? 0 : 1;
@ -345,7 +346,14 @@ int test_vec4_size()
 	Error += 32 == sizeof(glm::highp_dvec4) ? 0 : 1;
 	Error += glm::vec4().length() == 4 ? 0 : 1;
 	Error += glm::dvec4().length() == 4 ? 0 : 1;
-	
+
 	struct my_struct
 	{
 		glm::uint32 a;
 		glm::vec4 b;
 	};
 	GLM_STATIC_ASSERT(sizeof(my_struct) == sizeof(glm::uint32) + sizeof(glm::vec4), "glm::vec4 alignment is not correct");
 	return Error;
 }
--- a/test/gtc/CMakeLists.txt
+++ b/test/gtc/CMakeLists.txt
@ -13,6 +13,7 @@ glmCreateTestGTC(gtc_quaternion)
 glmCreateTestGTC(gtc_random)
 glmCreateTestGTC(gtc_round)
 glmCreateTestGTC(gtc_reciprocal)
 glmCreateTestGTC(gtc_type_aligned)
 glmCreateTestGTC(gtc_type_precision)
 glmCreateTestGTC(gtc_type_ptr)
 glmCreateTestGTC(gtc_ulp)
--- a/test/gtc/gtc_type_aligned.cpp
+++ b/test/gtc/gtc_type_aligned.cpp
@ -0,0 +1,11 @@
 /// @file test/gtc/gtc_type_aligned.cpp
 #define GLM_FORCE_ALIGNED
 #include <glm/gtc/type_aligned.hpp>
 int main()
 {
 	int Error = 0;
 	return Error;
 }
--- a/test/gtc/gtc_type_precision.cpp
+++ b/test/gtc/gtc_type_precision.cpp
@ -1,33 +1,4 @@
 ///////////////////////////////////////////////////////////////////////////////////
 /// OpenGL Mathematics (glm.g-truc.net)
 ///
 /// Copyright (c) 2005 - 2015 G-Truc Creation (www.g-truc.net)
 /// Permission is hereby granted, free of charge, to any person obtaining a copy
 /// of this software and associated documentation files (the "Software"), to deal
 /// in the Software without restriction, including without limitation the rights
 /// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 /// copies of the Software, and to permit persons to whom the Software is
 /// furnished to do so, subject to the following conditions:
 /// 
 /// The above copyright notice and this permission notice shall be included in
 /// all copies or substantial portions of the Software.
 /// 
 /// Restrictions:
 ///		By making use of the Software for military purposes, you choose to make
 ///		a Bunny unhappy.
 /// 
 /// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 /// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 /// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 /// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 /// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 /// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 /// THE SOFTWARE.
 ///
 /// @file test/gtc/gtc_type_precision.cpp
 /// @date 2010-09-16 / 2014-11-25
 /// @author Christophe Riccio
 ///////////////////////////////////////////////////////////////////////////////////
 #include <glm/gtc/type_precision.hpp>
 #include <glm/gtc/quaternion.hpp>