| 
						
						
							
								
							
						
						
					 | 
					 | 
					@ -577,7 +577,20 @@ namespace detail { | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
							static vec<4, float, Q> call(vec<4, float, Q> const& a, vec<4, float, Q> const& b) | 
					 | 
					 | 
					 | 
							static vec<4, float, Q> call(vec<4, float, Q> const& a, vec<4, float, Q> const& b) | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
							{ | 
					 | 
					 | 
					 | 
							{ | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
								vec<4, float, Q> Result; | 
					 | 
					 | 
					 | 
								vec<4, float, Q> Result; | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					#if GLM_ARCH & GLM_ARCH_ARMV8_BIT | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
								Result.data = vdivq_f32(a.data, b.data); | 
					 | 
					 | 
					 | 
								Result.data = vdivq_f32(a.data, b.data); | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					#else | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
								/* Arm assembler reference: | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
								 * | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
								 * The Newton-Raphson iteration: x[n+1] = x[n] * (2 - d * x[n]) | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
								 * converges to (1/d) if x0 is the result of VRECPE applied to d. | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
								 * | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
								 * Note: The precision usually improves with two interactions, but more than two iterations are not helpful. */ | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
								float32x4_t x = vrecpeq_f32(b.data); | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
								x = vmulq_f32(vrecpsq_f32(b.data, x), x); | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
								x = vmulq_f32(vrecpsq_f32(b.data, x), x); | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
								Result.data = vmulq_f32(a.data, x); | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					#endif | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
								return Result; | 
					 | 
					 | 
					 | 
								return Result; | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
							} | 
					 | 
					 | 
					 | 
							} | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
						}; | 
					 | 
					 | 
					 | 
						}; | 
				
			
			
		
	
	
		
		
			
				
					| 
						
							
								
							
						
						
						
					 | 
					 | 
					
  |