Merge branch 'master' of https://github.com/g-truc/glm
	
		
	
				
					
				
			
						commit
						4fce0f0de5
					
				
				 8 changed files with 360 additions and 86 deletions
			
			
		| @ -0,0 +1,155 @@ | ||||
| /// @ref simd_neon
 | ||||
| /// @file glm/simd/neon.h
 | ||||
| 
 | ||||
| #pragma once | ||||
| 
 | ||||
| #if GLM_ARCH & GLM_ARCH_NEON_BIT | ||||
| #include <arm_neon.h> | ||||
| 
 | ||||
| namespace glm { | ||||
| 	namespace neon { | ||||
| 		static float32x4_t dupq_lane(float32x4_t vsrc, int lane) { | ||||
| 			switch(lane) { | ||||
| #if GLM_ARCH & GLM_ARCH_ARMV8_BIT | ||||
| 				case 0: return vdupq_laneq_f32(vsrc, 0); | ||||
| 				case 1: return vdupq_laneq_f32(vsrc, 1); | ||||
| 				case 2: return vdupq_laneq_f32(vsrc, 2); | ||||
| 				case 3: return vdupq_laneq_f32(vsrc, 3); | ||||
| #else | ||||
| 				case 0: return vdupq_n_f32(vgetq_lane_f32(vsrc, 0)); | ||||
| 				case 1: return vdupq_n_f32(vgetq_lane_f32(vsrc, 1)); | ||||
| 				case 2: return vdupq_n_f32(vgetq_lane_f32(vsrc, 2)); | ||||
| 				case 3: return vdupq_n_f32(vgetq_lane_f32(vsrc, 3)); | ||||
| #endif | ||||
| 			} | ||||
| 			assert(!"Unreachable code executed!"); | ||||
| 			return vdupq_n_f32(0.0f); | ||||
| 		} | ||||
| 
 | ||||
| 		static float32x2_t dup_lane(float32x4_t vsrc, int lane) { | ||||
| 			switch(lane) { | ||||
| #if GLM_ARCH & GLM_ARCH_ARMV8_BIT | ||||
| 				case 0: return vdup_laneq_f32(vsrc, 0); | ||||
| 				case 1: return vdup_laneq_f32(vsrc, 1); | ||||
| 				case 2: return vdup_laneq_f32(vsrc, 2); | ||||
| 				case 3: return vdup_laneq_f32(vsrc, 3); | ||||
| #else | ||||
| 				case 0: return vdup_n_f32(vgetq_lane_f32(vsrc, 0)); | ||||
| 				case 1: return vdup_n_f32(vgetq_lane_f32(vsrc, 1)); | ||||
| 				case 2: return vdup_n_f32(vgetq_lane_f32(vsrc, 2)); | ||||
| 				case 3: return vdup_n_f32(vgetq_lane_f32(vsrc, 3)); | ||||
| #endif | ||||
| 			} | ||||
| 			assert(!"Unreachable code executed!"); | ||||
| 			return vdup_n_f32(0.0f); | ||||
| 		} | ||||
| 
 | ||||
| 		static float32x4_t copy_lane(float32x4_t vdst, int dlane, float32x4_t vsrc, int slane) { | ||||
| #if GLM_ARCH & GLM_ARCH_ARMV8_BIT | ||||
| 			switch(dlane) { | ||||
| 				case 0: | ||||
| 					switch(slane) { | ||||
| 						case 0: return vcopyq_laneq_f32(vdst, 0, vsrc, 0); | ||||
| 						case 1: return vcopyq_laneq_f32(vdst, 0, vsrc, 1); | ||||
| 						case 2: return vcopyq_laneq_f32(vdst, 0, vsrc, 2); | ||||
| 						case 3: return vcopyq_laneq_f32(vdst, 0, vsrc, 3); | ||||
| 					} | ||||
| 					assert(!"Unreachable code executed!"); | ||||
| 				case 1: | ||||
| 					switch(slane) { | ||||
| 						case 0: return vcopyq_laneq_f32(vdst, 1, vsrc, 0); | ||||
| 						case 1: return vcopyq_laneq_f32(vdst, 1, vsrc, 1); | ||||
| 						case 2: return vcopyq_laneq_f32(vdst, 1, vsrc, 2); | ||||
| 						case 3: return vcopyq_laneq_f32(vdst, 1, vsrc, 3); | ||||
| 					} | ||||
| 					assert(!"Unreachable code executed!"); | ||||
| 				case 2: | ||||
| 					switch(slane) { | ||||
| 						case 0: return vcopyq_laneq_f32(vdst, 2, vsrc, 0); | ||||
| 						case 1: return vcopyq_laneq_f32(vdst, 2, vsrc, 1); | ||||
| 						case 2: return vcopyq_laneq_f32(vdst, 2, vsrc, 2); | ||||
| 						case 3: return vcopyq_laneq_f32(vdst, 2, vsrc, 3); | ||||
| 					} | ||||
| 					assert(!"Unreachable code executed!"); | ||||
| 				case 3: | ||||
| 					switch(slane) { | ||||
| 						case 0: return vcopyq_laneq_f32(vdst, 3, vsrc, 0); | ||||
| 						case 1: return vcopyq_laneq_f32(vdst, 3, vsrc, 1); | ||||
| 						case 2: return vcopyq_laneq_f32(vdst, 3, vsrc, 2); | ||||
| 						case 3: return vcopyq_laneq_f32(vdst, 3, vsrc, 3); | ||||
| 					} | ||||
| 					assert(!"Unreachable code executed!"); | ||||
| 			} | ||||
| #else | ||||
| 
 | ||||
| 			float l; | ||||
| 			switch(slane) { | ||||
| 				case 0: l = vgetq_lane_f32(vsrc, 0); break; | ||||
| 				case 1: l = vgetq_lane_f32(vsrc, 1); break; | ||||
| 				case 2: l = vgetq_lane_f32(vsrc, 2); break; | ||||
| 				case 3: l = vgetq_lane_f32(vsrc, 3); break; | ||||
| 				default: 
 | ||||
| 					assert(!"Unreachable code executed!"); | ||||
| 			} | ||||
| 			switch(dlane) { | ||||
| 				case 0: return vsetq_lane_f32(l, vdst, 0); | ||||
| 				case 1: return vsetq_lane_f32(l, vdst, 1); | ||||
| 				case 2: return vsetq_lane_f32(l, vdst, 2); | ||||
| 				case 3: return vsetq_lane_f32(l, vdst, 3); | ||||
| 			} | ||||
| #endif | ||||
| 			assert(!"Unreachable code executed!"); | ||||
| 			return vdupq_n_f32(0.0f); | ||||
| 		} | ||||
| 
 | ||||
| 		static float32x4_t mul_lane(float32x4_t v, float32x4_t vlane, int lane) { | ||||
| #if GLM_ARCH & GLM_ARCH_ARMV8_BIT | ||||
| 			switch(lane) { 
 | ||||
| 				case 0: return vmulq_laneq_f32(v, vlane, 0); break; | ||||
| 				case 1: return vmulq_laneq_f32(v, vlane, 1); break; | ||||
| 				case 2: return vmulq_laneq_f32(v, vlane, 2); break; | ||||
| 				case 3: return vmulq_laneq_f32(v, vlane, 3); break; | ||||
| 				default: 
 | ||||
| 					assert(!"Unreachable code executed!"); | ||||
| 			} | ||||
| 			assert(!"Unreachable code executed!"); | ||||
| 			return vdupq_n_f32(0.0f); | ||||
| #else | ||||
| 			return vmulq_f32(v, dupq_lane(vlane, lane)); | ||||
| #endif | ||||
| 		} | ||||
| 
 | ||||
| 		static float32x4_t madd_lane(float32x4_t acc, float32x4_t v, float32x4_t vlane, int lane) { | ||||
| #if GLM_ARCH & GLM_ARCH_ARMV8_BIT | ||||
| #ifdef GLM_CONFIG_FORCE_FMA | ||||
| #	define FMADD_LANE(acc, x, y, L) do { asm volatile ("fmla %0.4s, %1.4s, %2.4s" : "+w"(acc) : "w"(x), "w"(dup_lane(y, L))); } while(0) | ||||
| #else | ||||
| #	define FMADD_LANE(acc, x, y, L) do { acc = vmlaq_laneq_f32(acc, x, y, L); } while(0) | ||||
| #endif | ||||
| 
 | ||||
| 			switch(lane) { 
 | ||||
| 				case 0: 
 | ||||
| 					FMADD_LANE(acc, v, vlane, 0); | ||||
| 					return acc; | ||||
| 				case 1: | ||||
| 					FMADD_LANE(acc, v, vlane, 1); | ||||
| 					return acc; | ||||
| 				case 2: | ||||
| 					FMADD_LANE(acc, v, vlane, 2); | ||||
| 					return acc; | ||||
| 				case 3: | ||||
| 					FMADD_LANE(acc, v, vlane, 3); | ||||
| 					return acc; | ||||
| 				default: 
 | ||||
| 					assert(!"Unreachable code executed!"); | ||||
| 			} | ||||
| 			assert(!"Unreachable code executed!"); | ||||
| 			return vdupq_n_f32(0.0f); | ||||
| #	undef FMADD_LANE | ||||
| #else | ||||
| 			return vaddq_f32(acc, vmulq_f32(v, dupq_lane(vlane, lane))); | ||||
| #endif | ||||
| 		} | ||||
| 	} //namespace neon
 | ||||
| } // namespace glm
 | ||||
| #endif // GLM_ARCH & GLM_ARCH_NEON_BIT
 | ||||
					Loading…
					
					
				
		Reference in New Issue