@@ -71,13 +71,48 @@ simde_vcmla_rot180_f16(simde_float16x4_t r, simde_float16x4_t a, simde_float16x4
7171 #define vcmla_rot180_f16 (r , a , b ) simde_vcmla_rot180_f16(r, a, b)
7272#endif
7373
74+ SIMDE_FUNCTION_ATTRIBUTES
75+ simde_float32x2_t
76+ simde_vcmla_rot180_f32 (simde_float32x2_t r , simde_float32x2_t a , simde_float32x2_t b ) {
77+ #if defined(SIMDE_ARM_NEON_A32V8_NATIVE ) && defined(SIMDE_ARCH_ARM_COMPLEX ) && \
78+ (!defined(HEDLEY_GCC_VERSION ) || HEDLEY_GCC_VERSION_CHECK (9 , 0 , 0 )) && \
79+ (!defined(__clang__ ) || SIMDE_DETECT_CLANG_VERSION_CHECK (12 , 0 , 0 ))
80+ return vcmla_rot180_f32 (r , a , b );
81+ #else
82+ simde_float32x2_private
83+ r_ = simde_float32x2_to_private (r ),
84+ a_ = simde_float32x2_to_private (a ),
85+ b_ = simde_float32x2_to_private (b );
86+
87+ #if defined(SIMDE_SHUFFLE_VECTOR_ )
88+ a_ .values = SIMDE_SHUFFLE_VECTOR_ (32 , 8 , a_ .values , a_ .values , 0 , 0 );
89+ b_ .values = SIMDE_SHUFFLE_VECTOR_ (32 , 8 , - b_ .values , - b_ .values , 0 , 1 );
90+ r_ .values += b_ .values * a_ .values ;
91+ #else
92+ SIMDE_VECTORIZE
93+ for (size_t i = 0 ; i < (sizeof (r_ .values ) / (2 * sizeof (r_ .values [0 ]))) ; i ++ ) {
94+ r_ .values [2 * i ] += - (b_ .values [2 * i ]) * a_ .values [2 * i ];
95+ r_ .values [2 * i + 1 ] += - (b_ .values [2 * i + 1 ]) * a_ .values [2 * i ];
96+ }
97+ #endif
98+
99+ return simde_float32x2_from_private (r_ );
100+ #endif
101+ }
102+ #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES ) || (defined(SIMDE_ENABLE_NATIVE_ALIASES ) && \
103+ !(defined(SIMDE_ARCH_ARM_COMPLEX ) && \
104+ (!defined(HEDLEY_GCC_VERSION ) || HEDLEY_GCC_VERSION_CHECK (9 ,0 ,0 )) && \
105+ (!defined(__clang__ ) || SIMDE_DETECT_CLANG_VERSION_CHECK (12 ,0 ,0 ))))
106+ #undef vcmla_rot180_f32
107+ #define vcmla_rot180_f32 (r , a , b ) simde_vcmla_rot180_f32(r, a, b)
108+ #endif
109+
74110SIMDE_FUNCTION_ATTRIBUTES
75111simde_float16x8_t
76112simde_vcmlaq_rot180_f16 (simde_float16x8_t r , simde_float16x8_t a , simde_float16x8_t b ) {
77- #if defined(SIMDE_ARM_NEON_A32V8_NATIVE ) && \
113+ #if defined(SIMDE_ARM_NEON_A32V8_NATIVE ) && defined( SIMDE_ARM_NEON_FP16 ) && defined( SIMDE_ARCH_ARM_COMPLEX ) && \
78114 (!defined(HEDLEY_GCC_VERSION ) || HEDLEY_GCC_VERSION_CHECK (8 ,5 ,0 )) && \
79- (!defined(__clang__ ) || SIMDE_DETECT_CLANG_VERSION_CHECK (12 ,0 ,0 )) && \
80- defined(SIMDE_ARM_NEON_FP16 ) && defined(SIMDE_ARCH_ARM_COMPLEX )
115+ (!defined(__clang__ ) || SIMDE_DETECT_CLANG_VERSION_CHECK (12 ,0 ,0 ))
81116 return vcmlaq_rot180_f16 (r , a , b );
82117 #else
83118 simde_float16x8_private
@@ -101,51 +136,13 @@ simde_vcmlaq_rot180_f16(simde_float16x8_t r, simde_float16x8_t a, simde_float16x
101136 #endif
102137}
103138#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES ) || (defined(SIMDE_ENABLE_NATIVE_ALIASES ) && \
104- !((! defined(HEDLEY_GCC_VERSION ) || HEDLEY_GCC_VERSION_CHECK ( 8 , 5 , 0 ) ) && \
105- (!defined(__clang__ ) || SIMDE_DETECT_CLANG_VERSION_CHECK ( 12 , 0 ,0 )) && \
106- defined(SIMDE_ARM_NEON_FP16 ) && defined( SIMDE_ARCH_ARM_COMPLEX )))
139+ !(defined(SIMDE_ARCH_ARM_COMPLEX ) && defined( SIMDE_ARM_NEON_FP16 ) && \
140+ (!defined(HEDLEY_GCC_VERSION ) || HEDLEY_GCC_VERSION_CHECK ( 8 , 5 ,0 )) && \
141+ (! defined(__clang__ ) || SIMDE_DETECT_CLANG_VERSION_CHECK ( 12 , 0 , 0 ) )))
107142 #undef vcmlaq_rot180_f16
108143 #define vcmlaq_rot180_f16 (r , a , b ) simde_vcmlaq_rot180_f16(r, a, b)
109144#endif
110145
111-
112- SIMDE_FUNCTION_ATTRIBUTES
113- simde_float32x2_t
114- simde_vcmla_rot180_f32 (simde_float32x2_t r , simde_float32x2_t a , simde_float32x2_t b ) {
115- #if defined(SIMDE_ARM_NEON_A32V8_NATIVE ) && \
116- (!defined(HEDLEY_GCC_VERSION ) || HEDLEY_GCC_VERSION_CHECK (9 ,0 ,0 )) && \
117- (!defined(__clang__ ) || SIMDE_DETECT_CLANG_VERSION_CHECK (12 ,0 ,0 )) && \
118- defined(SIMDE_ARCH_ARM_COMPLEX )
119- return vcmla_rot180_f32 (r , a , b );
120- #else
121- simde_float32x2_private
122- r_ = simde_float32x2_to_private (r ),
123- a_ = simde_float32x2_to_private (a ),
124- b_ = simde_float32x2_to_private (b );
125-
126- #if defined(SIMDE_SHUFFLE_VECTOR_ )
127- a_ .values = SIMDE_SHUFFLE_VECTOR_ (32 , 8 , a_ .values , a_ .values , 0 , 0 );
128- b_ .values = SIMDE_SHUFFLE_VECTOR_ (32 , 8 , - b_ .values , - b_ .values , 0 , 1 );
129- r_ .values += b_ .values * a_ .values ;
130- #else
131- SIMDE_VECTORIZE
132- for (size_t i = 0 ; i < (sizeof (r_ .values ) / (2 * sizeof (r_ .values [0 ]))) ; i ++ ) {
133- r_ .values [2 * i ] += - (b_ .values [2 * i ]) * a_ .values [2 * i ];
134- r_ .values [2 * i + 1 ] += - (b_ .values [2 * i + 1 ]) * a_ .values [2 * i ];
135- }
136- #endif
137-
138- return simde_float32x2_from_private (r_ );
139- #endif
140- }
141- #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES ) || (defined(SIMDE_ENABLE_NATIVE_ALIASES ) && \
142- !((!defined(HEDLEY_GCC_VERSION ) || HEDLEY_GCC_VERSION_CHECK (9 ,0 ,0 )) && \
143- (!defined(__clang__ ) || SIMDE_DETECT_CLANG_VERSION_CHECK (12 ,0 ,0 )) && \
144- defined(SIMDE_ARCH_ARM_COMPLEX )))
145- #undef vcmla_rot180_f32
146- #define vcmla_rot180_f32 (r , a , b ) simde_vcmla_rot180_f32(r, a, b)
147- #endif
148-
149146SIMDE_FUNCTION_ATTRIBUTES
150147simde_float32x4_t
151148simde_vcmlaq_rot180_f32 (simde_float32x4_t r , simde_float32x4_t a , simde_float32x4_t b ) {
@@ -180,9 +177,9 @@ simde_vcmlaq_rot180_f32(simde_float32x4_t r, simde_float32x4_t a, simde_float32x
180177 #endif
181178}
182179#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES ) || (defined(SIMDE_ENABLE_NATIVE_ALIASES ) && \
183- !((! defined(HEDLEY_GCC_VERSION ) || HEDLEY_GCC_VERSION_CHECK ( 9 , 0 , 0 ) ) && \
184- (!defined(__clang__ ) || SIMDE_DETECT_CLANG_VERSION_CHECK ( 12 ,0 ,0 )) && \
185- defined(SIMDE_ARCH_ARM_COMPLEX )))
180+ !(defined(SIMDE_ARCH_ARM_COMPLEX ) && \
181+ (!defined(HEDLEY_GCC_VERSION ) || HEDLEY_GCC_VERSION_CHECK ( 9 ,0 ,0 )) && \
182+ (! defined(__clang__ ) || SIMDE_DETECT_CLANG_VERSION_CHECK ( 12 , 0 , 0 ) )))
186183 #undef vcmlaq_rot180_f32
187184 #define vcmlaq_rot180_f32 (r , a , b ) simde_vcmlaq_rot180_f32(r, a, b)
188185#endif
0 commit comments