Skip to content

Commit d542caa

Browse files
authored
x86 avx: fix cvtt bounds
1 parent 2a0486c commit d542caa

File tree

2 files changed

+60
-18
lines changed

2 files changed

+60
-18
lines changed

simde/x86/avx.h

Lines changed: 26 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3823,7 +3823,7 @@ simde__m128i
38233823
simde_mm256_cvttpd_epi32 (simde__m256d a) {
38243824
#if defined(SIMDE_X86_AVX_NATIVE)
38253825
return _mm256_cvttpd_epi32(a);
3826-
#elif defined(SIMDE_LOONGARCH_LASX_NATIVE)
3826+
#elif defined(SIMDE_LOONGARCH_LASX_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE)
38273827
simde__m256i_private a_;
38283828
a_.i256 = __lasx_xvftintrz_w_d(a, a);
38293829
a_.i256 = __lasx_xvpermi_d(a_.i256, 0xd8);
@@ -3832,13 +3832,20 @@ simde_mm256_cvttpd_epi32 (simde__m256d a) {
38323832
simde__m128i_private r_;
38333833
simde__m256d_private a_ = simde__m256d_to_private(a);
38343834

3835-
#if defined(simde_math_trunc)
3835+
#if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
3836+
r_.m64[0] = simde_mm_cvttpd_pi32(a_.m128d[0]);
3837+
r_.m64[1] = simde_mm_cvttpd_pi32(a_.m128d[1]);
3838+
#else
38363839
SIMDE_VECTORIZE
3837-
for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) {
3838-
r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, simde_math_trunc(a_.f64[i]));
3840+
for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
3841+
simde_float64 v = simde_math_trunc(a_.f64[i]);
3842+
#if defined(SIMDE_FAST_CONVERSION_RANGE)
3843+
r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v);
3844+
#else
3845+
r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ?
3846+
SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN;
3847+
#endif
38393848
}
3840-
#else
3841-
HEDLEY_UNREACHABLE();
38423849
#endif
38433850

38443851
return simde__m128i_from_private(r_);
@@ -3854,19 +3861,26 @@ simde__m256i
38543861
simde_mm256_cvttps_epi32 (simde__m256 a) {
38553862
#if defined(SIMDE_X86_AVX_NATIVE)
38563863
return _mm256_cvttps_epi32(a);
3857-
#elif defined(SIMDE_LOONGARCH_LASX_NATIVE)
3864+
#elif defined(SIMDE_LOONGARCH_LASX_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE)
38583865
return __lasx_xvftintrz_w_s(a);
38593866
#else
38603867
simde__m256i_private r_;
38613868
simde__m256_private a_ = simde__m256_to_private(a);
38623869

3863-
#if defined(simde_math_truncf)
3870+
#if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
3871+
r_.m128i[0] = simde_mm_cvttps_epi32(a_.m128[0]);
3872+
r_.m128i[1] = simde_mm_cvttps_epi32(a_.m128[1]);
3873+
#else
38643874
SIMDE_VECTORIZE
3865-
for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) {
3866-
r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, simde_math_truncf(a_.f32[i]));
3875+
for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
3876+
simde_float32 v = simde_math_truncf(a_.f32[i]);
3877+
#if defined(SIMDE_FAST_CONVERSION_RANGE)
3878+
r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v);
3879+
#else
3880+
r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ?
3881+
SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN;
3882+
#endif
38673883
}
3868-
#else
3869-
HEDLEY_UNREACHABLE();
38703884
#endif
38713885

38723886
return simde__m256i_from_private(r_);

test/x86/avx.c

Lines changed: 34 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7125,9 +7125,9 @@ test_simde_mm256_cvtpd_epi32(SIMDE_MUNIT_TEST_ARGS) {
71257125
#endif
71267126
#if !defined(SIMDE_FAST_CONVERSION_RANGE)
71277127
{ simde_mm256_set_pd(
7128-
HEDLEY_STATIC_CAST(simde_float64, HEDLEY_STATIC_CAST(int64_t, INT32_MAX) + 1),
7128+
HEDLEY_STATIC_CAST(simde_float64, HEDLEY_STATIC_CAST(int64_t, INT32_MAX) + 1),
71297129
HEDLEY_STATIC_CAST(simde_float64, HEDLEY_STATIC_CAST(int64_t, INT32_MAX) - 100),
7130-
HEDLEY_STATIC_CAST(simde_float64, HEDLEY_STATIC_CAST(int64_t, INT32_MIN) - 1),
7130+
HEDLEY_STATIC_CAST(simde_float64, HEDLEY_STATIC_CAST(int64_t, INT32_MIN) - 1),
71317131
HEDLEY_STATIC_CAST(simde_float64, HEDLEY_STATIC_CAST(int64_t, INT32_MIN) + 100)),
71327132
simde_mm_set_epi32(
71337133
INT32_MIN, INT32_C(2147483547), INT32_MIN, -INT32_C(2147483548)) },
@@ -7218,9 +7218,9 @@ test_simde_mm256_cvtps_epi32(SIMDE_MUNIT_TEST_ARGS) {
72187218
#endif
72197219
#if !defined(SIMDE_FAST_CONVERSION_RANGE)
72207220
{ simde_mm256_set_ps(
7221-
HEDLEY_STATIC_CAST(simde_float32, HEDLEY_STATIC_CAST(int64_t, INT32_MAX) + 1),
7221+
HEDLEY_STATIC_CAST(simde_float32, HEDLEY_STATIC_CAST(int64_t, INT32_MAX) + 1),
72227222
HEDLEY_STATIC_CAST(simde_float32, HEDLEY_STATIC_CAST(int64_t, INT32_MAX) - 100),
7223-
HEDLEY_STATIC_CAST(simde_float32, HEDLEY_STATIC_CAST(int64_t, INT32_MIN) - 1),
7223+
HEDLEY_STATIC_CAST(simde_float32, HEDLEY_STATIC_CAST(int64_t, INT32_MIN) - 1),
72247224
HEDLEY_STATIC_CAST(simde_float32, HEDLEY_STATIC_CAST(int64_t, INT32_MIN) + 100),
72257225
0.f, 0.f, 0.f, 0.f),
72267226
simde_mm256_set_epi32(
@@ -7436,7 +7436,20 @@ test_simde_mm256_cvttpd_epi32(SIMDE_MUNIT_TEST_ARGS) {
74367436
const struct {
74377437
simde__m256d a;
74387438
simde__m128i r;
7439-
} test_vec[8] = {
7439+
} test_vec[] = {
7440+
#if !defined(SIMDE_FAST_NANS)
7441+
{ simde_mm256_set_pd(SIMDE_MATH_NAN, -SIMDE_MATH_NAN, 0.0, 0.0),
7442+
simde_mm_set_epi32( INT32_MIN, INT32_MIN, 0, 0) },
7443+
#endif
7444+
#if !defined(SIMDE_FAST_CONVERSION_RANGE)
7445+
{ simde_mm256_set_pd(
7446+
HEDLEY_STATIC_CAST(simde_float64, HEDLEY_STATIC_CAST(int64_t, INT32_MAX) + 1),
7447+
HEDLEY_STATIC_CAST(simde_float64, HEDLEY_STATIC_CAST(int64_t, INT32_MAX) - 100),
7448+
HEDLEY_STATIC_CAST(simde_float64, HEDLEY_STATIC_CAST(int64_t, INT32_MIN) - 1),
7449+
HEDLEY_STATIC_CAST(simde_float64, HEDLEY_STATIC_CAST(int64_t, INT32_MIN) + 100)),
7450+
simde_mm_set_epi32(
7451+
INT32_MIN, INT32_C(2147483547), INT32_MIN, -INT32_C(2147483548)) },
7452+
#endif
74407453
{ simde_mm256_set_pd(SIMDE_FLOAT64_C( -175.82), SIMDE_FLOAT64_C( -91.19),
74417454
SIMDE_FLOAT64_C( -855.64), SIMDE_FLOAT64_C(-1000.00)),
74427455
simde_mm_set_epi32(INT32_C(-175), INT32_C( -91), INT32_C(-855), INT32_C(-1000)) },
@@ -7476,7 +7489,22 @@ test_simde_mm256_cvttps_epi32(SIMDE_MUNIT_TEST_ARGS) {
74767489
const struct {
74777490
simde__m256 a;
74787491
simde__m256i r;
7479-
} test_vec[8] = {
7492+
} test_vec[] = {
7493+
#if !defined(SIMDE_FAST_NANS)
7494+
{ simde_mm256_set_ps(SIMDE_MATH_NAN, -SIMDE_MATH_NAN, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f),
7495+
simde_mm256_set_epi32( INT32_MIN, INT32_MIN, 0, 0, 0, 0, 0, 0) },
7496+
#endif
7497+
#if !defined(SIMDE_FAST_CONVERSION_RANGE)
7498+
{ simde_mm256_set_ps(
7499+
HEDLEY_STATIC_CAST(simde_float32, HEDLEY_STATIC_CAST(int64_t, INT32_MAX) + 1),
7500+
HEDLEY_STATIC_CAST(simde_float32, HEDLEY_STATIC_CAST(int64_t, INT32_MAX) - 100),
7501+
HEDLEY_STATIC_CAST(simde_float32, HEDLEY_STATIC_CAST(int64_t, INT32_MIN) - 1),
7502+
HEDLEY_STATIC_CAST(simde_float32, HEDLEY_STATIC_CAST(int64_t, INT32_MIN) + 100),
7503+
0.f, 0.f, 0.f, 0.f),
7504+
simde_mm256_set_epi32(
7505+
INT32_MIN, INT32_C(2147483520), INT32_MIN, -INT32_C(2147483520),
7506+
0, 0, 0, 0) },
7507+
#endif
74807508
{ simde_mm256_set_ps(SIMDE_FLOAT32_C( -135.75), SIMDE_FLOAT32_C( 534.39),
74817509
SIMDE_FLOAT32_C( -81.93), SIMDE_FLOAT32_C( -234.94),
74827510
SIMDE_FLOAT32_C( -390.94), SIMDE_FLOAT32_C( -625.05),

0 commit comments

Comments
 (0)