From be50440fad233232c58e87173585f8b45a3f24c9 Mon Sep 17 00:00:00 2001 From: Pradipta Ghosh Date: Tue, 24 Jan 2023 09:19:38 +0000 Subject: [PATCH 1/2] BUG: Fix for npyv_s32 npyv__trunc_s32_f32 (VXE) np.sin(), np.cos() are giving erroneous result for float32 (VXE) This PR is fixing `npyv_s32 npyv__trunc_s32_f32(npyv_f32 a)` to resolve the issue. --- numpy/core/src/common/simd/vec/conversion.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/numpy/core/src/common/simd/vec/conversion.h b/numpy/core/src/common/simd/vec/conversion.h index f0d625c553e9..34020d4d2435 100644 --- a/numpy/core/src/common/simd/vec/conversion.h +++ b/numpy/core/src/common/simd/vec/conversion.h @@ -170,7 +170,8 @@ npyv_pack_b8_b64(npyv_b64 a, npyv_b64 b, npyv_b64 c, npyv_b64 d, #ifdef NPY_HAVE_VXE2 return vec_signed(a); #elif defined(NPY_HAVE_VXE) - return vec_packs(vec_signed(npyv_doublee(a)), vec_signed(npyv_doublee(vec_mergel(a, a)))); + return vec_packs(vec_signed(__builtin_s390_vflls(vec_mergeh(a,a))), + vec_signed(__builtin_s390_vflls(vec_mergel(a, a)))); // VSX #elif defined(__IBMC__) return vec_cts(a, 0); From 289d2403c992719df27686c6abad45ec4d8bc2fd Mon Sep 17 00:00:00 2001 From: Pradipta Ghosh Date: Tue, 24 Jan 2023 14:07:45 +0000 Subject: [PATCH 2/2] replace __builtin_s390_vflls with npyv_doublee as before --- numpy/core/src/common/simd/vec/conversion.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/numpy/core/src/common/simd/vec/conversion.h b/numpy/core/src/common/simd/vec/conversion.h index 34020d4d2435..0c0d5b3ac50e 100644 --- a/numpy/core/src/common/simd/vec/conversion.h +++ b/numpy/core/src/common/simd/vec/conversion.h @@ -170,8 +170,8 @@ npyv_pack_b8_b64(npyv_b64 a, npyv_b64 b, npyv_b64 c, npyv_b64 d, #ifdef NPY_HAVE_VXE2 return vec_signed(a); #elif defined(NPY_HAVE_VXE) - return vec_packs(vec_signed(__builtin_s390_vflls(vec_mergeh(a,a))), - vec_signed(__builtin_s390_vflls(vec_mergel(a, a)))); + return vec_packs(vec_signed(npyv_doublee(vec_mergeh(a,a))), + vec_signed(npyv_doublee(vec_mergel(a, a)))); // VSX #elif defined(__IBMC__) return vec_cts(a, 0);