diff --git a/ggml.c b/ggml.c index 6c946d0..e473ba0 100644 --- a/ggml.c +++ b/ggml.c @@ -1083,8 +1083,8 @@ static void dequantize_row_q4_1(const void * restrict vx, float * restrict y, in const uint8x16_t vq = vcombine_u8(vx_0, vx_1); // convert to 2x uint16x8_t - const uint16x8_t vi_0 = vmovl_s8(vget_low_u8 (vq)); - const uint16x8_t vi_1 = vmovl_s8(vget_high_u8(vq)); + const uint16x8_t vi_0 = vmovl_u8(vget_low_u8 (vq)); + const uint16x8_t vi_1 = vmovl_u8(vget_high_u8(vq)); // convert to 4x float32x4_t const float32x4_t vf_0 = vcvtq_f32_u32(vmovl_u16(vget_low_u16 (vi_0))); @@ -1301,8 +1301,8 @@ static void dequantize_row_q4_1_o(const void * restrict vx, float * restrict y, const uint8x16_t vq = vcombine_u8(vx_0, vx_1); // convert to 2x uint16x8_t - const uint16x8_t vi_0 = vmovl_s8(vget_low_u8 (vq)); - const uint16x8_t vi_1 = vmovl_s8(vget_high_u8(vq)); + const uint16x8_t vi_0 = vmovl_u8(vget_low_u8 (vq)); + const uint16x8_t vi_1 = vmovl_u8(vget_high_u8(vq)); // convert to 4x float32x4_t const float32x4_t vf_0 = vcvtq_f32_u32(vmovl_u16(vget_low_u16 (vi_0)));