DSP: Fix vector type mismatches
This commit fixes the vector type mismatches in the MVE function
implementations that are treated as errors in GCC unless the
`-flax-vector-conversions` option is specified.
Note that most of these mismatches were already fixed upstream.
Signed-off-by: Stephanos Ioannidis <root@stephanos.io>
GitOrigin-RevId: 5f86244bad4ad5a590e084f0e72ba7a1416c2edf
Change-Id: Ia0d6d8e93b07b6334a5f5ca90b4017049f145013
Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/third_party/zephyr/cmsis/+/3627934
Tested-by: CopyBot Service Account <copybot.service@gmail.com>
Reviewed-by: Jack Rosenthal <jrosenth@chromium.org>
Tested-by: Jack Rosenthal <jrosenth@chromium.org>
Commit-Queue: Jack Rosenthal <jrosenth@chromium.org>
diff --git a/CMSIS/DSP/PrivateInclude/arm_vec_fft.h b/CMSIS/DSP/PrivateInclude/arm_vec_fft.h
index 30dcb0e..1006920 100644
--- a/CMSIS/DSP/PrivateInclude/arm_vec_fft.h
+++ b/CMSIS/DSP/PrivateInclude/arm_vec_fft.h
@@ -140,7 +140,7 @@
{
uint32_t *src = (uint32_t *) pSrc;
int32_t blkCnt; /* loop counters */
- uint32x4_t bitRevTabOff;
+ uint16x8_t bitRevTabOff;
uint16x8_t one = vdupq_n_u16(1);
uint32x4_t bitRevOff1Low, bitRevOff0Low;
uint32x4_t bitRevOff1High, bitRevOff0High;
@@ -152,8 +152,8 @@
bitRevOff0Low = vmullbq_int_u16(bitRevTabOff, one);
bitRevOff0High = vmulltq_int_u16(bitRevTabOff, one);
- bitRevOff0Low = vshrq_n_u16(bitRevOff0Low, 3);
- bitRevOff0High = vshrq_n_u16(bitRevOff0High, 3);
+ bitRevOff0Low = (uint32x4_t)vshrq_n_u16((uint16x8_t)bitRevOff0Low, 3);
+ bitRevOff0High = (uint32x4_t)vshrq_n_u16((uint16x8_t)bitRevOff0High, 3);
blkCnt = (bitRevLen / 16);
while (blkCnt > 0) {
@@ -162,8 +162,8 @@
bitRevOff1Low = vmullbq_int_u16(bitRevTabOff, one);
bitRevOff1High = vmulltq_int_u16(bitRevTabOff, one);
- bitRevOff1Low = vshrq_n_u16(bitRevOff1Low, 3);
- bitRevOff1High = vshrq_n_u16(bitRevOff1High, 3);
+ bitRevOff1Low = (uint32x4_t)vshrq_n_u16((uint16x8_t)bitRevOff1Low, 3);
+ bitRevOff1High = (uint32x4_t)vshrq_n_u16((uint16x8_t)bitRevOff1High, 3);
inLow = vldrwq_gather_shifted_offset_u32(src, bitRevOff0Low);
inHigh = vldrwq_gather_shifted_offset_u32(src, bitRevOff0High);
@@ -177,8 +177,8 @@
bitRevOff0Low = vmullbq_int_u16(bitRevTabOff, one);
bitRevOff0High = vmulltq_int_u16(bitRevTabOff, one);
- bitRevOff0Low = vshrq_n_u16(bitRevOff0Low, 3);
- bitRevOff0High = vshrq_n_u16(bitRevOff0High, 3);
+ bitRevOff0Low = (uint32x4_t)vshrq_n_u16((uint16x8_t)bitRevOff0Low, 3);
+ bitRevOff0High = (uint32x4_t)vshrq_n_u16((uint16x8_t)bitRevOff0High, 3);
inLow = vldrwq_gather_shifted_offset_u32(src, bitRevOff1Low);
inHigh = vldrwq_gather_shifted_offset_u32(src, bitRevOff1High);
@@ -211,8 +211,8 @@
bitRevOff0Low = vmullbq_int_u16(bitRevTabOff, one);
bitRevOff0High = vmulltq_int_u16(bitRevTabOff, one);
- bitRevOff0Low = vshrq_n_u16(bitRevOff0Low, 3);
- bitRevOff0High = vshrq_n_u16(bitRevOff0High, 3);
+ bitRevOff0Low = (uint32x4_t)vshrq_n_u16((uint16x8_t)bitRevOff0Low, 3);
+ bitRevOff0High = (uint32x4_t)vshrq_n_u16((uint16x8_t)bitRevOff0High, 3);
inLow = vldrwq_gather_shifted_offset_z_u32(src, bitRevOff0Low, p);
inHigh = vldrwq_gather_shifted_offset_z_u32(src, bitRevOff0High, p);
@@ -251,13 +251,13 @@
while (blkCnt > 0) {
uint64x2_t vecIn;
- vecIn = vldrdq_gather_offset_u64(pSrc, (int64x2_t) bitRevOffs0);
+ vecIn = vldrdq_gather_offset_u64(pSrc, (uint64x2_t) bitRevOffs0);
idxOffs0 = idxOffs0 + 16;
vst1q(pDst32, (uint32x4_t) vecIn);
pDst32 += 4;
bitRevOffs0 = vbrsrq(idxOffs0, bitRevPos);
- vecIn = vldrdq_gather_offset_u64(pSrc, (int64x2_t) bitRevOffs1);
+ vecIn = vldrdq_gather_offset_u64(pSrc, (uint64x2_t) bitRevOffs1);
idxOffs1 = idxOffs1 + 16;
vst1q(pDst32, (uint32x4_t) vecIn);
pDst32 += 4;
@@ -297,13 +297,13 @@
while (blkCnt > 0) {
uint32x4_t vecIn;
- vecIn = vldrwq_gather_offset_s32(pSrc, bitRevOffs0);
+ vecIn = (uint32x4_t)vldrwq_gather_offset_s32(pSrc, bitRevOffs0);
idxOffs0 = idxOffs0 + 32;
vst1q(pDst16, (uint16x8_t) vecIn);
pDst16 += 8;
bitRevOffs0 = vbrsrq(idxOffs0, bitRevPos);
- vecIn = vldrwq_gather_offset_s32(pSrc, bitRevOffs1);
+ vecIn = (uint32x4_t)vldrwq_gather_offset_s32(pSrc, bitRevOffs1);
idxOffs1 = idxOffs1 + 32;
vst1q(pDst16, (uint16x8_t) vecIn);
pDst16 += 8;
diff --git a/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_df1_q15.c b/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_df1_q15.c
index c77a05c..5ab13dc 100644
--- a/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_df1_q15.c
+++ b/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_df1_q15.c
@@ -102,9 +102,9 @@
bCoeffs1[7] = a2;
bCoeffs1[6] = a1;
- bCoeffs2 =
+ bCoeffs2 = (q15x8_t)
vsetq_lane_s32(vgetq_lane_s32((q31x4_t) bCoeffs0, 3), (q31x4_t) bCoeffs2, 3);
- bCoeffs3 =
+ bCoeffs3 = (q15x8_t)
vsetq_lane_s32(vgetq_lane_s32((q31x4_t) bCoeffs1, 3), (q31x4_t) bCoeffs3, 3);
diff --git a/CMSIS/DSP/Source/MatrixFunctions/arm_mat_ldlt_f32.c b/CMSIS/DSP/Source/MatrixFunctions/arm_mat_ldlt_f32.c
index bcca830..05fe9fc 100644
--- a/CMSIS/DSP/Source/MatrixFunctions/arm_mat_ldlt_f32.c
+++ b/CMSIS/DSP/Source/MatrixFunctions/arm_mat_ldlt_f32.c
@@ -178,7 +178,7 @@
int32x4_t vecOffs;
int w;
- vecOffs = vidupq_u32((uint32_t)0, 1);
+ vecOffs = (int32x4_t)vidupq_u32((uint32_t)0, 1);
vecOffs = vmulq_n_s32(vecOffs,n);
for(w=k+1; w<n; w+=4)
@@ -204,7 +204,7 @@
//pA[w*n+x] = pA[w*n+x] - pA[w*n+k] * (pA[x*n+k] * invA);
- vecX = vldrwq_gather_shifted_offset_z_f32(&pA[x*n+k], vecOffs, p0);
+ vecX = vldrwq_gather_shifted_offset_z_f32(&pA[x*n+k], (uint32x4_t)vecOffs, p0);
vecX = vmulq_m_n_f32(vuninitializedq_f32(),vecX,invA,p0);
@@ -247,7 +247,7 @@
vecA = vldrwq_z_f32(&pA[w*n+x],p0);
- vecX = vldrwq_gather_shifted_offset_z_f32(&pA[x*n+k], vecOffs, p0);
+ vecX = vldrwq_gather_shifted_offset_z_f32(&pA[x*n+k], (uint32x4_t)vecOffs, p0);
vecX = vmulq_m_n_f32(vuninitializedq_f32(),vecX,invA,p0);
vecA = vfmsq_m(vecA, vecW, vecX, p0);
diff --git a/CMSIS/DSP/Source/SupportFunctions/arm_q15_to_float.c b/CMSIS/DSP/Source/SupportFunctions/arm_q15_to_float.c
index 2dded65..f67ce26 100644
--- a/CMSIS/DSP/Source/SupportFunctions/arm_q15_to_float.c
+++ b/CMSIS/DSP/Source/SupportFunctions/arm_q15_to_float.c
@@ -72,9 +72,9 @@
{
/* C = (float32_t) A / 32768 */
/* convert from q15 to float and then store the results in the destination buffer */
- vecDst = vldrhq_s32(pSrcVec);
+ vecDst = (q15x8_t)vldrhq_s32(pSrcVec);
pSrcVec += 4;
- vstrwq(pDst, vcvtq_n_f32_s32(vecDst, 15));
+ vstrwq(pDst, vcvtq_n_f32_s32((int32x4_t)vecDst, 15));
pDst += 4;
/*
* Decrement the blockSize loop counter
diff --git a/CMSIS/DSP/Source/SupportFunctions/arm_q7_to_float.c b/CMSIS/DSP/Source/SupportFunctions/arm_q7_to_float.c
index 258309e..16f9342 100644
--- a/CMSIS/DSP/Source/SupportFunctions/arm_q7_to_float.c
+++ b/CMSIS/DSP/Source/SupportFunctions/arm_q7_to_float.c
@@ -70,9 +70,9 @@
{
/* C = (float32_t) A / 32768 */
/* convert from q7 to float and then store the results in the destination buffer */
- vecDst = vldrbq_s32(pSrcVec);
+ vecDst = (q7x16_t)vldrbq_s32(pSrcVec);
pSrcVec += 4;
- vstrwq(pDst, vcvtq_n_f32_s32(vecDst, 7));
+ vstrwq(pDst, vcvtq_n_f32_s32((int32x4_t)vecDst, 7));
pDst += 4;
/*
* Decrement the blockSize loop counter
diff --git a/CMSIS/DSP/Source/TransformFunctions/arm_cfft_q15.c b/CMSIS/DSP/Source/TransformFunctions/arm_cfft_q15.c
index 9d4eb96..121cac1 100644
--- a/CMSIS/DSP/Source/TransformFunctions/arm_cfft_q15.c
+++ b/CMSIS/DSP/Source/TransformFunctions/arm_cfft_q15.c
@@ -184,16 +184,16 @@
vecC = (q15x8_t) vldrwq_gather_base_s32(vecScGathAddr, 8);
vecTmp0 = vhaddq(vecSum0, vecSum1);
- vstrwq_scatter_base_s32(vecScGathAddr, -64, (q15x8_t) vecTmp0);
+ vstrwq_scatter_base_s32(vecScGathAddr, -64, (int32x4_t) vecTmp0);
vecTmp0 = vhsubq(vecSum0, vecSum1);
- vstrwq_scatter_base_s32(vecScGathAddr, -64 + 4, (q15x8_t) vecTmp0);
+ vstrwq_scatter_base_s32(vecScGathAddr, -64 + 4, (int32x4_t) vecTmp0);
vecTmp0 = MVE_CMPLX_SUB_FX_A_ixB(vecDiff0, vecDiff1);
- vstrwq_scatter_base_s32(vecScGathAddr, -64 + 8, (q15x8_t) vecTmp0);
+ vstrwq_scatter_base_s32(vecScGathAddr, -64 + 8, (int32x4_t) vecTmp0);
vecTmp0 = MVE_CMPLX_ADD_FX_A_ixB(vecDiff0, vecDiff1);
- vstrwq_scatter_base_s32(vecScGathAddr, -64 + 12, (q15x8_t) vecTmp0);
+ vstrwq_scatter_base_s32(vecScGathAddr, -64 + 12, (int32x4_t) vecTmp0);
blkCnt--;
}
@@ -419,16 +419,16 @@
vecC = (q15x8_t) vldrwq_gather_base_s32(vecScGathAddr, 8);
vecTmp0 = vhaddq(vecSum0, vecSum1);
- vstrwq_scatter_base_s32(vecScGathAddr, -64, (q15x8_t) vecTmp0);
+ vstrwq_scatter_base_s32(vecScGathAddr, -64, (int32x4_t) vecTmp0);
vecTmp0 = vhsubq(vecSum0, vecSum1);
- vstrwq_scatter_base_s32(vecScGathAddr, -64 + 4, (q15x8_t) vecTmp0);
+ vstrwq_scatter_base_s32(vecScGathAddr, -64 + 4, (int32x4_t) vecTmp0);
vecTmp0 = MVE_CMPLX_ADD_FX_A_ixB(vecDiff0, vecDiff1);
- vstrwq_scatter_base_s32(vecScGathAddr, -64 + 8, (q15x8_t) vecTmp0);
+ vstrwq_scatter_base_s32(vecScGathAddr, -64 + 8, (int32x4_t) vecTmp0);
vecTmp0 = MVE_CMPLX_SUB_FX_A_ixB(vecDiff0, vecDiff1);
- vstrwq_scatter_base_s32(vecScGathAddr, -64 + 12, (q15x8_t) vecTmp0);
+ vstrwq_scatter_base_s32(vecScGathAddr, -64 + 12, (int32x4_t) vecTmp0);
blkCnt--;
}