gcc 11 warning: mismatched bound

Clean up a new build warning with gcc11:
argument 3 of type ‘const uint8_t * const[]’ with
mismatched bound [-Warray-parameter=]

Standardize sad functions with array sizes.

Change-Id: Iea4144e61368f6a8279e2f3ae96c78aff06c8b41
This commit is contained in:
Johann
2022-03-23 14:28:29 +09:00
parent b0087f6cd2
commit d60b671a73
6 changed files with 155 additions and 140 deletions
+61 -61
View File
@@ -31,7 +31,7 @@ static INLINE uint8x8_t load_unaligned_2_buffers(const void *const buf0,
static INLINE void sad4x_4d(const uint8_t *const src_ptr, const int src_stride,
const uint8_t *const ref_array[4],
const int ref_stride, const int height,
uint32_t *const res) {
uint32_t sad_array[4]) {
int i;
uint16x8_t abs[2] = { vdupq_n_u16(0), vdupq_n_u16(0) };
#if !defined(__aarch64__)
@@ -61,26 +61,26 @@ static INLINE void sad4x_4d(const uint8_t *const src_ptr, const int src_stride,
a[1] = vpadd_u16(vget_low_u16(abs[1]), vget_high_u16(abs[1]));
r = vpaddlq_u16(vcombine_u16(a[0], a[1]));
#endif
vst1q_u32(res, r);
vst1q_u32(sad_array, r);
}
void vpx_sad4x4x4d_neon(const uint8_t *src_ptr, int src_stride,
const uint8_t *const ref_array[4], int ref_stride,
uint32_t *res) {
sad4x_4d(src_ptr, src_stride, ref_array, ref_stride, 4, res);
uint32_t sad_array[4]) {
sad4x_4d(src_ptr, src_stride, ref_array, ref_stride, 4, sad_array);
}
void vpx_sad4x8x4d_neon(const uint8_t *src_ptr, int src_stride,
const uint8_t *const ref_array[4], int ref_stride,
uint32_t *res) {
sad4x_4d(src_ptr, src_stride, ref_array, ref_stride, 8, res);
uint32_t sad_array[4]) {
sad4x_4d(src_ptr, src_stride, ref_array, ref_stride, 8, sad_array);
}
////////////////////////////////////////////////////////////////////////////////
// Can handle 512 pixels' sad sum (such as 16x32 or 32x16)
static INLINE void sad_512_pel_final_neon(const uint16x8_t *sum /*[4]*/,
uint32_t *const res) {
static INLINE void sad_512_pel_final_neon(const uint16x8_t sum[4],
uint32_t sad_array[4]) {
#if defined(__aarch64__)
const uint16x8_t a0 = vpaddq_u16(sum[0], sum[1]);
const uint16x8_t a1 = vpaddq_u16(sum[2], sum[3]);
@@ -95,21 +95,21 @@ static INLINE void sad_512_pel_final_neon(const uint16x8_t *sum /*[4]*/,
const uint16x4_t b1 = vpadd_u16(a2, a3);
const uint32x4_t r = vpaddlq_u16(vcombine_u16(b0, b1));
#endif
vst1q_u32(res, r);
vst1q_u32(sad_array, r);
}
#if defined(__arm__) || !defined(__ARM_FEATURE_DOTPROD)
// Can handle 1024 pixels' sad sum (such as 32x32)
static INLINE void sad_1024_pel_final_neon(const uint16x8_t *sum /*[4]*/,
uint32_t *const res) {
static INLINE void sad_1024_pel_final_neon(const uint16x8_t sum[4],
uint32_t sad_array[4]) {
#if defined(__aarch64__)
const uint16x8_t a0 = vpaddq_u16(sum[0], sum[1]);
const uint16x8_t a1 = vpaddq_u16(sum[2], sum[3]);
const uint32x4_t b0 = vpaddlq_u16(a0);
const uint32x4_t b1 = vpaddlq_u16(a1);
const uint32x4_t r = vpaddq_u32(b0, b1);
vst1q_u32(res, r);
vst1q_u32(sad_array, r);
#else
const uint16x4_t a0 = vpadd_u16(vget_low_u16(sum[0]), vget_high_u16(sum[0]));
const uint16x4_t a1 = vpadd_u16(vget_low_u16(sum[1]), vget_high_u16(sum[1]));
@@ -119,13 +119,13 @@ static INLINE void sad_1024_pel_final_neon(const uint16x8_t *sum /*[4]*/,
const uint32x4_t b1 = vpaddlq_u16(vcombine_u16(a2, a3));
const uint32x2_t c0 = vpadd_u32(vget_low_u32(b0), vget_high_u32(b0));
const uint32x2_t c1 = vpadd_u32(vget_low_u32(b1), vget_high_u32(b1));
vst1q_u32(res, vcombine_u32(c0, c1));
vst1q_u32(sad_array, vcombine_u32(c0, c1));
#endif
}
// Can handle 2048 pixels' sad sum (such as 32x64 or 64x32)
static INLINE void sad_2048_pel_final_neon(const uint16x8_t *sum /*[4]*/,
uint32_t *const res) {
static INLINE void sad_2048_pel_final_neon(const uint16x8_t sum[4],
uint32_t sad_array[4]) {
#if defined(__aarch64__)
const uint32x4_t a0 = vpaddlq_u16(sum[0]);
const uint32x4_t a1 = vpaddlq_u16(sum[1]);
@@ -134,7 +134,7 @@ static INLINE void sad_2048_pel_final_neon(const uint16x8_t *sum /*[4]*/,
const uint32x4_t b0 = vpaddq_u32(a0, a1);
const uint32x4_t b1 = vpaddq_u32(a2, a3);
const uint32x4_t r = vpaddq_u32(b0, b1);
vst1q_u32(res, r);
vst1q_u32(sad_array, r);
#else
const uint32x4_t a0 = vpaddlq_u16(sum[0]);
const uint32x4_t a1 = vpaddlq_u16(sum[1]);
@@ -146,13 +146,13 @@ static INLINE void sad_2048_pel_final_neon(const uint16x8_t *sum /*[4]*/,
const uint32x2_t b3 = vadd_u32(vget_low_u32(a3), vget_high_u32(a3));
const uint32x2_t c0 = vpadd_u32(b0, b1);
const uint32x2_t c1 = vpadd_u32(b2, b3);
vst1q_u32(res, vcombine_u32(c0, c1));
vst1q_u32(sad_array, vcombine_u32(c0, c1));
#endif
}
// Can handle 4096 pixels' sad sum (such as 64x64)
static INLINE void sad_4096_pel_final_neon(const uint16x8_t *sum /*[8]*/,
uint32_t *const res) {
static INLINE void sad_4096_pel_final_neon(const uint16x8_t sum[8],
uint32_t sad_array[4]) {
#if defined(__aarch64__)
const uint32x4_t a0 = vpaddlq_u16(sum[0]);
const uint32x4_t a1 = vpaddlq_u16(sum[1]);
@@ -169,7 +169,7 @@ static INLINE void sad_4096_pel_final_neon(const uint16x8_t *sum /*[8]*/,
const uint32x4_t c0 = vpaddq_u32(b0, b1);
const uint32x4_t c1 = vpaddq_u32(b2, b3);
const uint32x4_t r = vpaddq_u32(c0, c1);
vst1q_u32(res, r);
vst1q_u32(sad_array, r);
#else
const uint32x4_t a0 = vpaddlq_u16(sum[0]);
const uint32x4_t a1 = vpaddlq_u16(sum[1]);
@@ -189,7 +189,7 @@ static INLINE void sad_4096_pel_final_neon(const uint16x8_t *sum /*[8]*/,
const uint32x2_t c3 = vadd_u32(vget_low_u32(b3), vget_high_u32(b3));
const uint32x2_t d0 = vpadd_u32(c0, c1);
const uint32x2_t d1 = vpadd_u32(c2, c3);
vst1q_u32(res, vcombine_u32(d0, d1));
vst1q_u32(sad_array, vcombine_u32(d0, d1));
#endif
}
@@ -197,7 +197,7 @@ static INLINE void sad_4096_pel_final_neon(const uint16x8_t *sum /*[8]*/,
static INLINE void sad8x_4d(const uint8_t *src_ptr, int src_stride,
const uint8_t *const ref_array[4], int ref_stride,
uint32_t *res, const int height) {
uint32_t sad_array[4], const int height) {
int i, j;
const uint8_t *ref_loop[4] = { ref_array[0], ref_array[1], ref_array[2],
ref_array[3] };
@@ -214,25 +214,25 @@ static INLINE void sad8x_4d(const uint8_t *src_ptr, int src_stride,
}
}
sad_512_pel_final_neon(sum, res);
sad_512_pel_final_neon(sum, sad_array);
}
void vpx_sad8x4x4d_neon(const uint8_t *src_ptr, int src_stride,
const uint8_t *const ref_array[4], int ref_stride,
uint32_t *res) {
sad8x_4d(src_ptr, src_stride, ref_array, ref_stride, res, 4);
uint32_t sad_array[4]) {
sad8x_4d(src_ptr, src_stride, ref_array, ref_stride, sad_array, 4);
}
void vpx_sad8x8x4d_neon(const uint8_t *src_ptr, int src_stride,
const uint8_t *const ref_array[4], int ref_stride,
uint32_t *res) {
sad8x_4d(src_ptr, src_stride, ref_array, ref_stride, res, 8);
uint32_t sad_array[4]) {
sad8x_4d(src_ptr, src_stride, ref_array, ref_stride, sad_array, 8);
}
void vpx_sad8x16x4d_neon(const uint8_t *src_ptr, int src_stride,
const uint8_t *const ref_array[4], int ref_stride,
uint32_t *res) {
sad8x_4d(src_ptr, src_stride, ref_array, ref_stride, res, 16);
uint32_t sad_array[4]) {
sad8x_4d(src_ptr, src_stride, ref_array, ref_stride, sad_array, 16);
}
////////////////////////////////////////////////////////////////////////////////
@@ -249,7 +249,7 @@ static INLINE void sad16_neon(const uint8_t *ref_ptr, const uint8x16_t src_ptr,
static INLINE void sad16x_4d(const uint8_t *src_ptr, int src_stride,
const uint8_t *const ref_array[4], int ref_stride,
uint32_t *res, const int height) {
uint32_t sad_array[4], const int height) {
int i;
uint32x4_t r0, r1;
const uint8_t *ref_loop[4] = { ref_array[0], ref_array[1], ref_array[2],
@@ -267,7 +267,7 @@ static INLINE void sad16x_4d(const uint8_t *src_ptr, int src_stride,
r0 = vpaddq_u32(sum[0], sum[1]);
r1 = vpaddq_u32(sum[2], sum[3]);
vst1q_u32(res, vpaddq_u32(r0, r1));
vst1q_u32(sad_array, vpaddq_u32(r0, r1));
}
#else
@@ -281,7 +281,7 @@ static INLINE void sad16_neon(const uint8_t *ref_ptr, const uint8x16_t src_ptr,
static INLINE void sad16x_4d(const uint8_t *src_ptr, int src_stride,
const uint8_t *const ref_array[4], int ref_stride,
uint32_t *res, const int height) {
uint32_t sad_array[4], const int height) {
int i;
const uint8_t *ref_loop[4] = { ref_array[0], ref_array[1], ref_array[2],
ref_array[3] };
@@ -302,27 +302,27 @@ static INLINE void sad16x_4d(const uint8_t *src_ptr, int src_stride,
ref_loop[3] += ref_stride;
}
sad_512_pel_final_neon(sum, res);
sad_512_pel_final_neon(sum, sad_array);
}
#endif
void vpx_sad16x8x4d_neon(const uint8_t *src_ptr, int src_stride,
const uint8_t *const ref_array[4], int ref_stride,
uint32_t *res) {
sad16x_4d(src_ptr, src_stride, ref_array, ref_stride, res, 8);
uint32_t sad_array[4]) {
sad16x_4d(src_ptr, src_stride, ref_array, ref_stride, sad_array, 8);
}
void vpx_sad16x16x4d_neon(const uint8_t *src_ptr, int src_stride,
const uint8_t *const ref_array[4], int ref_stride,
uint32_t *res) {
sad16x_4d(src_ptr, src_stride, ref_array, ref_stride, res, 16);
uint32_t sad_array[4]) {
sad16x_4d(src_ptr, src_stride, ref_array, ref_stride, sad_array, 16);
}
void vpx_sad16x32x4d_neon(const uint8_t *src_ptr, int src_stride,
const uint8_t *const ref_array[4], int ref_stride,
uint32_t *res) {
sad16x_4d(src_ptr, src_stride, ref_array, ref_stride, res, 32);
uint32_t sad_array[4]) {
sad16x_4d(src_ptr, src_stride, ref_array, ref_stride, sad_array, 32);
}
////////////////////////////////////////////////////////////////////////////////
@@ -332,7 +332,7 @@ void vpx_sad16x32x4d_neon(const uint8_t *src_ptr, int src_stride,
static INLINE void sad32x_4d(const uint8_t *src_ptr, int src_stride,
const uint8_t *const ref_array[4], int ref_stride,
uint32_t *res, const int height) {
uint32_t sad_array[4], const int height) {
int i;
uint32x4_t r0, r1;
const uint8_t *ref_loop[4] = { ref_array[0], ref_array[1], ref_array[2],
@@ -365,25 +365,25 @@ static INLINE void sad32x_4d(const uint8_t *src_ptr, int src_stride,
r0 = vpaddq_u32(sum[0], sum[1]);
r1 = vpaddq_u32(sum[2], sum[3]);
vst1q_u32(res, vpaddq_u32(r0, r1));
vst1q_u32(sad_array, vpaddq_u32(r0, r1));
}
void vpx_sad32x16x4d_neon(const uint8_t *src_ptr, int src_stride,
const uint8_t *const ref_array[4], int ref_stride,
uint32_t *res) {
sad32x_4d(src_ptr, src_stride, ref_array, ref_stride, res, 16);
uint32_t sad_array[4]) {
sad32x_4d(src_ptr, src_stride, ref_array, ref_stride, sad_array, 16);
}
void vpx_sad32x32x4d_neon(const uint8_t *src_ptr, int src_stride,
const uint8_t *const ref_array[4], int ref_stride,
uint32_t *res) {
sad32x_4d(src_ptr, src_stride, ref_array, ref_stride, res, 32);
uint32_t sad_array[4]) {
sad32x_4d(src_ptr, src_stride, ref_array, ref_stride, sad_array, 32);
}
void vpx_sad32x64x4d_neon(const uint8_t *src_ptr, int src_stride,
const uint8_t *const ref_array[4], int ref_stride,
uint32_t *res) {
sad32x_4d(src_ptr, src_stride, ref_array, ref_stride, res, 64);
uint32_t sad_array[4]) {
sad32x_4d(src_ptr, src_stride, ref_array, ref_stride, sad_array, 64);
}
#else
@@ -422,26 +422,26 @@ static INLINE void sad32x_4d(const uint8_t *src_ptr, int src_stride,
void vpx_sad32x16x4d_neon(const uint8_t *src_ptr, int src_stride,
const uint8_t *const ref_array[4], int ref_stride,
uint32_t *res) {
uint32_t sad_array[4]) {
uint16x8_t sum[4];
sad32x_4d(src_ptr, src_stride, ref_array, ref_stride, 16, sum);
sad_512_pel_final_neon(sum, res);
sad_512_pel_final_neon(sum, sad_array);
}
void vpx_sad32x32x4d_neon(const uint8_t *src_ptr, int src_stride,
const uint8_t *const ref_array[4], int ref_stride,
uint32_t *res) {
uint32_t sad_array[4]) {
uint16x8_t sum[4];
sad32x_4d(src_ptr, src_stride, ref_array, ref_stride, 32, sum);
sad_1024_pel_final_neon(sum, res);
sad_1024_pel_final_neon(sum, sad_array);
}
void vpx_sad32x64x4d_neon(const uint8_t *src_ptr, int src_stride,
const uint8_t *const ref_array[4], int ref_stride,
uint32_t *res) {
uint32_t sad_array[4]) {
uint16x8_t sum[4];
sad32x_4d(src_ptr, src_stride, ref_array, ref_stride, 64, sum);
sad_2048_pel_final_neon(sum, res);
sad_2048_pel_final_neon(sum, sad_array);
}
#endif
@@ -453,7 +453,7 @@ void vpx_sad32x64x4d_neon(const uint8_t *src_ptr, int src_stride,
void vpx_sad64x32x4d_neon(const uint8_t *src_ptr, int src_stride,
const uint8_t *const ref_array[4], int ref_stride,
uint32_t *res) {
uint32_t sad_array[4]) {
int i;
uint32x4_t r0, r1;
const uint8_t *ref_loop[4] = { ref_array[0], ref_array[1], ref_array[2],
@@ -497,12 +497,12 @@ void vpx_sad64x32x4d_neon(const uint8_t *src_ptr, int src_stride,
r0 = vpaddq_u32(sum[0], sum[1]);
r1 = vpaddq_u32(sum[2], sum[3]);
vst1q_u32(res, vpaddq_u32(r0, r1));
vst1q_u32(sad_array, vpaddq_u32(r0, r1));
}
void vpx_sad64x64x4d_neon(const uint8_t *src_ptr, int src_stride,
const uint8_t *const ref_array[4], int ref_stride,
uint32_t *res) {
uint32_t sad_array[4]) {
int i;
uint32x4_t r0, r1, r2, r3;
const uint8_t *ref_loop[4] = { ref_array[0], ref_array[1], ref_array[2],
@@ -551,14 +551,14 @@ void vpx_sad64x64x4d_neon(const uint8_t *src_ptr, int src_stride,
r3 = vpaddq_u32(sum[6], sum[7]);
r0 = vpaddq_u32(r0, r1);
r1 = vpaddq_u32(r2, r3);
vst1q_u32(res, vpaddq_u32(r0, r1));
vst1q_u32(sad_array, vpaddq_u32(r0, r1));
}
#else
void vpx_sad64x32x4d_neon(const uint8_t *src_ptr, int src_stride,
const uint8_t *const ref_array[4], int ref_stride,
uint32_t *res) {
uint32_t sad_array[4]) {
int i;
const uint8_t *ref_loop[4] = { ref_array[0], ref_array[1], ref_array[2],
ref_array[3] };
@@ -599,12 +599,12 @@ void vpx_sad64x32x4d_neon(const uint8_t *src_ptr, int src_stride,
ref_loop[3] += ref_stride;
}
sad_2048_pel_final_neon(sum, res);
sad_2048_pel_final_neon(sum, sad_array);
}
void vpx_sad64x64x4d_neon(const uint8_t *src_ptr, int src_stride,
const uint8_t *const ref_array[4], int ref_stride,
uint32_t *res) {
uint32_t sad_array[4]) {
int i;
const uint8_t *ref_loop[4] = { ref_array[0], ref_array[1], ref_array[2],
ref_array[3] };
@@ -646,7 +646,7 @@ void vpx_sad64x64x4d_neon(const uint8_t *src_ptr, int src_stride,
ref_loop[3] += ref_stride;
}
sad_4096_pel_final_neon(sum, res);
sad_4096_pel_final_neon(sum, sad_array);
}
#endif
+16 -16
View File
@@ -1040,77 +1040,77 @@ static uint32_t avgsad_64width_msa(const uint8_t *src, int32_t src_stride,
#define VPX_SAD_4xHEIGHTx3_MSA(height) \
void vpx_sad4x##height##x3_msa(const uint8_t *src, int32_t src_stride, \
const uint8_t *ref, int32_t ref_stride, \
uint32_t *sads) { \
uint32_t sads[3]) { \
sad_4width_x3_msa(src, src_stride, ref, ref_stride, height, sads); \
}
#define VPX_SAD_8xHEIGHTx3_MSA(height) \
void vpx_sad8x##height##x3_msa(const uint8_t *src, int32_t src_stride, \
const uint8_t *ref, int32_t ref_stride, \
uint32_t *sads) { \
uint32_t sads[3]) { \
sad_8width_x3_msa(src, src_stride, ref, ref_stride, height, sads); \
}
#define VPX_SAD_16xHEIGHTx3_MSA(height) \
void vpx_sad16x##height##x3_msa(const uint8_t *src, int32_t src_stride, \
const uint8_t *ref, int32_t ref_stride, \
uint32_t *sads) { \
uint32_t sads[3]) { \
sad_16width_x3_msa(src, src_stride, ref, ref_stride, height, sads); \
}
#define VPX_SAD_4xHEIGHTx8_MSA(height) \
void vpx_sad4x##height##x8_msa(const uint8_t *src, int32_t src_stride, \
const uint8_t *ref, int32_t ref_stride, \
uint32_t *sads) { \
uint32_t sads[8]) { \
sad_4width_x8_msa(src, src_stride, ref, ref_stride, height, sads); \
}
#define VPX_SAD_8xHEIGHTx8_MSA(height) \
void vpx_sad8x##height##x8_msa(const uint8_t *src, int32_t src_stride, \
const uint8_t *ref, int32_t ref_stride, \
uint32_t *sads) { \
uint32_t sads[8]) { \
sad_8width_x8_msa(src, src_stride, ref, ref_stride, height, sads); \
}
#define VPX_SAD_16xHEIGHTx8_MSA(height) \
void vpx_sad16x##height##x8_msa(const uint8_t *src, int32_t src_stride, \
const uint8_t *ref, int32_t ref_stride, \
uint32_t *sads) { \
uint32_t sads[8]) { \
sad_16width_x8_msa(src, src_stride, ref, ref_stride, height, sads); \
}
#define VPX_SAD_4xHEIGHTx4D_MSA(height) \
void vpx_sad4x##height##x4d_msa(const uint8_t *src, int32_t src_stride, \
const uint8_t *const refs[], \
int32_t ref_stride, uint32_t *sads) { \
const uint8_t *const refs[4], \
int32_t ref_stride, uint32_t sads[4]) { \
sad_4width_x4d_msa(src, src_stride, refs, ref_stride, height, sads); \
}
#define VPX_SAD_8xHEIGHTx4D_MSA(height) \
void vpx_sad8x##height##x4d_msa(const uint8_t *src, int32_t src_stride, \
const uint8_t *const refs[], \
int32_t ref_stride, uint32_t *sads) { \
const uint8_t *const refs[4], \
int32_t ref_stride, uint32_t sads[4]) { \
sad_8width_x4d_msa(src, src_stride, refs, ref_stride, height, sads); \
}
#define VPX_SAD_16xHEIGHTx4D_MSA(height) \
void vpx_sad16x##height##x4d_msa(const uint8_t *src, int32_t src_stride, \
const uint8_t *const refs[], \
int32_t ref_stride, uint32_t *sads) { \
const uint8_t *const refs[4], \
int32_t ref_stride, uint32_t sads[4]) { \
sad_16width_x4d_msa(src, src_stride, refs, ref_stride, height, sads); \
}
#define VPX_SAD_32xHEIGHTx4D_MSA(height) \
void vpx_sad32x##height##x4d_msa(const uint8_t *src, int32_t src_stride, \
const uint8_t *const refs[], \
int32_t ref_stride, uint32_t *sads) { \
const uint8_t *const refs[4], \
int32_t ref_stride, uint32_t sads[4]) { \
sad_32width_x4d_msa(src, src_stride, refs, ref_stride, height, sads); \
}
#define VPX_SAD_64xHEIGHTx4D_MSA(height) \
void vpx_sad64x##height##x4d_msa(const uint8_t *src, int32_t src_stride, \
const uint8_t *const refs[], \
int32_t ref_stride, uint32_t *sads) { \
const uint8_t *const refs[4], \
int32_t ref_stride, uint32_t sads[4]) { \
sad_64width_x4d_msa(src, src_stride, refs, ref_stride, height, sads); \
}
+32 -16
View File
@@ -45,23 +45,39 @@ static INLINE unsigned int sad(const uint8_t *src_ptr, int src_stride,
return sad(src_ptr, src_stride, comp_pred, m, m, n); \
}
// depending on call sites, pass **ref_array to avoid & in subsequent call and
// de-dup with 4D below.
// Compare |src_ptr| to |k| adjacent blocks starting at |ref_ptr|.
// |k| == {3,8}. Used in vp8 for an exhaustive search.
// src: ref:
// 0 1 2 3 0 1 2 3 x x
// 4 5 6 7 6 7 8 9 x x
// 8 9 10 11 12 13 14 15 x x
// 12 13 14 15 18 19 20 21 x x
//
// x 1 2 3 4 x
// x 7 8 9 10 x
// x 13 14 15 16 x
// x 19 20 21 22 x
//
// x x 2 3 4 5
// x x 8 9 10 11
// x x 14 15 16 17
// x x 20 21 22 23
//
#define sadMxNxK(m, n, k) \
void vpx_sad##m##x##n##x##k##_c(const uint8_t *src_ptr, int src_stride, \
const uint8_t *ref_ptr, int ref_stride, \
uint32_t *sad_array) { \
uint32_t sad_array[k]) { \
int i; \
for (i = 0; i < k; ++i) \
sad_array[i] = \
vpx_sad##m##x##n##_c(src_ptr, src_stride, &ref_ptr[i], ref_stride); \
vpx_sad##m##x##n##_c(src_ptr, src_stride, ref_ptr + i, ref_stride); \
}
// This appears to be equivalent to the above when k == 4 and refs is const
// Compare |src_ptr| to 4 distinct references in |ref_array[]|
#define sadMxNx4D(m, n) \
void vpx_sad##m##x##n##x4d_c(const uint8_t *src_ptr, int src_stride, \
const uint8_t *const ref_array[], \
int ref_stride, uint32_t *sad_array) { \
const uint8_t *const ref_array[4], \
int ref_stride, uint32_t sad_array[4]) { \
int i; \
for (i = 0; i < 4; ++i) \
sad_array[i] = \
@@ -181,15 +197,15 @@ static INLINE unsigned int highbd_sadb(const uint8_t *src8_ptr, int src_stride,
return highbd_sadb(src_ptr, src_stride, comp_pred, m, m, n); \
}
#define highbd_sadMxNx4D(m, n) \
void vpx_highbd_sad##m##x##n##x4d_c(const uint8_t *src_ptr, int src_stride, \
const uint8_t *const ref_array[], \
int ref_stride, uint32_t *sad_array) { \
int i; \
for (i = 0; i < 4; ++i) { \
sad_array[i] = vpx_highbd_sad##m##x##n##_c(src_ptr, src_stride, \
ref_array[i], ref_stride); \
} \
#define highbd_sadMxNx4D(m, n) \
void vpx_highbd_sad##m##x##n##x4d_c(const uint8_t *src_ptr, int src_stride, \
const uint8_t *const ref_array[4], \
int ref_stride, uint32_t sad_array[4]) { \
int i; \
for (i = 0; i < 4; ++i) { \
sad_array[i] = vpx_highbd_sad##m##x##n##_c(src_ptr, src_stride, \
ref_array[i], ref_stride); \
} \
}
/* clang-format off */
+37 -37
View File
@@ -877,80 +877,80 @@ specialize qw/vpx_sad4x4_avg neon msa sse2 mmi/;
# Multi-block SAD, comparing a reference to N blocks 1 pixel apart horizontally
#
# Blocks of 3
add_proto qw/void vpx_sad16x16x3/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
add_proto qw/void vpx_sad16x16x3/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t sad_array[3]";
specialize qw/vpx_sad16x16x3 sse3 ssse3 msa mmi/;
add_proto qw/void vpx_sad16x8x3/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
add_proto qw/void vpx_sad16x8x3/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t sad_array[3]";
specialize qw/vpx_sad16x8x3 sse3 ssse3 msa mmi/;
add_proto qw/void vpx_sad8x16x3/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
add_proto qw/void vpx_sad8x16x3/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t sad_array[3]";
specialize qw/vpx_sad8x16x3 sse3 msa mmi/;
add_proto qw/void vpx_sad8x8x3/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
add_proto qw/void vpx_sad8x8x3/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t sad_array[3]";
specialize qw/vpx_sad8x8x3 sse3 msa mmi/;
add_proto qw/void vpx_sad4x4x3/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
add_proto qw/void vpx_sad4x4x3/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t sad_array[3]";
specialize qw/vpx_sad4x4x3 sse3 msa mmi/;
# Blocks of 8
add_proto qw/void vpx_sad32x32x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
add_proto qw/void vpx_sad32x32x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t sad_array[8]";
specialize qw/vpx_sad32x32x8 avx2/;
add_proto qw/void vpx_sad16x16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
add_proto qw/void vpx_sad16x16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t sad_array[8]";
specialize qw/vpx_sad16x16x8 sse4_1 msa mmi/;
add_proto qw/void vpx_sad16x8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
add_proto qw/void vpx_sad16x8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t sad_array[8]";
specialize qw/vpx_sad16x8x8 sse4_1 msa mmi/;
add_proto qw/void vpx_sad8x16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
add_proto qw/void vpx_sad8x16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t sad_array[8]";
specialize qw/vpx_sad8x16x8 sse4_1 msa mmi/;
add_proto qw/void vpx_sad8x8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
add_proto qw/void vpx_sad8x8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t sad_array[8]";
specialize qw/vpx_sad8x8x8 sse4_1 msa mmi/;
add_proto qw/void vpx_sad4x4x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
add_proto qw/void vpx_sad4x4x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t sad_array[8]";
specialize qw/vpx_sad4x4x8 sse4_1 msa mmi/;
#
# Multi-block SAD, comparing a reference to N independent blocks
#
add_proto qw/void vpx_sad64x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[], int ref_stride, uint32_t *sad_array";
add_proto qw/void vpx_sad64x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]";
specialize qw/vpx_sad64x64x4d avx512 avx2 neon msa sse2 vsx mmi/;
add_proto qw/void vpx_sad64x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[], int ref_stride, uint32_t *sad_array";
add_proto qw/void vpx_sad64x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]";
specialize qw/vpx_sad64x32x4d neon msa sse2 vsx mmi/;
add_proto qw/void vpx_sad32x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[], int ref_stride, uint32_t *sad_array";
add_proto qw/void vpx_sad32x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]";
specialize qw/vpx_sad32x64x4d neon msa sse2 vsx mmi/;
add_proto qw/void vpx_sad32x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[], int ref_stride, uint32_t *sad_array";
add_proto qw/void vpx_sad32x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]";
specialize qw/vpx_sad32x32x4d avx2 neon msa sse2 vsx mmi/;
add_proto qw/void vpx_sad32x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[], int ref_stride, uint32_t *sad_array";
add_proto qw/void vpx_sad32x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]";
specialize qw/vpx_sad32x16x4d neon msa sse2 vsx mmi/;
add_proto qw/void vpx_sad16x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[], int ref_stride, uint32_t *sad_array";
add_proto qw/void vpx_sad16x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]";
specialize qw/vpx_sad16x32x4d neon msa sse2 vsx mmi/;
add_proto qw/void vpx_sad16x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[], int ref_stride, uint32_t *sad_array";
add_proto qw/void vpx_sad16x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]";
specialize qw/vpx_sad16x16x4d neon msa sse2 vsx mmi/;
add_proto qw/void vpx_sad16x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[], int ref_stride, uint32_t *sad_array";
add_proto qw/void vpx_sad16x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]";
specialize qw/vpx_sad16x8x4d neon msa sse2 vsx mmi/;
add_proto qw/void vpx_sad8x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[], int ref_stride, uint32_t *sad_array";
add_proto qw/void vpx_sad8x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]";
specialize qw/vpx_sad8x16x4d neon msa sse2 mmi/;
add_proto qw/void vpx_sad8x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[], int ref_stride, uint32_t *sad_array";
add_proto qw/void vpx_sad8x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]";
specialize qw/vpx_sad8x8x4d neon msa sse2 mmi/;
add_proto qw/void vpx_sad8x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[], int ref_stride, uint32_t *sad_array";
add_proto qw/void vpx_sad8x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]";
specialize qw/vpx_sad8x4x4d neon msa sse2 mmi/;
add_proto qw/void vpx_sad4x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[], int ref_stride, uint32_t *sad_array";
add_proto qw/void vpx_sad4x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]";
specialize qw/vpx_sad4x8x4d neon msa sse2 mmi/;
add_proto qw/void vpx_sad4x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[], int ref_stride, uint32_t *sad_array";
add_proto qw/void vpx_sad4x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]";
specialize qw/vpx_sad4x4x4d neon msa sse2 mmi/;
add_proto qw/uint64_t vpx_sum_squares_2d_i16/, "const int16_t *src, int stride, int size";
@@ -1064,43 +1064,43 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
#
# Multi-block SAD, comparing a reference to N independent blocks
#
add_proto qw/void vpx_highbd_sad64x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[], int ref_stride, uint32_t *sad_array";
add_proto qw/void vpx_highbd_sad64x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[4], int ref_stride, uint32_t sad_array[4]";
specialize qw/vpx_highbd_sad64x64x4d sse2/;
add_proto qw/void vpx_highbd_sad64x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[], int ref_stride, uint32_t *sad_array";
add_proto qw/void vpx_highbd_sad64x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[4], int ref_stride, uint32_t sad_array[4]";
specialize qw/vpx_highbd_sad64x32x4d sse2/;
add_proto qw/void vpx_highbd_sad32x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[], int ref_stride, uint32_t *sad_array";
add_proto qw/void vpx_highbd_sad32x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[4], int ref_stride, uint32_t sad_array[4]";
specialize qw/vpx_highbd_sad32x64x4d sse2/;
add_proto qw/void vpx_highbd_sad32x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[], int ref_stride, uint32_t *sad_array";
add_proto qw/void vpx_highbd_sad32x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[4], int ref_stride, uint32_t sad_array[4]";
specialize qw/vpx_highbd_sad32x32x4d sse2/;
add_proto qw/void vpx_highbd_sad32x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[], int ref_stride, uint32_t *sad_array";
add_proto qw/void vpx_highbd_sad32x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[4], int ref_stride, uint32_t sad_array[4]";
specialize qw/vpx_highbd_sad32x16x4d sse2/;
add_proto qw/void vpx_highbd_sad16x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[], int ref_stride, uint32_t *sad_array";
add_proto qw/void vpx_highbd_sad16x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[4], int ref_stride, uint32_t sad_array[4]";
specialize qw/vpx_highbd_sad16x32x4d sse2/;
add_proto qw/void vpx_highbd_sad16x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[], int ref_stride, uint32_t *sad_array";
add_proto qw/void vpx_highbd_sad16x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[4], int ref_stride, uint32_t sad_array[4]";
specialize qw/vpx_highbd_sad16x16x4d sse2/;
add_proto qw/void vpx_highbd_sad16x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[], int ref_stride, uint32_t *sad_array";
add_proto qw/void vpx_highbd_sad16x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[4], int ref_stride, uint32_t sad_array[4]";
specialize qw/vpx_highbd_sad16x8x4d sse2/;
add_proto qw/void vpx_highbd_sad8x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[], int ref_stride, uint32_t *sad_array";
add_proto qw/void vpx_highbd_sad8x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[4], int ref_stride, uint32_t sad_array[4]";
specialize qw/vpx_highbd_sad8x16x4d sse2/;
add_proto qw/void vpx_highbd_sad8x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[], int ref_stride, uint32_t *sad_array";
add_proto qw/void vpx_highbd_sad8x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[4], int ref_stride, uint32_t sad_array[4]";
specialize qw/vpx_highbd_sad8x8x4d sse2/;
add_proto qw/void vpx_highbd_sad8x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[], int ref_stride, uint32_t *sad_array";
add_proto qw/void vpx_highbd_sad8x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[4], int ref_stride, uint32_t sad_array[4]";
specialize qw/vpx_highbd_sad8x4x4d sse2/;
add_proto qw/void vpx_highbd_sad4x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[], int ref_stride, uint32_t *sad_array";
add_proto qw/void vpx_highbd_sad4x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[4], int ref_stride, uint32_t sad_array[4]";
specialize qw/vpx_highbd_sad4x8x4d sse2/;
add_proto qw/void vpx_highbd_sad4x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[], int ref_stride, uint32_t *sad_array";
add_proto qw/void vpx_highbd_sad4x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[4], int ref_stride, uint32_t sad_array[4]";
specialize qw/vpx_highbd_sad4x4x4d sse2/;
#
+6 -7
View File
@@ -11,8 +11,7 @@
#include "./vpx_dsp_rtcd.h"
#include "vpx/vpx_integer.h"
static INLINE void calc_final_4(const __m256i *const sums /*[4]*/,
uint32_t *sad_array) {
static INLINE void calc_final_4(const __m256i sums[4], uint32_t sad_array[4]) {
const __m256i t0 = _mm256_hadd_epi32(sums[0], sums[1]);
const __m256i t1 = _mm256_hadd_epi32(sums[2], sums[3]);
const __m256i t2 = _mm256_hadd_epi32(t0, t1);
@@ -22,8 +21,8 @@ static INLINE void calc_final_4(const __m256i *const sums /*[4]*/,
}
void vpx_sad32x32x4d_avx2(const uint8_t *src_ptr, int src_stride,
const uint8_t *const ref_array[/*4*/], int ref_stride,
uint32_t *sad_array /*[4]*/) {
const uint8_t *const ref_array[4], int ref_stride,
uint32_t sad_array[4]) {
int i;
const uint8_t *refs[4];
__m256i sums[4];
@@ -71,7 +70,7 @@ void vpx_sad32x32x4d_avx2(const uint8_t *src_ptr, int src_stride,
void vpx_sad32x32x8_avx2(const uint8_t *src_ptr, int src_stride,
const uint8_t *ref_ptr, int ref_stride,
uint32_t *sad_array) {
uint32_t sad_array[8]) {
int i;
__m256i sums[8];
@@ -127,8 +126,8 @@ void vpx_sad32x32x8_avx2(const uint8_t *src_ptr, int src_stride,
}
void vpx_sad64x64x4d_avx2(const uint8_t *src_ptr, int src_stride,
const uint8_t *const ref_array[/*4*/], int ref_stride,
uint32_t *sad_array /*[4]*/) {
const uint8_t *const ref_array[4], int ref_stride,
uint32_t sad_array[4]) {
__m256i sums[4];
int i;
const uint8_t *refs[4];
+3 -3
View File
@@ -12,8 +12,8 @@
#include "vpx/vpx_integer.h"
void vpx_sad64x64x4d_avx512(const uint8_t *src_ptr, int src_stride,
const uint8_t *const ref_array[/*4*/],
int ref_stride, uint32_t *res /*[4]*/) {
const uint8_t *const ref_array[4], int ref_stride,
uint32_t sad_array[4]) {
__m512i src_reg, ref0_reg, ref1_reg, ref2_reg, ref3_reg;
__m512i sum_ref0, sum_ref1, sum_ref2, sum_ref3;
__m512i sum_mlow, sum_mhigh;
@@ -78,6 +78,6 @@ void vpx_sad64x64x4d_avx512(const uint8_t *src_ptr, int src_stride,
sum128 = _mm_add_epi32(_mm256_castsi256_si128(sum256),
_mm256_extractf128_si256(sum256, 1));
_mm_storeu_si128((__m128i *)(res), sum128);
_mm_storeu_si128((__m128i *)(sad_array), sum128);
}
}