gcc 11 warning: mismatched bound
Clean up a new build warning with gcc11: argument 3 of type ‘const uint8_t * const[]’ with mismatched bound [-Warray-parameter=] Standardize sad functions with array sizes. Change-Id: Iea4144e61368f6a8279e2f3ae96c78aff06c8b41
This commit is contained in:
+61
-61
@@ -31,7 +31,7 @@ static INLINE uint8x8_t load_unaligned_2_buffers(const void *const buf0,
|
||||
static INLINE void sad4x_4d(const uint8_t *const src_ptr, const int src_stride,
|
||||
const uint8_t *const ref_array[4],
|
||||
const int ref_stride, const int height,
|
||||
uint32_t *const res) {
|
||||
uint32_t sad_array[4]) {
|
||||
int i;
|
||||
uint16x8_t abs[2] = { vdupq_n_u16(0), vdupq_n_u16(0) };
|
||||
#if !defined(__aarch64__)
|
||||
@@ -61,26 +61,26 @@ static INLINE void sad4x_4d(const uint8_t *const src_ptr, const int src_stride,
|
||||
a[1] = vpadd_u16(vget_low_u16(abs[1]), vget_high_u16(abs[1]));
|
||||
r = vpaddlq_u16(vcombine_u16(a[0], a[1]));
|
||||
#endif
|
||||
vst1q_u32(res, r);
|
||||
vst1q_u32(sad_array, r);
|
||||
}
|
||||
|
||||
void vpx_sad4x4x4d_neon(const uint8_t *src_ptr, int src_stride,
|
||||
const uint8_t *const ref_array[4], int ref_stride,
|
||||
uint32_t *res) {
|
||||
sad4x_4d(src_ptr, src_stride, ref_array, ref_stride, 4, res);
|
||||
uint32_t sad_array[4]) {
|
||||
sad4x_4d(src_ptr, src_stride, ref_array, ref_stride, 4, sad_array);
|
||||
}
|
||||
|
||||
void vpx_sad4x8x4d_neon(const uint8_t *src_ptr, int src_stride,
|
||||
const uint8_t *const ref_array[4], int ref_stride,
|
||||
uint32_t *res) {
|
||||
sad4x_4d(src_ptr, src_stride, ref_array, ref_stride, 8, res);
|
||||
uint32_t sad_array[4]) {
|
||||
sad4x_4d(src_ptr, src_stride, ref_array, ref_stride, 8, sad_array);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// Can handle 512 pixels' sad sum (such as 16x32 or 32x16)
|
||||
static INLINE void sad_512_pel_final_neon(const uint16x8_t *sum /*[4]*/,
|
||||
uint32_t *const res) {
|
||||
static INLINE void sad_512_pel_final_neon(const uint16x8_t sum[4],
|
||||
uint32_t sad_array[4]) {
|
||||
#if defined(__aarch64__)
|
||||
const uint16x8_t a0 = vpaddq_u16(sum[0], sum[1]);
|
||||
const uint16x8_t a1 = vpaddq_u16(sum[2], sum[3]);
|
||||
@@ -95,21 +95,21 @@ static INLINE void sad_512_pel_final_neon(const uint16x8_t *sum /*[4]*/,
|
||||
const uint16x4_t b1 = vpadd_u16(a2, a3);
|
||||
const uint32x4_t r = vpaddlq_u16(vcombine_u16(b0, b1));
|
||||
#endif
|
||||
vst1q_u32(res, r);
|
||||
vst1q_u32(sad_array, r);
|
||||
}
|
||||
|
||||
#if defined(__arm__) || !defined(__ARM_FEATURE_DOTPROD)
|
||||
|
||||
// Can handle 1024 pixels' sad sum (such as 32x32)
|
||||
static INLINE void sad_1024_pel_final_neon(const uint16x8_t *sum /*[4]*/,
|
||||
uint32_t *const res) {
|
||||
static INLINE void sad_1024_pel_final_neon(const uint16x8_t sum[4],
|
||||
uint32_t sad_array[4]) {
|
||||
#if defined(__aarch64__)
|
||||
const uint16x8_t a0 = vpaddq_u16(sum[0], sum[1]);
|
||||
const uint16x8_t a1 = vpaddq_u16(sum[2], sum[3]);
|
||||
const uint32x4_t b0 = vpaddlq_u16(a0);
|
||||
const uint32x4_t b1 = vpaddlq_u16(a1);
|
||||
const uint32x4_t r = vpaddq_u32(b0, b1);
|
||||
vst1q_u32(res, r);
|
||||
vst1q_u32(sad_array, r);
|
||||
#else
|
||||
const uint16x4_t a0 = vpadd_u16(vget_low_u16(sum[0]), vget_high_u16(sum[0]));
|
||||
const uint16x4_t a1 = vpadd_u16(vget_low_u16(sum[1]), vget_high_u16(sum[1]));
|
||||
@@ -119,13 +119,13 @@ static INLINE void sad_1024_pel_final_neon(const uint16x8_t *sum /*[4]*/,
|
||||
const uint32x4_t b1 = vpaddlq_u16(vcombine_u16(a2, a3));
|
||||
const uint32x2_t c0 = vpadd_u32(vget_low_u32(b0), vget_high_u32(b0));
|
||||
const uint32x2_t c1 = vpadd_u32(vget_low_u32(b1), vget_high_u32(b1));
|
||||
vst1q_u32(res, vcombine_u32(c0, c1));
|
||||
vst1q_u32(sad_array, vcombine_u32(c0, c1));
|
||||
#endif
|
||||
}
|
||||
|
||||
// Can handle 2048 pixels' sad sum (such as 32x64 or 64x32)
|
||||
static INLINE void sad_2048_pel_final_neon(const uint16x8_t *sum /*[4]*/,
|
||||
uint32_t *const res) {
|
||||
static INLINE void sad_2048_pel_final_neon(const uint16x8_t sum[4],
|
||||
uint32_t sad_array[4]) {
|
||||
#if defined(__aarch64__)
|
||||
const uint32x4_t a0 = vpaddlq_u16(sum[0]);
|
||||
const uint32x4_t a1 = vpaddlq_u16(sum[1]);
|
||||
@@ -134,7 +134,7 @@ static INLINE void sad_2048_pel_final_neon(const uint16x8_t *sum /*[4]*/,
|
||||
const uint32x4_t b0 = vpaddq_u32(a0, a1);
|
||||
const uint32x4_t b1 = vpaddq_u32(a2, a3);
|
||||
const uint32x4_t r = vpaddq_u32(b0, b1);
|
||||
vst1q_u32(res, r);
|
||||
vst1q_u32(sad_array, r);
|
||||
#else
|
||||
const uint32x4_t a0 = vpaddlq_u16(sum[0]);
|
||||
const uint32x4_t a1 = vpaddlq_u16(sum[1]);
|
||||
@@ -146,13 +146,13 @@ static INLINE void sad_2048_pel_final_neon(const uint16x8_t *sum /*[4]*/,
|
||||
const uint32x2_t b3 = vadd_u32(vget_low_u32(a3), vget_high_u32(a3));
|
||||
const uint32x2_t c0 = vpadd_u32(b0, b1);
|
||||
const uint32x2_t c1 = vpadd_u32(b2, b3);
|
||||
vst1q_u32(res, vcombine_u32(c0, c1));
|
||||
vst1q_u32(sad_array, vcombine_u32(c0, c1));
|
||||
#endif
|
||||
}
|
||||
|
||||
// Can handle 4096 pixels' sad sum (such as 64x64)
|
||||
static INLINE void sad_4096_pel_final_neon(const uint16x8_t *sum /*[8]*/,
|
||||
uint32_t *const res) {
|
||||
static INLINE void sad_4096_pel_final_neon(const uint16x8_t sum[8],
|
||||
uint32_t sad_array[4]) {
|
||||
#if defined(__aarch64__)
|
||||
const uint32x4_t a0 = vpaddlq_u16(sum[0]);
|
||||
const uint32x4_t a1 = vpaddlq_u16(sum[1]);
|
||||
@@ -169,7 +169,7 @@ static INLINE void sad_4096_pel_final_neon(const uint16x8_t *sum /*[8]*/,
|
||||
const uint32x4_t c0 = vpaddq_u32(b0, b1);
|
||||
const uint32x4_t c1 = vpaddq_u32(b2, b3);
|
||||
const uint32x4_t r = vpaddq_u32(c0, c1);
|
||||
vst1q_u32(res, r);
|
||||
vst1q_u32(sad_array, r);
|
||||
#else
|
||||
const uint32x4_t a0 = vpaddlq_u16(sum[0]);
|
||||
const uint32x4_t a1 = vpaddlq_u16(sum[1]);
|
||||
@@ -189,7 +189,7 @@ static INLINE void sad_4096_pel_final_neon(const uint16x8_t *sum /*[8]*/,
|
||||
const uint32x2_t c3 = vadd_u32(vget_low_u32(b3), vget_high_u32(b3));
|
||||
const uint32x2_t d0 = vpadd_u32(c0, c1);
|
||||
const uint32x2_t d1 = vpadd_u32(c2, c3);
|
||||
vst1q_u32(res, vcombine_u32(d0, d1));
|
||||
vst1q_u32(sad_array, vcombine_u32(d0, d1));
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -197,7 +197,7 @@ static INLINE void sad_4096_pel_final_neon(const uint16x8_t *sum /*[8]*/,
|
||||
|
||||
static INLINE void sad8x_4d(const uint8_t *src_ptr, int src_stride,
|
||||
const uint8_t *const ref_array[4], int ref_stride,
|
||||
uint32_t *res, const int height) {
|
||||
uint32_t sad_array[4], const int height) {
|
||||
int i, j;
|
||||
const uint8_t *ref_loop[4] = { ref_array[0], ref_array[1], ref_array[2],
|
||||
ref_array[3] };
|
||||
@@ -214,25 +214,25 @@ static INLINE void sad8x_4d(const uint8_t *src_ptr, int src_stride,
|
||||
}
|
||||
}
|
||||
|
||||
sad_512_pel_final_neon(sum, res);
|
||||
sad_512_pel_final_neon(sum, sad_array);
|
||||
}
|
||||
|
||||
void vpx_sad8x4x4d_neon(const uint8_t *src_ptr, int src_stride,
|
||||
const uint8_t *const ref_array[4], int ref_stride,
|
||||
uint32_t *res) {
|
||||
sad8x_4d(src_ptr, src_stride, ref_array, ref_stride, res, 4);
|
||||
uint32_t sad_array[4]) {
|
||||
sad8x_4d(src_ptr, src_stride, ref_array, ref_stride, sad_array, 4);
|
||||
}
|
||||
|
||||
void vpx_sad8x8x4d_neon(const uint8_t *src_ptr, int src_stride,
|
||||
const uint8_t *const ref_array[4], int ref_stride,
|
||||
uint32_t *res) {
|
||||
sad8x_4d(src_ptr, src_stride, ref_array, ref_stride, res, 8);
|
||||
uint32_t sad_array[4]) {
|
||||
sad8x_4d(src_ptr, src_stride, ref_array, ref_stride, sad_array, 8);
|
||||
}
|
||||
|
||||
void vpx_sad8x16x4d_neon(const uint8_t *src_ptr, int src_stride,
|
||||
const uint8_t *const ref_array[4], int ref_stride,
|
||||
uint32_t *res) {
|
||||
sad8x_4d(src_ptr, src_stride, ref_array, ref_stride, res, 16);
|
||||
uint32_t sad_array[4]) {
|
||||
sad8x_4d(src_ptr, src_stride, ref_array, ref_stride, sad_array, 16);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
@@ -249,7 +249,7 @@ static INLINE void sad16_neon(const uint8_t *ref_ptr, const uint8x16_t src_ptr,
|
||||
|
||||
static INLINE void sad16x_4d(const uint8_t *src_ptr, int src_stride,
|
||||
const uint8_t *const ref_array[4], int ref_stride,
|
||||
uint32_t *res, const int height) {
|
||||
uint32_t sad_array[4], const int height) {
|
||||
int i;
|
||||
uint32x4_t r0, r1;
|
||||
const uint8_t *ref_loop[4] = { ref_array[0], ref_array[1], ref_array[2],
|
||||
@@ -267,7 +267,7 @@ static INLINE void sad16x_4d(const uint8_t *src_ptr, int src_stride,
|
||||
|
||||
r0 = vpaddq_u32(sum[0], sum[1]);
|
||||
r1 = vpaddq_u32(sum[2], sum[3]);
|
||||
vst1q_u32(res, vpaddq_u32(r0, r1));
|
||||
vst1q_u32(sad_array, vpaddq_u32(r0, r1));
|
||||
}
|
||||
|
||||
#else
|
||||
@@ -281,7 +281,7 @@ static INLINE void sad16_neon(const uint8_t *ref_ptr, const uint8x16_t src_ptr,
|
||||
|
||||
static INLINE void sad16x_4d(const uint8_t *src_ptr, int src_stride,
|
||||
const uint8_t *const ref_array[4], int ref_stride,
|
||||
uint32_t *res, const int height) {
|
||||
uint32_t sad_array[4], const int height) {
|
||||
int i;
|
||||
const uint8_t *ref_loop[4] = { ref_array[0], ref_array[1], ref_array[2],
|
||||
ref_array[3] };
|
||||
@@ -302,27 +302,27 @@ static INLINE void sad16x_4d(const uint8_t *src_ptr, int src_stride,
|
||||
ref_loop[3] += ref_stride;
|
||||
}
|
||||
|
||||
sad_512_pel_final_neon(sum, res);
|
||||
sad_512_pel_final_neon(sum, sad_array);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
void vpx_sad16x8x4d_neon(const uint8_t *src_ptr, int src_stride,
|
||||
const uint8_t *const ref_array[4], int ref_stride,
|
||||
uint32_t *res) {
|
||||
sad16x_4d(src_ptr, src_stride, ref_array, ref_stride, res, 8);
|
||||
uint32_t sad_array[4]) {
|
||||
sad16x_4d(src_ptr, src_stride, ref_array, ref_stride, sad_array, 8);
|
||||
}
|
||||
|
||||
void vpx_sad16x16x4d_neon(const uint8_t *src_ptr, int src_stride,
|
||||
const uint8_t *const ref_array[4], int ref_stride,
|
||||
uint32_t *res) {
|
||||
sad16x_4d(src_ptr, src_stride, ref_array, ref_stride, res, 16);
|
||||
uint32_t sad_array[4]) {
|
||||
sad16x_4d(src_ptr, src_stride, ref_array, ref_stride, sad_array, 16);
|
||||
}
|
||||
|
||||
void vpx_sad16x32x4d_neon(const uint8_t *src_ptr, int src_stride,
|
||||
const uint8_t *const ref_array[4], int ref_stride,
|
||||
uint32_t *res) {
|
||||
sad16x_4d(src_ptr, src_stride, ref_array, ref_stride, res, 32);
|
||||
uint32_t sad_array[4]) {
|
||||
sad16x_4d(src_ptr, src_stride, ref_array, ref_stride, sad_array, 32);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
@@ -332,7 +332,7 @@ void vpx_sad16x32x4d_neon(const uint8_t *src_ptr, int src_stride,
|
||||
|
||||
static INLINE void sad32x_4d(const uint8_t *src_ptr, int src_stride,
|
||||
const uint8_t *const ref_array[4], int ref_stride,
|
||||
uint32_t *res, const int height) {
|
||||
uint32_t sad_array[4], const int height) {
|
||||
int i;
|
||||
uint32x4_t r0, r1;
|
||||
const uint8_t *ref_loop[4] = { ref_array[0], ref_array[1], ref_array[2],
|
||||
@@ -365,25 +365,25 @@ static INLINE void sad32x_4d(const uint8_t *src_ptr, int src_stride,
|
||||
|
||||
r0 = vpaddq_u32(sum[0], sum[1]);
|
||||
r1 = vpaddq_u32(sum[2], sum[3]);
|
||||
vst1q_u32(res, vpaddq_u32(r0, r1));
|
||||
vst1q_u32(sad_array, vpaddq_u32(r0, r1));
|
||||
}
|
||||
|
||||
void vpx_sad32x16x4d_neon(const uint8_t *src_ptr, int src_stride,
|
||||
const uint8_t *const ref_array[4], int ref_stride,
|
||||
uint32_t *res) {
|
||||
sad32x_4d(src_ptr, src_stride, ref_array, ref_stride, res, 16);
|
||||
uint32_t sad_array[4]) {
|
||||
sad32x_4d(src_ptr, src_stride, ref_array, ref_stride, sad_array, 16);
|
||||
}
|
||||
|
||||
void vpx_sad32x32x4d_neon(const uint8_t *src_ptr, int src_stride,
|
||||
const uint8_t *const ref_array[4], int ref_stride,
|
||||
uint32_t *res) {
|
||||
sad32x_4d(src_ptr, src_stride, ref_array, ref_stride, res, 32);
|
||||
uint32_t sad_array[4]) {
|
||||
sad32x_4d(src_ptr, src_stride, ref_array, ref_stride, sad_array, 32);
|
||||
}
|
||||
|
||||
void vpx_sad32x64x4d_neon(const uint8_t *src_ptr, int src_stride,
|
||||
const uint8_t *const ref_array[4], int ref_stride,
|
||||
uint32_t *res) {
|
||||
sad32x_4d(src_ptr, src_stride, ref_array, ref_stride, res, 64);
|
||||
uint32_t sad_array[4]) {
|
||||
sad32x_4d(src_ptr, src_stride, ref_array, ref_stride, sad_array, 64);
|
||||
}
|
||||
|
||||
#else
|
||||
@@ -422,26 +422,26 @@ static INLINE void sad32x_4d(const uint8_t *src_ptr, int src_stride,
|
||||
|
||||
void vpx_sad32x16x4d_neon(const uint8_t *src_ptr, int src_stride,
|
||||
const uint8_t *const ref_array[4], int ref_stride,
|
||||
uint32_t *res) {
|
||||
uint32_t sad_array[4]) {
|
||||
uint16x8_t sum[4];
|
||||
sad32x_4d(src_ptr, src_stride, ref_array, ref_stride, 16, sum);
|
||||
sad_512_pel_final_neon(sum, res);
|
||||
sad_512_pel_final_neon(sum, sad_array);
|
||||
}
|
||||
|
||||
void vpx_sad32x32x4d_neon(const uint8_t *src_ptr, int src_stride,
|
||||
const uint8_t *const ref_array[4], int ref_stride,
|
||||
uint32_t *res) {
|
||||
uint32_t sad_array[4]) {
|
||||
uint16x8_t sum[4];
|
||||
sad32x_4d(src_ptr, src_stride, ref_array, ref_stride, 32, sum);
|
||||
sad_1024_pel_final_neon(sum, res);
|
||||
sad_1024_pel_final_neon(sum, sad_array);
|
||||
}
|
||||
|
||||
void vpx_sad32x64x4d_neon(const uint8_t *src_ptr, int src_stride,
|
||||
const uint8_t *const ref_array[4], int ref_stride,
|
||||
uint32_t *res) {
|
||||
uint32_t sad_array[4]) {
|
||||
uint16x8_t sum[4];
|
||||
sad32x_4d(src_ptr, src_stride, ref_array, ref_stride, 64, sum);
|
||||
sad_2048_pel_final_neon(sum, res);
|
||||
sad_2048_pel_final_neon(sum, sad_array);
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -453,7 +453,7 @@ void vpx_sad32x64x4d_neon(const uint8_t *src_ptr, int src_stride,
|
||||
|
||||
void vpx_sad64x32x4d_neon(const uint8_t *src_ptr, int src_stride,
|
||||
const uint8_t *const ref_array[4], int ref_stride,
|
||||
uint32_t *res) {
|
||||
uint32_t sad_array[4]) {
|
||||
int i;
|
||||
uint32x4_t r0, r1;
|
||||
const uint8_t *ref_loop[4] = { ref_array[0], ref_array[1], ref_array[2],
|
||||
@@ -497,12 +497,12 @@ void vpx_sad64x32x4d_neon(const uint8_t *src_ptr, int src_stride,
|
||||
|
||||
r0 = vpaddq_u32(sum[0], sum[1]);
|
||||
r1 = vpaddq_u32(sum[2], sum[3]);
|
||||
vst1q_u32(res, vpaddq_u32(r0, r1));
|
||||
vst1q_u32(sad_array, vpaddq_u32(r0, r1));
|
||||
}
|
||||
|
||||
void vpx_sad64x64x4d_neon(const uint8_t *src_ptr, int src_stride,
|
||||
const uint8_t *const ref_array[4], int ref_stride,
|
||||
uint32_t *res) {
|
||||
uint32_t sad_array[4]) {
|
||||
int i;
|
||||
uint32x4_t r0, r1, r2, r3;
|
||||
const uint8_t *ref_loop[4] = { ref_array[0], ref_array[1], ref_array[2],
|
||||
@@ -551,14 +551,14 @@ void vpx_sad64x64x4d_neon(const uint8_t *src_ptr, int src_stride,
|
||||
r3 = vpaddq_u32(sum[6], sum[7]);
|
||||
r0 = vpaddq_u32(r0, r1);
|
||||
r1 = vpaddq_u32(r2, r3);
|
||||
vst1q_u32(res, vpaddq_u32(r0, r1));
|
||||
vst1q_u32(sad_array, vpaddq_u32(r0, r1));
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
void vpx_sad64x32x4d_neon(const uint8_t *src_ptr, int src_stride,
|
||||
const uint8_t *const ref_array[4], int ref_stride,
|
||||
uint32_t *res) {
|
||||
uint32_t sad_array[4]) {
|
||||
int i;
|
||||
const uint8_t *ref_loop[4] = { ref_array[0], ref_array[1], ref_array[2],
|
||||
ref_array[3] };
|
||||
@@ -599,12 +599,12 @@ void vpx_sad64x32x4d_neon(const uint8_t *src_ptr, int src_stride,
|
||||
ref_loop[3] += ref_stride;
|
||||
}
|
||||
|
||||
sad_2048_pel_final_neon(sum, res);
|
||||
sad_2048_pel_final_neon(sum, sad_array);
|
||||
}
|
||||
|
||||
void vpx_sad64x64x4d_neon(const uint8_t *src_ptr, int src_stride,
|
||||
const uint8_t *const ref_array[4], int ref_stride,
|
||||
uint32_t *res) {
|
||||
uint32_t sad_array[4]) {
|
||||
int i;
|
||||
const uint8_t *ref_loop[4] = { ref_array[0], ref_array[1], ref_array[2],
|
||||
ref_array[3] };
|
||||
@@ -646,7 +646,7 @@ void vpx_sad64x64x4d_neon(const uint8_t *src_ptr, int src_stride,
|
||||
ref_loop[3] += ref_stride;
|
||||
}
|
||||
|
||||
sad_4096_pel_final_neon(sum, res);
|
||||
sad_4096_pel_final_neon(sum, sad_array);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
+16
-16
@@ -1040,77 +1040,77 @@ static uint32_t avgsad_64width_msa(const uint8_t *src, int32_t src_stride,
|
||||
#define VPX_SAD_4xHEIGHTx3_MSA(height) \
|
||||
void vpx_sad4x##height##x3_msa(const uint8_t *src, int32_t src_stride, \
|
||||
const uint8_t *ref, int32_t ref_stride, \
|
||||
uint32_t *sads) { \
|
||||
uint32_t sads[3]) { \
|
||||
sad_4width_x3_msa(src, src_stride, ref, ref_stride, height, sads); \
|
||||
}
|
||||
|
||||
#define VPX_SAD_8xHEIGHTx3_MSA(height) \
|
||||
void vpx_sad8x##height##x3_msa(const uint8_t *src, int32_t src_stride, \
|
||||
const uint8_t *ref, int32_t ref_stride, \
|
||||
uint32_t *sads) { \
|
||||
uint32_t sads[3]) { \
|
||||
sad_8width_x3_msa(src, src_stride, ref, ref_stride, height, sads); \
|
||||
}
|
||||
|
||||
#define VPX_SAD_16xHEIGHTx3_MSA(height) \
|
||||
void vpx_sad16x##height##x3_msa(const uint8_t *src, int32_t src_stride, \
|
||||
const uint8_t *ref, int32_t ref_stride, \
|
||||
uint32_t *sads) { \
|
||||
uint32_t sads[3]) { \
|
||||
sad_16width_x3_msa(src, src_stride, ref, ref_stride, height, sads); \
|
||||
}
|
||||
|
||||
#define VPX_SAD_4xHEIGHTx8_MSA(height) \
|
||||
void vpx_sad4x##height##x8_msa(const uint8_t *src, int32_t src_stride, \
|
||||
const uint8_t *ref, int32_t ref_stride, \
|
||||
uint32_t *sads) { \
|
||||
uint32_t sads[8]) { \
|
||||
sad_4width_x8_msa(src, src_stride, ref, ref_stride, height, sads); \
|
||||
}
|
||||
|
||||
#define VPX_SAD_8xHEIGHTx8_MSA(height) \
|
||||
void vpx_sad8x##height##x8_msa(const uint8_t *src, int32_t src_stride, \
|
||||
const uint8_t *ref, int32_t ref_stride, \
|
||||
uint32_t *sads) { \
|
||||
uint32_t sads[8]) { \
|
||||
sad_8width_x8_msa(src, src_stride, ref, ref_stride, height, sads); \
|
||||
}
|
||||
|
||||
#define VPX_SAD_16xHEIGHTx8_MSA(height) \
|
||||
void vpx_sad16x##height##x8_msa(const uint8_t *src, int32_t src_stride, \
|
||||
const uint8_t *ref, int32_t ref_stride, \
|
||||
uint32_t *sads) { \
|
||||
uint32_t sads[8]) { \
|
||||
sad_16width_x8_msa(src, src_stride, ref, ref_stride, height, sads); \
|
||||
}
|
||||
|
||||
#define VPX_SAD_4xHEIGHTx4D_MSA(height) \
|
||||
void vpx_sad4x##height##x4d_msa(const uint8_t *src, int32_t src_stride, \
|
||||
const uint8_t *const refs[], \
|
||||
int32_t ref_stride, uint32_t *sads) { \
|
||||
const uint8_t *const refs[4], \
|
||||
int32_t ref_stride, uint32_t sads[4]) { \
|
||||
sad_4width_x4d_msa(src, src_stride, refs, ref_stride, height, sads); \
|
||||
}
|
||||
|
||||
#define VPX_SAD_8xHEIGHTx4D_MSA(height) \
|
||||
void vpx_sad8x##height##x4d_msa(const uint8_t *src, int32_t src_stride, \
|
||||
const uint8_t *const refs[], \
|
||||
int32_t ref_stride, uint32_t *sads) { \
|
||||
const uint8_t *const refs[4], \
|
||||
int32_t ref_stride, uint32_t sads[4]) { \
|
||||
sad_8width_x4d_msa(src, src_stride, refs, ref_stride, height, sads); \
|
||||
}
|
||||
|
||||
#define VPX_SAD_16xHEIGHTx4D_MSA(height) \
|
||||
void vpx_sad16x##height##x4d_msa(const uint8_t *src, int32_t src_stride, \
|
||||
const uint8_t *const refs[], \
|
||||
int32_t ref_stride, uint32_t *sads) { \
|
||||
const uint8_t *const refs[4], \
|
||||
int32_t ref_stride, uint32_t sads[4]) { \
|
||||
sad_16width_x4d_msa(src, src_stride, refs, ref_stride, height, sads); \
|
||||
}
|
||||
|
||||
#define VPX_SAD_32xHEIGHTx4D_MSA(height) \
|
||||
void vpx_sad32x##height##x4d_msa(const uint8_t *src, int32_t src_stride, \
|
||||
const uint8_t *const refs[], \
|
||||
int32_t ref_stride, uint32_t *sads) { \
|
||||
const uint8_t *const refs[4], \
|
||||
int32_t ref_stride, uint32_t sads[4]) { \
|
||||
sad_32width_x4d_msa(src, src_stride, refs, ref_stride, height, sads); \
|
||||
}
|
||||
|
||||
#define VPX_SAD_64xHEIGHTx4D_MSA(height) \
|
||||
void vpx_sad64x##height##x4d_msa(const uint8_t *src, int32_t src_stride, \
|
||||
const uint8_t *const refs[], \
|
||||
int32_t ref_stride, uint32_t *sads) { \
|
||||
const uint8_t *const refs[4], \
|
||||
int32_t ref_stride, uint32_t sads[4]) { \
|
||||
sad_64width_x4d_msa(src, src_stride, refs, ref_stride, height, sads); \
|
||||
}
|
||||
|
||||
|
||||
+32
-16
@@ -45,23 +45,39 @@ static INLINE unsigned int sad(const uint8_t *src_ptr, int src_stride,
|
||||
return sad(src_ptr, src_stride, comp_pred, m, m, n); \
|
||||
}
|
||||
|
||||
// depending on call sites, pass **ref_array to avoid & in subsequent call and
|
||||
// de-dup with 4D below.
|
||||
// Compare |src_ptr| to |k| adjacent blocks starting at |ref_ptr|.
|
||||
// |k| == {3,8}. Used in vp8 for an exhaustive search.
|
||||
// src: ref:
|
||||
// 0 1 2 3 0 1 2 3 x x
|
||||
// 4 5 6 7 6 7 8 9 x x
|
||||
// 8 9 10 11 12 13 14 15 x x
|
||||
// 12 13 14 15 18 19 20 21 x x
|
||||
//
|
||||
// x 1 2 3 4 x
|
||||
// x 7 8 9 10 x
|
||||
// x 13 14 15 16 x
|
||||
// x 19 20 21 22 x
|
||||
//
|
||||
// x x 2 3 4 5
|
||||
// x x 8 9 10 11
|
||||
// x x 14 15 16 17
|
||||
// x x 20 21 22 23
|
||||
//
|
||||
#define sadMxNxK(m, n, k) \
|
||||
void vpx_sad##m##x##n##x##k##_c(const uint8_t *src_ptr, int src_stride, \
|
||||
const uint8_t *ref_ptr, int ref_stride, \
|
||||
uint32_t *sad_array) { \
|
||||
uint32_t sad_array[k]) { \
|
||||
int i; \
|
||||
for (i = 0; i < k; ++i) \
|
||||
sad_array[i] = \
|
||||
vpx_sad##m##x##n##_c(src_ptr, src_stride, &ref_ptr[i], ref_stride); \
|
||||
vpx_sad##m##x##n##_c(src_ptr, src_stride, ref_ptr + i, ref_stride); \
|
||||
}
|
||||
|
||||
// This appears to be equivalent to the above when k == 4 and refs is const
|
||||
// Compare |src_ptr| to 4 distinct references in |ref_array[]|
|
||||
#define sadMxNx4D(m, n) \
|
||||
void vpx_sad##m##x##n##x4d_c(const uint8_t *src_ptr, int src_stride, \
|
||||
const uint8_t *const ref_array[], \
|
||||
int ref_stride, uint32_t *sad_array) { \
|
||||
const uint8_t *const ref_array[4], \
|
||||
int ref_stride, uint32_t sad_array[4]) { \
|
||||
int i; \
|
||||
for (i = 0; i < 4; ++i) \
|
||||
sad_array[i] = \
|
||||
@@ -181,15 +197,15 @@ static INLINE unsigned int highbd_sadb(const uint8_t *src8_ptr, int src_stride,
|
||||
return highbd_sadb(src_ptr, src_stride, comp_pred, m, m, n); \
|
||||
}
|
||||
|
||||
#define highbd_sadMxNx4D(m, n) \
|
||||
void vpx_highbd_sad##m##x##n##x4d_c(const uint8_t *src_ptr, int src_stride, \
|
||||
const uint8_t *const ref_array[], \
|
||||
int ref_stride, uint32_t *sad_array) { \
|
||||
int i; \
|
||||
for (i = 0; i < 4; ++i) { \
|
||||
sad_array[i] = vpx_highbd_sad##m##x##n##_c(src_ptr, src_stride, \
|
||||
ref_array[i], ref_stride); \
|
||||
} \
|
||||
#define highbd_sadMxNx4D(m, n) \
|
||||
void vpx_highbd_sad##m##x##n##x4d_c(const uint8_t *src_ptr, int src_stride, \
|
||||
const uint8_t *const ref_array[4], \
|
||||
int ref_stride, uint32_t sad_array[4]) { \
|
||||
int i; \
|
||||
for (i = 0; i < 4; ++i) { \
|
||||
sad_array[i] = vpx_highbd_sad##m##x##n##_c(src_ptr, src_stride, \
|
||||
ref_array[i], ref_stride); \
|
||||
} \
|
||||
}
|
||||
|
||||
/* clang-format off */
|
||||
|
||||
@@ -877,80 +877,80 @@ specialize qw/vpx_sad4x4_avg neon msa sse2 mmi/;
|
||||
# Multi-block SAD, comparing a reference to N blocks 1 pixel apart horizontally
|
||||
#
|
||||
# Blocks of 3
|
||||
add_proto qw/void vpx_sad16x16x3/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
|
||||
add_proto qw/void vpx_sad16x16x3/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t sad_array[3]";
|
||||
specialize qw/vpx_sad16x16x3 sse3 ssse3 msa mmi/;
|
||||
|
||||
add_proto qw/void vpx_sad16x8x3/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
|
||||
add_proto qw/void vpx_sad16x8x3/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t sad_array[3]";
|
||||
specialize qw/vpx_sad16x8x3 sse3 ssse3 msa mmi/;
|
||||
|
||||
add_proto qw/void vpx_sad8x16x3/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
|
||||
add_proto qw/void vpx_sad8x16x3/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t sad_array[3]";
|
||||
specialize qw/vpx_sad8x16x3 sse3 msa mmi/;
|
||||
|
||||
add_proto qw/void vpx_sad8x8x3/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
|
||||
add_proto qw/void vpx_sad8x8x3/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t sad_array[3]";
|
||||
specialize qw/vpx_sad8x8x3 sse3 msa mmi/;
|
||||
|
||||
add_proto qw/void vpx_sad4x4x3/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
|
||||
add_proto qw/void vpx_sad4x4x3/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t sad_array[3]";
|
||||
specialize qw/vpx_sad4x4x3 sse3 msa mmi/;
|
||||
|
||||
# Blocks of 8
|
||||
add_proto qw/void vpx_sad32x32x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
|
||||
add_proto qw/void vpx_sad32x32x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t sad_array[8]";
|
||||
specialize qw/vpx_sad32x32x8 avx2/;
|
||||
|
||||
add_proto qw/void vpx_sad16x16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
|
||||
add_proto qw/void vpx_sad16x16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t sad_array[8]";
|
||||
specialize qw/vpx_sad16x16x8 sse4_1 msa mmi/;
|
||||
|
||||
add_proto qw/void vpx_sad16x8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
|
||||
add_proto qw/void vpx_sad16x8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t sad_array[8]";
|
||||
specialize qw/vpx_sad16x8x8 sse4_1 msa mmi/;
|
||||
|
||||
add_proto qw/void vpx_sad8x16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
|
||||
add_proto qw/void vpx_sad8x16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t sad_array[8]";
|
||||
specialize qw/vpx_sad8x16x8 sse4_1 msa mmi/;
|
||||
|
||||
add_proto qw/void vpx_sad8x8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
|
||||
add_proto qw/void vpx_sad8x8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t sad_array[8]";
|
||||
specialize qw/vpx_sad8x8x8 sse4_1 msa mmi/;
|
||||
|
||||
add_proto qw/void vpx_sad4x4x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
|
||||
add_proto qw/void vpx_sad4x4x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t sad_array[8]";
|
||||
specialize qw/vpx_sad4x4x8 sse4_1 msa mmi/;
|
||||
|
||||
#
|
||||
# Multi-block SAD, comparing a reference to N independent blocks
|
||||
#
|
||||
add_proto qw/void vpx_sad64x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[], int ref_stride, uint32_t *sad_array";
|
||||
add_proto qw/void vpx_sad64x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]";
|
||||
specialize qw/vpx_sad64x64x4d avx512 avx2 neon msa sse2 vsx mmi/;
|
||||
|
||||
add_proto qw/void vpx_sad64x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[], int ref_stride, uint32_t *sad_array";
|
||||
add_proto qw/void vpx_sad64x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]";
|
||||
specialize qw/vpx_sad64x32x4d neon msa sse2 vsx mmi/;
|
||||
|
||||
add_proto qw/void vpx_sad32x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[], int ref_stride, uint32_t *sad_array";
|
||||
add_proto qw/void vpx_sad32x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]";
|
||||
specialize qw/vpx_sad32x64x4d neon msa sse2 vsx mmi/;
|
||||
|
||||
add_proto qw/void vpx_sad32x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[], int ref_stride, uint32_t *sad_array";
|
||||
add_proto qw/void vpx_sad32x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]";
|
||||
specialize qw/vpx_sad32x32x4d avx2 neon msa sse2 vsx mmi/;
|
||||
|
||||
add_proto qw/void vpx_sad32x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[], int ref_stride, uint32_t *sad_array";
|
||||
add_proto qw/void vpx_sad32x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]";
|
||||
specialize qw/vpx_sad32x16x4d neon msa sse2 vsx mmi/;
|
||||
|
||||
add_proto qw/void vpx_sad16x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[], int ref_stride, uint32_t *sad_array";
|
||||
add_proto qw/void vpx_sad16x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]";
|
||||
specialize qw/vpx_sad16x32x4d neon msa sse2 vsx mmi/;
|
||||
|
||||
add_proto qw/void vpx_sad16x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[], int ref_stride, uint32_t *sad_array";
|
||||
add_proto qw/void vpx_sad16x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]";
|
||||
specialize qw/vpx_sad16x16x4d neon msa sse2 vsx mmi/;
|
||||
|
||||
add_proto qw/void vpx_sad16x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[], int ref_stride, uint32_t *sad_array";
|
||||
add_proto qw/void vpx_sad16x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]";
|
||||
specialize qw/vpx_sad16x8x4d neon msa sse2 vsx mmi/;
|
||||
|
||||
add_proto qw/void vpx_sad8x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[], int ref_stride, uint32_t *sad_array";
|
||||
add_proto qw/void vpx_sad8x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]";
|
||||
specialize qw/vpx_sad8x16x4d neon msa sse2 mmi/;
|
||||
|
||||
add_proto qw/void vpx_sad8x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[], int ref_stride, uint32_t *sad_array";
|
||||
add_proto qw/void vpx_sad8x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]";
|
||||
specialize qw/vpx_sad8x8x4d neon msa sse2 mmi/;
|
||||
|
||||
add_proto qw/void vpx_sad8x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[], int ref_stride, uint32_t *sad_array";
|
||||
add_proto qw/void vpx_sad8x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]";
|
||||
specialize qw/vpx_sad8x4x4d neon msa sse2 mmi/;
|
||||
|
||||
add_proto qw/void vpx_sad4x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[], int ref_stride, uint32_t *sad_array";
|
||||
add_proto qw/void vpx_sad4x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]";
|
||||
specialize qw/vpx_sad4x8x4d neon msa sse2 mmi/;
|
||||
|
||||
add_proto qw/void vpx_sad4x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[], int ref_stride, uint32_t *sad_array";
|
||||
add_proto qw/void vpx_sad4x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]";
|
||||
specialize qw/vpx_sad4x4x4d neon msa sse2 mmi/;
|
||||
|
||||
add_proto qw/uint64_t vpx_sum_squares_2d_i16/, "const int16_t *src, int stride, int size";
|
||||
@@ -1064,43 +1064,43 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
||||
#
|
||||
# Multi-block SAD, comparing a reference to N independent blocks
|
||||
#
|
||||
add_proto qw/void vpx_highbd_sad64x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[], int ref_stride, uint32_t *sad_array";
|
||||
add_proto qw/void vpx_highbd_sad64x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[4], int ref_stride, uint32_t sad_array[4]";
|
||||
specialize qw/vpx_highbd_sad64x64x4d sse2/;
|
||||
|
||||
add_proto qw/void vpx_highbd_sad64x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[], int ref_stride, uint32_t *sad_array";
|
||||
add_proto qw/void vpx_highbd_sad64x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[4], int ref_stride, uint32_t sad_array[4]";
|
||||
specialize qw/vpx_highbd_sad64x32x4d sse2/;
|
||||
|
||||
add_proto qw/void vpx_highbd_sad32x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[], int ref_stride, uint32_t *sad_array";
|
||||
add_proto qw/void vpx_highbd_sad32x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[4], int ref_stride, uint32_t sad_array[4]";
|
||||
specialize qw/vpx_highbd_sad32x64x4d sse2/;
|
||||
|
||||
add_proto qw/void vpx_highbd_sad32x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[], int ref_stride, uint32_t *sad_array";
|
||||
add_proto qw/void vpx_highbd_sad32x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[4], int ref_stride, uint32_t sad_array[4]";
|
||||
specialize qw/vpx_highbd_sad32x32x4d sse2/;
|
||||
|
||||
add_proto qw/void vpx_highbd_sad32x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[], int ref_stride, uint32_t *sad_array";
|
||||
add_proto qw/void vpx_highbd_sad32x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[4], int ref_stride, uint32_t sad_array[4]";
|
||||
specialize qw/vpx_highbd_sad32x16x4d sse2/;
|
||||
|
||||
add_proto qw/void vpx_highbd_sad16x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[], int ref_stride, uint32_t *sad_array";
|
||||
add_proto qw/void vpx_highbd_sad16x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[4], int ref_stride, uint32_t sad_array[4]";
|
||||
specialize qw/vpx_highbd_sad16x32x4d sse2/;
|
||||
|
||||
add_proto qw/void vpx_highbd_sad16x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[], int ref_stride, uint32_t *sad_array";
|
||||
add_proto qw/void vpx_highbd_sad16x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[4], int ref_stride, uint32_t sad_array[4]";
|
||||
specialize qw/vpx_highbd_sad16x16x4d sse2/;
|
||||
|
||||
add_proto qw/void vpx_highbd_sad16x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[], int ref_stride, uint32_t *sad_array";
|
||||
add_proto qw/void vpx_highbd_sad16x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[4], int ref_stride, uint32_t sad_array[4]";
|
||||
specialize qw/vpx_highbd_sad16x8x4d sse2/;
|
||||
|
||||
add_proto qw/void vpx_highbd_sad8x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[], int ref_stride, uint32_t *sad_array";
|
||||
add_proto qw/void vpx_highbd_sad8x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[4], int ref_stride, uint32_t sad_array[4]";
|
||||
specialize qw/vpx_highbd_sad8x16x4d sse2/;
|
||||
|
||||
add_proto qw/void vpx_highbd_sad8x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[], int ref_stride, uint32_t *sad_array";
|
||||
add_proto qw/void vpx_highbd_sad8x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[4], int ref_stride, uint32_t sad_array[4]";
|
||||
specialize qw/vpx_highbd_sad8x8x4d sse2/;
|
||||
|
||||
add_proto qw/void vpx_highbd_sad8x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[], int ref_stride, uint32_t *sad_array";
|
||||
add_proto qw/void vpx_highbd_sad8x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[4], int ref_stride, uint32_t sad_array[4]";
|
||||
specialize qw/vpx_highbd_sad8x4x4d sse2/;
|
||||
|
||||
add_proto qw/void vpx_highbd_sad4x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[], int ref_stride, uint32_t *sad_array";
|
||||
add_proto qw/void vpx_highbd_sad4x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[4], int ref_stride, uint32_t sad_array[4]";
|
||||
specialize qw/vpx_highbd_sad4x8x4d sse2/;
|
||||
|
||||
add_proto qw/void vpx_highbd_sad4x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[], int ref_stride, uint32_t *sad_array";
|
||||
add_proto qw/void vpx_highbd_sad4x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[4], int ref_stride, uint32_t sad_array[4]";
|
||||
specialize qw/vpx_highbd_sad4x4x4d sse2/;
|
||||
|
||||
#
|
||||
|
||||
@@ -11,8 +11,7 @@
|
||||
#include "./vpx_dsp_rtcd.h"
|
||||
#include "vpx/vpx_integer.h"
|
||||
|
||||
static INLINE void calc_final_4(const __m256i *const sums /*[4]*/,
|
||||
uint32_t *sad_array) {
|
||||
static INLINE void calc_final_4(const __m256i sums[4], uint32_t sad_array[4]) {
|
||||
const __m256i t0 = _mm256_hadd_epi32(sums[0], sums[1]);
|
||||
const __m256i t1 = _mm256_hadd_epi32(sums[2], sums[3]);
|
||||
const __m256i t2 = _mm256_hadd_epi32(t0, t1);
|
||||
@@ -22,8 +21,8 @@ static INLINE void calc_final_4(const __m256i *const sums /*[4]*/,
|
||||
}
|
||||
|
||||
void vpx_sad32x32x4d_avx2(const uint8_t *src_ptr, int src_stride,
|
||||
const uint8_t *const ref_array[/*4*/], int ref_stride,
|
||||
uint32_t *sad_array /*[4]*/) {
|
||||
const uint8_t *const ref_array[4], int ref_stride,
|
||||
uint32_t sad_array[4]) {
|
||||
int i;
|
||||
const uint8_t *refs[4];
|
||||
__m256i sums[4];
|
||||
@@ -71,7 +70,7 @@ void vpx_sad32x32x4d_avx2(const uint8_t *src_ptr, int src_stride,
|
||||
|
||||
void vpx_sad32x32x8_avx2(const uint8_t *src_ptr, int src_stride,
|
||||
const uint8_t *ref_ptr, int ref_stride,
|
||||
uint32_t *sad_array) {
|
||||
uint32_t sad_array[8]) {
|
||||
int i;
|
||||
__m256i sums[8];
|
||||
|
||||
@@ -127,8 +126,8 @@ void vpx_sad32x32x8_avx2(const uint8_t *src_ptr, int src_stride,
|
||||
}
|
||||
|
||||
void vpx_sad64x64x4d_avx2(const uint8_t *src_ptr, int src_stride,
|
||||
const uint8_t *const ref_array[/*4*/], int ref_stride,
|
||||
uint32_t *sad_array /*[4]*/) {
|
||||
const uint8_t *const ref_array[4], int ref_stride,
|
||||
uint32_t sad_array[4]) {
|
||||
__m256i sums[4];
|
||||
int i;
|
||||
const uint8_t *refs[4];
|
||||
|
||||
@@ -12,8 +12,8 @@
|
||||
#include "vpx/vpx_integer.h"
|
||||
|
||||
void vpx_sad64x64x4d_avx512(const uint8_t *src_ptr, int src_stride,
|
||||
const uint8_t *const ref_array[/*4*/],
|
||||
int ref_stride, uint32_t *res /*[4]*/) {
|
||||
const uint8_t *const ref_array[4], int ref_stride,
|
||||
uint32_t sad_array[4]) {
|
||||
__m512i src_reg, ref0_reg, ref1_reg, ref2_reg, ref3_reg;
|
||||
__m512i sum_ref0, sum_ref1, sum_ref2, sum_ref3;
|
||||
__m512i sum_mlow, sum_mhigh;
|
||||
@@ -78,6 +78,6 @@ void vpx_sad64x64x4d_avx512(const uint8_t *src_ptr, int src_stride,
|
||||
sum128 = _mm_add_epi32(_mm256_castsi256_si128(sum256),
|
||||
_mm256_extractf128_si256(sum256, 1));
|
||||
|
||||
_mm_storeu_si128((__m128i *)(res), sum128);
|
||||
_mm_storeu_si128((__m128i *)(sad_array), sum128);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user