namespace ARCH_* defines
this prevents redefinition warnings if a toolchain sets one BUG=b/117240165 Change-Id: Ib5d8c303cd05b4dbcc8d42c71ecfcba8f6d7b90c
This commit is contained in:
+4
-4
@@ -430,10 +430,10 @@ ifneq ($(call enabled,DIST-SRCS),)
|
||||
DIST-SRCS-$(CONFIG_MSVS) += build/make/gen_msvs_vcxproj.sh
|
||||
DIST-SRCS-$(CONFIG_MSVS) += build/make/msvs_common.sh
|
||||
DIST-SRCS-$(CONFIG_RVCT) += build/make/armlink_adapter.sh
|
||||
DIST-SRCS-$(ARCH_ARM) += build/make/ads2gas.pl
|
||||
DIST-SRCS-$(ARCH_ARM) += build/make/ads2gas_apple.pl
|
||||
DIST-SRCS-$(ARCH_ARM) += build/make/ads2armasm_ms.pl
|
||||
DIST-SRCS-$(ARCH_ARM) += build/make/thumb.pm
|
||||
DIST-SRCS-$(VPX_ARCH_ARM) += build/make/ads2gas.pl
|
||||
DIST-SRCS-$(VPX_ARCH_ARM) += build/make/ads2gas_apple.pl
|
||||
DIST-SRCS-$(VPX_ARCH_ARM) += build/make/ads2armasm_ms.pl
|
||||
DIST-SRCS-$(VPX_ARCH_ARM) += build/make/thumb.pm
|
||||
DIST-SRCS-yes += $(target:-$(TOOLCHAIN)=).mk
|
||||
endif
|
||||
INSTALL-SRCS := $(call cond_enabled,CONFIG_INSTALL_SRCS,INSTALL-SRCS)
|
||||
|
||||
@@ -518,10 +518,10 @@ fmt_deps = sed -e 's;^\([a-zA-Z0-9_]*\)\.o;\${@:.d=.o} \$@;'
|
||||
EOF
|
||||
fi
|
||||
|
||||
print_config_mk ARCH "${1}" ${ARCH_LIST}
|
||||
print_config_mk HAVE "${1}" ${HAVE_LIST}
|
||||
print_config_mk CONFIG "${1}" ${CONFIG_LIST}
|
||||
print_config_mk HAVE "${1}" gnu_strip
|
||||
print_config_mk VPX_ARCH "${1}" ${ARCH_LIST}
|
||||
print_config_mk HAVE "${1}" ${HAVE_LIST}
|
||||
print_config_mk CONFIG "${1}" ${CONFIG_LIST}
|
||||
print_config_mk HAVE "${1}" gnu_strip
|
||||
|
||||
enabled msvs && echo "CONFIG_VS_VERSION=${vs_version}" >> "${1}"
|
||||
|
||||
@@ -538,10 +538,10 @@ write_common_target_config_h() {
|
||||
#define RESTRICT ${RESTRICT}
|
||||
#define INLINE ${INLINE}
|
||||
EOF
|
||||
print_config_h ARCH "${TMP_H}" ${ARCH_LIST}
|
||||
print_config_h HAVE "${TMP_H}" ${HAVE_LIST}
|
||||
print_config_h CONFIG "${TMP_H}" ${CONFIG_LIST}
|
||||
print_config_vars_h "${TMP_H}" ${VAR_LIST}
|
||||
print_config_h VPX_ARCH "${TMP_H}" ${ARCH_LIST}
|
||||
print_config_h HAVE "${TMP_H}" ${HAVE_LIST}
|
||||
print_config_h CONFIG "${TMP_H}" ${CONFIG_LIST}
|
||||
print_config_vars_h "${TMP_H}" ${VAR_LIST}
|
||||
echo "#endif /* VPX_CONFIG_H */" >> ${TMP_H}
|
||||
mkdir -p `dirname "$1"`
|
||||
cmp "$1" ${TMP_H} >/dev/null 2>&1 || mv ${TMP_H} "$1"
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
|
||||
# ARM assembly files are written in RVCT-style. We use some make magic to
|
||||
# filter those files to allow GCC compilation
|
||||
ifeq ($(ARCH_ARM),yes)
|
||||
ifeq ($(VPX_ARCH_ARM),yes)
|
||||
ASM:=$(if $(filter yes,$(CONFIG_GCC)$(CONFIG_MSVS)),.asm.S,.asm)
|
||||
else
|
||||
ASM:=.asm
|
||||
@@ -139,7 +139,7 @@ CODEC_SRCS-yes += vpx_ports/mem_ops_aligned.h
|
||||
CODEC_SRCS-yes += vpx_ports/vpx_once.h
|
||||
CODEC_SRCS-yes += $(BUILD_PFX)vpx_config.c
|
||||
INSTALL-SRCS-no += $(BUILD_PFX)vpx_config.c
|
||||
ifeq ($(ARCH_X86)$(ARCH_X86_64),yes)
|
||||
ifeq ($(VPX_ARCH_X86)$(VPX_ARCH_X86_64),yes)
|
||||
INSTALL-SRCS-$(CONFIG_CODEC_SRCS) += third_party/x86inc/x86inc.asm
|
||||
INSTALL-SRCS-$(CONFIG_CODEC_SRCS) += vpx_dsp/x86/bitdepth_conversion_sse2.asm
|
||||
endif
|
||||
@@ -347,7 +347,7 @@ CLEAN-OBJS += libvpx.syms
|
||||
#
|
||||
# Rule to make assembler configuration file from C configuration file
|
||||
#
|
||||
ifeq ($(ARCH_X86)$(ARCH_X86_64),yes)
|
||||
ifeq ($(VPX_ARCH_X86)$(VPX_ARCH_X86_64),yes)
|
||||
# YASM
|
||||
$(BUILD_PFX)vpx_config.asm: $(BUILD_PFX)vpx_config.h
|
||||
@echo " [CREATE] $@"
|
||||
|
||||
@@ -1133,7 +1133,7 @@ using std::make_tuple;
|
||||
x0_q4, x_step_q4, y0_q4, y_step_q4, w, h, bd); \
|
||||
}
|
||||
|
||||
#if HAVE_SSE2 && ARCH_X86_64
|
||||
#if HAVE_SSE2 && VPX_ARCH_X86_64
|
||||
WRAP(convolve_copy_sse2, 8)
|
||||
WRAP(convolve_avg_sse2, 8)
|
||||
WRAP(convolve_copy_sse2, 10)
|
||||
@@ -1158,7 +1158,7 @@ WRAP(convolve8_vert_sse2, 12)
|
||||
WRAP(convolve8_avg_vert_sse2, 12)
|
||||
WRAP(convolve8_sse2, 12)
|
||||
WRAP(convolve8_avg_sse2, 12)
|
||||
#endif // HAVE_SSE2 && ARCH_X86_64
|
||||
#endif // HAVE_SSE2 && VPX_ARCH_X86_64
|
||||
|
||||
#if HAVE_AVX2
|
||||
WRAP(convolve_copy_avx2, 8)
|
||||
@@ -1278,7 +1278,7 @@ const ConvolveParam kArrayConvolve_c[] = { ALL_SIZES(convolve8_c) };
|
||||
#endif
|
||||
INSTANTIATE_TEST_CASE_P(C, ConvolveTest, ::testing::ValuesIn(kArrayConvolve_c));
|
||||
|
||||
#if HAVE_SSE2 && ARCH_X86_64
|
||||
#if HAVE_SSE2 && VPX_ARCH_X86_64
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
const ConvolveFunctions convolve8_sse2(
|
||||
wrap_convolve_copy_sse2_8, wrap_convolve_avg_sse2_8,
|
||||
|
||||
+2
-2
@@ -514,7 +514,7 @@ INSTANTIATE_TEST_CASE_P(
|
||||
::testing::Values(VPX_BITS_8, VPX_BITS_10, VPX_BITS_12)));
|
||||
#endif // HAVE_SSE2
|
||||
|
||||
#if HAVE_SSSE3 && !CONFIG_VP9_HIGHBITDEPTH && ARCH_X86_64
|
||||
#if HAVE_SSSE3 && !CONFIG_VP9_HIGHBITDEPTH && VPX_ARCH_X86_64
|
||||
// vpx_fdct8x8_ssse3 is only available in 64 bit builds.
|
||||
static const FuncInfo dct_ssse3_func_info = {
|
||||
&fdct_wrapper<vpx_fdct8x8_ssse3>, &idct_wrapper<vpx_idct8x8_64_add_sse2>, 8, 1
|
||||
@@ -524,7 +524,7 @@ static const FuncInfo dct_ssse3_func_info = {
|
||||
INSTANTIATE_TEST_CASE_P(SSSE3, TransDCT,
|
||||
::testing::Values(make_tuple(0, &dct_ssse3_func_info, 0,
|
||||
VPX_BITS_8)));
|
||||
#endif // HAVE_SSSE3 && !CONFIG_VP9_HIGHBITDEPTH && ARCH_X86_64
|
||||
#endif // HAVE_SSSE3 && !CONFIG_VP9_HIGHBITDEPTH && VPX_ARCH_X86_64
|
||||
|
||||
#if HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH
|
||||
static const FuncInfo dct_avx2_func_info = {
|
||||
|
||||
@@ -737,7 +737,7 @@ INSTANTIATE_TEST_CASE_P(
|
||||
make_tuple(&idct8x8_12, &idct8x8_64_add_12_sse2, 6225, VPX_BITS_12)));
|
||||
#endif // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
|
||||
|
||||
#if HAVE_SSSE3 && ARCH_X86_64 && !CONFIG_VP9_HIGHBITDEPTH && \
|
||||
#if HAVE_SSSE3 && VPX_ARCH_X86_64 && !CONFIG_VP9_HIGHBITDEPTH && \
|
||||
!CONFIG_EMULATE_HARDWARE
|
||||
INSTANTIATE_TEST_CASE_P(SSSE3, FwdTrans8x8DCT,
|
||||
::testing::Values(make_tuple(&vpx_fdct8x8_ssse3,
|
||||
|
||||
@@ -254,11 +254,11 @@ INSTANTIATE_TEST_CASE_P(
|
||||
HadamardFuncWithSize(&vpx_hadamard_32x32_avx2, 32)));
|
||||
#endif // HAVE_AVX2
|
||||
|
||||
#if HAVE_SSSE3 && ARCH_X86_64
|
||||
#if HAVE_SSSE3 && VPX_ARCH_X86_64
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
SSSE3, HadamardLowbdTest,
|
||||
::testing::Values(HadamardFuncWithSize(&vpx_hadamard_8x8_ssse3, 8)));
|
||||
#endif // HAVE_SSSE3 && ARCH_X86_64
|
||||
#endif // HAVE_SSSE3 && VPX_ARCH_X86_64
|
||||
|
||||
#if HAVE_NEON
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
|
||||
@@ -147,7 +147,7 @@ const DecodeParam kVP9InvalidFileTests[] = {
|
||||
// This file will cause a large allocation which is expected to fail in 32-bit
|
||||
// environments. Test x86 for coverage purposes as the allocation failure will
|
||||
// be in platform agnostic code.
|
||||
#if ARCH_X86
|
||||
#if VPX_ARCH_X86
|
||||
{ 1, "invalid-vp90-2-00-quantizer-63.ivf.kf_65527x61446.ivf" },
|
||||
#endif
|
||||
{ 1, "invalid-vp90-2-12-droppable_1.ivf.s3676_r01-05_b6-.ivf" },
|
||||
|
||||
@@ -28,7 +28,7 @@
|
||||
// See platform implementations of RegisterStateCheckXXX for details.
|
||||
//
|
||||
|
||||
#if defined(_WIN64) && ARCH_X86_64
|
||||
#if defined(_WIN64) && VPX_ARCH_X86_64
|
||||
|
||||
#undef NOMINMAX
|
||||
#define NOMINMAX
|
||||
@@ -138,9 +138,9 @@ class RegisterStateCheck {};
|
||||
|
||||
} // namespace libvpx_test
|
||||
|
||||
#endif // _WIN64 && ARCH_X86_64
|
||||
#endif // _WIN64 && VPX_ARCH_X86_64
|
||||
|
||||
#if ARCH_X86 || ARCH_X86_64
|
||||
#if VPX_ARCH_X86 || VPX_ARCH_X86_64
|
||||
#if defined(__GNUC__)
|
||||
|
||||
namespace libvpx_test {
|
||||
@@ -178,7 +178,7 @@ class RegisterStateCheckMMX {
|
||||
} // namespace libvpx_test
|
||||
|
||||
#endif // __GNUC__
|
||||
#endif // ARCH_X86 || ARCH_X86_64
|
||||
#endif // VPX_ARCH_X86 || VPX_ARCH_X86_64
|
||||
|
||||
#ifndef API_REGISTER_STATE_CHECK
|
||||
#define API_REGISTER_STATE_CHECK ASM_REGISTER_STATE_CHECK
|
||||
|
||||
+5
-5
@@ -12,7 +12,7 @@
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
|
||||
#include "./vpx_config.h"
|
||||
#if ARCH_X86 || ARCH_X86_64
|
||||
#if VPX_ARCH_X86 || VPX_ARCH_X86_64
|
||||
#include "vpx_ports/x86.h"
|
||||
#endif
|
||||
extern "C" {
|
||||
@@ -26,7 +26,7 @@ extern void vpx_dsp_rtcd();
|
||||
extern void vpx_scale_rtcd();
|
||||
}
|
||||
|
||||
#if ARCH_X86 || ARCH_X86_64
|
||||
#if VPX_ARCH_X86 || VPX_ARCH_X86_64
|
||||
static void append_negative_gtest_filter(const char *str) {
|
||||
std::string filter = ::testing::FLAGS_gtest_filter;
|
||||
// Negative patterns begin with one '-' followed by a ':' separated list.
|
||||
@@ -34,12 +34,12 @@ static void append_negative_gtest_filter(const char *str) {
|
||||
filter += str;
|
||||
::testing::FLAGS_gtest_filter = filter;
|
||||
}
|
||||
#endif // ARCH_X86 || ARCH_X86_64
|
||||
#endif // VPX_ARCH_X86 || VPX_ARCH_X86_64
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
::testing::InitGoogleTest(&argc, argv);
|
||||
|
||||
#if ARCH_X86 || ARCH_X86_64
|
||||
#if VPX_ARCH_X86 || VPX_ARCH_X86_64
|
||||
const int simd_caps = x86_simd_caps();
|
||||
if (!(simd_caps & HAS_MMX)) append_negative_gtest_filter(":MMX.*:MMX/*");
|
||||
if (!(simd_caps & HAS_SSE)) append_negative_gtest_filter(":SSE.*:SSE/*");
|
||||
@@ -56,7 +56,7 @@ int main(int argc, char **argv) {
|
||||
if (!(simd_caps & HAS_AVX512)) {
|
||||
append_negative_gtest_filter(":AVX512.*:AVX512/*");
|
||||
}
|
||||
#endif // ARCH_X86 || ARCH_X86_64
|
||||
#endif // VPX_ARCH_X86 || VPX_ARCH_X86_64
|
||||
|
||||
#if !CONFIG_SHARED
|
||||
// Shared library builds don't support whitebox tests
|
||||
|
||||
@@ -505,7 +505,7 @@ INSTANTIATE_TEST_CASE_P(
|
||||
#endif // HAVE_SSE2
|
||||
|
||||
#if HAVE_SSSE3
|
||||
#if ARCH_X86_64
|
||||
#if VPX_ARCH_X86_64
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
SSSE3, VP9QuantizeTest,
|
||||
::testing::Values(make_tuple(&vpx_quantize_b_ssse3, &vpx_quantize_b_c,
|
||||
@@ -528,7 +528,7 @@ INSTANTIATE_TEST_CASE_P(
|
||||
&vpx_quantize_b_32x32_c, VPX_BITS_8, 32,
|
||||
false)));
|
||||
|
||||
#endif // ARCH_X86_64
|
||||
#endif // VPX_ARCH_X86_64
|
||||
#endif // HAVE_SSSE3
|
||||
|
||||
#if HAVE_AVX
|
||||
@@ -541,7 +541,7 @@ INSTANTIATE_TEST_CASE_P(AVX, VP9QuantizeTest,
|
||||
VPX_BITS_8, 32, false)));
|
||||
#endif // HAVE_AVX
|
||||
|
||||
#if ARCH_X86_64 && HAVE_AVX2
|
||||
#if VPX_ARCH_X86_64 && HAVE_AVX2
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
AVX2, VP9QuantizeTest,
|
||||
::testing::Values(make_tuple(&QuantFPWrapper<vp9_quantize_fp_avx2>,
|
||||
|
||||
@@ -22,7 +22,7 @@
|
||||
namespace libvpx_test {
|
||||
namespace {
|
||||
|
||||
#if ARCH_ARM || (ARCH_MIPS && !HAVE_MIPS64) || ARCH_X86
|
||||
#if VPX_ARCH_ARM || (VPX_ARCH_MIPS && !HAVE_MIPS64) || VPX_ARCH_X86
|
||||
// Avoid OOM failures on 32-bit platforms.
|
||||
const int kNumSizesToTest = 7;
|
||||
#else
|
||||
|
||||
Vendored
+1
@@ -18,3 +18,4 @@ Copy PIC 'GLOBAL' macros from x86_abi_support.asm
|
||||
Use .text instead of .rodata on macho to avoid broken tables in PIC mode.
|
||||
Use .text with no alignment for aout
|
||||
Only use 'hidden' visibility with Chromium
|
||||
Prefix ARCH_* with VPX_.
|
||||
|
||||
Vendored
+12
-12
@@ -45,7 +45,7 @@
|
||||
%endif
|
||||
|
||||
%ifndef STACK_ALIGNMENT
|
||||
%if ARCH_X86_64
|
||||
%if VPX_ARCH_X86_64
|
||||
%define STACK_ALIGNMENT 16
|
||||
%else
|
||||
%define STACK_ALIGNMENT 4
|
||||
@@ -54,7 +54,7 @@
|
||||
|
||||
%define WIN64 0
|
||||
%define UNIX64 0
|
||||
%if ARCH_X86_64
|
||||
%if VPX_ARCH_X86_64
|
||||
%ifidn __OUTPUT_FORMAT__,win32
|
||||
%define WIN64 1
|
||||
%elifidn __OUTPUT_FORMAT__,win64
|
||||
@@ -165,7 +165,7 @@
|
||||
%endif
|
||||
%endif
|
||||
|
||||
%if ARCH_X86_64 == 0
|
||||
%if VPX_ARCH_X86_64 == 0
|
||||
%undef PIC
|
||||
%endif
|
||||
|
||||
@@ -260,7 +260,7 @@
|
||||
%if %0 == 2
|
||||
%define r%1m %2d
|
||||
%define r%1mp %2
|
||||
%elif ARCH_X86_64 ; memory
|
||||
%elif VPX_ARCH_X86_64 ; memory
|
||||
%define r%1m [rstk + stack_offset + %3]
|
||||
%define r%1mp qword r %+ %1 %+ m
|
||||
%else
|
||||
@@ -281,7 +281,7 @@
|
||||
%define e%1h %3
|
||||
%define r%1b %2
|
||||
%define e%1b %2
|
||||
%if ARCH_X86_64 == 0
|
||||
%if VPX_ARCH_X86_64 == 0
|
||||
%define r%1 e%1
|
||||
%endif
|
||||
%endmacro
|
||||
@@ -318,7 +318,7 @@ DECLARE_REG_SIZE bp, bpl, null
|
||||
|
||||
DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
|
||||
|
||||
%if ARCH_X86_64
|
||||
%if VPX_ARCH_X86_64
|
||||
%define gprsize 8
|
||||
%else
|
||||
%define gprsize 4
|
||||
@@ -485,7 +485,7 @@ DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
|
||||
%if %1 > 0
|
||||
%assign regs_used (regs_used + 1)
|
||||
%endif
|
||||
%if ARCH_X86_64 && regs_used < 5 + UNIX64 * 3
|
||||
%if VPX_ARCH_X86_64 && regs_used < 5 + UNIX64 * 3
|
||||
; Ensure that we don't clobber any registers containing arguments
|
||||
%assign regs_used 5 + UNIX64 * 3
|
||||
%endif
|
||||
@@ -607,7 +607,7 @@ DECLARE_REG 14, R15, 120
|
||||
AUTO_REP_RET
|
||||
%endmacro
|
||||
|
||||
%elif ARCH_X86_64 ; *nix x64 ;=============================================
|
||||
%elif VPX_ARCH_X86_64 ; *nix x64 ;=============================================
|
||||
|
||||
DECLARE_REG 0, rdi
|
||||
DECLARE_REG 1, rsi
|
||||
@@ -948,7 +948,7 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge, ja, jae,
|
||||
%endif
|
||||
%endif
|
||||
|
||||
%if ARCH_X86_64 || cpuflag(sse2)
|
||||
%if VPX_ARCH_X86_64 || cpuflag(sse2)
|
||||
%ifdef __NASM_VER__
|
||||
ALIGNMODE k8
|
||||
%else
|
||||
@@ -1005,7 +1005,7 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge, ja, jae,
|
||||
%define RESET_MM_PERMUTATION INIT_XMM %1
|
||||
%define mmsize 16
|
||||
%define num_mmregs 8
|
||||
%if ARCH_X86_64
|
||||
%if VPX_ARCH_X86_64
|
||||
%define num_mmregs 16
|
||||
%endif
|
||||
%define mova movdqa
|
||||
@@ -1026,7 +1026,7 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge, ja, jae,
|
||||
%define RESET_MM_PERMUTATION INIT_YMM %1
|
||||
%define mmsize 32
|
||||
%define num_mmregs 8
|
||||
%if ARCH_X86_64
|
||||
%if VPX_ARCH_X86_64
|
||||
%define num_mmregs 16
|
||||
%endif
|
||||
%define mova movdqa
|
||||
@@ -1637,7 +1637,7 @@ FMA4_INSTR fnmsub, pd, ps, sd, ss
|
||||
|
||||
; workaround: vpbroadcastq is broken in x86_32 due to a yasm bug (fixed in 1.3.0)
|
||||
%ifdef __YASM_VER__
|
||||
%if __YASM_VERSION_ID__ < 0x01030000 && ARCH_X86_64 == 0
|
||||
%if __YASM_VERSION_ID__ < 0x01030000 && VPX_ARCH_X86_64 == 0
|
||||
%macro vpbroadcastq 2
|
||||
%if sizeof%1 == 16
|
||||
movddup %1, %2
|
||||
|
||||
+1
-1
@@ -292,7 +292,7 @@ typedef struct macroblockd {
|
||||
|
||||
struct vpx_internal_error_info error_info;
|
||||
|
||||
#if ARCH_X86 || ARCH_X86_64
|
||||
#if VPX_ARCH_X86 || VPX_ARCH_X86_64
|
||||
/* This is an intermediate buffer currently used in sub-pixel motion search
|
||||
* to keep a copy of the reference area. This buffer can be used for other
|
||||
* purpose.
|
||||
|
||||
@@ -10,11 +10,11 @@
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "vp8_rtcd.h"
|
||||
#if ARCH_ARM
|
||||
#if VPX_ARCH_ARM
|
||||
#include "vpx_ports/arm.h"
|
||||
#elif ARCH_X86 || ARCH_X86_64
|
||||
#elif VPX_ARCH_X86 || VPX_ARCH_X86_64
|
||||
#include "vpx_ports/x86.h"
|
||||
#elif ARCH_PPC
|
||||
#elif VPX_ARCH_PPC
|
||||
#include "vpx_ports/ppc.h"
|
||||
#endif
|
||||
#include "vp8/common/onyxc_int.h"
|
||||
@@ -92,11 +92,11 @@ void vp8_machine_specific_config(VP8_COMMON *ctx) {
|
||||
(void)ctx;
|
||||
#endif /* CONFIG_MULTITHREAD */
|
||||
|
||||
#if ARCH_ARM
|
||||
#if VPX_ARCH_ARM
|
||||
ctx->cpu_caps = arm_cpu_caps();
|
||||
#elif ARCH_X86 || ARCH_X86_64
|
||||
#elif VPX_ARCH_X86 || VPX_ARCH_X86_64
|
||||
ctx->cpu_caps = x86_simd_caps();
|
||||
#elif ARCH_PPC
|
||||
#elif VPX_ARCH_PPC
|
||||
ctx->cpu_caps = ppc_simd_caps();
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -26,7 +26,7 @@ extern "C" {
|
||||
|
||||
typedef enum { NORMAL_LOOPFILTER = 0, SIMPLE_LOOPFILTER = 1 } LOOPFILTERTYPE;
|
||||
|
||||
#if ARCH_ARM
|
||||
#if VPX_ARCH_ARM
|
||||
#define SIMD_WIDTH 1
|
||||
#else
|
||||
#define SIMD_WIDTH 16
|
||||
|
||||
@@ -185,7 +185,7 @@ static inline int sem_destroy(sem_t *sem) {
|
||||
|
||||
#endif
|
||||
|
||||
#if ARCH_X86 || ARCH_X86_64
|
||||
#if VPX_ARCH_X86 || VPX_ARCH_X86_64
|
||||
#include "vpx_ports/x86.h"
|
||||
#else
|
||||
#define x86_pause_hint()
|
||||
|
||||
@@ -22,7 +22,7 @@
|
||||
#define prototype_simple_loopfilter(sym) \
|
||||
void sym(unsigned char *y, int ystride, const unsigned char *blimit)
|
||||
|
||||
#if HAVE_SSE2 && ARCH_X86_64
|
||||
#if HAVE_SSE2 && VPX_ARCH_X86_64
|
||||
prototype_loopfilter(vp8_loop_filter_bv_y_sse2);
|
||||
prototype_loopfilter(vp8_loop_filter_bh_y_sse2);
|
||||
#else
|
||||
@@ -68,7 +68,7 @@ void vp8_loop_filter_mbv_sse2(unsigned char *y_ptr, unsigned char *u_ptr,
|
||||
void vp8_loop_filter_bh_sse2(unsigned char *y_ptr, unsigned char *u_ptr,
|
||||
unsigned char *v_ptr, int y_stride, int uv_stride,
|
||||
loop_filter_info *lfi) {
|
||||
#if ARCH_X86_64
|
||||
#if VPX_ARCH_X86_64
|
||||
vp8_loop_filter_bh_y_sse2(y_ptr, y_stride, lfi->blim, lfi->lim, lfi->hev_thr,
|
||||
2);
|
||||
#else
|
||||
@@ -101,7 +101,7 @@ void vp8_loop_filter_bhs_sse2(unsigned char *y_ptr, int y_stride,
|
||||
void vp8_loop_filter_bv_sse2(unsigned char *y_ptr, unsigned char *u_ptr,
|
||||
unsigned char *v_ptr, int y_stride, int uv_stride,
|
||||
loop_filter_info *lfi) {
|
||||
#if ARCH_X86_64
|
||||
#if VPX_ARCH_X86_64
|
||||
vp8_loop_filter_bv_y_sse2(y_ptr, y_stride, lfi->blim, lfi->lim, lfi->hev_thr,
|
||||
2);
|
||||
#else
|
||||
|
||||
@@ -37,7 +37,7 @@
|
||||
#if CONFIG_ERROR_CONCEALMENT
|
||||
#include "error_concealment.h"
|
||||
#endif
|
||||
#if ARCH_ARM
|
||||
#if VPX_ARCH_ARM
|
||||
#include "vpx_ports/arm.h"
|
||||
#endif
|
||||
|
||||
|
||||
+3
-3
@@ -251,7 +251,7 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
|
||||
int pre_stride = x->e_mbd.pre.y_stride;
|
||||
unsigned char *base_pre = x->e_mbd.pre.y_buffer;
|
||||
|
||||
#if ARCH_X86 || ARCH_X86_64
|
||||
#if VPX_ARCH_X86 || VPX_ARCH_X86_64
|
||||
MACROBLOCKD *xd = &x->e_mbd;
|
||||
unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
|
||||
bestmv->as_mv.col;
|
||||
@@ -380,7 +380,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
|
||||
int pre_stride = x->e_mbd.pre.y_stride;
|
||||
unsigned char *base_pre = x->e_mbd.pre.y_buffer;
|
||||
|
||||
#if ARCH_X86 || ARCH_X86_64
|
||||
#if VPX_ARCH_X86 || VPX_ARCH_X86_64
|
||||
MACROBLOCKD *xd = &x->e_mbd;
|
||||
unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
|
||||
bestmv->as_mv.col;
|
||||
@@ -676,7 +676,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
|
||||
int pre_stride = x->e_mbd.pre.y_stride;
|
||||
unsigned char *base_pre = x->e_mbd.pre.y_buffer;
|
||||
|
||||
#if ARCH_X86 || ARCH_X86_64
|
||||
#if VPX_ARCH_X86 || VPX_ARCH_X86_64
|
||||
MACROBLOCKD *xd = &x->e_mbd;
|
||||
unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
|
||||
bestmv->as_mv.col;
|
||||
|
||||
@@ -38,7 +38,7 @@
|
||||
#include "vpx_ports/system_state.h"
|
||||
#include "vpx_ports/vpx_timer.h"
|
||||
#include "vpx_util/vpx_write_yuv_frame.h"
|
||||
#if ARCH_ARM
|
||||
#if VPX_ARCH_ARM
|
||||
#include "vpx_ports/arm.h"
|
||||
#endif
|
||||
#if CONFIG_MULTI_RES_ENCODING
|
||||
@@ -2043,7 +2043,7 @@ struct VP8_COMP *vp8_create_compressor(VP8_CONFIG *oxcf) {
|
||||
cpi->fn_ptr[BLOCK_4X4].sdx8f = vpx_sad4x4x8;
|
||||
cpi->fn_ptr[BLOCK_4X4].sdx4df = vpx_sad4x4x4d;
|
||||
|
||||
#if ARCH_X86 || ARCH_X86_64
|
||||
#if VPX_ARCH_X86 || VPX_ARCH_X86_64
|
||||
cpi->fn_ptr[BLOCK_16X16].copymem = vp8_copy32xn;
|
||||
cpi->fn_ptr[BLOCK_16X8].copymem = vp8_copy32xn;
|
||||
cpi->fn_ptr[BLOCK_8X16].copymem = vp8_copy32xn;
|
||||
|
||||
@@ -18,7 +18,7 @@
|
||||
#include "vpx_scale/vpx_scale.h"
|
||||
#include "vp8/common/alloccommon.h"
|
||||
#include "vp8/common/loopfilter.h"
|
||||
#if ARCH_ARM
|
||||
#if VPX_ARCH_ARM
|
||||
#include "vpx_ports/arm.h"
|
||||
#endif
|
||||
|
||||
|
||||
+3
-3
@@ -69,8 +69,8 @@ VP8_COMMON_SRCS-yes += common/vp8_entropymodedata.h
|
||||
|
||||
VP8_COMMON_SRCS-yes += common/treecoder.c
|
||||
|
||||
VP8_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/vp8_asm_stubs.c
|
||||
VP8_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/loopfilter_x86.c
|
||||
VP8_COMMON_SRCS-$(VPX_ARCH_X86)$(VPX_ARCH_X86_64) += common/x86/vp8_asm_stubs.c
|
||||
VP8_COMMON_SRCS-$(VPX_ARCH_X86)$(VPX_ARCH_X86_64) += common/x86/loopfilter_x86.c
|
||||
VP8_COMMON_SRCS-$(CONFIG_POSTPROC) += common/mfqe.c
|
||||
VP8_COMMON_SRCS-$(CONFIG_POSTPROC) += common/postproc.h
|
||||
VP8_COMMON_SRCS-$(CONFIG_POSTPROC) += common/postproc.c
|
||||
@@ -92,7 +92,7 @@ ifeq ($(CONFIG_POSTPROC),yes)
|
||||
VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/mfqe_sse2.asm
|
||||
endif
|
||||
|
||||
ifeq ($(ARCH_X86_64),yes)
|
||||
ifeq ($(VPX_ARCH_X86_64),yes)
|
||||
VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/loopfilter_block_sse2_x86_64.asm
|
||||
endif
|
||||
|
||||
|
||||
@@ -114,7 +114,7 @@ int vp9_diamond_search_sad_avx(const MACROBLOCK *x,
|
||||
// Work out the start point for the search
|
||||
const uint8_t *best_address = in_what;
|
||||
const uint8_t *new_best_address = best_address;
|
||||
#if ARCH_X86_64
|
||||
#if VPX_ARCH_X86_64
|
||||
__m128i v_ba_q = _mm_set1_epi64x((intptr_t)best_address);
|
||||
#else
|
||||
__m128i v_ba_d = _mm_set1_epi32((intptr_t)best_address);
|
||||
@@ -138,7 +138,7 @@ int vp9_diamond_search_sad_avx(const MACROBLOCK *x,
|
||||
for (i = 0, step = 0; step < tot_steps; step++) {
|
||||
for (j = 0; j < cfg->searches_per_step; j += 4, i += 4) {
|
||||
__m128i v_sad_d, v_cost_d, v_outside_d, v_inside_d, v_diff_mv_w;
|
||||
#if ARCH_X86_64
|
||||
#if VPX_ARCH_X86_64
|
||||
__m128i v_blocka[2];
|
||||
#else
|
||||
__m128i v_blocka[1];
|
||||
@@ -175,7 +175,7 @@ int vp9_diamond_search_sad_avx(const MACROBLOCK *x,
|
||||
|
||||
// Compute the SIMD pointer offsets.
|
||||
{
|
||||
#if ARCH_X86_64 // sizeof(intptr_t) == 8
|
||||
#if VPX_ARCH_X86_64 // sizeof(intptr_t) == 8
|
||||
// Load the offsets
|
||||
__m128i v_bo10_q = _mm_loadu_si128((const __m128i *)&ss_os[i + 0]);
|
||||
__m128i v_bo32_q = _mm_loadu_si128((const __m128i *)&ss_os[i + 2]);
|
||||
@@ -186,7 +186,7 @@ int vp9_diamond_search_sad_avx(const MACROBLOCK *x,
|
||||
// Compute the candidate addresses
|
||||
v_blocka[0] = _mm_add_epi64(v_ba_q, v_bo10_q);
|
||||
v_blocka[1] = _mm_add_epi64(v_ba_q, v_bo32_q);
|
||||
#else // ARCH_X86 // sizeof(intptr_t) == 4
|
||||
#else // VPX_ARCH_X86 // sizeof(intptr_t) == 4
|
||||
__m128i v_bo_d = _mm_loadu_si128((const __m128i *)&ss_os[i]);
|
||||
v_bo_d = _mm_and_si128(v_bo_d, v_inside_d);
|
||||
v_blocka[0] = _mm_add_epi32(v_ba_d, v_bo_d);
|
||||
@@ -294,7 +294,7 @@ int vp9_diamond_search_sad_avx(const MACROBLOCK *x,
|
||||
best_address = new_best_address;
|
||||
|
||||
v_bmv_w = _mm_set1_epi32(bmv.as_int);
|
||||
#if ARCH_X86_64
|
||||
#if VPX_ARCH_X86_64
|
||||
v_ba_q = _mm_set1_epi64x((intptr_t)best_address);
|
||||
#else
|
||||
v_ba_d = _mm_set1_epi32((intptr_t)best_address);
|
||||
|
||||
@@ -58,7 +58,7 @@ cglobal block_error, 3, 3, 8, uqc, dqc, size, ssz
|
||||
movhlps m7, m6
|
||||
paddq m4, m5
|
||||
paddq m6, m7
|
||||
%if ARCH_X86_64
|
||||
%if VPX_ARCH_X86_64
|
||||
movq rax, m4
|
||||
movq [sszq], m6
|
||||
%else
|
||||
@@ -105,7 +105,7 @@ cglobal block_error_fp, 3, 3, 6, uqc, dqc, size
|
||||
; accumulate horizontally and store in return value
|
||||
movhlps m5, m4
|
||||
paddq m4, m5
|
||||
%if ARCH_X86_64
|
||||
%if VPX_ARCH_X86_64
|
||||
movq rax, m4
|
||||
%else
|
||||
pshufd m5, m4, 0x1
|
||||
|
||||
+1
-1
@@ -118,7 +118,7 @@ endif
|
||||
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_dct_sse2.asm
|
||||
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_error_sse2.asm
|
||||
|
||||
ifeq ($(ARCH_X86_64),yes)
|
||||
ifeq ($(VPX_ARCH_X86_64),yes)
|
||||
VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/vp9_quantize_ssse3_x86_64.asm
|
||||
endif
|
||||
|
||||
|
||||
@@ -173,7 +173,7 @@ vpx_codec_err_t vpx_codec_enc_config_default(vpx_codec_iface_t *iface,
|
||||
return res;
|
||||
}
|
||||
|
||||
#if ARCH_X86 || ARCH_X86_64
|
||||
#if VPX_ARCH_X86 || VPX_ARCH_X86_64
|
||||
/* On X86, disable the x87 unit's internal 80 bit precision for better
|
||||
* consistency with the SSE unit's 64 bit precision.
|
||||
*/
|
||||
|
||||
+2
-2
@@ -260,7 +260,7 @@ void vpx_quantize_b_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
|
||||
15;
|
||||
|
||||
qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign;
|
||||
#if (ARCH_X86 || ARCH_X86_64) && !CONFIG_VP9_HIGHBITDEPTH
|
||||
#if (VPX_ARCH_X86 || VPX_ARCH_X86_64) && !CONFIG_VP9_HIGHBITDEPTH
|
||||
// When tran_low_t is only 16 bits dqcoeff can outrange it. Rather than
|
||||
// truncating with a cast, saturate the value. This is easier to implement
|
||||
// on x86 and preserves the sign of the value.
|
||||
@@ -268,7 +268,7 @@ void vpx_quantize_b_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
|
||||
clamp(qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2, INT16_MIN, INT16_MAX);
|
||||
#else
|
||||
dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2;
|
||||
#endif // ARCH_X86 && CONFIG_VP9_HIGHBITDEPTH
|
||||
#endif // VPX_ARCH_X86 && CONFIG_VP9_HIGHBITDEPTH
|
||||
|
||||
if (tmp) eob = idx_arr[i];
|
||||
}
|
||||
|
||||
+1
-1
@@ -62,7 +62,7 @@ typedef struct variance_vtable {
|
||||
vpx_sad_multi_fn_t sdx3f;
|
||||
vpx_sad_multi_fn_t sdx8f;
|
||||
vpx_sad_multi_d_fn_t sdx4df;
|
||||
#if ARCH_X86 || ARCH_X86_64
|
||||
#if VPX_ARCH_X86 || VPX_ARCH_X86_64
|
||||
vp8_copy32xn_fn_t copymem;
|
||||
#endif
|
||||
} vp8_variance_fn_ptr_t;
|
||||
|
||||
+5
-5
@@ -87,7 +87,7 @@ ifeq ($(CONFIG_VP9),yes)
|
||||
DSP_SRCS-yes += vpx_convolve.c
|
||||
DSP_SRCS-yes += vpx_convolve.h
|
||||
|
||||
DSP_SRCS-$(ARCH_X86)$(ARCH_X86_64) += x86/convolve.h
|
||||
DSP_SRCS-$(VPX_ARCH_X86)$(VPX_ARCH_X86_64) += x86/convolve.h
|
||||
|
||||
DSP_SRCS-$(HAVE_SSE2) += x86/convolve_sse2.h
|
||||
DSP_SRCS-$(HAVE_SSSE3) += x86/convolve_ssse3.h
|
||||
@@ -205,7 +205,7 @@ DSP_SRCS-$(HAVE_SSE2) += x86/fwd_txfm_sse2.h
|
||||
DSP_SRCS-$(HAVE_SSE2) += x86/fwd_txfm_sse2.c
|
||||
DSP_SRCS-$(HAVE_SSE2) += x86/fwd_txfm_impl_sse2.h
|
||||
DSP_SRCS-$(HAVE_SSE2) += x86/fwd_dct32x32_impl_sse2.h
|
||||
ifeq ($(ARCH_X86_64),yes)
|
||||
ifeq ($(VPX_ARCH_X86_64),yes)
|
||||
DSP_SRCS-$(HAVE_SSSE3) += x86/fwd_txfm_ssse3_x86_64.asm
|
||||
endif
|
||||
DSP_SRCS-$(HAVE_AVX2) += x86/fwd_txfm_avx2.c
|
||||
@@ -316,7 +316,7 @@ DSP_SRCS-$(HAVE_AVX2) += x86/avg_intrin_avx2.c
|
||||
DSP_SRCS-$(HAVE_NEON) += arm/avg_neon.c
|
||||
DSP_SRCS-$(HAVE_NEON) += arm/hadamard_neon.c
|
||||
DSP_SRCS-$(HAVE_MSA) += mips/avg_msa.c
|
||||
ifeq ($(ARCH_X86_64),yes)
|
||||
ifeq ($(VPX_ARCH_X86_64),yes)
|
||||
DSP_SRCS-$(HAVE_SSSE3) += x86/avg_ssse3_x86_64.asm
|
||||
endif
|
||||
DSP_SRCS-$(HAVE_VSX) += ppc/hadamard_vsx.c
|
||||
@@ -384,9 +384,9 @@ DSP_SRCS-$(HAVE_SSE2) += x86/variance_sse2.c # Contains SSE2 and SSSE3
|
||||
DSP_SRCS-$(HAVE_AVX2) += x86/variance_avx2.c
|
||||
DSP_SRCS-$(HAVE_VSX) += ppc/variance_vsx.c
|
||||
|
||||
ifeq ($(ARCH_X86_64),yes)
|
||||
ifeq ($(VPX_ARCH_X86_64),yes)
|
||||
DSP_SRCS-$(HAVE_SSE2) += x86/ssim_opt_x86_64.asm
|
||||
endif # ARCH_X86_64
|
||||
endif # VPX_ARCH_X86_64
|
||||
|
||||
DSP_SRCS-$(HAVE_SSE2) += x86/subpel_variance_sse2.asm # Contains SSE2 and SSSE3
|
||||
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
|
||||
SECTION .text
|
||||
|
||||
%if ARCH_X86_64
|
||||
%if VPX_ARCH_X86_64
|
||||
; matrix transpose
|
||||
%macro TRANSPOSE8X8 10
|
||||
; stage 1
|
||||
|
||||
@@ -27,7 +27,7 @@ TRANSFORM_COEFFS 9102, 13623
|
||||
|
||||
SECTION .text
|
||||
|
||||
%if ARCH_X86_64
|
||||
%if VPX_ARCH_X86_64
|
||||
INIT_XMM ssse3
|
||||
cglobal fdct8x8, 3, 5, 13, input, output, stride
|
||||
|
||||
|
||||
@@ -25,11 +25,11 @@ cglobal highbd_sad%1x%2, 4, %3, 7, src, src_stride, ref, ref_stride, \
|
||||
cglobal highbd_sad%1x%2_avg, 5, 1 + %3, 7, src, src_stride, ref, ref_stride, \
|
||||
second_pred, n_rows
|
||||
%else ; %3 == 7
|
||||
cglobal highbd_sad%1x%2_avg, 5, ARCH_X86_64 + %3, 7, src, src_stride, \
|
||||
cglobal highbd_sad%1x%2_avg, 5, VPX_ARCH_X86_64 + %3, 7, src, src_stride, \
|
||||
ref, ref_stride, \
|
||||
second_pred, \
|
||||
src_stride3, ref_stride3
|
||||
%if ARCH_X86_64
|
||||
%if VPX_ARCH_X86_64
|
||||
%define n_rowsd r7d
|
||||
%else ; x86-32
|
||||
%define n_rowsd dword r0m
|
||||
|
||||
@@ -78,7 +78,7 @@ SECTION .text
|
||||
%endmacro
|
||||
|
||||
%macro INC_SRC_BY_SRC_STRIDE 0
|
||||
%if ARCH_X86=1 && CONFIG_PIC=1
|
||||
%if VPX_ARCH_X86=1 && CONFIG_PIC=1
|
||||
add srcq, src_stridemp
|
||||
add srcq, src_stridemp
|
||||
%else
|
||||
@@ -91,7 +91,7 @@ SECTION .text
|
||||
%define filter_idx_shift 5
|
||||
|
||||
|
||||
%if ARCH_X86_64
|
||||
%if VPX_ARCH_X86_64
|
||||
%if %2 == 1 ; avg
|
||||
cglobal highbd_sub_pixel_avg_variance%1xh, 9, 10, 13, src, src_stride, \
|
||||
x_offset, y_offset, \
|
||||
@@ -268,11 +268,11 @@ SECTION .text
|
||||
|
||||
.x_zero_y_nonhalf:
|
||||
; x_offset == 0 && y_offset == bilin interpolation
|
||||
%if ARCH_X86_64
|
||||
%if VPX_ARCH_X86_64
|
||||
lea bilin_filter, [GLOBAL(bilin_filter_m)]
|
||||
%endif
|
||||
shl y_offsetd, filter_idx_shift
|
||||
%if ARCH_X86_64 && mmsize == 16
|
||||
%if VPX_ARCH_X86_64 && mmsize == 16
|
||||
mova m8, [bilin_filter+y_offsetq]
|
||||
mova m9, [bilin_filter+y_offsetq+16]
|
||||
mova m10, [GLOBAL(pw_8)]
|
||||
@@ -280,7 +280,7 @@ SECTION .text
|
||||
%define filter_y_b m9
|
||||
%define filter_rnd m10
|
||||
%else ; x86-32 or mmx
|
||||
%if ARCH_X86=1 && CONFIG_PIC=1
|
||||
%if VPX_ARCH_X86=1 && CONFIG_PIC=1
|
||||
; x_offset == 0, reuse x_offset reg
|
||||
%define tempq x_offsetq
|
||||
add y_offsetq, g_bilin_filterm
|
||||
@@ -495,11 +495,11 @@ SECTION .text
|
||||
|
||||
.x_half_y_nonhalf:
|
||||
; x_offset == 0.5 && y_offset == bilin interpolation
|
||||
%if ARCH_X86_64
|
||||
%if VPX_ARCH_X86_64
|
||||
lea bilin_filter, [GLOBAL(bilin_filter_m)]
|
||||
%endif
|
||||
shl y_offsetd, filter_idx_shift
|
||||
%if ARCH_X86_64 && mmsize == 16
|
||||
%if VPX_ARCH_X86_64 && mmsize == 16
|
||||
mova m8, [bilin_filter+y_offsetq]
|
||||
mova m9, [bilin_filter+y_offsetq+16]
|
||||
mova m10, [GLOBAL(pw_8)]
|
||||
@@ -507,7 +507,7 @@ SECTION .text
|
||||
%define filter_y_b m9
|
||||
%define filter_rnd m10
|
||||
%else ; x86_32
|
||||
%if ARCH_X86=1 && CONFIG_PIC=1
|
||||
%if VPX_ARCH_X86=1 && CONFIG_PIC=1
|
||||
; x_offset == 0.5. We can reuse x_offset reg
|
||||
%define tempq x_offsetq
|
||||
add y_offsetq, g_bilin_filterm
|
||||
@@ -617,11 +617,11 @@ SECTION .text
|
||||
jnz .x_nonhalf_y_nonzero
|
||||
|
||||
; x_offset == bilin interpolation && y_offset == 0
|
||||
%if ARCH_X86_64
|
||||
%if VPX_ARCH_X86_64
|
||||
lea bilin_filter, [GLOBAL(bilin_filter_m)]
|
||||
%endif
|
||||
shl x_offsetd, filter_idx_shift
|
||||
%if ARCH_X86_64 && mmsize == 16
|
||||
%if VPX_ARCH_X86_64 && mmsize == 16
|
||||
mova m8, [bilin_filter+x_offsetq]
|
||||
mova m9, [bilin_filter+x_offsetq+16]
|
||||
mova m10, [GLOBAL(pw_8)]
|
||||
@@ -629,7 +629,7 @@ SECTION .text
|
||||
%define filter_x_b m9
|
||||
%define filter_rnd m10
|
||||
%else ; x86-32
|
||||
%if ARCH_X86=1 && CONFIG_PIC=1
|
||||
%if VPX_ARCH_X86=1 && CONFIG_PIC=1
|
||||
; y_offset == 0. We can reuse y_offset reg.
|
||||
%define tempq y_offsetq
|
||||
add x_offsetq, g_bilin_filterm
|
||||
@@ -716,11 +716,11 @@ SECTION .text
|
||||
jne .x_nonhalf_y_nonhalf
|
||||
|
||||
; x_offset == bilin interpolation && y_offset == 0.5
|
||||
%if ARCH_X86_64
|
||||
%if VPX_ARCH_X86_64
|
||||
lea bilin_filter, [GLOBAL(bilin_filter_m)]
|
||||
%endif
|
||||
shl x_offsetd, filter_idx_shift
|
||||
%if ARCH_X86_64 && mmsize == 16
|
||||
%if VPX_ARCH_X86_64 && mmsize == 16
|
||||
mova m8, [bilin_filter+x_offsetq]
|
||||
mova m9, [bilin_filter+x_offsetq+16]
|
||||
mova m10, [GLOBAL(pw_8)]
|
||||
@@ -728,7 +728,7 @@ SECTION .text
|
||||
%define filter_x_b m9
|
||||
%define filter_rnd m10
|
||||
%else ; x86-32
|
||||
%if ARCH_X86=1 && CONFIG_PIC=1
|
||||
%if VPX_ARCH_X86=1 && CONFIG_PIC=1
|
||||
; y_offset == 0.5. We can reuse y_offset reg.
|
||||
%define tempq y_offsetq
|
||||
add x_offsetq, g_bilin_filterm
|
||||
@@ -843,12 +843,12 @@ SECTION .text
|
||||
|
||||
.x_nonhalf_y_nonhalf:
|
||||
; loading filter - this is same as in 8-bit depth
|
||||
%if ARCH_X86_64
|
||||
%if VPX_ARCH_X86_64
|
||||
lea bilin_filter, [GLOBAL(bilin_filter_m)]
|
||||
%endif
|
||||
shl x_offsetd, filter_idx_shift ; filter_idx_shift = 5
|
||||
shl y_offsetd, filter_idx_shift
|
||||
%if ARCH_X86_64 && mmsize == 16
|
||||
%if VPX_ARCH_X86_64 && mmsize == 16
|
||||
mova m8, [bilin_filter+x_offsetq]
|
||||
mova m9, [bilin_filter+x_offsetq+16]
|
||||
mova m10, [bilin_filter+y_offsetq]
|
||||
@@ -860,7 +860,7 @@ SECTION .text
|
||||
%define filter_y_b m11
|
||||
%define filter_rnd m12
|
||||
%else ; x86-32
|
||||
%if ARCH_X86=1 && CONFIG_PIC=1
|
||||
%if VPX_ARCH_X86=1 && CONFIG_PIC=1
|
||||
; In this case, there is NO unused register. Used src_stride register. Later,
|
||||
; src_stride has to be loaded from stack when it is needed.
|
||||
%define tempq src_strideq
|
||||
|
||||
@@ -25,11 +25,11 @@ cglobal sad%1x%2, 4, %3, 6, src, src_stride, ref, ref_stride, \
|
||||
cglobal sad%1x%2_avg, 5, 1 + %3, 5, src, src_stride, ref, ref_stride, \
|
||||
second_pred, n_rows
|
||||
%else ; %3 == 7
|
||||
cglobal sad%1x%2_avg, 5, ARCH_X86_64 + %3, 6, src, src_stride, \
|
||||
cglobal sad%1x%2_avg, 5, VPX_ARCH_X86_64 + %3, 6, src, src_stride, \
|
||||
ref, ref_stride, \
|
||||
second_pred, \
|
||||
src_stride3, ref_stride3
|
||||
%if ARCH_X86_64
|
||||
%if VPX_ARCH_X86_64
|
||||
%define n_rowsd r7d
|
||||
%else ; x86-32
|
||||
%define n_rowsd dword r0m
|
||||
|
||||
@@ -95,7 +95,7 @@ SECTION .text
|
||||
%endmacro
|
||||
|
||||
%macro INC_SRC_BY_SRC_STRIDE 0
|
||||
%if ARCH_X86=1 && CONFIG_PIC=1
|
||||
%if VPX_ARCH_X86=1 && CONFIG_PIC=1
|
||||
add srcq, src_stridemp
|
||||
%else
|
||||
add srcq, src_strideq
|
||||
@@ -114,7 +114,7 @@ SECTION .text
|
||||
; 11, not 13, if the registers are ordered correctly. May make a minor speed
|
||||
; difference on Win64
|
||||
|
||||
%if ARCH_X86_64
|
||||
%if VPX_ARCH_X86_64
|
||||
%if %2 == 1 ; avg
|
||||
cglobal sub_pixel_avg_variance%1xh, 9, 10, 13, src, src_stride, \
|
||||
x_offset, y_offset, ref, ref_stride, \
|
||||
@@ -352,11 +352,11 @@ SECTION .text
|
||||
|
||||
.x_zero_y_nonhalf:
|
||||
; x_offset == 0 && y_offset == bilin interpolation
|
||||
%if ARCH_X86_64
|
||||
%if VPX_ARCH_X86_64
|
||||
lea bilin_filter, [GLOBAL(bilin_filter_m)]
|
||||
%endif
|
||||
shl y_offsetd, filter_idx_shift
|
||||
%if ARCH_X86_64 && %1 > 4
|
||||
%if VPX_ARCH_X86_64 && %1 > 4
|
||||
mova m8, [bilin_filter+y_offsetq]
|
||||
%if notcpuflag(ssse3) ; FIXME(rbultje) don't scatter registers on x86-64
|
||||
mova m9, [bilin_filter+y_offsetq+16]
|
||||
@@ -366,7 +366,7 @@ SECTION .text
|
||||
%define filter_y_b m9
|
||||
%define filter_rnd m10
|
||||
%else ; x86-32 or mmx
|
||||
%if ARCH_X86=1 && CONFIG_PIC=1
|
||||
%if VPX_ARCH_X86=1 && CONFIG_PIC=1
|
||||
; x_offset == 0, reuse x_offset reg
|
||||
%define tempq x_offsetq
|
||||
add y_offsetq, g_bilin_filterm
|
||||
@@ -675,11 +675,11 @@ SECTION .text
|
||||
|
||||
.x_half_y_nonhalf:
|
||||
; x_offset == 0.5 && y_offset == bilin interpolation
|
||||
%if ARCH_X86_64
|
||||
%if VPX_ARCH_X86_64
|
||||
lea bilin_filter, [GLOBAL(bilin_filter_m)]
|
||||
%endif
|
||||
shl y_offsetd, filter_idx_shift
|
||||
%if ARCH_X86_64 && %1 > 4
|
||||
%if VPX_ARCH_X86_64 && %1 > 4
|
||||
mova m8, [bilin_filter+y_offsetq]
|
||||
%if notcpuflag(ssse3) ; FIXME(rbultje) don't scatter registers on x86-64
|
||||
mova m9, [bilin_filter+y_offsetq+16]
|
||||
@@ -689,7 +689,7 @@ SECTION .text
|
||||
%define filter_y_b m9
|
||||
%define filter_rnd m10
|
||||
%else ;x86_32
|
||||
%if ARCH_X86=1 && CONFIG_PIC=1
|
||||
%if VPX_ARCH_X86=1 && CONFIG_PIC=1
|
||||
; x_offset == 0.5. We can reuse x_offset reg
|
||||
%define tempq x_offsetq
|
||||
add y_offsetq, g_bilin_filterm
|
||||
@@ -833,11 +833,11 @@ SECTION .text
|
||||
jnz .x_nonhalf_y_nonzero
|
||||
|
||||
; x_offset == bilin interpolation && y_offset == 0
|
||||
%if ARCH_X86_64
|
||||
%if VPX_ARCH_X86_64
|
||||
lea bilin_filter, [GLOBAL(bilin_filter_m)]
|
||||
%endif
|
||||
shl x_offsetd, filter_idx_shift
|
||||
%if ARCH_X86_64 && %1 > 4
|
||||
%if VPX_ARCH_X86_64 && %1 > 4
|
||||
mova m8, [bilin_filter+x_offsetq]
|
||||
%if notcpuflag(ssse3) ; FIXME(rbultje) don't scatter registers on x86-64
|
||||
mova m9, [bilin_filter+x_offsetq+16]
|
||||
@@ -847,7 +847,7 @@ SECTION .text
|
||||
%define filter_x_b m9
|
||||
%define filter_rnd m10
|
||||
%else ; x86-32
|
||||
%if ARCH_X86=1 && CONFIG_PIC=1
|
||||
%if VPX_ARCH_X86=1 && CONFIG_PIC=1
|
||||
;y_offset == 0. We can reuse y_offset reg.
|
||||
%define tempq y_offsetq
|
||||
add x_offsetq, g_bilin_filterm
|
||||
@@ -975,11 +975,11 @@ SECTION .text
|
||||
jne .x_nonhalf_y_nonhalf
|
||||
|
||||
; x_offset == bilin interpolation && y_offset == 0.5
|
||||
%if ARCH_X86_64
|
||||
%if VPX_ARCH_X86_64
|
||||
lea bilin_filter, [GLOBAL(bilin_filter_m)]
|
||||
%endif
|
||||
shl x_offsetd, filter_idx_shift
|
||||
%if ARCH_X86_64 && %1 > 4
|
||||
%if VPX_ARCH_X86_64 && %1 > 4
|
||||
mova m8, [bilin_filter+x_offsetq]
|
||||
%if notcpuflag(ssse3) ; FIXME(rbultje) don't scatter registers on x86-64
|
||||
mova m9, [bilin_filter+x_offsetq+16]
|
||||
@@ -989,7 +989,7 @@ SECTION .text
|
||||
%define filter_x_b m9
|
||||
%define filter_rnd m10
|
||||
%else ; x86-32
|
||||
%if ARCH_X86=1 && CONFIG_PIC=1
|
||||
%if VPX_ARCH_X86=1 && CONFIG_PIC=1
|
||||
; y_offset == 0.5. We can reuse y_offset reg.
|
||||
%define tempq y_offsetq
|
||||
add x_offsetq, g_bilin_filterm
|
||||
@@ -1173,12 +1173,12 @@ SECTION .text
|
||||
STORE_AND_RET %1
|
||||
|
||||
.x_nonhalf_y_nonhalf:
|
||||
%if ARCH_X86_64
|
||||
%if VPX_ARCH_X86_64
|
||||
lea bilin_filter, [GLOBAL(bilin_filter_m)]
|
||||
%endif
|
||||
shl x_offsetd, filter_idx_shift
|
||||
shl y_offsetd, filter_idx_shift
|
||||
%if ARCH_X86_64 && %1 > 4
|
||||
%if VPX_ARCH_X86_64 && %1 > 4
|
||||
mova m8, [bilin_filter+x_offsetq]
|
||||
%if notcpuflag(ssse3) ; FIXME(rbultje) don't scatter registers on x86-64
|
||||
mova m9, [bilin_filter+x_offsetq+16]
|
||||
@@ -1194,7 +1194,7 @@ SECTION .text
|
||||
%define filter_y_b m11
|
||||
%define filter_rnd m12
|
||||
%else ; x86-32
|
||||
%if ARCH_X86=1 && CONFIG_PIC=1
|
||||
%if VPX_ARCH_X86=1 && CONFIG_PIC=1
|
||||
; In this case, there is NO unused register. Used src_stride register. Later,
|
||||
; src_stride has to be loaded from stack when it is needed.
|
||||
%define tempq src_strideq
|
||||
|
||||
@@ -92,7 +92,7 @@ uint64_t vpx_sum_squares_2d_i16_sse2(const int16_t *src, int stride, int size) {
|
||||
|
||||
v_acc_q = _mm_add_epi64(v_acc_q, _mm_srli_si128(v_acc_q, 8));
|
||||
|
||||
#if ARCH_X86_64
|
||||
#if VPX_ARCH_X86_64
|
||||
return (uint64_t)_mm_cvtsi128_si64(v_acc_q);
|
||||
#else
|
||||
{
|
||||
|
||||
@@ -64,7 +64,7 @@
|
||||
dec rcx
|
||||
%endm
|
||||
|
||||
%if ARCH_X86_64
|
||||
%if VPX_ARCH_X86_64
|
||||
%macro HIGH_GET_PARAM 0
|
||||
mov rdx, arg(5) ;filter ptr
|
||||
mov rsi, arg(0) ;src_ptr
|
||||
@@ -197,7 +197,7 @@ sym(vpx_highbd_filter_block1d4_v2_sse2):
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
%if ARCH_X86_64
|
||||
%if VPX_ARCH_X86_64
|
||||
global sym(vpx_highbd_filter_block1d8_v2_sse2) PRIVATE
|
||||
sym(vpx_highbd_filter_block1d8_v2_sse2):
|
||||
push rbp
|
||||
@@ -277,7 +277,7 @@ sym(vpx_highbd_filter_block1d4_v2_avg_sse2):
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
%if ARCH_X86_64
|
||||
%if VPX_ARCH_X86_64
|
||||
global sym(vpx_highbd_filter_block1d8_v2_avg_sse2) PRIVATE
|
||||
sym(vpx_highbd_filter_block1d8_v2_avg_sse2):
|
||||
push rbp
|
||||
@@ -358,7 +358,7 @@ sym(vpx_highbd_filter_block1d4_h2_sse2):
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
%if ARCH_X86_64
|
||||
%if VPX_ARCH_X86_64
|
||||
global sym(vpx_highbd_filter_block1d8_h2_sse2) PRIVATE
|
||||
sym(vpx_highbd_filter_block1d8_h2_sse2):
|
||||
push rbp
|
||||
@@ -439,7 +439,7 @@ sym(vpx_highbd_filter_block1d4_h2_avg_sse2):
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
%if ARCH_X86_64
|
||||
%if VPX_ARCH_X86_64
|
||||
global sym(vpx_highbd_filter_block1d8_h2_avg_sse2) PRIVATE
|
||||
sym(vpx_highbd_filter_block1d8_h2_avg_sse2):
|
||||
push rbp
|
||||
|
||||
@@ -602,7 +602,7 @@ static void vpx_filter_block1d4_v4_sse2(const uint8_t *src_ptr,
|
||||
}
|
||||
}
|
||||
|
||||
#if CONFIG_VP9_HIGHBITDEPTH && ARCH_X86_64
|
||||
#if CONFIG_VP9_HIGHBITDEPTH && VPX_ARCH_X86_64
|
||||
static void vpx_highbd_filter_block1d4_h4_sse2(
|
||||
const uint16_t *src_ptr, ptrdiff_t src_stride, uint16_t *dst_ptr,
|
||||
ptrdiff_t dst_stride, uint32_t height, const int16_t *kernel, int bd) {
|
||||
@@ -982,7 +982,7 @@ static void vpx_highbd_filter_block1d16_v4_sse2(
|
||||
vpx_highbd_filter_block1d8_v4_sse2(src_ptr + 8, src_stride, dst_ptr + 8,
|
||||
dst_stride, height, kernel, bd);
|
||||
}
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH && ARCH_X86_64
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH && VPX_ARCH_X86_64
|
||||
|
||||
// From vpx_subpixel_8t_sse2.asm.
|
||||
filter8_1dfunction vpx_filter_block1d16_v8_sse2;
|
||||
@@ -1060,7 +1060,7 @@ FUN_CONV_1D(avg_vert, y0_q4, y_step_q4, v,
|
||||
FUN_CONV_2D(, sse2, 0);
|
||||
FUN_CONV_2D(avg_, sse2, 1);
|
||||
|
||||
#if CONFIG_VP9_HIGHBITDEPTH && ARCH_X86_64
|
||||
#if CONFIG_VP9_HIGHBITDEPTH && VPX_ARCH_X86_64
|
||||
// From vpx_dsp/x86/vpx_high_subpixel_8t_sse2.asm.
|
||||
highbd_filter8_1dfunction vpx_highbd_filter_block1d16_v8_sse2;
|
||||
highbd_filter8_1dfunction vpx_highbd_filter_block1d16_h8_sse2;
|
||||
@@ -1158,4 +1158,4 @@ HIGH_FUN_CONV_1D(avg_vert, y0_q4, y_step_q4, v,
|
||||
// int y_step_q4, int w, int h, int bd);
|
||||
HIGH_FUN_CONV_2D(, sse2, 0);
|
||||
HIGH_FUN_CONV_2D(avg_, sse2, 1);
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH && ARCH_X86_64
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH && VPX_ARCH_X86_64
|
||||
|
||||
@@ -894,21 +894,21 @@ static void vpx_filter_block1d4_v4_avx2(const uint8_t *src_ptr,
|
||||
|
||||
#if HAVE_AVX2 && HAVE_SSSE3
|
||||
filter8_1dfunction vpx_filter_block1d4_v8_ssse3;
|
||||
#if ARCH_X86_64
|
||||
#if VPX_ARCH_X86_64
|
||||
filter8_1dfunction vpx_filter_block1d8_v8_intrin_ssse3;
|
||||
filter8_1dfunction vpx_filter_block1d8_h8_intrin_ssse3;
|
||||
filter8_1dfunction vpx_filter_block1d4_h8_intrin_ssse3;
|
||||
#define vpx_filter_block1d8_v8_avx2 vpx_filter_block1d8_v8_intrin_ssse3
|
||||
#define vpx_filter_block1d8_h8_avx2 vpx_filter_block1d8_h8_intrin_ssse3
|
||||
#define vpx_filter_block1d4_h8_avx2 vpx_filter_block1d4_h8_intrin_ssse3
|
||||
#else // ARCH_X86
|
||||
#else // VPX_ARCH_X86
|
||||
filter8_1dfunction vpx_filter_block1d8_v8_ssse3;
|
||||
filter8_1dfunction vpx_filter_block1d8_h8_ssse3;
|
||||
filter8_1dfunction vpx_filter_block1d4_h8_ssse3;
|
||||
#define vpx_filter_block1d8_v8_avx2 vpx_filter_block1d8_v8_ssse3
|
||||
#define vpx_filter_block1d8_h8_avx2 vpx_filter_block1d8_h8_ssse3
|
||||
#define vpx_filter_block1d4_h8_avx2 vpx_filter_block1d4_h8_ssse3
|
||||
#endif // ARCH_X86_64
|
||||
#endif // VPX_ARCH_X86_64
|
||||
filter8_1dfunction vpx_filter_block1d8_v8_avg_ssse3;
|
||||
filter8_1dfunction vpx_filter_block1d8_h8_avg_ssse3;
|
||||
filter8_1dfunction vpx_filter_block1d4_v8_avg_ssse3;
|
||||
|
||||
@@ -31,7 +31,7 @@ static INLINE __m128i shuffle_filter_convolve8_8_ssse3(
|
||||
}
|
||||
|
||||
// Used by the avx2 implementation.
|
||||
#if ARCH_X86_64
|
||||
#if VPX_ARCH_X86_64
|
||||
// Use the intrinsics below
|
||||
filter8_1dfunction vpx_filter_block1d4_h8_intrin_ssse3;
|
||||
filter8_1dfunction vpx_filter_block1d8_h8_intrin_ssse3;
|
||||
@@ -39,14 +39,14 @@ filter8_1dfunction vpx_filter_block1d8_v8_intrin_ssse3;
|
||||
#define vpx_filter_block1d4_h8_ssse3 vpx_filter_block1d4_h8_intrin_ssse3
|
||||
#define vpx_filter_block1d8_h8_ssse3 vpx_filter_block1d8_h8_intrin_ssse3
|
||||
#define vpx_filter_block1d8_v8_ssse3 vpx_filter_block1d8_v8_intrin_ssse3
|
||||
#else // ARCH_X86
|
||||
#else // VPX_ARCH_X86
|
||||
// Use the assembly in vpx_dsp/x86/vpx_subpixel_8t_ssse3.asm.
|
||||
filter8_1dfunction vpx_filter_block1d4_h8_ssse3;
|
||||
filter8_1dfunction vpx_filter_block1d8_h8_ssse3;
|
||||
filter8_1dfunction vpx_filter_block1d8_v8_ssse3;
|
||||
#endif
|
||||
|
||||
#if ARCH_X86_64
|
||||
#if VPX_ARCH_X86_64
|
||||
void vpx_filter_block1d4_h8_intrin_ssse3(
|
||||
const uint8_t *src_ptr, ptrdiff_t src_pitch, uint8_t *output_ptr,
|
||||
ptrdiff_t output_pitch, uint32_t output_height, const int16_t *filter) {
|
||||
@@ -198,7 +198,7 @@ void vpx_filter_block1d8_v8_intrin_ssse3(
|
||||
output_ptr += out_pitch;
|
||||
}
|
||||
}
|
||||
#endif // ARCH_X86_64
|
||||
#endif // VPX_ARCH_X86_64
|
||||
|
||||
static void vpx_filter_block1d16_h4_ssse3(const uint8_t *src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
|
||||
@@ -26,7 +26,7 @@ SECTION .text
|
||||
%define LOCAL_VARS_SIZE 16*6
|
||||
|
||||
%macro SETUP_LOCAL_VARS 0
|
||||
; TODO(slavarnway): using xmm registers for these on ARCH_X86_64 +
|
||||
; TODO(slavarnway): using xmm registers for these on VPX_ARCH_X86_64 +
|
||||
; pmaddubsw has a higher latency on some platforms, this might be eased by
|
||||
; interleaving the instructions.
|
||||
%define k0k1 [rsp + 16*0]
|
||||
@@ -48,7 +48,7 @@ SECTION .text
|
||||
mova k2k3, m1
|
||||
mova k4k5, m2
|
||||
mova k6k7, m3
|
||||
%if ARCH_X86_64
|
||||
%if VPX_ARCH_X86_64
|
||||
%define krd m12
|
||||
%define tmp0 [rsp + 16*4]
|
||||
%define tmp1 [rsp + 16*5]
|
||||
@@ -68,7 +68,7 @@ SECTION .text
|
||||
%endm
|
||||
|
||||
;-------------------------------------------------------------------------------
|
||||
%if ARCH_X86_64
|
||||
%if VPX_ARCH_X86_64
|
||||
%define LOCAL_VARS_SIZE_H4 0
|
||||
%else
|
||||
%define LOCAL_VARS_SIZE_H4 16*4
|
||||
@@ -79,7 +79,7 @@ cglobal filter_block1d4_%1, 6, 6, 11, LOCAL_VARS_SIZE_H4, \
|
||||
src, sstride, dst, dstride, height, filter
|
||||
mova m4, [filterq]
|
||||
packsswb m4, m4
|
||||
%if ARCH_X86_64
|
||||
%if VPX_ARCH_X86_64
|
||||
%define k0k1k4k5 m8
|
||||
%define k2k3k6k7 m9
|
||||
%define krd m10
|
||||
@@ -339,7 +339,7 @@ SUBPIX_HFILTER4 h8_avg ; vpx_filter_block1d4_h8_avg_ssse3
|
||||
; TODO(Linfeng): Detect cpu type and choose the code with better performance.
|
||||
%define X86_SUBPIX_VFILTER_PREFER_SLOW_CELERON 1
|
||||
|
||||
%if ARCH_X86_64 && X86_SUBPIX_VFILTER_PREFER_SLOW_CELERON
|
||||
%if VPX_ARCH_X86_64 && X86_SUBPIX_VFILTER_PREFER_SLOW_CELERON
|
||||
%define NUM_GENERAL_REG_USED 9
|
||||
%else
|
||||
%define NUM_GENERAL_REG_USED 6
|
||||
@@ -359,9 +359,9 @@ cglobal filter_block1d%2_%1, 6, NUM_GENERAL_REG_USED, 15, LOCAL_VARS_SIZE, \
|
||||
|
||||
dec heightd
|
||||
|
||||
%if ARCH_X86 || X86_SUBPIX_VFILTER_PREFER_SLOW_CELERON
|
||||
%if VPX_ARCH_X86 || X86_SUBPIX_VFILTER_PREFER_SLOW_CELERON
|
||||
|
||||
%if ARCH_X86_64
|
||||
%if VPX_ARCH_X86_64
|
||||
%define src1q r7
|
||||
%define sstride6q r8
|
||||
%define dst_stride dstrideq
|
||||
@@ -467,7 +467,7 @@ cglobal filter_block1d%2_%1, 6, NUM_GENERAL_REG_USED, 15, LOCAL_VARS_SIZE, \
|
||||
movx [dstq], m0
|
||||
|
||||
%else
|
||||
; ARCH_X86_64
|
||||
; VPX_ARCH_X86_64
|
||||
|
||||
movx m0, [srcq ] ;A
|
||||
movx m1, [srcq + sstrideq ] ;B
|
||||
@@ -567,7 +567,7 @@ cglobal filter_block1d%2_%1, 6, NUM_GENERAL_REG_USED, 15, LOCAL_VARS_SIZE, \
|
||||
%endif
|
||||
movx [dstq], m0
|
||||
|
||||
%endif ; ARCH_X86_64
|
||||
%endif ; VPX_ARCH_X86_64
|
||||
|
||||
.done:
|
||||
REP_RET
|
||||
@@ -581,9 +581,9 @@ cglobal filter_block1d16_%1, 6, NUM_GENERAL_REG_USED, 16, LOCAL_VARS_SIZE, \
|
||||
mova m4, [filterq]
|
||||
SETUP_LOCAL_VARS
|
||||
|
||||
%if ARCH_X86 || X86_SUBPIX_VFILTER_PREFER_SLOW_CELERON
|
||||
%if VPX_ARCH_X86 || X86_SUBPIX_VFILTER_PREFER_SLOW_CELERON
|
||||
|
||||
%if ARCH_X86_64
|
||||
%if VPX_ARCH_X86_64
|
||||
%define src1q r7
|
||||
%define sstride6q r8
|
||||
%define dst_stride dstrideq
|
||||
@@ -654,7 +654,7 @@ cglobal filter_block1d16_%1, 6, NUM_GENERAL_REG_USED, 16, LOCAL_VARS_SIZE, \
|
||||
REP_RET
|
||||
|
||||
%else
|
||||
; ARCH_X86_64
|
||||
; VPX_ARCH_X86_64
|
||||
dec heightd
|
||||
|
||||
movu m1, [srcq ] ;A
|
||||
@@ -790,7 +790,7 @@ cglobal filter_block1d16_%1, 6, NUM_GENERAL_REG_USED, 16, LOCAL_VARS_SIZE, \
|
||||
.done:
|
||||
REP_RET
|
||||
|
||||
%endif ; ARCH_X86_64
|
||||
%endif ; VPX_ARCH_X86_64
|
||||
|
||||
%endm
|
||||
|
||||
|
||||
@@ -17,11 +17,11 @@
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#if (ARCH_X86 || ARCH_X86_64) && HAVE_MMX
|
||||
#if (VPX_ARCH_X86 || VPX_ARCH_X86_64) && HAVE_MMX
|
||||
extern void vpx_clear_system_state(void);
|
||||
#else
|
||||
#define vpx_clear_system_state()
|
||||
#endif // (ARCH_X86 || ARCH_X86_64) && HAVE_MMX
|
||||
#endif // (VPX_ARCH_X86 || VPX_ARCH_X86_64) && HAVE_MMX
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
|
||||
@@ -17,29 +17,29 @@ PORTS_SRCS-yes += msvc.h
|
||||
PORTS_SRCS-yes += system_state.h
|
||||
PORTS_SRCS-yes += vpx_timer.h
|
||||
|
||||
ifeq ($(ARCH_X86),yes)
|
||||
ifeq ($(VPX_ARCH_X86),yes)
|
||||
PORTS_SRCS-$(HAVE_MMX) += emms_mmx.c
|
||||
endif
|
||||
ifeq ($(ARCH_X86_64),yes)
|
||||
ifeq ($(VPX_ARCH_X86_64),yes)
|
||||
# Visual Studio x64 does not support the _mm_empty() intrinsic.
|
||||
PORTS_SRCS-$(HAVE_MMX) += emms_mmx.asm
|
||||
endif
|
||||
|
||||
ifeq ($(ARCH_X86_64),yes)
|
||||
ifeq ($(VPX_ARCH_X86_64),yes)
|
||||
PORTS_SRCS-$(CONFIG_MSVS) += float_control_word.asm
|
||||
endif
|
||||
|
||||
ifeq ($(ARCH_X86)$(ARCH_X86_64),yes)
|
||||
ifeq ($(VPX_ARCH_X86)$(VPX_ARCH_X86_64),yes)
|
||||
PORTS_SRCS-yes += x86.h
|
||||
PORTS_SRCS-yes += x86_abi_support.asm
|
||||
endif
|
||||
|
||||
PORTS_SRCS-$(ARCH_ARM) += arm_cpudetect.c
|
||||
PORTS_SRCS-$(ARCH_ARM) += arm.h
|
||||
PORTS_SRCS-$(VPX_ARCH_ARM) += arm_cpudetect.c
|
||||
PORTS_SRCS-$(VPX_ARCH_ARM) += arm.h
|
||||
|
||||
PORTS_SRCS-$(ARCH_PPC) += ppc_cpudetect.c
|
||||
PORTS_SRCS-$(ARCH_PPC) += ppc.h
|
||||
PORTS_SRCS-$(VPX_ARCH_PPC) += ppc_cpudetect.c
|
||||
PORTS_SRCS-$(VPX_ARCH_PPC) += ppc.h
|
||||
|
||||
ifeq ($(ARCH_MIPS), yes)
|
||||
ifeq ($(VPX_ARCH_MIPS), yes)
|
||||
PORTS_SRCS-yes += asmdefs_mmi.h
|
||||
endif
|
||||
|
||||
+8
-8
@@ -43,7 +43,7 @@ typedef enum {
|
||||
} vpx_cpu_t;
|
||||
|
||||
#if defined(__GNUC__) && __GNUC__ || defined(__ANDROID__)
|
||||
#if ARCH_X86_64
|
||||
#if VPX_ARCH_X86_64
|
||||
#define cpuid(func, func2, ax, bx, cx, dx) \
|
||||
__asm__ __volatile__("cpuid \n\t" \
|
||||
: "=a"(ax), "=b"(bx), "=c"(cx), "=d"(dx) \
|
||||
@@ -59,7 +59,7 @@ typedef enum {
|
||||
#endif
|
||||
#elif defined(__SUNPRO_C) || \
|
||||
defined(__SUNPRO_CC) /* end __GNUC__ or __ANDROID__*/
|
||||
#if ARCH_X86_64
|
||||
#if VPX_ARCH_X86_64
|
||||
#define cpuid(func, func2, ax, bx, cx, dx) \
|
||||
asm volatile( \
|
||||
"xchg %rsi, %rbx \n\t" \
|
||||
@@ -79,7 +79,7 @@ typedef enum {
|
||||
: "a"(func), "c"(func2));
|
||||
#endif
|
||||
#else /* end __SUNPRO__ */
|
||||
#if ARCH_X86_64
|
||||
#if VPX_ARCH_X86_64
|
||||
#if defined(_MSC_VER) && _MSC_VER > 1500
|
||||
#define cpuid(func, func2, a, b, c, d) \
|
||||
do { \
|
||||
@@ -253,7 +253,7 @@ static INLINE unsigned int x86_readtsc(void) {
|
||||
asm volatile("rdtsc\n\t" : "=a"(tsc) :);
|
||||
return tsc;
|
||||
#else
|
||||
#if ARCH_X86_64
|
||||
#if VPX_ARCH_X86_64
|
||||
return (unsigned int)__rdtsc();
|
||||
#else
|
||||
__asm rdtsc;
|
||||
@@ -271,7 +271,7 @@ static INLINE uint64_t x86_readtsc64(void) {
|
||||
asm volatile("rdtsc\n\t" : "=a"(lo), "=d"(hi));
|
||||
return ((uint64_t)hi << 32) | lo;
|
||||
#else
|
||||
#if ARCH_X86_64
|
||||
#if VPX_ARCH_X86_64
|
||||
return (uint64_t)__rdtsc();
|
||||
#else
|
||||
__asm rdtsc;
|
||||
@@ -293,7 +293,7 @@ static INLINE unsigned int x86_readtscp(void) {
|
||||
unsigned int ui;
|
||||
return (unsigned int)__rdtscp(&ui);
|
||||
#else
|
||||
#if ARCH_X86_64
|
||||
#if VPX_ARCH_X86_64
|
||||
return (unsigned int)__rdtscp();
|
||||
#else
|
||||
__asm rdtscp;
|
||||
@@ -319,7 +319,7 @@ static INLINE unsigned int x86_tsc_end(void) {
|
||||
#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC)
|
||||
#define x86_pause_hint() asm volatile("pause \n\t")
|
||||
#else
|
||||
#if ARCH_X86_64
|
||||
#if VPX_ARCH_X86_64
|
||||
#define x86_pause_hint() _mm_pause();
|
||||
#else
|
||||
#define x86_pause_hint() __asm pause
|
||||
@@ -344,7 +344,7 @@ static unsigned short x87_get_control_word(void) {
|
||||
asm volatile("fstcw %0\n\t" : "=m"(*&mode) :);
|
||||
return mode;
|
||||
}
|
||||
#elif ARCH_X86_64
|
||||
#elif VPX_ARCH_X86_64
|
||||
/* No fldcw intrinsics on Windows x64, punt to external asm */
|
||||
extern void vpx_winx64_fldcw(unsigned short mode);
|
||||
extern unsigned short vpx_winx64_fstcw(void);
|
||||
|
||||
@@ -51,16 +51,16 @@ extern "C" {
|
||||
do { \
|
||||
} while (0)
|
||||
#else
|
||||
#if ARCH_X86 || ARCH_X86_64
|
||||
#if VPX_ARCH_X86 || VPX_ARCH_X86_64
|
||||
// Use a compiler barrier on x86, no runtime penalty.
|
||||
#define vpx_atomic_memory_barrier() __asm__ __volatile__("" ::: "memory")
|
||||
#elif ARCH_ARM
|
||||
#elif VPX_ARCH_ARM
|
||||
#define vpx_atomic_memory_barrier() __asm__ __volatile__("dmb ish" ::: "memory")
|
||||
#elif ARCH_MIPS
|
||||
#elif VPX_ARCH_MIPS
|
||||
#define vpx_atomic_memory_barrier() __asm__ __volatile__("sync" ::: "memory")
|
||||
#else
|
||||
#error Unsupported architecture!
|
||||
#endif // ARCH_X86 || ARCH_X86_64
|
||||
#endif // VPX_ARCH_X86 || VPX_ARCH_X86_64
|
||||
#endif // defined(_MSC_VER)
|
||||
#endif // atomic builtin availability check
|
||||
|
||||
|
||||
Reference in New Issue
Block a user