Refactor and extend run-time CPU feature detection on Arm
1) Overhaul the Arm CPU feature detection code, taking inspiration from similar recent changes in libaom. 2) Add neon_dotprod and neon_i8mm arch options in the configure, build and unit test files, adding appropriate conditional options where necessary. 3) Soft-enable run-time CPU feature detection by default for both 32- bit and 64-bit Arm platforms. Change-Id: I3f13317d88324acc5753394351188baa8d18a261
This commit is contained in:
@@ -143,6 +143,12 @@ $(BUILD_PFX)%_avx2.c.o: CFLAGS += -mavx2
|
||||
$(BUILD_PFX)%_avx512.c.d: CFLAGS += -mavx512f -mavx512cd -mavx512bw -mavx512dq -mavx512vl
|
||||
$(BUILD_PFX)%_avx512.c.o: CFLAGS += -mavx512f -mavx512cd -mavx512bw -mavx512dq -mavx512vl
|
||||
|
||||
# AARCH64
|
||||
$(BUILD_PFX)%_neon_dotprod.c.d: CFLAGS += -march=armv8.2-a+dotprod
|
||||
$(BUILD_PFX)%_neon_dotprod.c.o: CFLAGS += -march=armv8.2-a+dotprod
|
||||
$(BUILD_PFX)%_neon_i8mm.c.d: CFLAGS += -march=armv8.2-a+dotprod+i8mm
|
||||
$(BUILD_PFX)%_neon_i8mm.c.o: CFLAGS += -march=armv8.2-a+dotprod+i8mm
|
||||
|
||||
# POWER
|
||||
$(BUILD_PFX)%_vsx.c.d: CFLAGS += -maltivec -mvsx
|
||||
$(BUILD_PFX)%_vsx.c.o: CFLAGS += -maltivec -mvsx
|
||||
|
||||
+15
-2
@@ -973,10 +973,23 @@ process_common_toolchain() {
|
||||
# Process architecture variants
|
||||
case ${toolchain} in
|
||||
arm*)
|
||||
# on arm, isa versions are supersets
|
||||
soft_enable runtime_cpu_detect
|
||||
# Arm ISA extensions are treated as supersets.
|
||||
case ${tgt_isa} in
|
||||
arm64|armv8)
|
||||
soft_enable neon
|
||||
for ext in ${ARCH_EXT_LIST_AARCH64}; do
|
||||
# Disable higher order extensions to simplify dependencies.
|
||||
if [ "$disable_exts" = "yes" ]; then
|
||||
if ! disabled $ext; then
|
||||
RTCD_OPTIONS="${RTCD_OPTIONS}--disable-${ext} "
|
||||
disable_feature $ext
|
||||
fi
|
||||
elif disabled $ext; then
|
||||
disable_exts="yes"
|
||||
else
|
||||
soft_enable $ext
|
||||
fi
|
||||
done
|
||||
;;
|
||||
armv7|armv7s)
|
||||
soft_enable neon
|
||||
|
||||
+1
-1
@@ -487,7 +487,7 @@ if ($opts{arch} eq 'x86') {
|
||||
@ALL_ARCHS = filter(qw/neon_asm neon/);
|
||||
arm;
|
||||
} elsif ($opts{arch} eq 'armv8' || $opts{arch} eq 'arm64' ) {
|
||||
@ALL_ARCHS = filter(qw/neon/);
|
||||
@ALL_ARCHS = filter(qw/neon neon_dotprod neon_i8mm/);
|
||||
@REQUIRES = filter(qw/neon/);
|
||||
&require(@REQUIRES);
|
||||
arm;
|
||||
|
||||
@@ -252,6 +252,13 @@ ARCH_LIST="
|
||||
ppc
|
||||
loongarch
|
||||
"
|
||||
|
||||
ARCH_EXT_LIST_AARCH64="
|
||||
neon
|
||||
neon_dotprod
|
||||
neon_i8mm
|
||||
"
|
||||
|
||||
ARCH_EXT_LIST_X86="
|
||||
mmx
|
||||
sse
|
||||
@@ -271,8 +278,8 @@ ARCH_EXT_LIST_LOONGSON="
|
||||
"
|
||||
|
||||
ARCH_EXT_LIST="
|
||||
neon
|
||||
neon_asm
|
||||
${ARCH_EXT_LIST_AARCH64}
|
||||
|
||||
mips32
|
||||
dspr2
|
||||
|
||||
+22
-2
@@ -12,6 +12,9 @@
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
|
||||
#include "./vpx_config.h"
|
||||
#if VPX_ARCH_ARM
|
||||
#include "vpx_ports/arm.h"
|
||||
#endif
|
||||
#if VPX_ARCH_X86 || VPX_ARCH_X86_64
|
||||
#include "vpx_ports/x86.h"
|
||||
#endif
|
||||
@@ -26,7 +29,7 @@ extern void vpx_dsp_rtcd();
|
||||
extern void vpx_scale_rtcd();
|
||||
}
|
||||
|
||||
#if VPX_ARCH_X86 || VPX_ARCH_X86_64
|
||||
#if (!CONFIG_SHARED && VPX_ARCH_ARM) || VPX_ARCH_X86 || VPX_ARCH_X86_64
|
||||
static void append_negative_gtest_filter(const char *str) {
|
||||
std::string filter = ::testing::FLAGS_gtest_filter;
|
||||
// Negative patterns begin with one '-' followed by a ':' separated list.
|
||||
@@ -34,11 +37,28 @@ static void append_negative_gtest_filter(const char *str) {
|
||||
filter += str;
|
||||
::testing::FLAGS_gtest_filter = filter;
|
||||
}
|
||||
#endif // VPX_ARCH_X86 || VPX_ARCH_X86_64
|
||||
#endif // (!CONFIG_SHARED && VPX_ARCH_ARM) || VPX_ARCH_X86 || VPX_ARCH_X86_64
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
::testing::InitGoogleTest(&argc, argv);
|
||||
|
||||
#if !CONFIG_SHARED
|
||||
#if VPX_ARCH_AARCH64
|
||||
const int caps = arm_cpu_caps();
|
||||
if (!(caps & HAS_NEON_DOTPROD)) {
|
||||
append_negative_gtest_filter(":NEON_DOTPROD.*:NEON_DOTPROD/*");
|
||||
}
|
||||
if (!(caps & HAS_NEON_I8MM)) {
|
||||
append_negative_gtest_filter(":NEON_I8MM.*:NEON_I8MM/*");
|
||||
}
|
||||
#elif VPX_ARCH_ARM
|
||||
const int caps = arm_cpu_caps();
|
||||
if (!(caps & HAS_NEON)) {
|
||||
append_negative_gtest_filter(":NEON.*:NEON/*");
|
||||
}
|
||||
#endif // VPX_ARCH_ARM
|
||||
#endif // !CONFIG_SHARED
|
||||
|
||||
#if VPX_ARCH_X86 || VPX_ARCH_X86_64
|
||||
const int simd_caps = x86_simd_caps();
|
||||
if (!(simd_caps & HAS_MMX)) append_negative_gtest_filter(":MMX.*:MMX/*");
|
||||
|
||||
@@ -0,0 +1,89 @@
|
||||
/*
|
||||
* Copyright (c) 2023 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
// Feature detection code for Armv7-A / AArch32.
|
||||
|
||||
#include "arm_cpudetect.h"
|
||||
|
||||
#if !CONFIG_RUNTIME_CPU_DETECT
|
||||
|
||||
static int arm_get_cpu_caps(void) {
|
||||
// This function should actually be a no-op. There is no way to adjust any of
|
||||
// these because the RTCD tables do not exist: the functions are called
|
||||
// statically.
|
||||
int flags = 0;
|
||||
#if HAVE_NEON
|
||||
flags |= HAS_NEON;
|
||||
#endif // HAVE_NEON
|
||||
return flags;
|
||||
}
|
||||
|
||||
#elif defined(_MSC_VER) // end !CONFIG_RUNTIME_CPU_DETECT
|
||||
|
||||
static int arm_get_cpu_caps(void) {
|
||||
int flags = 0;
|
||||
#if HAVE_NEON || HAVE_NEON_ASM
|
||||
// MSVC has no inline __asm support for Arm, but it does let you __emit
|
||||
// instructions via their assembled hex code.
|
||||
// All of these instructions should be essentially nops.
|
||||
__try {
|
||||
// VORR q0,q0,q0
|
||||
__emit(0xF2200150);
|
||||
flags |= HAS_NEON;
|
||||
} __except (GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION) {
|
||||
// Ignore exception.
|
||||
}
|
||||
#endif // HAVE_NEON || HAVE_NEON_ASM
|
||||
return flags;
|
||||
}
|
||||
|
||||
#elif defined(ANDROID_USE_CPU_FEATURES_LIB)
|
||||
|
||||
static int arm_get_cpu_caps(void) {
|
||||
int flags = 0;
|
||||
#if HAVE_NEON || HAVE_NEON_ASM
|
||||
uint64_t features = android_getCpuFeatures();
|
||||
if (features & ANDROID_CPU_ARM_FEATURE_NEON) {
|
||||
flags |= HAS_NEON;
|
||||
}
|
||||
#endif // HAVE_NEON || HAVE_NEON_ASM
|
||||
return flags;
|
||||
}
|
||||
|
||||
#elif defined(__linux__) // end defined(AOM_USE_ANDROID_CPU_FEATURES)
|
||||
|
||||
#include <sys/auxv.h>
|
||||
|
||||
// Define hwcap values ourselves: building with an old auxv header where these
|
||||
// hwcap values are not defined should not prevent features from being enabled.
|
||||
#define VPX_AARCH32_HWCAP_NEON (1 << 12)
|
||||
|
||||
static int arm_get_cpu_caps(void) {
|
||||
int flags = 0;
|
||||
unsigned long hwcap = getauxval(AT_HWCAP);
|
||||
#if HAVE_NEON || HAVE_NEON_ASM
|
||||
if (hwcap & VPX_AARCH32_HWCAP_NEON) {
|
||||
flags |= HAS_NEON;
|
||||
}
|
||||
#endif // HAVE_NEON || HAVE_NEON_ASM
|
||||
return flags;
|
||||
}
|
||||
#else // end __linux__
|
||||
#error \
|
||||
"Runtime CPU detection selected, but no CPU detection method available" \
|
||||
"for your platform. Rerun configure with --disable-runtime-cpu-detect."
|
||||
#endif
|
||||
|
||||
int arm_cpu_caps(void) {
|
||||
int flags = 0;
|
||||
if (arm_cpu_env_flags(&flags)) {
|
||||
return flags;
|
||||
}
|
||||
return arm_get_cpu_caps() & arm_cpu_env_mask();
|
||||
}
|
||||
@@ -0,0 +1,173 @@
|
||||
/*
|
||||
* Copyright (c) 2023 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "arm_cpudetect.h"
|
||||
|
||||
#if defined(__APPLE__)
|
||||
#include <sys/sysctl.h>
|
||||
#endif
|
||||
|
||||
#if !CONFIG_RUNTIME_CPU_DETECT
|
||||
|
||||
static int arm_get_cpu_caps(void) {
|
||||
// This function should actually be a no-op. There is no way to adjust any of
|
||||
// these because the RTCD tables do not exist: the functions are called
|
||||
// statically.
|
||||
int flags = 0;
|
||||
#if HAVE_NEON
|
||||
flags |= HAS_NEON;
|
||||
#endif // HAVE_NEON
|
||||
return flags;
|
||||
}
|
||||
|
||||
#elif defined(__APPLE__) // end !CONFIG_RUNTIME_CPU_DETECT
|
||||
|
||||
// sysctlbyname() parameter documentation for instruction set characteristics:
|
||||
// https://developer.apple.com/documentation/kernel/1387446-sysctlbyname/determining_instruction_set_characteristics
|
||||
static INLINE int64_t have_feature(const char *feature) {
|
||||
int64_t feature_present = 0;
|
||||
size_t size = sizeof(feature_present);
|
||||
if (sysctlbyname(feature, &feature_present, &size, NULL, 0) != 0) {
|
||||
return 0;
|
||||
}
|
||||
return feature_present;
|
||||
}
|
||||
|
||||
static int arm_get_cpu_caps(void) {
|
||||
int flags = 0;
|
||||
#if HAVE_NEON
|
||||
flags |= HAS_NEON;
|
||||
#endif // HAVE_NEON
|
||||
#if HAVE_NEON_DOTPROD
|
||||
if (have_feature("hw.optional.arm.FEAT_DotProd")) {
|
||||
flags |= HAS_NEON_DOTPROD;
|
||||
}
|
||||
#endif // HAVE_NEON_DOTPROD
|
||||
#if HAVE_NEON_I8MM
|
||||
if (have_feature("hw.optional.arm.FEAT_I8MM")) {
|
||||
flags |= HAS_NEON_I8MM;
|
||||
}
|
||||
#endif // HAVE_NEON_I8MM
|
||||
return flags;
|
||||
}
|
||||
|
||||
#elif defined(_MSC_VER) // end __APPLE__
|
||||
|
||||
static int arm_get_cpu_caps(void) {
|
||||
int flags = 0;
|
||||
// IsProcessorFeaturePresent() parameter documentation:
|
||||
// https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-isprocessorfeaturepresent#parameters
|
||||
#if HAVE_NEON
|
||||
flags |= HAS_NEON; // Neon is mandatory in Armv8.0-A.
|
||||
#endif // HAVE_NEON
|
||||
#if HAVE_NEON_DOTPROD
|
||||
// Support for PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE was added in Windows SDK
|
||||
// 20348, supported by Windows 11 and Windows Server 2022.
|
||||
#if defined(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE)
|
||||
if (IsProcessorFeaturePresent(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE)) {
|
||||
flags |= HAS_NEON_DOTPROD;
|
||||
}
|
||||
#endif // defined(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE)
|
||||
#endif // HAVE_NEON_DOTPROD
|
||||
// No I8MM feature detection available on Windows at time of writing.
|
||||
return flags;
|
||||
}
|
||||
|
||||
#elif defined(ANDROID_USE_CPU_FEATURES_LIB)
|
||||
|
||||
static int arm_get_cpu_caps(void) {
|
||||
int flags = 0;
|
||||
#if HAVE_NEON
|
||||
flags |= HAS_NEON; // Neon is mandatory in Armv8.0-A.
|
||||
#endif // HAVE_NEON
|
||||
return flags;
|
||||
}
|
||||
|
||||
#elif defined(__linux__) // end defined(VPX_USE_ANDROID_CPU_FEATURES)
|
||||
|
||||
#include <sys/auxv.h>
|
||||
|
||||
// Define hwcap values ourselves: building with an old auxv header where these
|
||||
// hwcap values are not defined should not prevent features from being enabled.
|
||||
#define VPX_AARCH64_HWCAP_ASIMDDP (1 << 20)
|
||||
#define VPX_AARCH64_HWCAP2_I8MM (1 << 13)
|
||||
|
||||
static int arm_get_cpu_caps(void) {
|
||||
int flags = 0;
|
||||
unsigned long hwcap = getauxval(AT_HWCAP);
|
||||
unsigned long hwcap2 = getauxval(AT_HWCAP2);
|
||||
#if HAVE_NEON
|
||||
flags |= HAS_NEON; // Neon is mandatory in Armv8.0-A.
|
||||
#endif // HAVE_NEON
|
||||
#if HAVE_NEON_DOTPROD
|
||||
if (hwcap & VPX_AARCH64_HWCAP_ASIMDDP) {
|
||||
flags |= HAS_NEON_DOTPROD;
|
||||
}
|
||||
#endif // HAVE_NEON_DOTPROD
|
||||
#if HAVE_NEON_I8MM
|
||||
if (hwcap2 & VPX_AARCH64_HWCAP2_I8MM) {
|
||||
flags |= HAS_NEON_I8MM;
|
||||
}
|
||||
#endif // HAVE_NEON_I8MM
|
||||
return flags;
|
||||
}
|
||||
|
||||
#elif defined(__Fuchsia__) // end __linux__
|
||||
|
||||
#include <zircon/features.h>
|
||||
#include <zircon/syscalls.h>
|
||||
|
||||
// Added in https://fuchsia-review.googlesource.com/c/fuchsia/+/894282.
|
||||
#ifndef ZX_ARM64_FEATURE_ISA_I8MM
|
||||
#define ZX_ARM64_FEATURE_ISA_I8MM ((uint32_t)(1u << 19))
|
||||
#endif
|
||||
|
||||
static int arm_get_cpu_caps(void) {
|
||||
int flags = 0;
|
||||
#if HAVE_NEON
|
||||
flags |= HAS_NEON; // Neon is mandatory in Armv8.0-A.
|
||||
#endif // HAVE_NEON
|
||||
uint32_t features;
|
||||
zx_status_t status = zx_system_get_features(ZX_FEATURE_KIND_CPU, &features);
|
||||
if (status != ZX_OK) {
|
||||
return flags;
|
||||
}
|
||||
#if HAVE_NEON_DOTPROD
|
||||
if (features & ZX_ARM64_FEATURE_ISA_DP) {
|
||||
flags |= HAS_NEON_DOTPROD;
|
||||
}
|
||||
#endif // HAVE_NEON_DOTPROD
|
||||
#if HAVE_NEON_I8MM
|
||||
if (features & ZX_ARM64_FEATURE_ISA_I8MM) {
|
||||
flags |= HAS_NEON_I8MM;
|
||||
}
|
||||
#endif // HAVE_NEON_I8MM
|
||||
return flags;
|
||||
}
|
||||
|
||||
#else // end __Fuchsia__
|
||||
#error \
|
||||
"Runtime CPU detection selected, but no CPU detection method available" \
|
||||
"for your platform. Rerun configure with --disable-runtime-cpu-detect."
|
||||
#endif
|
||||
|
||||
int arm_cpu_caps(void) {
|
||||
int flags = 0;
|
||||
if (!arm_cpu_env_flags(&flags)) {
|
||||
flags = arm_get_cpu_caps() & arm_cpu_env_mask();
|
||||
}
|
||||
|
||||
// Restrict flags: FEAT_I8MM assumes that FEAT_DotProd is available.
|
||||
if (!(flags & HAS_NEON_DOTPROD)) {
|
||||
flags &= ~HAS_NEON_I8MM;
|
||||
}
|
||||
|
||||
return flags;
|
||||
}
|
||||
+6
-6
@@ -17,12 +17,12 @@
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*ARMv5TE "Enhanced DSP" instructions.*/
|
||||
#define HAS_EDSP 0x01
|
||||
/*ARMv6 "Parallel" or "Media" instructions.*/
|
||||
#define HAS_MEDIA 0x02
|
||||
/*ARMv7 optional NEON instructions.*/
|
||||
#define HAS_NEON 0x04
|
||||
// Armv7-A optional Neon instructions, mandatory from Armv8.0-A.
|
||||
#define HAS_NEON (1 << 0)
|
||||
// Armv8.2-A optional Neon dot-product instructions, mandatory from Armv8.4-A.
|
||||
#define HAS_NEON_DOTPROD (1 << 1)
|
||||
// Armv8.2-A optional Neon i8mm instructions, mandatory from Armv8.6-A.
|
||||
#define HAS_NEON_I8MM (1 << 2)
|
||||
|
||||
int arm_cpu_caps(void);
|
||||
|
||||
|
||||
@@ -1,154 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "./vpx_config.h"
|
||||
#include "vpx_ports/arm.h"
|
||||
|
||||
#ifdef WINAPI_FAMILY
|
||||
#include <winapifamily.h>
|
||||
#if !WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
|
||||
#define getenv(x) NULL
|
||||
#endif
|
||||
#endif
|
||||
|
||||
static int arm_cpu_env_flags(int *flags) {
|
||||
char *env;
|
||||
env = getenv("VPX_SIMD_CAPS");
|
||||
if (env && *env) {
|
||||
*flags = (int)strtol(env, NULL, 0);
|
||||
return 0;
|
||||
}
|
||||
*flags = 0;
|
||||
return -1;
|
||||
}
|
||||
|
||||
static int arm_cpu_env_mask(void) {
|
||||
char *env;
|
||||
env = getenv("VPX_SIMD_CAPS_MASK");
|
||||
return env && *env ? (int)strtol(env, NULL, 0) : ~0;
|
||||
}
|
||||
|
||||
#if !CONFIG_RUNTIME_CPU_DETECT
|
||||
|
||||
int arm_cpu_caps(void) {
|
||||
/* This function should actually be a no-op. There is no way to adjust any of
|
||||
* these because the RTCD tables do not exist: the functions are called
|
||||
* statically */
|
||||
int flags;
|
||||
int mask;
|
||||
if (!arm_cpu_env_flags(&flags)) {
|
||||
return flags;
|
||||
}
|
||||
mask = arm_cpu_env_mask();
|
||||
#if HAVE_NEON || HAVE_NEON_ASM
|
||||
flags |= HAS_NEON;
|
||||
#endif /* HAVE_NEON || HAVE_NEON_ASM */
|
||||
return flags & mask;
|
||||
}
|
||||
|
||||
#elif defined(_MSC_VER) /* end !CONFIG_RUNTIME_CPU_DETECT */
|
||||
/*For GetExceptionCode() and EXCEPTION_ILLEGAL_INSTRUCTION.*/
|
||||
#ifndef WIN32_LEAN_AND_MEAN
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
#endif
|
||||
#ifndef WIN32_EXTRA_LEAN
|
||||
#define WIN32_EXTRA_LEAN
|
||||
#endif
|
||||
#include <windows.h>
|
||||
|
||||
int arm_cpu_caps(void) {
|
||||
int flags;
|
||||
int mask;
|
||||
if (!arm_cpu_env_flags(&flags)) {
|
||||
return flags;
|
||||
}
|
||||
mask = arm_cpu_env_mask();
|
||||
/* MSVC has no inline __asm support for ARM, but it does let you __emit
|
||||
* instructions via their assembled hex code.
|
||||
* All of these instructions should be essentially nops.
|
||||
*/
|
||||
#if HAVE_NEON || HAVE_NEON_ASM
|
||||
if (mask & HAS_NEON) {
|
||||
__try {
|
||||
/*VORR q0,q0,q0*/
|
||||
__emit(0xF2200150);
|
||||
flags |= HAS_NEON;
|
||||
} __except (GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION) {
|
||||
/*Ignore exception.*/
|
||||
}
|
||||
}
|
||||
#endif /* HAVE_NEON || HAVE_NEON_ASM */
|
||||
return flags & mask;
|
||||
}
|
||||
|
||||
#elif defined(__ANDROID__) /* end _MSC_VER */
|
||||
#include <cpu-features.h>
|
||||
|
||||
int arm_cpu_caps(void) {
|
||||
int flags;
|
||||
int mask;
|
||||
uint64_t features;
|
||||
if (!arm_cpu_env_flags(&flags)) {
|
||||
return flags;
|
||||
}
|
||||
mask = arm_cpu_env_mask();
|
||||
features = android_getCpuFeatures();
|
||||
|
||||
#if HAVE_NEON || HAVE_NEON_ASM
|
||||
if (features & ANDROID_CPU_ARM_FEATURE_NEON) flags |= HAS_NEON;
|
||||
#endif /* HAVE_NEON || HAVE_NEON_ASM */
|
||||
return flags & mask;
|
||||
}
|
||||
|
||||
#elif defined(__linux__) /* end __ANDROID__ */
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
int arm_cpu_caps(void) {
|
||||
FILE *fin;
|
||||
int flags;
|
||||
int mask;
|
||||
if (!arm_cpu_env_flags(&flags)) {
|
||||
return flags;
|
||||
}
|
||||
mask = arm_cpu_env_mask();
|
||||
/* Reading /proc/self/auxv would be easier, but that doesn't work reliably
|
||||
* on Android.
|
||||
* This also means that detection will fail in Scratchbox.
|
||||
*/
|
||||
fin = fopen("/proc/cpuinfo", "r");
|
||||
if (fin != NULL) {
|
||||
/* 512 should be enough for anybody (it's even enough for all the flags
|
||||
* that x86 has accumulated... so far).
|
||||
*/
|
||||
char buf[512];
|
||||
while (fgets(buf, 511, fin) != NULL) {
|
||||
#if HAVE_NEON || HAVE_NEON_ASM
|
||||
if (memcmp(buf, "Features", 8) == 0) {
|
||||
char *p;
|
||||
p = strstr(buf, " neon");
|
||||
if (p != NULL && (p[5] == ' ' || p[5] == '\n')) {
|
||||
flags |= HAS_NEON;
|
||||
}
|
||||
}
|
||||
#endif /* HAVE_NEON || HAVE_NEON_ASM */
|
||||
}
|
||||
fclose(fin);
|
||||
}
|
||||
return flags & mask;
|
||||
}
|
||||
#else /* end __linux__ */
|
||||
#error \
|
||||
"--enable-runtime-cpu-detect selected, but no CPU detection method " \
|
||||
"available for your platform. Reconfigure with --disable-runtime-cpu-detect."
|
||||
#endif
|
||||
@@ -0,0 +1,52 @@
|
||||
/*
|
||||
* Copyright (c) 2023 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "vpx_ports/arm.h"
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#undef WIN32_LEAN_AND_MEAN
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
#undef WIN32_EXTRA_LEAN
|
||||
#define WIN32_EXTRA_LEAN
|
||||
#include <windows.h>
|
||||
#endif
|
||||
|
||||
#ifdef WINAPI_FAMILY
|
||||
#include <winapifamily.h>
|
||||
#if !WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
|
||||
#define getenv(x) NULL
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__ANDROID__) && (__ANDROID_API__ < 18)
|
||||
#define ANDROID_USE_CPU_FEATURES_LIB 1
|
||||
// Use getauxval() when targeting (64-bit) Android with API level >= 18.
|
||||
// getauxval() is supported since Android API level 18 (Android 4.3.)
|
||||
// First Android version with 64-bit support was Android 5.x (API level 21).
|
||||
#include <cpu-features.h>
|
||||
#endif
|
||||
|
||||
static INLINE int arm_cpu_env_flags(int *flags) {
|
||||
const char *env = getenv("VPX_SIMD_CAPS");
|
||||
if (env && *env) {
|
||||
*flags = (int)strtol(env, NULL, 0);
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static INLINE int arm_cpu_env_mask(void) {
|
||||
const char *env = getenv("VPX_SIMD_CAPS_MASK");
|
||||
return env && *env ? (int)strtol(env, NULL, 0) : ~0;
|
||||
}
|
||||
@@ -36,7 +36,12 @@ PORTS_SRCS-yes += x86.h
|
||||
PORTS_SRCS-yes += x86_abi_support.asm
|
||||
endif
|
||||
|
||||
PORTS_SRCS-$(VPX_ARCH_ARM) += arm_cpudetect.c
|
||||
ifeq ($(VPX_ARCH_AARCH64),yes)
|
||||
PORTS_SRCS-yes += aarch64_cpudetect.c
|
||||
else
|
||||
PORTS_SRCS-$(VPX_ARCH_ARM) += aarch32_cpudetect.c
|
||||
endif
|
||||
PORTS_SRCS-$(VPX_ARCH_ARM) += arm_cpudetect.h
|
||||
PORTS_SRCS-$(VPX_ARCH_ARM) += arm.h
|
||||
|
||||
PORTS_SRCS-$(VPX_ARCH_PPC) += ppc_cpudetect.c
|
||||
|
||||
Reference in New Issue
Block a user