317 lines
7.1 KiB
C++
317 lines
7.1 KiB
C++
// SPDX-License-Identifier: Apache-2.0
|
|
// ----------------------------------------------------------------------------
|
|
// Copyright 2020-2024 Arm Limited
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
|
|
// use this file except in compliance with the License. You may obtain a copy
|
|
// of the License at:
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
|
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
|
// License for the specific language governing permissions and limitations
|
|
// under the License.
|
|
// ----------------------------------------------------------------------------
|
|
|
|
/**
|
|
* @brief Application entry point.
|
|
*
|
|
* This module contains the first command line entry point veneer, used to
|
|
* validate that the host extended ISA availability matches the tool build.
|
|
* It is compiled without any extended ISA support so it's guaranteed to be
|
|
* executable without any invalid instruction errors.
|
|
*/
|
|
|
|
#include <cstdio>
|
|
|
|
/**
|
|
* @brief The main veneer entry point.
|
|
*
|
|
* @param argc The number of arguments.
|
|
* @param argv The vector of arguments.
|
|
*
|
|
* @return 0 on success, non-zero otherwise.
|
|
*/
|
|
int astcenc_main_veneer(
|
|
int argc,
|
|
char **argv);
|
|
|
|
// x86-64 builds
|
|
#if (ASTCENC_SSE > 20) || (ASTCENC_AVX > 0) || \
|
|
(ASTCENC_POPCNT > 0) || (ASTCENC_F16C > 0)
|
|
|
|
static bool g_init { false };
|
|
|
|
/** Does this CPU support SSE 4.1? Set to -1 if not yet initialized. */
|
|
static bool g_cpu_has_sse41 { false };
|
|
|
|
/** Does this CPU support AVX2? Set to -1 if not yet initialized. */
|
|
static bool g_cpu_has_avx2 { false };
|
|
|
|
/** Does this CPU support POPCNT? Set to -1 if not yet initialized. */
|
|
static bool g_cpu_has_popcnt { false };
|
|
|
|
/** Does this CPU support F16C? Set to -1 if not yet initialized. */
|
|
static bool g_cpu_has_f16c { false };
|
|
|
|
/* ============================================================================
|
|
Platform code for Visual Studio
|
|
============================================================================ */
|
|
#if !defined(__clang__) && defined(_MSC_VER)
|
|
#define WIN32_LEAN_AND_MEAN
|
|
#include <windows.h>
|
|
#include <intrin.h>
|
|
|
|
/**
|
|
* @brief Detect platform CPU ISA support and update global trackers.
|
|
*/
|
|
static void detect_cpu_isa()
|
|
{
|
|
int data[4];
|
|
|
|
__cpuid(data, 0);
|
|
int num_id = data[0];
|
|
|
|
if (num_id >= 1)
|
|
{
|
|
__cpuidex(data, 1, 0);
|
|
// SSE41 = Bank 1, ECX, bit 19
|
|
g_cpu_has_sse41 = data[2] & (1 << 19) ? true : false;
|
|
// POPCNT = Bank 1, ECX, bit 23
|
|
g_cpu_has_popcnt = data[2] & (1 << 23) ? true : false;
|
|
// F16C = Bank 1, ECX, bit 29
|
|
g_cpu_has_f16c = data[2] & (1 << 29) ? true : false;
|
|
}
|
|
|
|
if (num_id >= 7)
|
|
{
|
|
__cpuidex(data, 7, 0);
|
|
// AVX2 = Bank 7, EBX, bit 5
|
|
g_cpu_has_avx2 = data[1] & (1 << 5) ? true : false;
|
|
}
|
|
|
|
// Ensure state bits are updated before init flag is updated
|
|
MemoryBarrier();
|
|
g_init = true;
|
|
}
|
|
|
|
/* ============================================================================
|
|
Platform code for GCC and Clang
|
|
============================================================================ */
|
|
#else
|
|
#include <cpuid.h>
|
|
|
|
/**
|
|
* @brief Detect platform CPU ISA support and update global trackers.
|
|
*/
|
|
static void detect_cpu_isa()
|
|
{
|
|
unsigned int data[4];
|
|
|
|
if (__get_cpuid_count(1, 0, &data[0], &data[1], &data[2], &data[3]))
|
|
{
|
|
// SSE41 = Bank 1, ECX, bit 19
|
|
g_cpu_has_sse41 = data[2] & (1 << 19) ? true : false;
|
|
// POPCNT = Bank 1, ECX, bit 23
|
|
g_cpu_has_popcnt = data[2] & (1 << 23) ? true : false;
|
|
// F16C = Bank 1, ECX, bit 29
|
|
g_cpu_has_f16c = data[2] & (1 << 29) ? true : false;
|
|
}
|
|
|
|
g_cpu_has_avx2 = 0;
|
|
if (__get_cpuid_count(7, 0, &data[0], &data[1], &data[2], &data[3]))
|
|
{
|
|
// AVX2 = Bank 7, EBX, bit 5
|
|
g_cpu_has_avx2 = data[1] & (1 << 5) ? true : false;
|
|
}
|
|
|
|
// Ensure state bits are updated before init flag is updated
|
|
__sync_synchronize();
|
|
g_init = true;
|
|
}
|
|
#endif
|
|
|
|
#if ASTCENC_POPCNT > 0
|
|
/**
|
|
* @brief Run-time detection if the host CPU supports the POPCNT extension.
|
|
*
|
|
* @return @c true if supported, @c false if not.
|
|
*/
|
|
static bool cpu_supports_popcnt()
|
|
{
|
|
if (!g_init)
|
|
{
|
|
detect_cpu_isa();
|
|
}
|
|
|
|
return g_cpu_has_popcnt;
|
|
}
|
|
#endif
|
|
|
|
#if ASTCENC_F16C > 0
|
|
/**
|
|
* @brief Run-time detection if the host CPU supports F16C extension.
|
|
*
|
|
* @return @c true if supported, @c false if not.
|
|
*/
|
|
static bool cpu_supports_f16c()
|
|
{
|
|
if (!g_init)
|
|
{
|
|
detect_cpu_isa();
|
|
}
|
|
|
|
return g_cpu_has_f16c;
|
|
}
|
|
#endif
|
|
|
|
#if ASTCENC_SSE >= 41
|
|
/**
|
|
* @brief Run-time detection if the host CPU supports SSE 4.1 extension.
|
|
*
|
|
* @return @c true if supported, @c false if not.
|
|
*/
|
|
static bool cpu_supports_sse41()
|
|
{
|
|
if (!g_init)
|
|
{
|
|
detect_cpu_isa();
|
|
}
|
|
|
|
return g_cpu_has_sse41;
|
|
}
|
|
#endif
|
|
|
|
#if ASTCENC_AVX >= 2
|
|
/**
|
|
* @brief Run-time detection if the host CPU supports AVX 2 extension.
|
|
*
|
|
* @return @c true if supported, @c false if not.
|
|
*/
|
|
static bool cpu_supports_avx2()
|
|
{
|
|
if (!g_init)
|
|
{
|
|
detect_cpu_isa();
|
|
}
|
|
|
|
return g_cpu_has_avx2;
|
|
}
|
|
#endif
|
|
|
|
/**
|
|
* @brief Print a string to stderr.
|
|
*/
|
|
static inline void print_error(
|
|
const char* format
|
|
) {
|
|
fprintf(stderr, "%s", format);
|
|
}
|
|
|
|
/**
|
|
* @brief Validate CPU ISA support meets the requirements of this build of the library.
|
|
*
|
|
* Each library build is statically compiled for a particular set of CPU ISA features, such as the
|
|
* SIMD support or other ISA extensions such as POPCNT. This function checks that the host CPU
|
|
* actually supports everything this build needs.
|
|
*
|
|
* @return Return @c true if validated, @c false otherwise.
|
|
*/
|
|
static bool validate_cpu_isa()
|
|
{
|
|
#if ASTCENC_AVX >= 2
|
|
if (!cpu_supports_avx2())
|
|
{
|
|
print_error("ERROR: Host does not support AVX2 ISA extension\n");
|
|
return false;
|
|
}
|
|
#endif
|
|
|
|
#if ASTCENC_F16C >= 1
|
|
if (!cpu_supports_f16c())
|
|
{
|
|
print_error("ERROR: Host does not support F16C ISA extension\n");
|
|
return false;
|
|
}
|
|
#endif
|
|
|
|
#if ASTCENC_SSE >= 41
|
|
if (!cpu_supports_sse41())
|
|
{
|
|
print_error("ERROR: Host does not support SSE4.1 ISA extension\n");
|
|
return false;
|
|
}
|
|
#endif
|
|
|
|
#if ASTCENC_POPCNT >= 1
|
|
if (!cpu_supports_popcnt())
|
|
{
|
|
print_error("ERROR: Host does not support POPCNT ISA extension\n");
|
|
return false;
|
|
}
|
|
#endif
|
|
|
|
return true;
|
|
}
|
|
|
|
// Validate Arm SVE availability
|
|
#elif ASTCENC_SVE != 0
|
|
|
|
#include <sys/auxv.h>
|
|
static bool cpu_supports_sve()
|
|
{
|
|
long hwcaps = getauxval(AT_HWCAP);
|
|
return (hwcaps & HWCAP_SVE) != 0;
|
|
}
|
|
|
|
/**
|
|
* @brief Print a string to stderr.
|
|
*/
|
|
static inline void print_error(
|
|
const char* format
|
|
) {
|
|
fprintf(stderr, "%s", format);
|
|
}
|
|
|
|
/**
|
|
* @brief Validate that SVE is supported.
|
|
*
|
|
* Note that this function checks that SVE is supported, but because it
|
|
* runs in the veneer which is compiled without SVE support, we cannot
|
|
* check the SVE width is correct. This is checked later.
|
|
*/
|
|
static bool validate_cpu_isa()
|
|
{
|
|
if (!cpu_supports_sve())
|
|
{
|
|
print_error("ERROR: Host does not support SVE ISA extension\n");
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
#else
|
|
|
|
// Fallback for cases with no dynamic ISA availability
|
|
static bool validate_cpu_isa()
|
|
{
|
|
return true;
|
|
}
|
|
|
|
#endif
|
|
|
|
int main(
|
|
int argc,
|
|
char **argv
|
|
) {
|
|
if (!validate_cpu_isa())
|
|
{
|
|
return 1;
|
|
}
|
|
|
|
return astcenc_main_veneer(argc, argv);
|
|
}
|