/* ScummVM - Graphic Adventure Engine * * ScummVM is the legal property of its developers, whose names * are too numerous to list here. Please refer to the COPYRIGHT * file distributed with this source distribution. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . * */ // This file is derived from instrset_detect.cpp and instrset.h with minor style changes /***************************************************************************** * Author: Agner Fog * Date created: 2012-05-30 * Last modified: 2022-07-20 * Version: 2.02.00 * Project: vector class library * Description: * Functions for checking which instruction sets are supported. * * (c) Copyright 2012-2022 Agner Fog. * Apache License version 2.0 or later. ******************************************************************************/ #ifndef INSTRSET_DETECT_H #define INSTRSET_DETECT_H #if defined(__x86_64__) || defined(__amd64) || defined(_M_X64) || defined(_M_AMD64) || \ defined(__i386__) || defined(__i386) || defined(_M_IX86) #include // Define integer types with known size #include // Define INT_MAX // Header files for non-vector intrinsic functions including _BitScanReverse(int), __cpuid(int[4],int), _xgetbv(int) #ifdef _MSC_VER // Microsoft compiler or compatible Intel compiler #include #pragma warning(disable: 6323 4514 4710 4711) // Diasble annoying warnings #else #include // Gcc or Clang compiler #endif // Define interface to cpuid instruction. // input: functionnumber = leaf (eax), ecxleaf = subleaf(ecx) // output: output[0] = eax, output[1] = ebx, output[2] = ecx, output[3] = edx static inline void cpuid(int output[4], int functionnumber, int ecxleaf = 0) { #if defined(__GNUC__) || defined(__clang__) // use inline assembly, Gnu/AT&T syntax int a, b, c, d; __asm("cpuid" : "=a"(a), "=b"(b), "=c"(c), "=d"(d) : "a"(functionnumber), "c"(ecxleaf) : ); output[0] = a; output[1] = b; output[2] = c; output[3] = d; #elif defined (_MSC_VER) // Microsoft compiler, intrin.h included __cpuidex(output, functionnumber, ecxleaf); // intrinsic function for CPUID #else // unknown platform. try inline assembly with masm/intel syntax __asm { mov eax, functionnumber mov ecx, ecxleaf cpuid; mov esi, output mov[esi], eax mov[esi + 4], ebx mov[esi + 8], ecx mov[esi + 12], edx } #endif } // Define interface to xgetbv instruction static inline uint64_t xgetbv (int ctr) { #if (defined (_MSC_FULL_VER) && _MSC_FULL_VER >= 160040000) || (defined (__INTEL_COMPILER) && __INTEL_COMPILER >= 1200) // Microsoft or Intel compiler supporting _xgetbv intrinsic return uint64_t(_xgetbv(ctr)); // intrinsic function for XGETBV #elif defined(__GNUC__) || defined (__clang__) // use inline assembly, Gnu/AT&T syntax uint32_t a, d; __asm("xgetbv" : "=a"(a),"=d"(d) : "c"(ctr) : ); return a | (uint64_t(d) << 32); #else // #elif defined (_WIN32) // other compiler. try inline assembly with masm/intel/MS syntax uint32_t a, d; __asm { mov ecx, ctr _emit 0x0f _emit 0x01 _emit 0xd0 ; // xgetbv mov a, eax mov d, edx } return a | (uint64_t(d) << 32); #endif } /* find supported instruction set return value: 0 = 80386 instruction set 1 or above = SSE (XMM) supported by CPU (not testing for OS support) 2 or above = SSE2 3 or above = SSE3 4 or above = Supplementary SSE3 (SSSE3) 5 or above = SSE4.1 6 or above = SSE4.2 7 or above = AVX supported by CPU and operating system 8 or above = AVX2 9 or above = AVX512F 10 or above = AVX512VL, AVX512BW, AVX512DQ */ static int instrset_detect(void) { static int iset = -1; // remember value for next call if (iset >= 0) { return iset; // called before } iset = 0; // default value int abcd[4] = {0,0,0,0}; // cpuid results cpuid(abcd, 0); // call cpuid function 0 if (abcd[0] == 0) return iset; // no further cpuid function supported cpuid(abcd, 1); // call cpuid function 1 for feature flags if ((abcd[3] & (1 << 0)) == 0) return iset; // no floating point if ((abcd[3] & (1 << 23)) == 0) return iset; // no MMX if ((abcd[3] & (1 << 15)) == 0) return iset; // no conditional move if ((abcd[3] & (1 << 24)) == 0) return iset; // no FXSAVE if ((abcd[3] & (1 << 25)) == 0) return iset; // no SSE iset = 1; // 1: SSE supported if ((abcd[3] & (1 << 26)) == 0) return iset; // no SSE2 iset = 2; // 2: SSE2 supported if ((abcd[2] & (1 << 0)) == 0) return iset; // no SSE3 iset = 3; // 3: SSE3 supported if ((abcd[2] & (1 << 9)) == 0) return iset; // no SSSE3 iset = 4; // 4: SSSE3 supported if ((abcd[2] & (1 << 19)) == 0) return iset; // no SSE4.1 iset = 5; // 5: SSE4.1 supported if ((abcd[2] & (1 << 23)) == 0) return iset; // no POPCNT if ((abcd[2] & (1 << 20)) == 0) return iset; // no SSE4.2 iset = 6; // 6: SSE4.2 supported if ((abcd[2] & (1 << 27)) == 0) return iset; // no OSXSAVE if ((xgetbv(0) & 6) != 6) return iset; // AVX not enabled in O.S. if ((abcd[2] & (1 << 28)) == 0) return iset; // no AVX iset = 7; // 7: AVX supported cpuid(abcd, 7); // call cpuid leaf 7 for feature flags if ((abcd[1] & (1 << 5)) == 0) return iset; // no AVX2 iset = 8; if ((abcd[1] & (1 << 16)) == 0) return iset; // no AVX512 cpuid(abcd, 0xD); // call cpuid leaf 0xD for feature flags if ((abcd[0] & 0x60) != 0x60) return iset; // no AVX512 iset = 9; cpuid(abcd, 7); // call cpuid leaf 7 for feature flags if ((abcd[1] & (1 << 31)) == 0) return iset; // no AVX512VL if ((abcd[1] & 0x40020000) != 0x40020000) return iset; // no AVX512BW, AVX512DQ iset = 10; return iset; } // detect if CPU supports the FMA3 instruction set static inline bool hasFMA3(void) { if (instrset_detect() < 7) return false; // must have AVX int abcd[4]; // cpuid results cpuid(abcd, 1); // call cpuid function 1 return ((abcd[2] & (1 << 12)) != 0); // ecx bit 12 indicates FMA3 } // detect if CPU supports the FMA4 instruction set static inline bool hasFMA4(void) { if (instrset_detect() < 7) return false; // must have AVX int abcd[4]; // cpuid results cpuid(abcd, 0x80000001); // call cpuid function 0x80000001 return ((abcd[2] & (1 << 16)) != 0); // ecx bit 16 indicates FMA4 } // detect if CPU supports the XOP instruction set static inline bool hasXOP(void) { if (instrset_detect() < 7) return false; // must have AVX int abcd[4]; // cpuid results cpuid(abcd, 0x80000001); // call cpuid function 0x80000001 return ((abcd[2] & (1 << 11)) != 0); // ecx bit 11 indicates XOP } // detect if CPU supports the AVX512ER instruction set static inline bool hasAVX512ER(void) { if (instrset_detect() < 9) return false; // must have AVX512F int abcd[4]; // cpuid results cpuid(abcd, 7); // call cpuid function 7 return ((abcd[1] & (1 << 27)) != 0); // ebx bit 27 indicates AVX512ER } // detect if CPU supports the AVX512VBMI instruction set static inline bool hasAVX512VBMI(void) { if (instrset_detect() < 10) return false; // must have AVX512BW int abcd[4]; // cpuid results cpuid(abcd, 7); // call cpuid function 7 return ((abcd[2] & (1 << 1)) != 0); // ecx bit 1 indicates AVX512VBMI } // detect if CPU supports the AVX512VBMI2 instruction set static inline bool hasAVX512VBMI2(void) { if (instrset_detect() < 10) return false; // must have AVX512BW int abcd[4]; // cpuid results cpuid(abcd, 7); // call cpuid function 7 return ((abcd[2] & (1 << 6)) != 0); // ecx bit 6 indicates AVX512VBMI2 } // detect if CPU supports the F16C instruction set static inline bool hasF16C(void) { if (instrset_detect() < 7) return false; // must have AVX int abcd[4]; // cpuid results cpuid(abcd, 1); // call cpuid function 1 return ((abcd[2] & (1 << 29)) != 0); // ecx bit 29 indicates F16C } // detect if CPU supports the AVX512_FP16 instruction set static inline bool hasAVX512FP16(void) { if (instrset_detect() < 10) return false; // must have AVX512 int abcd[4]; // cpuid results cpuid(abcd, 7); // call cpuid function 1 return ((abcd[3] & (1 << 23)) != 0); // edx bit 23 indicates AVX512_FP16 } #endif #endif