From 29a7a61806002bc1f8c9bcf0f84689f137cd41c0 Mon Sep 17 00:00:00 2001 From: Wunkolo Date: Fri, 11 Mar 2022 09:57:08 -0800 Subject: [PATCH 1/2] common/telemetry: Update `AddField` name type to `string_view` Non-owning `string_view` is flexable and avoids some of the many redundant copies made over `std::string` --- src/common/telemetry.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/common/telemetry.h b/src/common/telemetry.h index 4d632f7eb..3524c857e 100644 --- a/src/common/telemetry.h +++ b/src/common/telemetry.h @@ -8,6 +8,7 @@ #include #include #include +#include #include "common/common_funcs.h" #include "common/common_types.h" @@ -55,8 +56,8 @@ class Field : public FieldInterface { public: YUZU_NON_COPYABLE(Field); - Field(FieldType type_, std::string name_, T value_) - : name(std::move(name_)), type(type_), value(std::move(value_)) {} + Field(FieldType type_, std::string_view name_, T value_) + : name(name_), type(type_), value(std::move(value_)) {} ~Field() override = default; @@ -123,7 +124,7 @@ public: * @param value Value for the field to add. */ template - void AddField(FieldType type, const char* name, T value) { + void AddField(FieldType type, std::string_view name, T value) { return AddField(std::make_unique>(type, name, std::move(value))); } From d248c1203ea15992e6ca3a087a02fac76490deba Mon Sep 17 00:00:00 2001 From: Wunkolo Date: Wed, 9 Mar 2022 18:11:46 -0800 Subject: [PATCH 2/2] cpu_detect: Add additional x86 flags and telemetry Adds detection of additional CPU flags to cpu_detect and additions to telemetry output. This is not exhaustive but guided by features that [dynarmic utilizes](https://github.com/merryhime/dynarmic/blob/bcfe377aaa5138af740e90af5be7a7dff7b62a52/src/dynarmic/backend/x64/host_feature.h#L12-L33) as well as features that are currently utilized but not reported to telemetry(invariant_tsc). This is intended to guide future optimizations. AVX512 in particular is broken up into its individual subsets and some other processor features such as [sha](https://en.wikipedia.org/wiki/Intel_SHA_extensions) and [gfni](https://en.wikipedia.org/wiki/AVX-512#GFNI) are added to have some forward-facing data-points. What used to be a single `CPU_Extension_x64_AVX512` telemetry field is also broken up into individual `CPU_Extension_x64_AVX512{F,VL,CD,...}` fields. --- src/common/telemetry.cpp | 60 +++++++++++++++++++++++++---------- src/common/x64/cpu_detect.cpp | 30 +++++++++++++----- src/common/x64/cpu_detect.h | 21 ++++++++++-- src/yuzu/main.cpp | 4 +-- 4 files changed, 86 insertions(+), 29 deletions(-) diff --git a/src/common/telemetry.cpp b/src/common/telemetry.cpp index 6241d08b3..98c82cd17 100644 --- a/src/common/telemetry.cpp +++ b/src/common/telemetry.cpp @@ -55,22 +55,50 @@ void AppendBuildInfo(FieldCollection& fc) { void AppendCPUInfo(FieldCollection& fc) { #ifdef ARCHITECTURE_x86_64 - fc.AddField(FieldType::UserSystem, "CPU_Model", Common::GetCPUCaps().cpu_string); - fc.AddField(FieldType::UserSystem, "CPU_BrandString", Common::GetCPUCaps().brand_string); - fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AES", Common::GetCPUCaps().aes); - fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AVX", Common::GetCPUCaps().avx); - fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AVX2", Common::GetCPUCaps().avx2); - fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AVX512", Common::GetCPUCaps().avx512); - fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_BMI1", Common::GetCPUCaps().bmi1); - fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_BMI2", Common::GetCPUCaps().bmi2); - fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_FMA", Common::GetCPUCaps().fma); - fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_FMA4", Common::GetCPUCaps().fma4); - fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_SSE", Common::GetCPUCaps().sse); - fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_SSE2", Common::GetCPUCaps().sse2); - fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_SSE3", Common::GetCPUCaps().sse3); - fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_SSSE3", Common::GetCPUCaps().ssse3); - fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_SSE41", Common::GetCPUCaps().sse4_1); - fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_SSE42", Common::GetCPUCaps().sse4_2); + + const auto& caps = Common::GetCPUCaps(); + const auto add_field = [&fc](std::string_view field_name, const auto& field_value) { + fc.AddField(FieldType::UserSystem, field_name, field_value); + }; + add_field("CPU_Model", caps.cpu_string); + add_field("CPU_BrandString", caps.brand_string); + + add_field("CPU_Extension_x64_SSE", caps.sse); + add_field("CPU_Extension_x64_SSE2", caps.sse2); + add_field("CPU_Extension_x64_SSE3", caps.sse3); + add_field("CPU_Extension_x64_SSSE3", caps.ssse3); + add_field("CPU_Extension_x64_SSE41", caps.sse4_1); + add_field("CPU_Extension_x64_SSE42", caps.sse4_2); + + add_field("CPU_Extension_x64_AVX", caps.avx); + add_field("CPU_Extension_x64_AVX_VNNI", caps.avx_vnni); + add_field("CPU_Extension_x64_AVX2", caps.avx2); + + // Skylake-X/SP level AVX512, for compatibility with the previous telemetry field + add_field("CPU_Extension_x64_AVX512", + caps.avx512f && caps.avx512cd && caps.avx512vl && caps.avx512dq && caps.avx512bw); + + add_field("CPU_Extension_x64_AVX512F", caps.avx512f); + add_field("CPU_Extension_x64_AVX512CD", caps.avx512cd); + add_field("CPU_Extension_x64_AVX512VL", caps.avx512vl); + add_field("CPU_Extension_x64_AVX512DQ", caps.avx512dq); + add_field("CPU_Extension_x64_AVX512BW", caps.avx512bw); + add_field("CPU_Extension_x64_AVX512BITALG", caps.avx512bitalg); + add_field("CPU_Extension_x64_AVX512VBMI", caps.avx512vbmi); + + add_field("CPU_Extension_x64_AES", caps.aes); + add_field("CPU_Extension_x64_BMI1", caps.bmi1); + add_field("CPU_Extension_x64_BMI2", caps.bmi2); + add_field("CPU_Extension_x64_F16C", caps.f16c); + add_field("CPU_Extension_x64_FMA", caps.fma); + add_field("CPU_Extension_x64_FMA4", caps.fma4); + add_field("CPU_Extension_x64_GFNI", caps.gfni); + add_field("CPU_Extension_x64_INVARIANT_TSC", caps.invariant_tsc); + add_field("CPU_Extension_x64_LZCNT", caps.lzcnt); + add_field("CPU_Extension_x64_MOVBE", caps.movbe); + add_field("CPU_Extension_x64_PCLMULQDQ", caps.pclmulqdq); + add_field("CPU_Extension_x64_POPCNT", caps.popcnt); + add_field("CPU_Extension_x64_SHA", caps.sha); #else fc.AddField(FieldType::UserSystem, "CPU_Model", "Other"); #endif diff --git a/src/common/x64/cpu_detect.cpp b/src/common/x64/cpu_detect.cpp index 99d87f586..d81edb140 100644 --- a/src/common/x64/cpu_detect.cpp +++ b/src/common/x64/cpu_detect.cpp @@ -93,10 +93,14 @@ static CPUCaps Detect() { caps.sse = Common::Bit<25>(cpu_id[3]); caps.sse2 = Common::Bit<26>(cpu_id[3]); caps.sse3 = Common::Bit<0>(cpu_id[2]); + caps.pclmulqdq = Common::Bit<1>(cpu_id[2]); caps.ssse3 = Common::Bit<9>(cpu_id[2]); caps.sse4_1 = Common::Bit<19>(cpu_id[2]); caps.sse4_2 = Common::Bit<20>(cpu_id[2]); + caps.movbe = Common::Bit<22>(cpu_id[2]); + caps.popcnt = Common::Bit<23>(cpu_id[2]); caps.aes = Common::Bit<25>(cpu_id[2]); + caps.f16c = Common::Bit<29>(cpu_id[2]); // AVX support requires 3 separate checks: // - Is the AVX bit set in CPUID? @@ -112,16 +116,26 @@ static CPUCaps Detect() { if (max_std_fn >= 7) { __cpuidex(cpu_id, 0x00000007, 0x00000000); - // Can't enable AVX2 unless the XSAVE/XGETBV checks above passed - caps.avx2 = caps.avx && Common::Bit<5>(cpu_id[1]); + // Can't enable AVX{2,512} unless the XSAVE/XGETBV checks above passed + if (caps.avx) { + caps.avx2 = Common::Bit<5>(cpu_id[1]); + caps.avx512f = Common::Bit<16>(cpu_id[1]); + caps.avx512dq = Common::Bit<17>(cpu_id[1]); + caps.avx512cd = Common::Bit<28>(cpu_id[1]); + caps.avx512bw = Common::Bit<30>(cpu_id[1]); + caps.avx512vl = Common::Bit<31>(cpu_id[1]); + caps.avx512vbmi = Common::Bit<1>(cpu_id[2]); + caps.avx512bitalg = Common::Bit<12>(cpu_id[2]); + } + caps.bmi1 = Common::Bit<3>(cpu_id[1]); caps.bmi2 = Common::Bit<8>(cpu_id[1]); - // Checks for AVX512F, AVX512CD, AVX512VL, AVX512DQ, AVX512BW (Intel Skylake-X/SP) - if (Common::Bit<16>(cpu_id[1]) && Common::Bit<28>(cpu_id[1]) && - Common::Bit<31>(cpu_id[1]) && Common::Bit<17>(cpu_id[1]) && - Common::Bit<30>(cpu_id[1])) { - caps.avx512 = caps.avx2; - } + caps.sha = Common::Bit<29>(cpu_id[1]); + + caps.gfni = Common::Bit<8>(cpu_id[2]); + + __cpuidex(cpu_id, 0x00000007, 0x00000001); + caps.avx_vnni = caps.avx && Common::Bit<4>(cpu_id[0]); } } diff --git a/src/common/x64/cpu_detect.h b/src/common/x64/cpu_detect.h index 3e6d808f3..40c48b132 100644 --- a/src/common/x64/cpu_detect.h +++ b/src/common/x64/cpu_detect.h @@ -35,16 +35,31 @@ struct CPUCaps { bool ssse3 : 1; bool sse4_1 : 1; bool sse4_2 : 1; - bool lzcnt : 1; + bool avx : 1; + bool avx_vnni : 1; bool avx2 : 1; - bool avx512 : 1; + bool avx512f : 1; + bool avx512dq : 1; + bool avx512cd : 1; + bool avx512bw : 1; + bool avx512vl : 1; + bool avx512vbmi : 1; + bool avx512bitalg : 1; + + bool aes : 1; bool bmi1 : 1; bool bmi2 : 1; + bool f16c : 1; bool fma : 1; bool fma4 : 1; - bool aes : 1; + bool gfni : 1; bool invariant_tsc : 1; + bool lzcnt : 1; + bool movbe : 1; + bool pclmulqdq : 1; + bool popcnt : 1; + bool sha : 1; }; /** diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp index b3a8da0ea..1d459bdb3 100644 --- a/src/yuzu/main.cpp +++ b/src/yuzu/main.cpp @@ -249,9 +249,9 @@ GMainWindow::GMainWindow() #ifdef ARCHITECTURE_x86_64 const auto& caps = Common::GetCPUCaps(); std::string cpu_string = caps.cpu_string; - if (caps.avx || caps.avx2 || caps.avx512) { + if (caps.avx || caps.avx2 || caps.avx512f) { cpu_string += " | AVX"; - if (caps.avx512) { + if (caps.avx512f) { cpu_string += "512"; } else if (caps.avx2) { cpu_string += '2';