From 99f2cdf9ca10cac4af9a01f4934ea23d8d1bdacb Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Tue, 25 Mar 2014 15:52:59 -0400 Subject: [PATCH] eal: fix %rbx corruption and simplify the code Neil Horman reported that on x86-64 the upper half of %rbx would get clobbered when the code was compiled PIC or PIE, because the i386-specific code to preserve %ebx was incorrectly compiled. However, the code is really way more complex than it needs to be. For one thing, the CPUID instruction only needs %eax (leaf) and %ecx (subleaf) as parameters, and since we are testing for bits, we might as well list the bits explicitly. Furthermore, we can use an array rather than doing a switch statement inside a structure. Reported-by: Neil Horman Signed-off-by: H. Peter Anvin Signed-off-by: Neil Horman Reviewed-by: H. Peter Anvin --- lib/librte_eal/common/eal_common_cpuflags.c | 281 ++++++++++---------- 1 file changed, 136 insertions(+), 145 deletions(-) diff --git a/lib/librte_eal/common/eal_common_cpuflags.c b/lib/librte_eal/common/eal_common_cpuflags.c index 1ebf78cc2a..f9c18402d9 100644 --- a/lib/librte_eal/common/eal_common_cpuflags.c +++ b/lib/librte_eal/common/eal_common_cpuflags.c @@ -59,16 +59,7 @@ enum cpu_register_t { REG_EDX, }; -/** - * Parameters for CPUID instruction - */ -struct cpuid_parameters_t { - uint32_t eax; - uint32_t ebx; - uint32_t ecx; - uint32_t edx; - enum cpu_register_t return_register; -}; +typedef uint32_t cpuid_registers_t[4]; #define CPU_FLAG_NAME_MAX_LEN 64 @@ -76,109 +67,111 @@ struct cpuid_parameters_t { * Struct to hold a processor feature entry */ struct feature_entry { - enum rte_cpu_flag_t feature; /**< feature name */ + uint32_t leaf; /**< cpuid leaf */ + uint32_t subleaf; /**< cpuid subleaf */ + uint32_t reg; /**< cpuid register */ + uint32_t bit; /**< cpuid register bit */ char name[CPU_FLAG_NAME_MAX_LEN]; /**< String for printing */ - struct cpuid_parameters_t params; /**< cpuid parameters */ - uint32_t feature_mask; /**< bitmask for feature */ }; -#define FEAT_DEF(f) RTE_CPUFLAG_##f, #f +#define FEAT_DEF(name, leaf, subleaf, reg, bit) \ + [RTE_CPUFLAG_##name] = {leaf, subleaf, reg, bit, #name }, /** * An array that holds feature entries */ static const struct feature_entry cpu_feature_table[] = { - {FEAT_DEF(SSE3), {0x1, 0, 0, 0, REG_ECX}, 0x00000001}, - {FEAT_DEF(PCLMULQDQ), {0x1, 0, 0, 0, REG_ECX}, 0x00000002}, - {FEAT_DEF(DTES64), {0x1, 0, 0, 0, REG_ECX}, 0x00000004}, - {FEAT_DEF(MONITOR), {0x1, 0, 0, 0, REG_ECX}, 0x00000008}, - {FEAT_DEF(DS_CPL), {0x1, 0, 0, 0, REG_ECX}, 0x00000010}, - {FEAT_DEF(VMX), {0x1, 0, 0, 0, REG_ECX}, 0x00000020}, - {FEAT_DEF(SMX), {0x1, 0, 0, 0, REG_ECX}, 0x00000040}, - {FEAT_DEF(EIST), {0x1, 0, 0, 0, REG_ECX}, 0x00000080}, - {FEAT_DEF(TM2), {0x1, 0, 0, 0, REG_ECX}, 0x00000100}, - {FEAT_DEF(SSSE3), {0x1, 0, 0, 0, REG_ECX}, 0x00000200}, - {FEAT_DEF(CNXT_ID), {0x1, 0, 0, 0, REG_ECX}, 0x00000400}, - {FEAT_DEF(FMA), {0x1, 0, 0, 0, REG_ECX}, 0x00001000}, - {FEAT_DEF(CMPXCHG16B), {0x1, 0, 0, 0, REG_ECX}, 0x00002000}, - {FEAT_DEF(XTPR), {0x1, 0, 0, 0, REG_ECX}, 0x00004000}, - {FEAT_DEF(PDCM), {0x1, 0, 0, 0, REG_ECX}, 0x00008000}, - {FEAT_DEF(PCID), {0x1, 0, 0, 0, REG_ECX}, 0x00020000}, - {FEAT_DEF(DCA), {0x1, 0, 0, 0, REG_ECX}, 0x00040000}, - {FEAT_DEF(SSE4_1), {0x1, 0, 0, 0, REG_ECX}, 0x00080000}, - {FEAT_DEF(SSE4_2), {0x1, 0, 0, 0, REG_ECX}, 0x00100000}, - {FEAT_DEF(X2APIC), {0x1, 0, 0, 0, REG_ECX}, 0x00200000}, - {FEAT_DEF(MOVBE), {0x1, 0, 0, 0, REG_ECX}, 0x00400000}, - {FEAT_DEF(POPCNT), {0x1, 0, 0, 0, REG_ECX}, 0x00800000}, - {FEAT_DEF(TSC_DEADLINE), {0x1, 0, 0, 0, REG_ECX}, 0x01000000}, - {FEAT_DEF(AES), {0x1, 0, 0, 0, REG_ECX}, 0x02000000}, - {FEAT_DEF(XSAVE), {0x1, 0, 0, 0, REG_ECX}, 0x04000000}, - {FEAT_DEF(OSXSAVE), {0x1, 0, 0, 0, REG_ECX}, 0x08000000}, - {FEAT_DEF(AVX), {0x1, 0, 0, 0, REG_ECX}, 0x10000000}, - {FEAT_DEF(F16C), {0x1, 0, 0, 0, REG_ECX}, 0x20000000}, - {FEAT_DEF(RDRAND), {0x1, 0, 0, 0, REG_ECX}, 0x40000000}, + FEAT_DEF(SSE3, 0x00000001, 0, REG_ECX, 0) + FEAT_DEF(PCLMULQDQ, 0x00000001, 0, REG_ECX, 1) + FEAT_DEF(DTES64, 0x00000001, 0, REG_ECX, 2) + FEAT_DEF(MONITOR, 0x00000001, 0, REG_ECX, 3) + FEAT_DEF(DS_CPL, 0x00000001, 0, REG_ECX, 4) + FEAT_DEF(VMX, 0x00000001, 0, REG_ECX, 5) + FEAT_DEF(SMX, 0x00000001, 0, REG_ECX, 6) + FEAT_DEF(EIST, 0x00000001, 0, REG_ECX, 7) + FEAT_DEF(TM2, 0x00000001, 0, REG_ECX, 8) + FEAT_DEF(SSSE3, 0x00000001, 0, REG_ECX, 9) + FEAT_DEF(CNXT_ID, 0x00000001, 0, REG_ECX, 10) + FEAT_DEF(FMA, 0x00000001, 0, REG_ECX, 12) + FEAT_DEF(CMPXCHG16B, 0x00000001, 0, REG_ECX, 13) + FEAT_DEF(XTPR, 0x00000001, 0, REG_ECX, 14) + FEAT_DEF(PDCM, 0x00000001, 0, REG_ECX, 15) + FEAT_DEF(PCID, 0x00000001, 0, REG_ECX, 17) + FEAT_DEF(DCA, 0x00000001, 0, REG_ECX, 18) + FEAT_DEF(SSE4_1, 0x00000001, 0, REG_ECX, 19) + FEAT_DEF(SSE4_2, 0x00000001, 0, REG_ECX, 20) + FEAT_DEF(X2APIC, 0x00000001, 0, REG_ECX, 21) + FEAT_DEF(MOVBE, 0x00000001, 0, REG_ECX, 22) + FEAT_DEF(POPCNT, 0x00000001, 0, REG_ECX, 23) + FEAT_DEF(TSC_DEADLINE, 0x00000001, 0, REG_ECX, 24) + FEAT_DEF(AES, 0x00000001, 0, REG_ECX, 25) + FEAT_DEF(XSAVE, 0x00000001, 0, REG_ECX, 26) + FEAT_DEF(OSXSAVE, 0x00000001, 0, REG_ECX, 27) + FEAT_DEF(AVX, 0x00000001, 0, REG_ECX, 28) + FEAT_DEF(F16C, 0x00000001, 0, REG_ECX, 29) + FEAT_DEF(RDRAND, 0x00000001, 0, REG_ECX, 30) - {FEAT_DEF(FPU), {0x1, 0, 0, 0, REG_EDX}, 0x00000001}, - {FEAT_DEF(VME), {0x1, 0, 0, 0, REG_EDX}, 0x00000002}, - {FEAT_DEF(DE), {0x1, 0, 0, 0, REG_EDX}, 0x00000004}, - {FEAT_DEF(PSE), {0x1, 0, 0, 0, REG_EDX}, 0x00000008}, - {FEAT_DEF(TSC), {0x1, 0, 0, 0, REG_EDX}, 0x00000010}, - {FEAT_DEF(MSR), {0x1, 0, 0, 0, REG_EDX}, 0x00000020}, - {FEAT_DEF(PAE), {0x1, 0, 0, 0, REG_EDX}, 0x00000040}, - {FEAT_DEF(MCE), {0x1, 0, 0, 0, REG_EDX}, 0x00000080}, - {FEAT_DEF(CX8), {0x1, 0, 0, 0, REG_EDX}, 0x00000100}, - {FEAT_DEF(APIC), {0x1, 0, 0, 0, REG_EDX}, 0x00000200}, - {FEAT_DEF(SEP), {0x1, 0, 0, 0, REG_EDX}, 0x00000800}, - {FEAT_DEF(MTRR), {0x1, 0, 0, 0, REG_EDX}, 0x00001000}, - {FEAT_DEF(PGE), {0x1, 0, 0, 0, REG_EDX}, 0x00002000}, - {FEAT_DEF(MCA), {0x1, 0, 0, 0, REG_EDX}, 0x00004000}, - {FEAT_DEF(CMOV), {0x1, 0, 0, 0, REG_EDX}, 0x00008000}, - {FEAT_DEF(PAT), {0x1, 0, 0, 0, REG_EDX}, 0x00010000}, - {FEAT_DEF(PSE36), {0x1, 0, 0, 0, REG_EDX}, 0x00020000}, - {FEAT_DEF(PSN), {0x1, 0, 0, 0, REG_EDX}, 0x00040000}, - {FEAT_DEF(CLFSH), {0x1, 0, 0, 0, REG_EDX}, 0x00080000}, - {FEAT_DEF(DS), {0x1, 0, 0, 0, REG_EDX}, 0x00200000}, - {FEAT_DEF(ACPI), {0x1, 0, 0, 0, REG_EDX}, 0x00400000}, - {FEAT_DEF(MMX), {0x1, 0, 0, 0, REG_EDX}, 0x00800000}, - {FEAT_DEF(FXSR), {0x1, 0, 0, 0, REG_EDX}, 0x01000000}, - {FEAT_DEF(SSE), {0x1, 0, 0, 0, REG_EDX}, 0x02000000}, - {FEAT_DEF(SSE2), {0x1, 0, 0, 0, REG_EDX}, 0x04000000}, - {FEAT_DEF(SS), {0x1, 0, 0, 0, REG_EDX}, 0x08000000}, - {FEAT_DEF(HTT), {0x1, 0, 0, 0, REG_EDX}, 0x10000000}, - {FEAT_DEF(TM), {0x1, 0, 0, 0, REG_EDX}, 0x20000000}, - {FEAT_DEF(PBE), {0x1, 0, 0, 0, REG_EDX}, 0x80000000}, + FEAT_DEF(FPU, 0x00000001, 0, REG_EDX, 0) + FEAT_DEF(VME, 0x00000001, 0, REG_EDX, 1) + FEAT_DEF(DE, 0x00000001, 0, REG_EDX, 2) + FEAT_DEF(PSE, 0x00000001, 0, REG_EDX, 3) + FEAT_DEF(TSC, 0x00000001, 0, REG_EDX, 4) + FEAT_DEF(MSR, 0x00000001, 0, REG_EDX, 5) + FEAT_DEF(PAE, 0x00000001, 0, REG_EDX, 6) + FEAT_DEF(MCE, 0x00000001, 0, REG_EDX, 7) + FEAT_DEF(CX8, 0x00000001, 0, REG_EDX, 8) + FEAT_DEF(APIC, 0x00000001, 0, REG_EDX, 9) + FEAT_DEF(SEP, 0x00000001, 0, REG_EDX, 11) + FEAT_DEF(MTRR, 0x00000001, 0, REG_EDX, 12) + FEAT_DEF(PGE, 0x00000001, 0, REG_EDX, 13) + FEAT_DEF(MCA, 0x00000001, 0, REG_EDX, 14) + FEAT_DEF(CMOV, 0x00000001, 0, REG_EDX, 15) + FEAT_DEF(PAT, 0x00000001, 0, REG_EDX, 16) + FEAT_DEF(PSE36, 0x00000001, 0, REG_EDX, 17) + FEAT_DEF(PSN, 0x00000001, 0, REG_EDX, 18) + FEAT_DEF(CLFSH, 0x00000001, 0, REG_EDX, 19) + FEAT_DEF(DS, 0x00000001, 0, REG_EDX, 21) + FEAT_DEF(ACPI, 0x00000001, 0, REG_EDX, 22) + FEAT_DEF(MMX, 0x00000001, 0, REG_EDX, 23) + FEAT_DEF(FXSR, 0x00000001, 0, REG_EDX, 24) + FEAT_DEF(SSE, 0x00000001, 0, REG_EDX, 25) + FEAT_DEF(SSE2, 0x00000001, 0, REG_EDX, 26) + FEAT_DEF(SS, 0x00000001, 0, REG_EDX, 27) + FEAT_DEF(HTT, 0x00000001, 0, REG_EDX, 28) + FEAT_DEF(TM, 0x00000001, 0, REG_EDX, 29) + FEAT_DEF(PBE, 0x00000001, 0, REG_EDX, 31) - {FEAT_DEF(DIGTEMP), {0x6, 0, 0, 0, REG_EAX}, 0x00000001}, - {FEAT_DEF(TRBOBST), {0x6, 0, 0, 0, REG_EAX}, 0x00000002}, - {FEAT_DEF(ARAT), {0x6, 0, 0, 0, REG_EAX}, 0x00000004}, - {FEAT_DEF(PLN), {0x6, 0, 0, 0, REG_EAX}, 0x00000010}, - {FEAT_DEF(ECMD), {0x6, 0, 0, 0, REG_EAX}, 0x00000020}, - {FEAT_DEF(PTM), {0x6, 0, 0, 0, REG_EAX}, 0x00000040}, + FEAT_DEF(DIGTEMP, 0x00000006, 0, REG_EAX, 0) + FEAT_DEF(TRBOBST, 0x00000006, 0, REG_EAX, 1) + FEAT_DEF(ARAT, 0x00000006, 0, REG_EAX, 2) + FEAT_DEF(PLN, 0x00000006, 0, REG_EAX, 4) + FEAT_DEF(ECMD, 0x00000006, 0, REG_EAX, 5) + FEAT_DEF(PTM, 0x00000006, 0, REG_EAX, 6) - {FEAT_DEF(MPERF_APERF_MSR), {0x6, 0, 0, 0, REG_ECX}, 0x00000001}, - {FEAT_DEF(ACNT2), {0x6, 0, 0, 0, REG_ECX}, 0x00000002}, - {FEAT_DEF(ENERGY_EFF), {0x6, 0, 0, 0, REG_ECX}, 0x00000008}, + FEAT_DEF(MPERF_APERF_MSR, 0x00000006, 0, REG_ECX, 0) + FEAT_DEF(ACNT2, 0x00000006, 0, REG_ECX, 1) + FEAT_DEF(ENERGY_EFF, 0x00000006, 0, REG_ECX, 3) - {FEAT_DEF(FSGSBASE), {0x7, 0, 0, 0, REG_EBX}, 0x00000001}, - {FEAT_DEF(BMI1), {0x7, 0, 0, 0, REG_EBX}, 0x00000004}, - {FEAT_DEF(HLE), {0x7, 0, 0, 0, REG_EBX}, 0x00000010}, - {FEAT_DEF(AVX2), {0x7, 0, 0, 0, REG_EBX}, 0x00000020}, - {FEAT_DEF(SMEP), {0x7, 0, 0, 0, REG_EBX}, 0x00000040}, - {FEAT_DEF(BMI2), {0x7, 0, 0, 0, REG_EBX}, 0x00000080}, - {FEAT_DEF(ERMS), {0x7, 0, 0, 0, REG_EBX}, 0x00000100}, - {FEAT_DEF(INVPCID), {0x7, 0, 0, 0, REG_EBX}, 0x00000400}, - {FEAT_DEF(RTM), {0x7, 0, 0, 0, REG_EBX}, 0x00000800}, + FEAT_DEF(FSGSBASE, 0x00000007, 0, REG_EBX, 0) + FEAT_DEF(BMI1, 0x00000007, 0, REG_EBX, 2) + FEAT_DEF(HLE, 0x00000007, 0, REG_EBX, 4) + FEAT_DEF(AVX2, 0x00000007, 0, REG_EBX, 5) + FEAT_DEF(SMEP, 0x00000007, 0, REG_EBX, 6) + FEAT_DEF(BMI2, 0x00000007, 0, REG_EBX, 7) + FEAT_DEF(ERMS, 0x00000007, 0, REG_EBX, 8) + FEAT_DEF(INVPCID, 0x00000007, 0, REG_EBX, 10) + FEAT_DEF(RTM, 0x00000007, 0, REG_EBX, 11) - {FEAT_DEF(LAHF_SAHF), {0x80000001, 0, 0, 0, REG_ECX}, 0x00000001}, - {FEAT_DEF(LZCNT), {0x80000001, 0, 0, 0, REG_ECX}, 0x00000010}, + FEAT_DEF(LAHF_SAHF, 0x80000001, 0, REG_ECX, 0) + FEAT_DEF(LZCNT, 0x80000001, 0, REG_ECX, 4) - {FEAT_DEF(SYSCALL), {0x80000001, 0, 0, 0, REG_EDX}, 0x00000800}, - {FEAT_DEF(XD), {0x80000001, 0, 0, 0, REG_EDX}, 0x00100000}, - {FEAT_DEF(1GB_PG), {0x80000001, 0, 0, 0, REG_EDX}, 0x04000000}, - {FEAT_DEF(RDTSCP), {0x80000001, 0, 0, 0, REG_EDX}, 0x08000000}, - {FEAT_DEF(EM64T), {0x80000001, 0, 0, 0, REG_EDX}, 0x20000000}, + FEAT_DEF(SYSCALL, 0x80000001, 0, REG_EDX, 11) + FEAT_DEF(XD, 0x80000001, 0, REG_EDX, 20) + FEAT_DEF(1GB_PG, 0x80000001, 0, REG_EDX, 26) + FEAT_DEF(RDTSCP, 0x80000001, 0, REG_EDX, 27) + FEAT_DEF(EM64T, 0x80000001, 0, REG_EDX, 29) - {FEAT_DEF(INVTSC), {0x80000007, 0, 0, 0, REG_EDX}, 0x00000100}, + FEAT_DEF(INVTSC, 0x80000007, 0, REG_EDX, 8) }; /* @@ -187,51 +180,27 @@ static const struct feature_entry cpu_feature_table[] = { * This function, when compiled with GCC, will generate architecture-neutral * code, as per GCC manual. */ -static inline int -rte_cpu_get_features(struct cpuid_parameters_t params) +static inline void +rte_cpu_get_features(uint32_t leaf, uint32_t subleaf, cpuid_registers_t out) { - int eax, ebx, ecx, edx; /* registers */ - -#ifndef __PIC__ - asm volatile ("cpuid" - /* output */ - : "=a" (eax), - "=b" (ebx), - "=c" (ecx), - "=d" (edx) - /* input */ - : "a" (params.eax), - "b" (params.ebx), - "c" (params.ecx), - "d" (params.edx)); +#if defined(__i386__) && defined(__PIC__) + /* %ebx is a forbidden register if we compile with -fPIC or -fPIE */ + asm volatile("movl %%ebx,%0 ; cpuid ; xchgl %%ebx,%0" + : "=r" (out[REG_EBX]), + "=a" (out[REG_EAX]), + "=c" (out[REG_ECX]), + "=d" (out[REG_EDX]) + : "a" (leaf), "c" (subleaf)); #else - asm volatile ( - "mov %%ebx, %%edi\n" - "cpuid\n" - "xchgl %%ebx, %%edi;\n" - : "=a" (eax), - "=D" (ebx), - "=c" (ecx), - "=d" (edx) - /* input */ - : "a" (params.eax), - "D" (params.ebx), - "c" (params.ecx), - "d" (params.edx)); -#endif - switch (params.return_register) { - case REG_EAX: - return eax; - case REG_EBX: - return ebx; - case REG_ECX: - return ecx; - case REG_EDX: - return edx; - default: - return 0; - } + asm volatile("cpuid" + : "=a" (out[REG_EAX]), + "=b" (out[REG_EBX]), + "=c" (out[REG_ECX]), + "=d" (out[REG_EDX]) + : "a" (leaf), "c" (subleaf)); + +#endif } /* @@ -240,17 +209,30 @@ rte_cpu_get_features(struct cpuid_parameters_t params) int rte_cpu_get_flag_enabled(enum rte_cpu_flag_t feature) { - int value; + const struct feature_entry *feat; + cpuid_registers_t regs; + if (feature >= RTE_CPUFLAG_NUMFLAGS) /* Flag does not match anything in the feature tables */ return -ENOENT; - /* get value of the register containing the desired feature */ - value = rte_cpu_get_features(cpu_feature_table[feature].params); + feat = &cpu_feature_table[feature]; + + if (!feat->leaf) + /* This entry in the table wasn't filled out! */ + return -EFAULT; + + rte_cpu_get_features(feat->leaf & 0xffff0000, 0, regs); + if (((regs[REG_EAX] ^ feat->leaf) & 0xffff0000) || + regs[REG_EAX] < feat->leaf) + return 0; + + /* get the cpuid leaf containing the desired feature */ + rte_cpu_get_features(feat->leaf, feat->subleaf, regs); /* check if the feature is enabled */ - return (cpu_feature_table[feature].feature_mask & value) > 0; + return (regs[feat->reg] >> feat->bit) & 1; } /** @@ -271,9 +253,18 @@ rte_cpu_check_supported(void) RTE_COMPILE_TIME_CPUFLAGS }; unsigned i; + int ret; for (i = 0; i < sizeof(compile_time_flags)/sizeof(compile_time_flags[0]); i++) - if (rte_cpu_get_flag_enabled(compile_time_flags[i]) < 1) { + ret = rte_cpu_get_flag_enabled(compile_time_flags[i]); + + if (ret < 0) { + fprintf(stderr, + "ERROR: CPU feature flag lookup failed with error %d\n", + ret); + exit(1); + } + if (!ret) { fprintf(stderr, "ERROR: This system does not support \"%s\".\n" "Please check that RTE_MACHINE is set correctly.\n", -- 2.20.1