From fe3642edd48630b463f4eaf824035c6c7fb7d576 Mon Sep 17 00:00:00 2001 From: zzz Date: Sat, 12 Dec 2015 22:53:33 +0000 Subject: [PATCH] CPUID: Multiple bug fixes: - Add support for extended feature registers EBX/ECX - No such thing as EBX for 0x80000001 call; remove getExtendedEBXCPUFlags() method, replaced with getExtendedEBXFeatureFlags() - Check for support of 6 required Core i3/i5/i7 instructions to enable Haswell, since GMP Haswell requires Core i3/i5/i7 support. There are Pentium/Celeron Haswells that do not support these instructions. - Fix hasAVX2(), hasAVX512(), and hasADX() using wrong register - Fix hasAVX512() checking wrong bit - Define hasAVX512() as supporting AVX-512 Foundation, not the "full" instruction set as previously specified in the javadocs. - hasAVX2(), hasAVX512(), and hasADX() need not check hasAVX() first - Add missing hasADX() to CPUInfo interface Also: - More diagnostic output in CPUID.main() - More javadocs --- .../freenet/support/CPUInformation/CPUID.java | 58 +++++++++--- .../support/CPUInformation/CPUIDCPUInfo.java | 21 +++-- .../support/CPUInformation/CPUInfo.java | 21 ++++- .../support/CPUInformation/IntelCPUInfo.java | 27 ++++++ .../support/CPUInformation/IntelInfoImpl.java | 89 ++++++++++++++----- 5 files changed, 171 insertions(+), 45 deletions(-) diff --git a/core/java/src/freenet/support/CPUInformation/CPUID.java b/core/java/src/freenet/support/CPUInformation/CPUID.java index 62e82618e..3a9977d3e 100644 --- a/core/java/src/freenet/support/CPUInformation/CPUID.java +++ b/core/java/src/freenet/support/CPUInformation/CPUID.java @@ -190,12 +190,6 @@ public class CPUID { return c.ECX; } - static int getExtendedEBXCPUFlags() - { - CPUIDResult c = doCPUID(0x80000001); - return c.EBX; - } - static int getExtendedECXCPUFlags() { CPUIDResult c = doCPUID(0x80000001); @@ -209,6 +203,31 @@ public class CPUID { return c.EDX; } + /** + * @since 0.9.24 + */ + static int getExtendedEBXFeatureFlags() + { + // Supposed to set ECX to 0 before calling? + // But we don't have support for that in jcpuid. + // And it works just fine without that. + CPUIDResult c = doCPUID(7); + return c.EBX; + } + + /** + * There's almost nothing in here. + * @since 0.9.24 + */ + static int getExtendedECXFeatureFlags() + { + // Supposed to set ECX to 0 before calling? + // But we don't have support for that in jcpuid. + // And it works just fine without that. + CPUIDResult c = doCPUID(7); + return c.ECX; + } + /** * The model name string, up to 48 characters, as reported by * the processor itself. @@ -294,19 +313,28 @@ public class CPUID { System.out.println("CPU Family: " + family); System.out.println("CPU Model: " + model); System.out.println("CPU Stepping: " + getCPUStepping()); - System.out.println("CPU Flags: 0x" + Integer.toHexString(getEDXCPUFlags())); + System.out.println("CPU Flags (EDX): 0x" + Integer.toHexString(getEDXCPUFlags())); + System.out.println("CPU Flags (ECX): 0x" + Integer.toHexString(getECXCPUFlags())); + System.out.println("CPU Ext. Info. (EDX): 0x" + Integer.toHexString(getExtendedEDXCPUFlags())); + System.out.println("CPU Ext. Info. (ECX): 0x" + Integer.toHexString(getExtendedECXCPUFlags())); + System.out.println("CPU Ext. Feat. (EBX): 0x" + Integer.toHexString(getExtendedEBXFeatureFlags())); + System.out.println("CPU Ext. Feat. (ECX): 0x" + Integer.toHexString(getExtendedECXFeatureFlags())); CPUInfo c = getInfo(); System.out.println("\n **More CPUInfo**"); System.out.println("CPU model string: " + c.getCPUModelString()); - System.out.println("CPU has MMX: " + c.hasMMX()); - System.out.println("CPU has SSE: " + c.hasSSE()); - System.out.println("CPU has SSE2: " + c.hasSSE2()); - System.out.println("CPU has SSE3: " + c.hasSSE3()); + System.out.println("CPU has MMX: " + c.hasMMX()); + System.out.println("CPU has SSE: " + c.hasSSE()); + System.out.println("CPU has SSE2: " + c.hasSSE2()); + System.out.println("CPU has SSE3: " + c.hasSSE3()); System.out.println("CPU has SSE4.1: " + c.hasSSE41()); System.out.println("CPU has SSE4.2: " + c.hasSSE42()); - System.out.println("CPU has SSE4A: " + c.hasSSE4A()); - System.out.println("CPU has AES-NI: " + c.hasAES()); + System.out.println("CPU has SSE4A: " + c.hasSSE4A()); + System.out.println("CPU has AVX: " + c.hasAVX()); + System.out.println("CPU has AVX2: " + c.hasAVX2()); + System.out.println("CPU has AVX512: " + c.hasAVX512()); + System.out.println("CPU has ADX: " + c.hasADX()); + System.out.println("CPU has TBM: " + c.hasTBM()); if(c instanceof IntelCPUInfo){ System.out.println("\n **Intel-info**"); System.out.println("Is PII-compatible: "+((IntelCPUInfo)c).IsPentium2Compatible()); @@ -316,6 +344,10 @@ public class CPUID { System.out.println("Is Pentium M compatible: "+((IntelCPUInfo)c).IsPentiumMCompatible()); System.out.println("Is Core2-compatible: "+((IntelCPUInfo)c).IsCore2Compatible()); System.out.println("Is Corei-compatible: "+((IntelCPUInfo)c).IsCoreiCompatible()); + System.out.println("Is Sandy-compatible: "+((IntelCPUInfo)c).IsSandyCompatible()); + System.out.println("Is Ivy-compatible: "+((IntelCPUInfo)c).IsIvyCompatible()); + System.out.println("Is Haswell-compatible: "+((IntelCPUInfo)c).IsHaswellCompatible()); + System.out.println("Is Broadwell-compatible: "+((IntelCPUInfo)c).IsBroadwellCompatible()); } if(c instanceof AMDCPUInfo){ System.out.println("\n **AMD-info**"); diff --git a/core/java/src/freenet/support/CPUInformation/CPUIDCPUInfo.java b/core/java/src/freenet/support/CPUInformation/CPUIDCPUInfo.java index aa3005f0d..efe1dfc00 100644 --- a/core/java/src/freenet/support/CPUInformation/CPUIDCPUInfo.java +++ b/core/java/src/freenet/support/CPUInformation/CPUIDCPUInfo.java @@ -63,18 +63,26 @@ abstract class CPUIDCPUInfo implements CPUInfo */ public boolean hasAVX2() { - return hasAVX() && - (CPUID.getExtendedEBXCPUFlags() & (1 << 5)) != 0; //Extended EBX Bit 5 + return (CPUID.getExtendedEBXFeatureFlags() & (1 << 5)) != 0; //Extended EBX Bit 5 } /** - * @return true iff the CPU supports the AVX512 instruction set. + * Does the CPU supports the AVX-512 Foundation instruction set? + * + * Quote wikipedia: + * + * AVX-512 consists of multiple extensions not all meant to be supported + * by all processors implementing them. Only the core extension AVX-512F + * (AVX-512 Foundation) is required by all implementations. + * + * ref: https://en.wikipedia.org/wiki/AVX-512 + * + * @return true iff the CPU supports the AVX-512 Foundation instruction set. * @since 0.9.21 */ public boolean hasAVX512() { - return hasAVX() && - (CPUID.getExtendedEBXCPUFlags() & (1 << 5)) != 0; //Extended EBX Bit 5 + return (CPUID.getExtendedEBXFeatureFlags() & (1 << 16)) != 0; //Extended EBX Bit 16 } /** @@ -83,8 +91,7 @@ abstract class CPUIDCPUInfo implements CPUInfo */ public boolean hasADX() { - return hasAVX() && - (CPUID.getExtendedEBXCPUFlags() & (1 << 19)) != 0; //Extended EBX Bit 19 + return (CPUID.getExtendedEBXFeatureFlags() & (1 << 19)) != 0; //Extended EBX Bit 19 } /** diff --git a/core/java/src/freenet/support/CPUInformation/CPUInfo.java b/core/java/src/freenet/support/CPUInformation/CPUInfo.java index 033e8ef90..161f04aba 100644 --- a/core/java/src/freenet/support/CPUInformation/CPUInfo.java +++ b/core/java/src/freenet/support/CPUInformation/CPUInfo.java @@ -59,6 +59,9 @@ public interface CPUInfo public boolean hasSSE42(); /** + * AMD K10 only. Not supported on Intel. + * ref: https://en.wikipedia.org/wiki/SSE4.2#SSE4a + * * @return true iff the CPU support the SSE4A instruction set. */ public boolean hasSSE4A(); @@ -76,11 +79,27 @@ public interface CPUInfo public boolean hasAVX2(); /** - * @return true iff the CPU supports the full AVX512 instruction set. + * Does the CPU supports the AVX-512 Foundation instruction set? + * + * Quote wikipedia: + * + * AVX-512 consists of multiple extensions not all meant to be supported + * by all processors implementing them. Only the core extension AVX-512F + * (AVX-512 Foundation) is required by all implementations. + * + * ref: https://en.wikipedia.org/wiki/AVX-512 + * + * @return true iff the CPU supports the AVX-512 Foundation instruction set. * @since 0.9.21 */ public boolean hasAVX512(); + /** + * @return true iff the CPU supports the ADX instruction set. + * @since 0.9.21 + */ + public boolean hasADX(); + /** * @return true iff the CPU supports TBM. * @since 0.9.21 diff --git a/core/java/src/freenet/support/CPUInformation/IntelCPUInfo.java b/core/java/src/freenet/support/CPUInformation/IntelCPUInfo.java index 92ca9615d..6f2d3ff88 100644 --- a/core/java/src/freenet/support/CPUInformation/IntelCPUInfo.java +++ b/core/java/src/freenet/support/CPUInformation/IntelCPUInfo.java @@ -66,6 +66,10 @@ public interface IntelCPUInfo extends CPUInfo { /** * Supports the SSE 3, 4.1, 4.2 instructions. * In general, this requires 45nm or smaller process. + * + * This is the Nehalem architecture. + * ref: https://en.wikipedia.org/wiki/Nehalem_%28microarchitecture%29 + * * @return true if the CPU implements at least a Corei level instruction/feature set. */ public boolean IsCoreiCompatible(); @@ -82,6 +86,11 @@ public interface IntelCPUInfo extends CPUInfo { * Supports the SSE 3, 4.1, 4.2 instructions. * Supports the AVX 1 instructions. * In general, this requires 22nm or smaller process. + * + * UNUSED, there is no specific GMP build for Ivy Bridge, + * and this is never called from NativeBigInteger. + * Ivy Bridge is a successor to Sandy Bridge, so use IsSandyCompatible(). + * * @return true if the CPU implements at least a IvyBridge level instruction/feature set. */ public boolean IsIvyCompatible(); @@ -89,6 +98,19 @@ public interface IntelCPUInfo extends CPUInfo { /** * Supports the SSE 3, 4.1, 4.2 instructions. * Supports the AVX 1, 2 instructions. + * Supports the BMI 1, 2 instructions. + * + * WARNING - GMP 6 uses the BMI2 MULX instruction for the "coreihwl" binaries. + * Only Core i3/i5/i7 Haswell processors support BMI2. + * + * Requires support for all 6 of these Corei features: FMA3 MOVBE ABM AVX2 BMI1 BMI2 + * Pentium/Celeron Haswell processors do NOT support BMI2 and are NOT compatible. + * Those processors will be Sandy-compatible if they have AVX 1 support, + * and Corei-compatible if they do not. + * + * ref: https://software.intel.com/en-us/articles/how-to-detect-new-instruction-support-in-the-4th-generation-intel-core-processor-family + * ref: https://en.wikipedia.org/wiki/Haswell_%28microarchitecture%29 + * * In general, this requires 22nm or smaller process. * @return true if the CPU implements at least a Haswell level instruction/feature set. */ @@ -98,6 +120,11 @@ public interface IntelCPUInfo extends CPUInfo { * Supports the SSE 3, 4.1, 4.2 instructions. * Supports the AVX 1, 2 instructions. * In general, this requires 14nm or smaller process. + * + * NOT FULLY USED as of GMP 6.0. + * All GMP coreibwl binaries are duplicates of binaries for older technologies, + * so we do not distribute any. However, this is called from NativeBigInteger. + * * @return true if the CPU implements at least a Broadwell level instruction/feature set. */ public boolean IsBroadwellCompatible(); diff --git a/core/java/src/freenet/support/CPUInformation/IntelInfoImpl.java b/core/java/src/freenet/support/CPUInformation/IntelInfoImpl.java index 6631f3c0c..0174b16dd 100644 --- a/core/java/src/freenet/support/CPUInformation/IntelInfoImpl.java +++ b/core/java/src/freenet/support/CPUInformation/IntelInfoImpl.java @@ -282,6 +282,8 @@ class IntelInfoImpl extends CPUIDCPUInfo implements IntelCPUInfo modelString = "Atom"; break; // Sandy bridge 32 nm + // 1, 2, or 4 cores + // ref: https://en.wikipedia.org/wiki/Sandy_Bridge_%28microarchitecture%29 case 0x2a: isSandyCompatible = true; modelString = "Sandy Bridge"; @@ -295,6 +297,8 @@ class IntelInfoImpl extends CPUIDCPUInfo implements IntelCPUInfo modelString = "Westmere"; break; // Sandy Bridge 32 nm + // Sandy Bridge-E up to 8 cores + // ref: https://en.wikipedia.org/wiki/Sandy_Bridge_%28microarchitecture%29 case 0x2d: isSandyCompatible = true; modelString = "Sandy Bridge"; @@ -328,18 +332,15 @@ class IntelInfoImpl extends CPUIDCPUInfo implements IntelCPUInfo modelString = "Atom"; break; // Ivy Bridge 22 nm + // ref: https://en.wikipedia.org/wiki/Sandy_Bridge_%28microarchitecture%29 case 0x3a: isSandyCompatible = true; isIvyCompatible = true; modelString = "Ivy Bridge"; break; - // Haswell 22 nm - case 0x3c: - isSandyCompatible = true; - isIvyCompatible = true; - isHaswellCompatible = true; - modelString = "Haswell"; - break; + + // case 0x3c: See below + // Broadwell 14 nm case 0x3d: isSandyCompatible = true; @@ -354,32 +355,72 @@ class IntelInfoImpl extends CPUIDCPUInfo implements IntelCPUInfo isIvyCompatible = true; modelString = "Ivy Bridge"; break; - // Haswell 22 nm - case 0x3f: - isSandyCompatible = true; - isIvyCompatible = true; - isHaswellCompatible = true; - modelString = "Haswell"; - break; + + // case 0x3f: See below // following are for extended model == 4 // most flags are set above // isCoreiCompatible = true is the default // Haswell 22 nm + // Pentium and Celeron Haswells do not support new Haswell instructions, + // only Corei ones do, but we can't tell that from the model alone. + // + // We know for sure that GMP coreihwl uses the MULX instruction from BMI2, + // unsure about the others, but let's be safe and check all 6 feature bits, as + // the Intel app note suggests. + // + // ref: https://en.wikipedia.org/wiki/Haswell_%28microarchitecture%29 + // ref: https://software.intel.com/en-us/articles/how-to-detect-new-instruction-support-in-the-4th-generation-intel-core-processor-family + case 0x3c: + case 0x3f: case 0x45: - isSandyCompatible = true; - isIvyCompatible = true; - isHaswellCompatible = true; - modelString = "Haswell"; - break; - // Haswell 22 nm case 0x46: - isSandyCompatible = true; - isIvyCompatible = true; - isHaswellCompatible = true; - modelString = "Haswell"; + boolean hasNewInstructions = false; + int reg = CPUID.getECXCPUFlags(); + boolean hasFMA3 = (reg & (1 << 12)) != 0; + boolean hasMOVBE = (reg & (1 << 22)) != 0; + // AVX is implied by AVX2, so we don't need to check the value here, + // but we will need it below to enable Sandy Bridge if the Haswell checks fail. + // This is the same as hasAVX(). + boolean hasAVX = (reg & (1 << 28)) != 0 && (reg & (1 << 27)) != 0; + //System.out.println("FMA3 MOVBE: " + + // hasFMA3 + ' ' + hasMOVBE); + if (hasFMA3 && hasMOVBE) { + reg = CPUID.getExtendedECXCPUFlags(); + boolean hasABM = (reg & (1 << 5)) != 0; // aka LZCNT + //System.out.println("FMA3 MOVBE ABM: " + + // hasFMA3 + ' ' + hasMOVBE + ' ' + hasABM); + if (hasABM) { + reg = CPUID.getExtendedEBXFeatureFlags(); + boolean hasAVX2 = (reg & (1 << 5)) != 0; + boolean hasBMI1 = (reg & (1 << 3)) != 0; + boolean hasBMI2 = (reg & (1 << 8)) != 0; + //System.out.println("FMA3 MOVBE ABM AVX2 BMI1 BMI2: " + + // hasFMA3 + ' ' + hasMOVBE + ' ' + hasABM + ' ' + + // hasAVX2 + ' ' + hasBMI1 + ' ' + hasBMI2); + if (hasAVX2 && hasBMI1 && hasBMI2) + hasNewInstructions = true; + } + } + if (hasNewInstructions) { + isSandyCompatible = true; + isIvyCompatible = true; + isHaswellCompatible = true; + modelString = "Haswell Core i3/i5/i7 model " + model; + } else { + // This processor is "corei" compatible, as we define it, + // i.e. SSE4.2 but not necessarily AVX. + if (hasAVX) { + isSandyCompatible = true; + isIvyCompatible = true; + modelString = "Haswell Celeron/Pentium w/ AVX model " + model; + } else { + modelString = "Haswell Celeron/Pentium model " + model; + } + } break; + // Quark 32nm case 0x4a: isCore2Compatible = false;