[clang] 6673d69 - [X86] Don't imply -mprfchw when -m3dnow is specified. Enable prefetchw in the backend with 3dnow feature.

Craig Topper via cfe-commits cfe-commits at lists.llvm.org
Thu Jun 25 12:47:39 PDT 2020


Author: Craig Topper
Date: 2020-06-25T12:46:52-07:00
New Revision: 6673d69226d86f4906dca4b627d6e0582486d072

URL: https://github.com/llvm/llvm-project/commit/6673d69226d86f4906dca4b627d6e0582486d072
DIFF: https://github.com/llvm/llvm-project/commit/6673d69226d86f4906dca4b627d6e0582486d072.diff

LOG: [X86] Don't imply -mprfchw when -m3dnow is specified. Enable prefetchw in the backend with 3dnow feature.

The PREFETCHW instruction was originally part of the 3DNow. But
it was given its own CPUID bit on later CPUs just before 3DNow
was deprecated.

We were setting the -mprfchw flag if -m3dnow was passed or the CPU
supported 3dnow unless -mno-prfchw was passed. But -march=native
on a CPU without the PRFCHW CPUID bit set will pass -mno-prfchw.
So -march=k8 will behave differently than -march=native on a K8
for example.

So remove this implicit setting from the frontend and instead
enable the backend to use PREFETCHW if 3dnow OR prfchw is enabled.

Also enable PRFCHW flag on amdfam10/barcelona which seems to be
where this CPUID bit was introduced. That CPU also supported
3dnow.

Added: 
    

Modified: 
    clang/lib/Basic/Targets/X86.cpp
    clang/test/Preprocessor/predefined-arch-macros.c
    clang/test/Preprocessor/x86_target_features.c
    llvm/lib/Target/X86/X86.td
    llvm/lib/Target/X86/X86InstrInfo.td
    llvm/lib/Target/X86/X86Subtarget.h
    llvm/test/CodeGen/X86/prefetch.ll

Removed: 
    


################################################################################
diff  --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp
index fcf690847078..cc37c8a5cba0 100644
--- a/clang/lib/Basic/Targets/X86.cpp
+++ b/clang/lib/Basic/Targets/X86.cpp
@@ -335,6 +335,7 @@ bool X86TargetInfo::initFeatureMap(
     setFeatureEnabledImpl(Features, "lzcnt", true);
     setFeatureEnabledImpl(Features, "popcnt", true);
     setFeatureEnabledImpl(Features, "sahf", true);
+    setFeatureEnabledImpl(Features, "prfchw", true);
     LLVM_FALLTHROUGH;
   case CK_K8SSE3:
     setFeatureEnabledImpl(Features, "sse3", true);
@@ -450,12 +451,6 @@ bool X86TargetInfo::initFeatureMap(
       llvm::find(FeaturesVec, "-popcnt") == FeaturesVec.end())
     Features["popcnt"] = true;
 
-  // Enable prfchw if 3DNow! is enabled and prfchw is not explicitly disabled.
-  I = Features.find("3dnow");
-  if (I != Features.end() && I->getValue() &&
-      llvm::find(FeaturesVec, "-prfchw") == FeaturesVec.end())
-    Features["prfchw"] = true;
-
   // Additionally, if SSE is enabled and mmx is not explicitly disabled,
   // then enable MMX.
   I = Features.find("sse");

diff  --git a/clang/test/Preprocessor/predefined-arch-macros.c b/clang/test/Preprocessor/predefined-arch-macros.c
index f7de81028327..4e1535c91c08 100644
--- a/clang/test/Preprocessor/predefined-arch-macros.c
+++ b/clang/test/Preprocessor/predefined-arch-macros.c
@@ -2379,6 +2379,7 @@
 // CHECK_AMDFAM10_M32: #define __LZCNT__ 1
 // CHECK_AMDFAM10_M32: #define __MMX__ 1
 // CHECK_AMDFAM10_M32: #define __POPCNT__ 1
+// CHECK_AMDFAM10_M32: #define __PRFCHW__ 1
 // CHECK_AMDFAM10_M32: #define __SSE2_MATH__ 1
 // CHECK_AMDFAM10_M32: #define __SSE2__ 1
 // CHECK_AMDFAM10_M32: #define __SSE3__ 1
@@ -2399,6 +2400,7 @@
 // CHECK_AMDFAM10_M64: #define __LZCNT__ 1
 // CHECK_AMDFAM10_M64: #define __MMX__ 1
 // CHECK_AMDFAM10_M64: #define __POPCNT__ 1
+// CHECK_AMDFAM10_M64: #define __PRFCHW__ 1
 // CHECK_AMDFAM10_M64: #define __SSE2_MATH__ 1
 // CHECK_AMDFAM10_M64: #define __SSE2__ 1
 // CHECK_AMDFAM10_M64: #define __SSE3__ 1

diff  --git a/clang/test/Preprocessor/x86_target_features.c b/clang/test/Preprocessor/x86_target_features.c
index 8e24c1518720..6f3c0cffb17e 100644
--- a/clang/test/Preprocessor/x86_target_features.c
+++ b/clang/test/Preprocessor/x86_target_features.c
@@ -374,10 +374,12 @@
 
 // RUN: %clang -target i386-unknown-unknown -march=atom -m3dnow -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=3DNOWPRFCHW %s
 
-// 3DNOWPRFCHW: #define __PRFCHW__ 1
+// 3DNOWPRFCHW: #define __3dNOW__ 1
+// 3DNOWPRFCHW-NOT: #define __PRFCHW__ 1
 
 // RUN: %clang -target i386-unknown-unknown -march=atom -mno-prfchw -m3dnow -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=3DNOWNOPRFCHW %s
 
+// 3DNOWNOPRFCHW: #define __3dNOW__ 1
 // 3DNOWNOPRFCHW-NOT: #define __PRFCHW__ 1
 
 // RUN: %clang -target i386-unknown-unknown -march=atom -mprfchw -mno-3dnow -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=NO3DNOWPRFCHW %s

diff  --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index bcbc1ae492b2..3e85d4ab2a4c 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -847,6 +847,7 @@ def ProcessorFeatures {
                                                          FeatureFXSR,
                                                          FeatureNOPL,
                                                          FeatureCMPXCHG16B,
+                                                         FeaturePRFCHW,
                                                          FeatureLZCNT,
                                                          FeaturePOPCNT,
                                                          FeatureSlowSHLD,

diff  --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td
index 262961af7c51..46ca1896e4e2 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.td
+++ b/llvm/lib/Target/X86/X86InstrInfo.td
@@ -928,11 +928,10 @@ def HasRTM       : Predicate<"Subtarget->hasRTM()">;
 def HasADX       : Predicate<"Subtarget->hasADX()">;
 def HasSHA       : Predicate<"Subtarget->hasSHA()">;
 def HasSGX       : Predicate<"Subtarget->hasSGX()">;
-def HasPRFCHW    : Predicate<"Subtarget->hasPRFCHW()">;
 def HasRDSEED    : Predicate<"Subtarget->hasRDSEED()">;
 def HasSSEPrefetch : Predicate<"Subtarget->hasSSEPrefetch()">;
 def NoSSEPrefetch : Predicate<"!Subtarget->hasSSEPrefetch()">;
-def HasPrefetchW : Predicate<"Subtarget->hasPRFCHW()">;
+def HasPrefetchW : Predicate<"Subtarget->hasPrefetchW()">;
 def HasPREFETCHWT1 : Predicate<"Subtarget->hasPREFETCHWT1()">;
 def HasLAHFSAHF  : Predicate<"Subtarget->hasLAHFSAHF()">;
 def HasMWAITX    : Predicate<"Subtarget->hasMWAITX()">;

diff  --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h
index 5c514311db90..16483e835f50 100644
--- a/llvm/lib/Target/X86/X86Subtarget.h
+++ b/llvm/lib/Target/X86/X86Subtarget.h
@@ -647,8 +647,15 @@ class X86Subtarget final : public X86GenSubtargetInfo {
   bool hasRTM() const { return HasRTM; }
   bool hasADX() const { return HasADX; }
   bool hasSHA() const { return HasSHA; }
-  bool hasPRFCHW() const { return HasPRFCHW || HasPREFETCHWT1; }
+  bool hasPRFCHW() const { return HasPRFCHW; }
   bool hasPREFETCHWT1() const { return HasPREFETCHWT1; }
+  bool hasPrefetchW() const {
+    // The PREFETCHW instruction was added with 3DNow but later CPUs gave it
+    // its own CPUID bit as part of deprecating 3DNow. Intel eventually added
+    // it and KNL has another that prefetches to L2 cache. We assume the
+    // L1 version exists if the L2 version does.
+    return has3DNow() || hasPRFCHW() || hasPREFETCHWT1();
+  }
   bool hasSSEPrefetch() const {
     // We implicitly enable these when we have a write prefix supporting cache
     // level OR if we have prfchw, but don't already have a read prefetch from

diff  --git a/llvm/test/CodeGen/X86/prefetch.ll b/llvm/test/CodeGen/X86/prefetch.ll
index 839948174a43..f00788145f4e 100644
--- a/llvm/test/CodeGen/X86/prefetch.ll
+++ b/llvm/test/CodeGen/X86/prefetch.ll
@@ -10,7 +10,7 @@
 ; RUN: llc < %s -mtriple=i686-- -mattr=-sse,+prefetchwt1 | FileCheck %s -check-prefix=PREFETCHWT1
 ; RUN: llc < %s -mtriple=i686-- -mattr=-sse,+3dnow,+prefetchwt1 | FileCheck %s -check-prefix=PREFETCHWT1
 ; RUN: llc < %s -mtriple=i686-- -mattr=+3dnow | FileCheck %s -check-prefix=3DNOW
-; RUN: llc < %s -mtriple=i686-- -mattr=+3dnow,+prfchw | FileCheck %s -check-prefix=PRFCHW3DNOW
+; RUN: llc < %s -mtriple=i686-- -mattr=+3dnow,+prfchw | FileCheck %s -check-prefix=3DNOW
 
 ; Rules:
 ; 3dnow by itself get you just the single prefetch instruction with no hints
@@ -68,24 +68,11 @@ define void @t(i8* %ptr) nounwind  {
 ; 3DNOW-NEXT:    prefetch (%eax)
 ; 3DNOW-NEXT:    prefetch (%eax)
 ; 3DNOW-NEXT:    prefetch (%eax)
-; 3DNOW-NEXT:    prefetch (%eax)
-; 3DNOW-NEXT:    prefetch (%eax)
-; 3DNOW-NEXT:    prefetch (%eax)
-; 3DNOW-NEXT:    prefetch (%eax)
+; 3DNOW-NEXT:    prefetchw (%eax)
+; 3DNOW-NEXT:    prefetchw (%eax)
+; 3DNOW-NEXT:    prefetchw (%eax)
+; 3DNOW-NEXT:    prefetchw (%eax)
 ; 3DNOW-NEXT:    retl
-;
-; PRFCHW3DNOW-LABEL: t:
-; PRFCHW3DNOW:       # %bb.0: # %entry
-; PRFCHW3DNOW-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; PRFCHW3DNOW-NEXT:    prefetch (%eax)
-; PRFCHW3DNOW-NEXT:    prefetch (%eax)
-; PRFCHW3DNOW-NEXT:    prefetch (%eax)
-; PRFCHW3DNOW-NEXT:    prefetch (%eax)
-; PRFCHW3DNOW-NEXT:    prefetchw (%eax)
-; PRFCHW3DNOW-NEXT:    prefetchw (%eax)
-; PRFCHW3DNOW-NEXT:    prefetchw (%eax)
-; PRFCHW3DNOW-NEXT:    prefetchw (%eax)
-; PRFCHW3DNOW-NEXT:    retl
 entry:
 	tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 1, i32 1 )
 	tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 2, i32 1 )


        


More information about the cfe-commits mailing list