[llvm] dcc5ff3 - [PowerPC] Use PredictableSelectIsExpensive to enable select to branch in CGP

Kang Zhang via llvm-commits llvm-commits at lists.llvm.org
Mon May 11 08:02:51 PDT 2020


Author: Kang Zhang
Date: 2020-05-11T15:02:09Z
New Revision: dcc5ff3bc2e85a07ff2192223c652fa2cdb164ef

URL: https://github.com/llvm/llvm-project/commit/dcc5ff3bc2e85a07ff2192223c652fa2cdb164ef
DIFF: https://github.com/llvm/llvm-project/commit/dcc5ff3bc2e85a07ff2192223c652fa2cdb164ef.diff

LOG: [PowerPC] Use PredictableSelectIsExpensive to enable select to branch in CGP

Summary:
This patch will set the variable PredictableSelectIsExpensive to do the
select to if based on BranchProbability in CodeGenPrepare.

When the BranchProbability more than MinPercentageForPredictableBranch,
PPC will convert SELECT to branch.

Reviewed By: nemanjai

Differential Revision: https://reviews.llvm.org/D71883

Added: 
    llvm/test/CodeGen/PowerPC/select-to-branch.mir

Modified: 
    llvm/lib/Target/PowerPC/PPC.td
    llvm/lib/Target/PowerPC/PPCISelLowering.cpp
    llvm/lib/Target/PowerPC/PPCSubtarget.cpp
    llvm/lib/Target/PowerPC/PPCSubtarget.h

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/PowerPC/PPC.td b/llvm/lib/Target/PowerPC/PPC.td
index aeec73271507..1d1f11e498c2 100644
--- a/llvm/lib/Target/PowerPC/PPC.td
+++ b/llvm/lib/Target/PowerPC/PPC.td
@@ -229,6 +229,12 @@ def FeaturePCRelativeMemops :
                    "Enable PC relative Memory Ops",
                    [FeatureISA3_0]>;
 
+def FeaturePredictableSelectIsExpensive :
+  SubtargetFeature<"predictable-select-expensive",
+                   "PredictableSelectIsExpensive",
+                   "true",
+                   "Prefer likely predicted branches over selects">;
+
 // Since new processors generally contain a superset of features of those that
 // came before them, the idea is to make implementations of new processors
 // less error prone and easier to read.
@@ -281,14 +287,18 @@ def ProcessorFeatures {
     !listconcat(P7InheritableFeatures, P7SpecificFeatures);
 
   // Power8
-  list<SubtargetFeature> P8AdditionalFeatures = [DirectivePwr8,
-                                                 FeatureP8Altivec,
-                                                 FeatureP8Vector,
-                                                 FeatureP8Crypto,
-                                                 FeatureHTM,
-                                                 FeatureDirectMove,
-                                                 FeatureICBT,
-                                                 FeaturePartwordAtomic];
+  list<SubtargetFeature> P8AdditionalFeatures =
+    [DirectivePwr8,
+     FeatureP8Altivec,
+     FeatureP8Vector,
+     FeatureP8Crypto,
+     FeatureHTM,
+     FeatureDirectMove,
+     FeatureICBT,
+     FeaturePartwordAtomic,
+     FeaturePredictableSelectIsExpensive
+    ];
+
   list<SubtargetFeature> P8SpecificFeatures = [FeatureAddiLoadFusion,
                                                FeatureAddisLoadFusion];
   list<SubtargetFeature> P8InheritableFeatures =
@@ -297,10 +307,14 @@ def ProcessorFeatures {
     !listconcat(P8InheritableFeatures, P8SpecificFeatures);
 
   // Power9
-  list<SubtargetFeature> P9AdditionalFeatures = [DirectivePwr9,
-                                                 FeatureP9Altivec,
-                                                 FeatureP9Vector,
-                                                 FeatureISA3_0];
+  list<SubtargetFeature> P9AdditionalFeatures =
+    [DirectivePwr9,
+     FeatureP9Altivec,
+     FeatureP9Vector,
+     FeatureISA3_0,
+     FeaturePredictableSelectIsExpensive
+    ];
+
   // Some features are unique to Power9 and there is no reason to assume
   // they will be part of any future CPUs. One example is the narrower
   // dispatch for vector operations than scalar ones. For the time being,

diff  --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 007cd70543be..041be6fdb067 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1323,6 +1323,11 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
     MaxLoadsPerMemcmp = 8;
     MaxLoadsPerMemcmpOptSize = 4;
   }
+
+  // Let the subtarget (CPU) decide if a predictable select is more expensive
+  // than the corresponding branch. This information is used in CGP to decide
+  // when to convert selects into branches.
+  PredictableSelectIsExpensive = Subtarget.isPredictableSelectIsExpensive();
 }
 
 /// getMaxByValAlign - Helper for getByValTypeAlignment to determine

diff  --git a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
index 7819874b74ee..cfc54df13f79 100644
--- a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -120,6 +120,7 @@ void PPCSubtarget::initializeEnvironment() {
   VectorsUseTwoUnits = false;
   UsePPCPreRASchedStrategy = false;
   UsePPCPostRASchedStrategy = false;
+  PredictableSelectIsExpensive = false;
 
   HasPOPCNTD = POPCNTD_Unavailable;
 }

diff  --git a/llvm/lib/Target/PowerPC/PPCSubtarget.h b/llvm/lib/Target/PowerPC/PPCSubtarget.h
index 1ed01339571b..be1143f903e8 100644
--- a/llvm/lib/Target/PowerPC/PPCSubtarget.h
+++ b/llvm/lib/Target/PowerPC/PPCSubtarget.h
@@ -143,6 +143,7 @@ class PPCSubtarget : public PPCGenSubtargetInfo {
   bool VectorsUseTwoUnits;
   bool UsePPCPreRASchedStrategy;
   bool UsePPCPostRASchedStrategy;
+  bool PredictableSelectIsExpensive;
 
   POPCNTDKind HasPOPCNTD;
 
@@ -394,6 +395,10 @@ class PPCSubtarget : public PPCGenSubtargetInfo {
   }
 
   bool isXRaySupported() const override { return IsPPC64 && IsLittleEndian; }
+
+  bool isPredictableSelectIsExpensive() const {
+    return PredictableSelectIsExpensive;
+  }
 };
 } // End llvm namespace
 

diff  --git a/llvm/test/CodeGen/PowerPC/select-to-branch.mir b/llvm/test/CodeGen/PowerPC/select-to-branch.mir
new file mode 100644
index 000000000000..179b784aa331
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/select-to-branch.mir
@@ -0,0 +1,117 @@
+# RUN: llc -verify-machineinstrs -mcpu=pwr9 -mtriple powerpc64le-unknown-linux-gnu \
+# RUN:   -run-pass=codegenprepare -o - %s | FileCheck %s
+# RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple powerpc64le-unknown-linux-gnu \
+# RUN:   -run-pass=codegenprepare -o - %s | FileCheck %s
+--- |
+  define i32 @weighted_select1(i32 %a, i32 %b) {
+    %cmp = icmp ne i32 %a, 0
+    %sel = select i1 %cmp, i32 %a, i32 %b, !prof !14
+    ret i32 %sel
+
+  ; If branch_weights > 99% or branch_weights < 1%, the select will be
+  ; converted to branch, here !14 = 99/100, !14 = 99%, so it will do nothing.
+  ; CHECK-LABEL: weighted_select1
+  ; CHECK:         %cmp = icmp ne i32 %a, 0
+  ; CHECK-NEXT:    %sel = select i1 %cmp, i32 %a, i32 %b, !prof !14
+  ; CHECK-NEXT:    ret i32 %sel
+  }
+
+  define i32 @weighted_select2(i32 %a, i32 %b) {
+    %cmp = icmp ne i32 %a, 0
+    %sel = select i1 %cmp, i32 %a, i32 %b, !prof !15
+    ret i32 %sel
+
+  ; If branch_weights > 99% or branch_weights < 1%, the select will be converted
+  ; to branch, here !15 = 100/101, !15 > 99%, so it will convert select to
+  ; branch.
+  ; CHECK-LABEL: weighted_select2
+  ; CHECK:         %sel.frozen = freeze i32 %a
+  ; CHECK:         %cmp = icmp ne i32 %sel.frozen, 0
+  ; CHECK-NEXT:    br i1 %cmp, label %select.end, label %select.false, !prof !15
+  ; CHECK:       select.false:
+  ; CHECK-NEXT:    br label %select.end
+  ; CHECK:       select.end:
+  ; CHECK-NEXT:    %sel = phi i32 [ %a, %0 ], [ %b, %select.false ]
+  ; CHECK-NEXT:    ret i32 %sel
+  }
+
+  define i32 @weighted_select3(i32 %a, i32 %b) {
+    %cmp = icmp ne i32 %a, 0
+    %sel = select i1 %cmp, i32 %a, i32 %b, !prof !16
+    ret i32 %sel
+
+  ; If branch_weights > 99% or branch_weights < 1%, the select will be converted
+  ; to branch, here !16 = 1/101, !16 < 1%, so it will convert select to branch.
+  ; CHECK-LABEL: weighted_select3
+  ; CHECK:         %sel.frozen = freeze i32 %a
+  ; CHECK:         %cmp = icmp ne i32 %sel.frozen, 0
+  ; CHECK-NEXT:    br i1 %cmp, label %select.end, label %select.false, !prof !16
+  ; CHECK:       select.false:
+  ; CHECK-NEXT:    br label %select.end
+  ; CHECK:       select.end:
+  ; CHECK-NEXT:    %sel = phi i32 [ %a, %0 ], [ %b, %select.false ]
+  ; CHECK-NEXT:    ret i32 %sel
+  }
+
+  define i32 @unweighted_select(i32 %a, i32 %b) {
+    %cmp = icmp ne i32 %a, 0
+    %sel = select i1 %cmp, i32 %a, i32 %b, !prof !17
+    ret i32 %sel
+
+  ; There is no weight_branch information, so it will do nothing.
+  ; CHECK-LABEL: unweighted_select
+  ; CHECK:         %cmp = icmp ne i32 %a, 0
+  ; CHECK-NEXT:    %sel = select i1 %cmp, i32 %a, i32 %b, !prof !17
+  ; CHECK-NEXT:    ret i32 %sel
+  }
+
+  ; Function Attrs: optsize
+  define i32 @weighted_select_optsize(i32 %a, i32 %b) #0 {
+    %cmp = icmp ne i32 %a, 0
+    %sel = select i1 %cmp, i32 %a, i32 %b, !prof !15
+    ret i32 %sel
+
+  ; This function has used the optsize flag, so it will do nothing.
+  ; CHECK-LABEL: weighted_select_optsize
+  ; CHECK:         %cmp = icmp ne i32 %a, 0
+  ; CHECK-NEXT:    %sel = select i1 %cmp, i32 %a, i32 %b, !prof !15
+  ; CHECK-NEXT:    ret i32 %sel
+  }
+
+  define i32 @weighted_select_pgso(i32 %a, i32 %b) !prof !18 {
+    %cmp = icmp ne i32 %a, 0
+    %sel = select i1 %cmp, i32 %a, i32 %b, !prof !15
+    ret i32 %sel
+
+  ; The function_entry_count of this function is 0, so it will do nothing.
+  ; CHECK-LABEL: weighted_select_pgso
+  ; CHECK:         %cmp = icmp ne i32 %a, 0
+  ; CHECK-NEXT:    %sel = select i1 %cmp, i32 %a, i32 %b, !prof !15
+  ; CHECK-NEXT:    ret i32 %sel
+  }
+  
+  attributes #0 = { optsize }
+  
+  !llvm.module.flags = !{!0}
+  
+  !0 = !{i32 1, !"ProfileSummary", !1}
+  !1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
+  !2 = !{!"ProfileFormat", !"InstrProf"}
+  !3 = !{!"TotalCount", i64 10000}
+  !4 = !{!"MaxCount", i64 10}
+  !5 = !{!"MaxInternalCount", i64 1}
+  !6 = !{!"MaxFunctionCount", i64 1000}
+  !7 = !{!"NumCounts", i64 3}
+  !8 = !{!"NumFunctions", i64 3}
+  !9 = !{!"DetailedSummary", !10}
+  !10 = !{!11, !12, !13}
+  !11 = !{i32 10000, i64 100, i32 1}
+  !12 = !{i32 999000, i64 100, i32 1}
+  !13 = !{i32 999999, i64 1, i32 2}
+  !14 = !{!"branch_weights", i32 1, i32 99}
+  !15 = !{!"branch_weights", i32 1, i32 100}
+  !16 = !{!"branch_weights", i32 100, i32 1}
+  !17 = !{!"branch_weights", i32 0, i32 0}
+  !18 = !{!"function_entry_count", i64 0}
+
+...


        


More information about the llvm-commits mailing list