[llvm] 3b090ff - [InstCombine] use demanded vector elements to eliminate partially redundant instructions

Sanjay Patel via llvm-commits llvm-commits at lists.llvm.org
Tue Feb 28 06:45:14 PST 2023


Author: Sanjay Patel
Date: 2023-02-28T09:43:44-05:00
New Revision: 3b090ff2bdb2828b53d6fec9c06b9d52fed09f42

URL: https://github.com/llvm/llvm-project/commit/3b090ff2bdb2828b53d6fec9c06b9d52fed09f42
DIFF: https://github.com/llvm/llvm-project/commit/3b090ff2bdb2828b53d6fec9c06b9d52fed09f42.diff

LOG: [InstCombine] use demanded vector elements to eliminate partially redundant instructions

In issue #60632, we have vector math ops that differ because an
operand is shuffled, but the math has limited demanded elements,
so it can be replaced by another instruction:
https://alive2.llvm.org/ce/z/TKqq7H

I don't think we have anything like this yet - it's like a
CSE/GVN fold, but driven by demanded elements of a vector op.
This is limited to splat-0 as a first step to keep it simple.

Differential Revision: https://reviews.llvm.org/D144760

Added: 
    

Modified: 
    llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
    llvm/test/Transforms/InstCombine/vec_demanded_elts.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 2dbe83264210c..7195edc5d282a 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -1713,6 +1713,54 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
   // UB/poison potential, but that should be refined.
   BinaryOperator *BO;
   if (match(I, m_BinOp(BO)) && !BO->isIntDivRem() && !BO->isShift()) {
+    Value *X = BO->getOperand(0);
+    Value *Y = BO->getOperand(1);
+
+    // Look for an equivalent binop except that one operand has been shuffled.
+    // If the demand for this binop only includes elements that are the same as
+    // the other binop, then we may be able to replace this binop with a use of
+    // the earlier one.
+    //
+    // Example:
+    // %other_bo = bo (shuf X, {0}), Y
+    // %this_extracted_bo = extelt (bo X, Y), 0
+    // -->
+    // %other_bo = bo (shuf X, {0}), Y
+    // %this_extracted_bo = extelt %other_bo, 0
+    //
+    // TODO: Handle demand of an arbitrary single element or more than one
+    //       element instead of just element 0.
+    // TODO: Unlike general demanded elements transforms, this should be safe
+    //       for any (div/rem/shift) opcode too.
+    if (DemandedElts == 1 && !X->hasOneUse() && !Y->hasOneUse() &&
+        BO->hasOneUse() ) {
+
+      auto findShufBO = [&](bool MatchShufAsOp0) -> User * {
+        // Try to use shuffle-of-operand in place of an operand:
+        // bo X, Y --> bo (shuf X), Y
+        // bo X, Y --> bo X, (shuf Y)
+        BinaryOperator::BinaryOps Opcode = BO->getOpcode();
+        Value *ShufOp = MatchShufAsOp0 ? X : Y;
+        Value *OtherOp = MatchShufAsOp0 ? Y : X;
+        for (User *U : OtherOp->users()) {
+          auto Shuf = m_Shuffle(m_Specific(ShufOp), m_Value(), m_ZeroMask());
+          if (BO->isCommutative()
+                  ? match(U, m_c_BinOp(Opcode, Shuf, m_Specific(OtherOp)))
+                  : MatchShufAsOp0
+                        ? match(U, m_BinOp(Opcode, Shuf, m_Specific(OtherOp)))
+                        : match(U, m_BinOp(Opcode, m_Specific(OtherOp), Shuf)))
+            if (DT.dominates(U, I))
+              return U;
+        }
+        return nullptr;
+      };
+
+      if (User *ShufBO = findShufBO(/* MatchShufAsOp0 */ true))
+        return ShufBO;
+      if (User *ShufBO = findShufBO(/* MatchShufAsOp0 */ false))
+        return ShufBO;
+    }
+
     simplifyAndSetOp(I, 0, DemandedElts, UndefElts);
     simplifyAndSetOp(I, 1, DemandedElts, UndefElts2);
 

diff  --git a/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll b/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll
index 0f33eb58f080c..ee789612f8b5e 100644
--- a/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll
+++ b/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll
@@ -850,8 +850,7 @@ define void @common_binop_demand_via_splat_op0(<2 x i4> %x, <2 x i4> %y) {
 ; CHECK-LABEL: @common_binop_demand_via_splat_op0(
 ; CHECK-NEXT:    [[XSHUF:%.*]] = shufflevector <2 x i4> [[X:%.*]], <2 x i4> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[B_XSHUF_Y:%.*]] = mul <2 x i4> [[XSHUF]], [[Y:%.*]]
-; CHECK-NEXT:    [[B_XY:%.*]] = mul <2 x i4> [[X]], [[Y]]
-; CHECK-NEXT:    [[B_XY_SPLAT:%.*]] = shufflevector <2 x i4> [[B_XY]], <2 x i4> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[B_XY_SPLAT:%.*]] = shufflevector <2 x i4> [[B_XSHUF_Y]], <2 x i4> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    call void @use(<2 x i4> [[B_XSHUF_Y]])
 ; CHECK-NEXT:    call void @use(<2 x i4> [[B_XY_SPLAT]])
 ; CHECK-NEXT:    ret void
@@ -870,8 +869,7 @@ define void @common_binop_demand_via_splat_op1(<2 x i4> %p, <2 x i4> %y) {
 ; CHECK-NEXT:    [[X:%.*]] = sub <2 x i4> <i4 0, i4 1>, [[P:%.*]]
 ; CHECK-NEXT:    [[YSHUF:%.*]] = shufflevector <2 x i4> [[Y:%.*]], <2 x i4> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[B_X_YSHUF:%.*]] = mul <2 x i4> [[X]], [[YSHUF]]
-; CHECK-NEXT:    [[B_XY:%.*]] = mul <2 x i4> [[X]], [[Y]]
-; CHECK-NEXT:    [[B_XY_SPLAT:%.*]] = shufflevector <2 x i4> [[B_XY]], <2 x i4> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[B_XY_SPLAT:%.*]] = shufflevector <2 x i4> [[B_X_YSHUF]], <2 x i4> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    call void @use(<2 x i4> [[B_XY_SPLAT]])
 ; CHECK-NEXT:    call void @use(<2 x i4> [[B_X_YSHUF]])
 ; CHECK-NEXT:    ret void
@@ -888,12 +886,11 @@ define void @common_binop_demand_via_splat_op1(<2 x i4> %p, <2 x i4> %y) {
 
 define void @common_binop_demand_via_splat_op0_commute(<2 x i4> %p, <2 x i4> %q) {
 ; CHECK-LABEL: @common_binop_demand_via_splat_op0_commute(
-; CHECK-NEXT:    [[X:%.*]] = sub <2 x i4> <i4 0, i4 1>, [[P:%.*]]
+; CHECK-NEXT:    [[X:%.*]] = sub <2 x i4> <i4 0, i4 poison>, [[P:%.*]]
 ; CHECK-NEXT:    [[Y:%.*]] = sub <2 x i4> <i4 1, i4 2>, [[Q:%.*]]
 ; CHECK-NEXT:    [[XSHUF:%.*]] = shufflevector <2 x i4> [[X]], <2 x i4> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[B_Y_XSHUF:%.*]] = mul <2 x i4> [[Y]], [[XSHUF]]
-; CHECK-NEXT:    [[B_XY:%.*]] = mul <2 x i4> [[X]], [[Y]]
-; CHECK-NEXT:    [[B_XY_SPLAT:%.*]] = shufflevector <2 x i4> [[B_XY]], <2 x i4> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[B_XY_SPLAT:%.*]] = shufflevector <2 x i4> [[B_Y_XSHUF]], <2 x i4> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    call void @use(<2 x i4> [[B_XY_SPLAT]])
 ; CHECK-NEXT:    call void @use(<2 x i4> [[B_Y_XSHUF]])
 ; CHECK-NEXT:    ret void
@@ -912,11 +909,10 @@ define void @common_binop_demand_via_splat_op0_commute(<2 x i4> %p, <2 x i4> %q)
 define void @common_binop_demand_via_splat_op1_commute(<2 x i4> %p, <2 x i4> %q) {
 ; CHECK-LABEL: @common_binop_demand_via_splat_op1_commute(
 ; CHECK-NEXT:    [[X:%.*]] = sub <2 x i4> <i4 0, i4 1>, [[P:%.*]]
-; CHECK-NEXT:    [[Y:%.*]] = sub <2 x i4> <i4 2, i4 3>, [[Q:%.*]]
+; CHECK-NEXT:    [[Y:%.*]] = sub <2 x i4> <i4 2, i4 poison>, [[Q:%.*]]
 ; CHECK-NEXT:    [[YSHUF:%.*]] = shufflevector <2 x i4> [[Y]], <2 x i4> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[B_Y_XSHUF:%.*]] = mul <2 x i4> [[YSHUF]], [[X]]
-; CHECK-NEXT:    [[B_XY:%.*]] = mul <2 x i4> [[X]], [[Y]]
-; CHECK-NEXT:    [[B_XY_SPLAT:%.*]] = shufflevector <2 x i4> [[B_XY]], <2 x i4> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[B_XY_SPLAT:%.*]] = shufflevector <2 x i4> [[B_Y_XSHUF]], <2 x i4> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    call void @use(<2 x i4> [[B_XY_SPLAT]])
 ; CHECK-NEXT:    call void @use(<2 x i4> [[B_Y_XSHUF]])
 ; CHECK-NEXT:    ret void
@@ -932,6 +928,8 @@ define void @common_binop_demand_via_splat_op1_commute(<2 x i4> %p, <2 x i4> %q)
   ret void
 }
 
+; negative test - wrong operands for sub
+
 define void @common_binop_demand_via_splat_op0_wrong_commute(<2 x i4> %x, <2 x i4> %y) {
 ; CHECK-LABEL: @common_binop_demand_via_splat_op0_wrong_commute(
 ; CHECK-NEXT:    [[XSHUF:%.*]] = shufflevector <2 x i4> [[X:%.*]], <2 x i4> poison, <2 x i32> zeroinitializer
@@ -951,6 +949,8 @@ define void @common_binop_demand_via_splat_op0_wrong_commute(<2 x i4> %x, <2 x i
   ret void
 }
 
+; negative test - need to reorder insts?
+
 define void @common_binop_demand_via_splat_op0_not_dominated1(<2 x i4> %x, <2 x i4> %y) {
 ; CHECK-LABEL: @common_binop_demand_via_splat_op0_not_dominated1(
 ; CHECK-NEXT:    [[B_XY:%.*]] = mul <2 x i4> [[X:%.*]], [[Y:%.*]]
@@ -970,6 +970,8 @@ define void @common_binop_demand_via_splat_op0_not_dominated1(<2 x i4> %x, <2 x
   ret void
 }
 
+; negative test - need to reorder insts?
+
 define void @common_binop_demand_via_splat_op0_not_dominated2(<2 x i4> %x, <2 x i4> %y) {
 ; CHECK-LABEL: @common_binop_demand_via_splat_op0_not_dominated2(
 ; CHECK-NEXT:    [[B_XY:%.*]] = mul <2 x i4> [[X:%.*]], [[Y:%.*]]
@@ -993,8 +995,7 @@ define i4 @common_binop_demand_via_extelt_op0(<2 x i4> %x, <2 x i4> %y) {
 ; CHECK-LABEL: @common_binop_demand_via_extelt_op0(
 ; CHECK-NEXT:    [[XSHUF:%.*]] = shufflevector <2 x i4> [[X:%.*]], <2 x i4> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[B_XSHUF_Y:%.*]] = sub <2 x i4> [[XSHUF]], [[Y:%.*]]
-; CHECK-NEXT:    [[B_XY:%.*]] = sub nsw <2 x i4> [[X]], [[Y]]
-; CHECK-NEXT:    [[B_XY0:%.*]] = extractelement <2 x i4> [[B_XY]], i64 0
+; CHECK-NEXT:    [[B_XY0:%.*]] = extractelement <2 x i4> [[B_XSHUF_Y]], i64 0
 ; CHECK-NEXT:    call void @use(<2 x i4> [[B_XSHUF_Y]])
 ; CHECK-NEXT:    ret i4 [[B_XY0]]
 ;
@@ -1011,8 +1012,7 @@ define float @common_binop_demand_via_extelt_op1(<2 x float> %p, <2 x float> %y)
 ; CHECK-NEXT:    [[X:%.*]] = fsub <2 x float> <float 0.000000e+00, float 1.000000e+00>, [[P:%.*]]
 ; CHECK-NEXT:    [[YSHUF:%.*]] = shufflevector <2 x float> [[Y:%.*]], <2 x float> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[B_X_YSHUF:%.*]] = fdiv <2 x float> [[X]], [[YSHUF]]
-; CHECK-NEXT:    [[B_XY:%.*]] = fdiv <2 x float> [[X]], [[Y]]
-; CHECK-NEXT:    [[B_XY0:%.*]] = extractelement <2 x float> [[B_XY]], i64 0
+; CHECK-NEXT:    [[B_XY0:%.*]] = extractelement <2 x float> [[B_X_YSHUF]], i64 0
 ; CHECK-NEXT:    call void @use_fp(<2 x float> [[B_X_YSHUF]])
 ; CHECK-NEXT:    ret float [[B_XY0]]
 ;
@@ -1027,12 +1027,11 @@ define float @common_binop_demand_via_extelt_op1(<2 x float> %p, <2 x float> %y)
 
 define float @common_binop_demand_via_extelt_op0_commute(<2 x float> %p, <2 x float> %q) {
 ; CHECK-LABEL: @common_binop_demand_via_extelt_op0_commute(
-; CHECK-NEXT:    [[X:%.*]] = fsub <2 x float> <float 0.000000e+00, float 1.000000e+00>, [[P:%.*]]
+; CHECK-NEXT:    [[X:%.*]] = fsub <2 x float> <float 0.000000e+00, float poison>, [[P:%.*]]
 ; CHECK-NEXT:    [[Y:%.*]] = fsub <2 x float> <float 3.000000e+00, float 2.000000e+00>, [[Q:%.*]]
 ; CHECK-NEXT:    [[XSHUF:%.*]] = shufflevector <2 x float> [[X]], <2 x float> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[B_Y_XSHUF:%.*]] = fmul nnan <2 x float> [[Y]], [[XSHUF]]
-; CHECK-NEXT:    [[B_XY:%.*]] = fmul ninf <2 x float> [[X]], [[Y]]
-; CHECK-NEXT:    [[B_XY0:%.*]] = extractelement <2 x float> [[B_XY]], i64 0
+; CHECK-NEXT:    [[B_XY0:%.*]] = extractelement <2 x float> [[B_Y_XSHUF]], i64 0
 ; CHECK-NEXT:    call void @use_fp(<2 x float> [[B_Y_XSHUF]])
 ; CHECK-NEXT:    ret float [[B_XY0]]
 ;
@@ -1049,11 +1048,10 @@ define float @common_binop_demand_via_extelt_op0_commute(<2 x float> %p, <2 x fl
 define i4 @common_binop_demand_via_extelt_op1_commute(<2 x i4> %p, <2 x i4> %q) {
 ; CHECK-LABEL: @common_binop_demand_via_extelt_op1_commute(
 ; CHECK-NEXT:    [[X:%.*]] = sub <2 x i4> <i4 0, i4 1>, [[P:%.*]]
-; CHECK-NEXT:    [[Y:%.*]] = sub <2 x i4> <i4 2, i4 3>, [[Q:%.*]]
+; CHECK-NEXT:    [[Y:%.*]] = sub <2 x i4> <i4 2, i4 poison>, [[Q:%.*]]
 ; CHECK-NEXT:    [[YSHUF:%.*]] = shufflevector <2 x i4> [[Y]], <2 x i4> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[B_Y_XSHUF:%.*]] = or <2 x i4> [[YSHUF]], [[X]]
-; CHECK-NEXT:    [[B_XY:%.*]] = or <2 x i4> [[X]], [[Y]]
-; CHECK-NEXT:    [[B_XY0:%.*]] = extractelement <2 x i4> [[B_XY]], i64 0
+; CHECK-NEXT:    [[B_XY0:%.*]] = extractelement <2 x i4> [[B_Y_XSHUF]], i64 0
 ; CHECK-NEXT:    call void @use(<2 x i4> [[B_Y_XSHUF]])
 ; CHECK-NEXT:    ret i4 [[B_XY0]]
 ;
@@ -1067,6 +1065,8 @@ define i4 @common_binop_demand_via_extelt_op1_commute(<2 x i4> %p, <2 x i4> %q)
   ret i4 %b_xy0
 }
 
+; negative test - wrong operands for sub
+
 define i4 @common_binop_demand_via_extelt_op0_wrong_commute(<2 x i4> %x, <2 x i4> %y) {
 ; CHECK-LABEL: @common_binop_demand_via_extelt_op0_wrong_commute(
 ; CHECK-NEXT:    [[XSHUF:%.*]] = shufflevector <2 x i4> [[X:%.*]], <2 x i4> poison, <2 x i32> zeroinitializer
@@ -1084,6 +1084,8 @@ define i4 @common_binop_demand_via_extelt_op0_wrong_commute(<2 x i4> %x, <2 x i4
   ret i4 %b_xy0
 }
 
+; negative test - need to reorder insts?
+
 define i4 @common_binop_demand_via_extelt_op0_not_dominated1(<2 x i4> %x, <2 x i4> %y) {
 ; CHECK-LABEL: @common_binop_demand_via_extelt_op0_not_dominated1(
 ; CHECK-NEXT:    [[B_XY:%.*]] = xor <2 x i4> [[X:%.*]], [[Y:%.*]]
@@ -1101,6 +1103,8 @@ define i4 @common_binop_demand_via_extelt_op0_not_dominated1(<2 x i4> %x, <2 x i
   ret i4 %b_xy0
 }
 
+; negative test - need to reorder insts?
+
 define i4 @common_binop_demand_via_extelt_op0_not_dominated2(<2 x i4> %x, <2 x i4> %y) {
 ; CHECK-LABEL: @common_binop_demand_via_extelt_op0_not_dominated2(
 ; CHECK-NEXT:    [[B_XY:%.*]] = mul <2 x i4> [[X:%.*]], [[Y:%.*]]
@@ -1118,6 +1122,8 @@ define i4 @common_binop_demand_via_extelt_op0_not_dominated2(<2 x i4> %x, <2 x i
   ret i4 %b_xy0
 }
 
+; negative test - splat doesn't match demanded element
+
 define i4 @common_binop_demand_via_extelt_op0_mismatch_elt0(<2 x i4> %x, <2 x i4> %y) {
 ; CHECK-LABEL: @common_binop_demand_via_extelt_op0_mismatch_elt0(
 ; CHECK-NEXT:    [[XSHUF:%.*]] = shufflevector <2 x i4> [[X:%.*]], <2 x i4> poison, <2 x i32> <i32 1, i32 1>
@@ -1135,6 +1141,8 @@ define i4 @common_binop_demand_via_extelt_op0_mismatch_elt0(<2 x i4> %x, <2 x i4
   ret i4 %b_xy0
 }
 
+; negative test - splat doesn't match demanded element
+
 define i4 @common_binop_demand_via_extelt_op0_mismatch_elt1(<2 x i4> %x, <2 x i4> %y) {
 ; CHECK-LABEL: @common_binop_demand_via_extelt_op0_mismatch_elt1(
 ; CHECK-NEXT:    [[XSHUF:%.*]] = shufflevector <2 x i4> [[X:%.*]], <2 x i4> poison, <2 x i32> zeroinitializer


        


More information about the llvm-commits mailing list