[llvm] 2144338 - Reapply [InstCombine] Replace one-use select operand based on condition

Tue Jan 19 11:29:42 PST 2021

Author: Nikita Popov
Date: 2021-01-19T20:26:38+01:00
New Revision: 21443381c00d9d5ddd6a73f2f839dc4872d79463

URL: https://github.com/llvm/llvm-project/commit/21443381c00d9d5ddd6a73f2f839dc4872d79463
DIFF: https://github.com/llvm/llvm-project/commit/21443381c00d9d5ddd6a73f2f839dc4872d79463.diff

LOG: Reapply [InstCombine] Replace one-use select operand based on condition

Relative to the original change, this adds a check that the
instruction on which we're replacing operands is safe to speculatively
execute, because that's what we're effectively doing. We're executing
the instruction with the replaced operand, which is fine if it's pure,
but not fine if can cause side-effects or UB (aka is not speculatable).

Additionally, we cannot (generally) replace operands in phi nodes,
as these may refer to a different loop iteration. This is also covered
by the speculation check.

-----

InstCombine already performs a fold where X == Y ? f(X) : Z is
transformed to X == Y ? f(Y) : Z if f(Y) simplifies. However,
if f(X) only has one use, then we can always directly replace the
use inside the instruction. To actually be profitable, limit it to
the case where Y is a non-expr constant.

This could be further extended to replace uses further up a one-use
instruction chain, but for now this only looks one level up.

Among other things, this also subsumes D94860.

Differential Revision: https://reviews.llvm.org/D94862

Added: 
    

Modified: 
    llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
    llvm/test/Transforms/InstCombine/select-binop-cmp.ll
    llvm/test/Transforms/InstCombine/select-safe-transforms.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 5a43b8b20db9..f26c194d31b9 100644

--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -1113,10 +1113,27 @@ Instruction *InstCombinerImpl::foldSelectValueEquivalence(SelectInst &Sel,
   // replacement cycle.
   Value *CmpLHS = Cmp.getOperand(0), *CmpRHS = Cmp.getOperand(1);
   if (TrueVal != CmpLHS &&
-      isGuaranteedNotToBeUndefOrPoison(CmpRHS, SQ.AC, &Sel, &DT))
+      isGuaranteedNotToBeUndefOrPoison(CmpRHS, SQ.AC, &Sel, &DT)) {
     if (Value *V = SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, SQ,
                                           /* AllowRefinement */ true))
       return replaceOperand(Sel, Swapped ? 2 : 1, V);
+
+    // Even if TrueVal does not simplify, we can directly replace a use of
+    // CmpLHS with CmpRHS, as long as the instruction is not used anywhere
+    // else and is safe to speculatively execute (we may end up executing it
+    // with 
diff erent operands, which should not cause side-effects or trigger
+    // undefined behavior). Only do this if CmpRHS is a constant, as
+    // profitability is not clear for other cases.
+    // FIXME: The replacement could be performed recursively.
+    if (match(CmpRHS, m_ImmConstant()) && !match(CmpLHS, m_ImmConstant()))
+      if (auto *I = dyn_cast<Instruction>(TrueVal))
+        if (I->hasOneUse() && isSafeToSpeculativelyExecute(I))
+          for (Use &U : I->operands())
+            if (U == CmpLHS) {
+              replaceUse(U, CmpRHS);
+              return &Sel;
+            }
+  }
   if (TrueVal != CmpRHS &&
       isGuaranteedNotToBeUndefOrPoison(CmpLHS, SQ.AC, &Sel, &DT))
     if (Value *V = SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, SQ,

diff  --git a/llvm/test/Transforms/InstCombine/select-binop-cmp.ll b/llvm/test/Transforms/InstCombine/select-binop-cmp.ll
index 99670c4cc1d3..bbf7456ae811 100644
--- a/llvm/test/Transforms/InstCombine/select-binop-cmp.ll
+++ b/llvm/test/Transforms/InstCombine/select-binop-cmp.ll
@@ -502,7 +502,7 @@ define i32 @select_xor_icmp_bad_2(i32 %x, i32 %y, i32 %z, i32 %k) {
 define i32 @select_xor_icmp_bad_3(i32 %x, i32 %y, i32 %z) {
 ; CHECK-LABEL: @select_xor_icmp_bad_3(
 ; CHECK-NEXT:    [[A:%.*]] = icmp eq i32 [[X:%.*]], 3
-; CHECK-NEXT:    [[B:%.*]] = xor i32 [[X]], [[Z:%.*]]
+; CHECK-NEXT:    [[B:%.*]] = xor i32 [[Z:%.*]], 3
 ; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], i32 [[B]], i32 [[Y:%.*]]
 ; CHECK-NEXT:    ret i32 [[C]]
 ;
@@ -541,7 +541,7 @@ define i32 @select_xor_icmp_bad_5(i32 %x, i32 %y, i32 %z) {
 define i32 @select_xor_icmp_bad_6(i32 %x, i32 %y, i32 %z) {
 ; CHECK-LABEL: @select_xor_icmp_bad_6(
 ; CHECK-NEXT:    [[A_NOT:%.*]] = icmp eq i32 [[X:%.*]], 1
-; CHECK-NEXT:    [[B:%.*]] = xor i32 [[X]], [[Z:%.*]]
+; CHECK-NEXT:    [[B:%.*]] = xor i32 [[Z:%.*]], 1
 ; CHECK-NEXT:    [[C:%.*]] = select i1 [[A_NOT]], i32 [[B]], i32 [[Y:%.*]]
 ; CHECK-NEXT:    ret i32 [[C]]
 ;
@@ -554,7 +554,7 @@ define i32 @select_xor_icmp_bad_6(i32 %x, i32 %y, i32 %z) {
 define <2 x i8> @select_xor_icmp_vec_bad(<2 x i8> %x, <2 x i8> %y, <2 x i8> %z) {
 ; CHECK-LABEL: @select_xor_icmp_vec_bad(
 ; CHECK-NEXT:    [[A:%.*]] = icmp eq <2 x i8> [[X:%.*]], <i8 5, i8 3>
-; CHECK-NEXT:    [[B:%.*]] = xor <2 x i8> [[X]], [[Z:%.*]]
+; CHECK-NEXT:    [[B:%.*]] = xor <2 x i8> [[Z:%.*]], <i8 5, i8 3>
 ; CHECK-NEXT:    [[C:%.*]] = select <2 x i1> [[A]], <2 x i8> [[B]], <2 x i8> [[Y:%.*]]
 ; CHECK-NEXT:    ret <2 x i8> [[C]]
 ;
@@ -581,7 +581,7 @@ define <2 x i8> @select_xor_icmp_vec_undef(<2 x i8> %x, <2 x i8> %y, <2 x i8> %z
 define i32 @select_mul_icmp_bad(i32 %x, i32 %y, i32 %z, i32 %k) {
 ; CHECK-LABEL: @select_mul_icmp_bad(
 ; CHECK-NEXT:    [[A:%.*]] = icmp eq i32 [[X:%.*]], 3
-; CHECK-NEXT:    [[B:%.*]] = mul i32 [[X]], [[Z:%.*]]
+; CHECK-NEXT:    [[B:%.*]] = mul i32 [[Z:%.*]], 3
 ; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], i32 [[B]], i32 [[Y:%.*]]
 ; CHECK-NEXT:    ret i32 [[C]]
 ;
@@ -594,7 +594,7 @@ define i32 @select_mul_icmp_bad(i32 %x, i32 %y, i32 %z, i32 %k) {
 define i32 @select_add_icmp_bad(i32 %x, i32 %y, i32 %z) {
 ; CHECK-LABEL: @select_add_icmp_bad(
 ; CHECK-NEXT:    [[A:%.*]] = icmp eq i32 [[X:%.*]], 1
-; CHECK-NEXT:    [[B:%.*]] = add i32 [[X]], [[Z:%.*]]
+; CHECK-NEXT:    [[B:%.*]] = add i32 [[Z:%.*]], 1
 ; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], i32 [[B]], i32 [[Y:%.*]]
 ; CHECK-NEXT:    ret i32 [[C]]
 ;
@@ -619,7 +619,7 @@ define i32 @select_and_icmp_zero(i32 %x, i32 %y, i32 %z) {
 define i32 @select_or_icmp_bad(i32 %x, i32 %y, i32 %z) {
 ; CHECK-LABEL: @select_or_icmp_bad(
 ; CHECK-NEXT:    [[A:%.*]] = icmp eq i32 [[X:%.*]], 3
-; CHECK-NEXT:    [[B:%.*]] = or i32 [[X]], [[Z:%.*]]
+; CHECK-NEXT:    [[B:%.*]] = or i32 [[Z:%.*]], 3
 ; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], i32 [[B]], i32 [[Y:%.*]]
 ; CHECK-NEXT:    ret i32 [[C]]
 ;
@@ -940,7 +940,7 @@ define float @select_fsub_fcmp_bad_2(float %x, float %y, float %z) {
 define i32 @select_sub_icmp_bad(i32 %x, i32 %y, i32 %z) {
 ; CHECK-LABEL: @select_sub_icmp_bad(
 ; CHECK-NEXT:    [[A:%.*]] = icmp eq i32 [[X:%.*]], 0
-; CHECK-NEXT:    [[B:%.*]] = sub i32 [[X]], [[Z:%.*]]
+; CHECK-NEXT:    [[B:%.*]] = sub i32 0, [[Z:%.*]]
 ; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], i32 [[B]], i32 [[Y:%.*]]
 ; CHECK-NEXT:    ret i32 [[C]]
 ;
@@ -953,7 +953,7 @@ define i32 @select_sub_icmp_bad(i32 %x, i32 %y, i32 %z) {
 define i32 @select_sub_icmp_bad_2(i32 %x, i32 %y, i32 %z) {
 ; CHECK-LABEL: @select_sub_icmp_bad_2(
 ; CHECK-NEXT:    [[A:%.*]] = icmp eq i32 [[X:%.*]], 1
-; CHECK-NEXT:    [[B:%.*]] = sub i32 [[Z:%.*]], [[X]]
+; CHECK-NEXT:    [[B:%.*]] = add i32 [[Z:%.*]], -1
 ; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], i32 [[B]], i32 [[Y:%.*]]
 ; CHECK-NEXT:    ret i32 [[C]]
 ;
@@ -1022,7 +1022,7 @@ define i32 @select_sub_icmp_bad_5(i32 %x, i32 %y, i32 %z, i32 %k) {
 define i32 @select_shl_icmp_bad(i32 %x, i32 %y, i32 %z) {
 ; CHECK-LABEL: @select_shl_icmp_bad(
 ; CHECK-NEXT:    [[A:%.*]] = icmp eq i32 [[X:%.*]], 1
-; CHECK-NEXT:    [[B:%.*]] = shl i32 [[Z:%.*]], [[X]]
+; CHECK-NEXT:    [[B:%.*]] = shl i32 [[Z:%.*]], 1
 ; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], i32 [[B]], i32 [[Y:%.*]]
 ; CHECK-NEXT:    ret i32 [[C]]
 ;
@@ -1035,7 +1035,7 @@ define i32 @select_shl_icmp_bad(i32 %x, i32 %y, i32 %z) {
 define i32 @select_lshr_icmp_bad(i32 %x, i32 %y, i32 %z) {
 ; CHECK-LABEL: @select_lshr_icmp_bad(
 ; CHECK-NEXT:    [[A:%.*]] = icmp eq i32 [[X:%.*]], 1
-; CHECK-NEXT:    [[B:%.*]] = lshr i32 [[Z:%.*]], [[X]]
+; CHECK-NEXT:    [[B:%.*]] = lshr i32 [[Z:%.*]], 1
 ; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], i32 [[B]], i32 [[Y:%.*]]
 ; CHECK-NEXT:    ret i32 [[C]]
 ;
@@ -1048,7 +1048,7 @@ define i32 @select_lshr_icmp_bad(i32 %x, i32 %y, i32 %z) {
 define i32 @select_ashr_icmp_bad(i32 %x, i32 %y, i32 %z) {
 ; CHECK-LABEL: @select_ashr_icmp_bad(
 ; CHECK-NEXT:    [[A:%.*]] = icmp eq i32 [[X:%.*]], 1
-; CHECK-NEXT:    [[B:%.*]] = ashr i32 [[Z:%.*]], [[X]]
+; CHECK-NEXT:    [[B:%.*]] = ashr i32 [[Z:%.*]], 1
 ; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], i32 [[B]], i32 [[Y:%.*]]
 ; CHECK-NEXT:    ret i32 [[C]]
 ;
@@ -1084,10 +1084,11 @@ define i32 @select_sdiv_icmp_bad(i32 %x, i32 %y, i32 %z) {
   ret i32 %C
 }
 
+; Can replace %x with 0, because sub is only used in the select.
 define i32 @select_replace_one_use(i32 %x, i32 %y) {
 ; CHECK-LABEL: @select_replace_one_use(
 ; CHECK-NEXT:    [[C:%.*]] = icmp eq i32 [[X:%.*]], 0
-; CHECK-NEXT:    [[SUB:%.*]] = sub i32 [[X]], [[Y:%.*]]
+; CHECK-NEXT:    [[SUB:%.*]] = sub i32 0, [[Y:%.*]]
 ; CHECK-NEXT:    [[S:%.*]] = select i1 [[C]], i32 [[SUB]], i32 [[Y]]
 ; CHECK-NEXT:    ret i32 [[S]]
 ;
@@ -1097,6 +1098,7 @@ define i32 @select_replace_one_use(i32 %x, i32 %y) {
   ret i32 %s
 }
 
+; Can not replace %x with 0, because %sub has other uses as well.
 define i32 @select_replace_multi_use(i32 %x, i32 %y) {
 ; CHECK-LABEL: @select_replace_multi_use(
 ; CHECK-NEXT:    [[C:%.*]] = icmp eq i32 [[X:%.*]], 0
@@ -1112,11 +1114,11 @@ define i32 @select_replace_multi_use(i32 %x, i32 %y) {
   ret i32 %s
 }
 
+; Case where the replacement allows the instruction to fold away.
 define i32 @select_replace_fold(i32 %x, i32 %y, i32 %z) {
 ; CHECK-LABEL: @select_replace_fold(
 ; CHECK-NEXT:    [[C:%.*]] = icmp eq i32 [[X:%.*]], 0
-; CHECK-NEXT:    [[FSHR:%.*]] = call i32 @llvm.fshr.i32(i32 [[Y:%.*]], i32 [[Z:%.*]], i32 [[X]])
-; CHECK-NEXT:    [[S:%.*]] = select i1 [[C]], i32 [[FSHR]], i32 [[Y]]
+; CHECK-NEXT:    [[S:%.*]] = select i1 [[C]], i32 [[Z:%.*]], i32 [[Y:%.*]]
 ; CHECK-NEXT:    ret i32 [[S]]
 ;
   %c = icmp eq i32 %x, 0
@@ -1125,6 +1127,9 @@ define i32 @select_replace_fold(i32 %x, i32 %y, i32 %z) {
   ret i32 %s
 }
 
+
+; Case where the use of %x is in a nested instruction.
+; FIXME: We only perform replacements one level up right now.
 define i32 @select_replace_nested(i32 %x, i32 %y, i32 %z) {
 ; CHECK-LABEL: @select_replace_nested(
 ; CHECK-NEXT:    [[C:%.*]] = icmp eq i32 [[X:%.*]], 0
@@ -1140,6 +1145,9 @@ define i32 @select_replace_nested(i32 %x, i32 %y, i32 %z) {
   ret i32 %s
 }
 
+; Do not replace with constant expressions. The profitability in this case is
+; unclear, and such replacements have historically lead to infinite combine
+; loops.
 define i32 @select_replace_constexpr(i32 %x, i32 %y, i32 %z) {
 ; CHECK-LABEL: @select_replace_constexpr(
 ; CHECK-NEXT:    [[C:%.*]] = icmp eq i32 [[X:%.*]], ptrtoint (i32* @g to i32)
@@ -1153,6 +1161,8 @@ define i32 @select_replace_constexpr(i32 %x, i32 %y, i32 %z) {
   ret i32 %s
 }
 
+; Don't replace with a potentially undef constant, as undef could evaluate
+; to 
diff erent values for both uses.
 define <2 x i32> @select_replace_undef(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @select_replace_undef(
 ; CHECK-NEXT:    [[C:%.*]] = icmp eq <2 x i32> [[X:%.*]], <i32 0, i32 undef>
@@ -1166,10 +1176,11 @@ define <2 x i32> @select_replace_undef(<2 x i32> %x, <2 x i32> %y) {
   ret <2 x i32> %s
 }
 
+; We can replace the call arguments, as the call is speculatable.
 define i32 @select_replace_call_speculatable(i32 %x, i32 %y) {
 ; CHECK-LABEL: @select_replace_call_speculatable(
 ; CHECK-NEXT:    [[C:%.*]] = icmp eq i32 [[X:%.*]], 0
-; CHECK-NEXT:    [[CALL:%.*]] = call i32 @call_speculatable(i32 [[X]], i32 [[X]])
+; CHECK-NEXT:    [[CALL:%.*]] = call i32 @call_speculatable(i32 0, i32 0)
 ; CHECK-NEXT:    [[S:%.*]] = select i1 [[C]], i32 [[CALL]], i32 [[Y:%.*]]
 ; CHECK-NEXT:    ret i32 [[S]]
 ;
@@ -1179,6 +1190,8 @@ define i32 @select_replace_call_speculatable(i32 %x, i32 %y) {
   ret i32 %s
 }
 
+; We can't replace the call arguments, as the call is not speculatable. We
+; may end up changing side-effects or causing undefined behavior.
 define i32 @select_replace_call_non_speculatable(i32 %x, i32 %y) {
 ; CHECK-LABEL: @select_replace_call_non_speculatable(
 ; CHECK-NEXT:    [[C:%.*]] = icmp eq i32 [[X:%.*]], 0
@@ -1192,6 +1205,8 @@ define i32 @select_replace_call_non_speculatable(i32 %x, i32 %y) {
   ret i32 %s
 }
 
+; We can replace %x by 2 here, because division by two cannot cause UB.
+; FIXME: As we check speculation prior to replacement, we don't catch this.
 define i32 @select_replace_sdiv_speculatable(i32 %x, i32 %y) {
 ; CHECK-LABEL: @select_replace_sdiv_speculatable(
 ; CHECK-NEXT:    [[C:%.*]] = icmp eq i32 [[X:%.*]], 2
@@ -1205,6 +1220,7 @@ define i32 @select_replace_sdiv_speculatable(i32 %x, i32 %y) {
   ret i32 %s
 }
 
+; We cannot replace %x by -1, because division by -1 can cause UB.
 define i32 @select_replace_sdiv_non_speculatable(i32 %x, i32 %y) {
 ; CHECK-LABEL: @select_replace_sdiv_non_speculatable(
 ; CHECK-NEXT:    [[C:%.*]] = icmp eq i32 [[X:%.*]], -1
@@ -1218,6 +1234,8 @@ define i32 @select_replace_sdiv_non_speculatable(i32 %x, i32 %y) {
   ret i32 %s
 }
 
+; We can replace %x by 2 here, because division by two cannot cause UB.
+; FIXME: As we check speculation prior to replacement, we don't catch this.
 define i32 @select_replace_udiv_speculatable(i32 %x, i32 %y) {
 ; CHECK-LABEL: @select_replace_udiv_speculatable(
 ; CHECK-NEXT:    [[C:%.*]] = icmp eq i32 [[X:%.*]], 2
@@ -1231,6 +1249,8 @@ define i32 @select_replace_udiv_speculatable(i32 %x, i32 %y) {
   ret i32 %s
 }
 
+; We can't replace %x by 0 here, because that would cause UB. However,
+; replacing the udiv result by poisong is fine.
 define i32 @select_replace_udiv_non_speculatable(i32 %x, i32 %y) {
 ; CHECK-LABEL: @select_replace_udiv_non_speculatable(
 ; CHECK-NEXT:    [[C:%.*]] = icmp eq i32 [[X:%.*]], 0
@@ -1243,6 +1263,8 @@ define i32 @select_replace_udiv_non_speculatable(i32 %x, i32 %y) {
   ret i32 %s
 }
 
+; We can't replace %i in the phi node here, because it refers to %i from
+; the previous loop iteration, not the current one.
 define void @select_replace_phi(i32 %x) {
 ; CHECK-LABEL: @select_replace_phi(
 ; CHECK-NEXT:  entry:

diff  --git a/llvm/test/Transforms/InstCombine/select-safe-transforms.ll b/llvm/test/Transforms/InstCombine/select-safe-transforms.ll
index 4bdf516d73f0..35f100302d47 100644
--- a/llvm/test/Transforms/InstCombine/select-safe-transforms.ll
+++ b/llvm/test/Transforms/InstCombine/select-safe-transforms.ll
@@ -17,7 +17,7 @@ define i1 @cond_eq_and(i8 %X, i8 %Y, i8 noundef %C) {
 define i1 @cond_eq_and_const(i8 %X, i8 %Y) {
 ; CHECK-LABEL: @cond_eq_and_const(
 ; CHECK-NEXT:    [[COND:%.*]] = icmp eq i8 [[X:%.*]], 10
-; CHECK-NEXT:    [[LHS:%.*]] = icmp ult i8 [[X]], [[Y:%.*]]
+; CHECK-NEXT:    [[LHS:%.*]] = icmp ugt i8 [[Y:%.*]], 10
 ; CHECK-NEXT:    [[RES:%.*]] = select i1 [[COND]], i1 [[LHS]], i1 false
 ; CHECK-NEXT:    ret i1 [[RES]]
 ;
@@ -43,7 +43,7 @@ define i1 @cond_eq_or(i8 %X, i8 %Y, i8 noundef %C) {
 define i1 @cond_eq_or_const(i8 %X, i8 %Y) {
 ; CHECK-LABEL: @cond_eq_or_const(
 ; CHECK-NEXT:    [[COND:%.*]] = icmp ne i8 [[X:%.*]], 10
-; CHECK-NEXT:    [[LHS:%.*]] = icmp ult i8 [[X]], [[Y:%.*]]
+; CHECK-NEXT:    [[LHS:%.*]] = icmp ugt i8 [[Y:%.*]], 10
 ; CHECK-NEXT:    [[RES:%.*]] = select i1 [[COND]], i1 true, i1 [[LHS]]
 ; CHECK-NEXT:    ret i1 [[RES]]
 ;