[llvm] f5d8952 - [InstCombine] Transform X == 0 ? 0 : X * Y --> X * freeze(Y)

Wed Sep 15 06:05:25 PDT 2021

Author: Filipp Zhinkin
Date: 2021-09-15T09:04:06-04:00
New Revision: f5d89523567b08420ff3fa48a6fc50dbf530afa8

URL: https://github.com/llvm/llvm-project/commit/f5d89523567b08420ff3fa48a6fc50dbf530afa8
DIFF: https://github.com/llvm/llvm-project/commit/f5d89523567b08420ff3fa48a6fc50dbf530afa8.diff

LOG: [InstCombine] Transform X == 0 ? 0 : X * Y --> X * freeze(Y)

Enabled mul folding optimization that was previously disabled
by being incorrect.
To preserve correctness, mul's operand that is not compared
with zero in select's condition is now frozen.

Related bug: https://bugs.llvm.org/show_bug.cgi?id=51286

Correctness:
https://alive2.llvm.org/ce/z/bHef7J
https://alive2.llvm.org/ce/z/QcR7sf
https://alive2.llvm.org/ce/z/vvBLzt
https://alive2.llvm.org/ce/z/jGDXgq
https://alive2.llvm.org/ce/z/3Pe8Z4
https://alive2.llvm.org/ce/z/LGga8M
https://alive2.llvm.org/ce/z/CTG5fs

Differential Revision: https://reviews.llvm.org/D108408

Added: 
    

Modified: 
    llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
    llvm/test/Transforms/InstCombine/select.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index b0c610bf64f33..db111375a9d31 100644

--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -723,6 +723,58 @@ static Instruction *foldSetClearBits(SelectInst &Sel,
   return nullptr;
 }
 
+//   select (x == 0), 0, x * y --> freeze(y) * x
+//   select (y == 0), 0, x * y --> freeze(x) * y
+//   select (x == 0), undef, x * y --> freeze(y) * x
+//   select (x == undef), 0, x * y --> freeze(y) * x
+// Usage of mul instead of 0 will make the result more poisonous,
+// so the operand that was not checked in the condition should be frozen.
+// The latter folding is applied only when a constant compared with x is
+// is a vector consisting of 0 and undefs. If a constant compared with x
+// is a scalar undefined value or undefined vector then an expression
+// should be already folded into a constant.
+static Instruction *foldSelectZeroOrMul(SelectInst &SI, InstCombinerImpl &IC) {
+  auto *CondVal = SI.getCondition();
+  auto *TrueVal = SI.getTrueValue();
+  auto *FalseVal = SI.getFalseValue();
+  Value *X, *Y;
+  ICmpInst::Predicate Predicate;
+
+  // Assuming that constant compared with zero is not undef (but it may be
+  // a vector with some undef elements). Otherwise (when a constant is undef)
+  // the select expression should be already simplified.
+  if (!match(CondVal, m_ICmp(Predicate, m_Value(X), m_Zero())) ||
+      !ICmpInst::isEquality(Predicate))
+    return nullptr;
+
+  if (Predicate == ICmpInst::ICMP_NE)
+    std::swap(TrueVal, FalseVal);
+
+  // Check that TrueVal is a constant instead of matching it with m_Zero()
+  // to handle the case when it is a scalar undef value or a vector containing
+  // non-zero elements that are masked by undef elements in the compare
+  // constant.
+  auto *TrueValC = dyn_cast<Constant>(TrueVal);
+  if (TrueValC == nullptr ||
+      !match(FalseVal, m_c_Mul(m_Specific(X), m_Value(Y))) ||
+      !isa<Instruction>(FalseVal))
+    return nullptr;
+
+  auto *ZeroC = cast<Constant>(cast<Instruction>(CondVal)->getOperand(1));
+  auto *MergedC = Constant::mergeUndefsWith(TrueValC, ZeroC);
+  // If X is compared with 0 then TrueVal could be either zero or undef.
+  // m_Zero match vectors containing some undef elements, but for scalars
+  // m_Undef should be used explicitly.
+  if (!match(MergedC, m_Zero()) && !match(MergedC, m_Undef()))
+    return nullptr;
+
+  auto *FalseValI = cast<Instruction>(FalseVal);
+  auto *FrY = IC.InsertNewInstBefore(new FreezeInst(Y, Y->getName() + ".fr"),
+                                     *FalseValI);
+  IC.replaceOperand(*FalseValI, FalseValI->getOperand(0) == Y ? 0 : 1, FrY);
+  return IC.replaceInstUsesWith(SI, FalseValI);
+}
+
 /// Transform patterns such as (a > b) ? a - b : 0 into usub.sat(a, b).
 /// There are 8 commuted/swapped variants of this pattern.
 /// TODO: Also support a - UMIN(a,b) patterns.
@@ -2930,6 +2982,8 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) {
     return Add;
   if (Instruction *Or = foldSetClearBits(SI, Builder))
     return Or;
+  if (Instruction *Mul = foldSelectZeroOrMul(SI, *this))
+    return Mul;
 
   // Turn (select C, (op X, Y), (op X, Z)) -> (op X, (select C, Y, Z))
   auto *TI = dyn_cast<Instruction>(TrueVal);

diff  --git a/llvm/test/Transforms/InstCombine/select.ll b/llvm/test/Transforms/InstCombine/select.ll
index fc288d565a7d1..be47e8b1480ed 100644
--- a/llvm/test/Transforms/InstCombine/select.ll
+++ b/llvm/test/Transforms/InstCombine/select.ll
@@ -2844,12 +2844,12 @@ define <2 x i1> @partial_false_undef_condval(<2 x i1> %x) {
   ret <2 x i1> %r
 }
 
+; select (x == 0), 0, x * y --> freeze(y) * x
 define i32 @mul_select_eq_zero(i32 %x, i32 %y) {
 ; CHECK-LABEL: @mul_select_eq_zero(
-; CHECK-NEXT:    [[C:%.*]] = icmp eq i32 [[X:%.*]], 0
-; CHECK-NEXT:    [[M:%.*]] = mul i32 [[X]], [[Y:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = select i1 [[C]], i32 0, i32 [[M]]
-; CHECK-NEXT:    ret i32 [[R]]
+; CHECK-NEXT:    [[Y_FR:%.*]] = freeze i32 [[Y:%.*]]
+; CHECK-NEXT:    [[M:%.*]] = mul i32 [[Y_FR]], [[X:%.*]]
+; CHECK-NEXT:    ret i32 [[M]]
 ;
   %c = icmp eq i32 %x, 0
   %m = mul i32 %x, %y
@@ -2857,12 +2857,12 @@ define i32 @mul_select_eq_zero(i32 %x, i32 %y) {
   ret i32 %r
 }
 
+; select (y == 0), 0, x * y --> freeze(x) * y
 define i32 @mul_select_eq_zero_commute(i32 %x, i32 %y) {
 ; CHECK-LABEL: @mul_select_eq_zero_commute(
-; CHECK-NEXT:    [[C:%.*]] = icmp eq i32 [[Y:%.*]], 0
-; CHECK-NEXT:    [[M:%.*]] = mul i32 [[X:%.*]], [[Y]]
-; CHECK-NEXT:    [[R:%.*]] = select i1 [[C]], i32 0, i32 [[M]]
-; CHECK-NEXT:    ret i32 [[R]]
+; CHECK-NEXT:    [[X_FR:%.*]] = freeze i32 [[X:%.*]]
+; CHECK-NEXT:    [[M:%.*]] = mul i32 [[X_FR]], [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[M]]
 ;
   %c = icmp eq i32 %y, 0
   %m = mul i32 %x, %y
@@ -2870,12 +2870,12 @@ define i32 @mul_select_eq_zero_commute(i32 %x, i32 %y) {
   ret i32 %r
 }
 
+; Check that mul's flags preserved during the transformation.
 define i32 @mul_select_eq_zero_copy_flags(i32 %x, i32 %y) {
 ; CHECK-LABEL: @mul_select_eq_zero_copy_flags(
-; CHECK-NEXT:    [[C:%.*]] = icmp eq i32 [[X:%.*]], 0
-; CHECK-NEXT:    [[M:%.*]] = mul nuw nsw i32 [[X]], [[Y:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = select i1 [[C]], i32 0, i32 [[M]]
-; CHECK-NEXT:    ret i32 [[R]]
+; CHECK-NEXT:    [[Y_FR:%.*]] = freeze i32 [[Y:%.*]]
+; CHECK-NEXT:    [[M:%.*]] = mul nuw nsw i32 [[Y_FR]], [[X:%.*]]
+; CHECK-NEXT:    ret i32 [[M]]
 ;
   %c = icmp eq i32 %x, 0
   %m = mul nuw nsw i32 %x, %y
@@ -2883,25 +2883,31 @@ define i32 @mul_select_eq_zero_copy_flags(i32 %x, i32 %y) {
   ret i32 %r
 }
 
+; Check that the transformation could be applied after condition's inversion.
+; select (x != 0), x * y, 0 --> freeze(y) * x
 define i32 @mul_select_ne_zero(i32 %x, i32 %y) {
 ; CHECK-LABEL: @mul_select_ne_zero(
-; CHECK-NEXT:    [[C_NOT:%.*]] = icmp eq i32 [[X:%.*]], 0
-; CHECK-NEXT:    [[M:%.*]] = mul i32 [[X]], [[Y:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = select i1 [[C_NOT]], i32 0, i32 [[M]]
-; CHECK-NEXT:    ret i32 [[R]]
+; CHECK-NEXT:    [[C:%.*]] = icmp ne i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[Y_FR:%.*]] = freeze i32 [[Y:%.*]]
+; CHECK-NEXT:    [[M:%.*]] = mul i32 [[Y_FR]], [[X]]
+; CHECK-NEXT:    call void @use(i1 [[C]])
+; CHECK-NEXT:    ret i32 [[M]]
 ;
   %c = icmp ne i32 %x, 0
   %m = mul i32 %x, %y
   %r = select i1 %c, i32 %m, i32 0
+  call void @use(i1 %c)
   ret i32 %r
 }
 
+; Check that if one of a select's branches returns undef then
+; an expression could be folded into mul as if there was a 0 instead of undef.
+; select (x == 0), undef, x * y --> freeze(y) * x
 define i32 @mul_select_eq_zero_sel_undef(i32 %x, i32 %y) {
 ; CHECK-LABEL: @mul_select_eq_zero_sel_undef(
-; CHECK-NEXT:    [[C:%.*]] = icmp eq i32 [[X:%.*]], 0
-; CHECK-NEXT:    [[M:%.*]] = mul i32 [[X]], [[Y:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = select i1 [[C]], i32 undef, i32 [[M]]
-; CHECK-NEXT:    ret i32 [[R]]
+; CHECK-NEXT:    [[Y_FR:%.*]] = freeze i32 [[Y:%.*]]
+; CHECK-NEXT:    [[M:%.*]] = mul i32 [[Y_FR]], [[X:%.*]]
+; CHECK-NEXT:    ret i32 [[M]]
 ;
   %c = icmp eq i32 %x, 0
   %m = mul i32 %x, %y
@@ -2909,15 +2915,16 @@ define i32 @mul_select_eq_zero_sel_undef(i32 %x, i32 %y) {
   ret i32 %r
 }
 
+; Check that the transformation is applied disregard to a number
+; of expression's users.
 define i32 @mul_select_eq_zero_multiple_users(i32 %x, i32 %y) {
 ; CHECK-LABEL: @mul_select_eq_zero_multiple_users(
-; CHECK-NEXT:    [[M:%.*]] = mul i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[Y_FR:%.*]] = freeze i32 [[Y:%.*]]
+; CHECK-NEXT:    [[M:%.*]] = mul i32 [[Y_FR]], [[X:%.*]]
 ; CHECK-NEXT:    call void @use_i32(i32 [[M]])
-; CHECK-NEXT:    [[C:%.*]] = icmp eq i32 [[X]], 0
-; CHECK-NEXT:    [[R:%.*]] = select i1 [[C]], i32 0, i32 [[M]]
 ; CHECK-NEXT:    call void @use_i32(i32 [[M]])
-; CHECK-NEXT:    call void @use_i32(i32 [[R]])
-; CHECK-NEXT:    ret i32 [[R]]
+; CHECK-NEXT:    call void @use_i32(i32 [[M]])
+; CHECK-NEXT:    ret i32 [[M]]
 ;
   %m = mul i32 %x, %y
   call void @use_i32(i32 %m)
@@ -2928,6 +2935,8 @@ define i32 @mul_select_eq_zero_multiple_users(i32 %x, i32 %y) {
   ret i32 %r
 }
 
+; Negative test: select's condition is unrelated to multiplied values,
+; so the transformation should not be applied.
 define i32 @mul_select_eq_zero_unrelated_condition(i32 %x, i32 %y, i32 %z) {
 ; CHECK-LABEL: @mul_select_eq_zero_unrelated_condition(
 ; CHECK-NEXT:    [[C:%.*]] = icmp eq i32 [[Z:%.*]], 0
@@ -2941,12 +2950,12 @@ define i32 @mul_select_eq_zero_unrelated_condition(i32 %x, i32 %y, i32 %z) {
   ret i32 %r
 }
 
+; select (<k x elt> x == 0), <k x elt> 0, <k x elt> x * y --> freeze(y) * x
 define <4 x i32> @mul_select_eq_zero_vector(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: @mul_select_eq_zero_vector(
-; CHECK-NEXT:    [[C:%.*]] = icmp eq <4 x i32> [[X:%.*]], zeroinitializer
-; CHECK-NEXT:    [[M:%.*]] = mul <4 x i32> [[X]], [[Y:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = select <4 x i1> [[C]], <4 x i32> zeroinitializer, <4 x i32> [[M]]
-; CHECK-NEXT:    ret <4 x i32> [[R]]
+; CHECK-NEXT:    [[Y_FR:%.*]] = freeze <4 x i32> [[Y:%.*]]
+; CHECK-NEXT:    [[M:%.*]] = mul <4 x i32> [[Y_FR]], [[X:%.*]]
+; CHECK-NEXT:    ret <4 x i32> [[M]]
 ;
   %c = icmp eq <4 x i32> %x, zeroinitializer
   %m = mul <4 x i32> %x, %y
@@ -2954,12 +2963,14 @@ define <4 x i32> @mul_select_eq_zero_vector(<4 x i32> %x, <4 x i32> %y) {
   ret <4 x i32> %r
 }
 
+; Check that a select is folded into multiplication if condition's operand
+; is a vector consisting of zeros and undefs.
+; select (<k x elt> x == {0, undef, ...}), <k x elt> 0, <k x elt> x * y --> freeze(y) * x
 define <2 x i32> @mul_select_eq_undef_vector(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @mul_select_eq_undef_vector(
-; CHECK-NEXT:    [[C:%.*]] = icmp eq <2 x i32> [[X:%.*]], <i32 0, i32 undef>
-; CHECK-NEXT:    [[M:%.*]] = mul <2 x i32> [[X]], [[Y:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = select <2 x i1> [[C]], <2 x i32> <i32 0, i32 42>, <2 x i32> [[M]]
-; CHECK-NEXT:    ret <2 x i32> [[R]]
+; CHECK-NEXT:    [[Y_FR:%.*]] = freeze <2 x i32> [[Y:%.*]]
+; CHECK-NEXT:    [[M:%.*]] = mul <2 x i32> [[Y_FR]], [[X:%.*]]
+; CHECK-NEXT:    ret <2 x i32> [[M]]
 ;
   %c = icmp eq <2 x i32> %x, <i32 0, i32 undef>
   %m = mul <2 x i32> %x, %y
@@ -2967,12 +2978,14 @@ define <2 x i32> @mul_select_eq_undef_vector(<2 x i32> %x, <2 x i32> %y) {
   ret <2 x i32> %r
 }
 
+; Check that a select is folded into multiplication if other select's operand
+; is a vector consisting of zeros and undefs.
+; select (<k x elt> x == 0), <k x elt> {0, undef, ...}, <k x elt> x * y --> freeze(y) * x
 define <2 x i32> @mul_select_eq_zero_sel_undef_vector(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @mul_select_eq_zero_sel_undef_vector(
-; CHECK-NEXT:    [[C:%.*]] = icmp eq <2 x i32> [[X:%.*]], zeroinitializer
-; CHECK-NEXT:    [[M:%.*]] = mul <2 x i32> [[X]], [[Y:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = select <2 x i1> [[C]], <2 x i32> <i32 0, i32 undef>, <2 x i32> [[M]]
-; CHECK-NEXT:    ret <2 x i32> [[R]]
+; CHECK-NEXT:    [[Y_FR:%.*]] = freeze <2 x i32> [[Y:%.*]]
+; CHECK-NEXT:    [[M:%.*]] = mul <2 x i32> [[Y_FR]], [[X:%.*]]
+; CHECK-NEXT:    ret <2 x i32> [[M]]
 ;
   %c = icmp eq <2 x i32> %x, zeroinitializer
   %m = mul <2 x i32> %x, %y
@@ -2980,6 +2993,8 @@ define <2 x i32> @mul_select_eq_zero_sel_undef_vector(<2 x i32> %x, <2 x i32> %y
   ret <2 x i32> %r
 }
 
+; Negative test: select should not be folded into mul because
+; condition's operand and select's operand do not merge into zero vector.
 define <2 x i32> @mul_select_eq_undef_vector_not_merging_to_zero(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @mul_select_eq_undef_vector_not_merging_to_zero(
 ; CHECK-NEXT:    [[C:%.*]] = icmp eq <2 x i32> [[X:%.*]], <i32 0, i32 undef>