[llvm] [InstCombine] Fix a cycle when folding fneg(select) with scalable vector types (PR #112465)

Thu Oct 24 14:06:47 PDT 2024

https://github.com/ssijaric-nv updated https://github.com/llvm/llvm-project/pull/112465

>From 771b15404028c986de3c2f197f02c3ac47a8a48c Mon Sep 17 00:00:00 2001
From: Sanjin Sijaric <ssijaric at nvidia.com>
Date: Tue, 15 Oct 2024 17:48:18 -0700
Subject: [PATCH 1/3] [InstCombine] Fix a cycle when folding fneg(select) with
 scalable vector types

The two folding operations are causing a cycle for the following case with
scalable vector types:

define <vscale x 2 x double> @test_fneg_select_abs(<vscale x 2 x i1> %cond, <vscale x 2 x double> %b) {
  %1 = select <vscale x 2 x i1> %cond, <vscale x 2 x double> zeroinitializer, <vscale x 2 x double> %b
  %2 = fneg fast <vscale x 2 x double> %1
  ret <vscale x 2 x double> %2
}

1) fold fneg:  -(Cond ? C : Y) -> Cond ? -C : -Y

2) fold select: (Cond ? -X : -Y) -> -(Cond ? X : Y)

1) results in the following since '<vscale x 2 x double> zeroinitializer' passes
the check for the immediate constant:

%.neg = fneg fast <vscale x 2 x double> zeroinitializer
%b.neg = fneg fast <vscale x 2 x double> %b
%1 = select fast <vscale x 2 x i1> %cond, <vscale x 2 x double> %.neg, <vscale x 2 x double> %b.neg

and so we end up going back and forth between 1) and 2).

Don't perform 1) if we are dealing with scalable vector constants as there is no
immediate representation for a negative scalable zeroinitializer.
---
 .../lib/Transforms/InstCombine/InstCombineAddSub.cpp |  3 ++-
 llvm/test/Transforms/InstCombine/fneg.ll             | 12 ++++++++++++
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 21588aca512758..645fad14c1999c 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -2878,7 +2878,8 @@ Instruction *InstCombinerImpl::visitFNeg(UnaryOperator &I) {
 
     // -(Cond ? X : C) --> Cond ? -X : -C
     // -(Cond ? C : Y) --> Cond ? -C : -Y
-    if (match(X, m_ImmConstant()) || match(Y, m_ImmConstant())) {
+    if ((match(X, m_ImmConstant()) && !isa<ScalableVectorType>(X->getType())) ||
+        (match(Y, m_ImmConstant()) && !isa<ScalableVectorType>(Y->getType()))) {
       Value *NegX = Builder.CreateFNegFMF(X, &I, X->getName() + ".neg");
       Value *NegY = Builder.CreateFNegFMF(Y, &I, Y->getName() + ".neg");
       SelectInst *NewSel = SelectInst::Create(Cond, NegX, NegY);
diff --git a/llvm/test/Transforms/InstCombine/fneg.ll b/llvm/test/Transforms/InstCombine/fneg.ll
index 3c4088832feaaa..643c8fd8d8a742 100644
--- a/llvm/test/Transforms/InstCombine/fneg.ll
+++ b/llvm/test/Transforms/InstCombine/fneg.ll
@@ -1109,4 +1109,16 @@ define float @test_fneg_select_maxnum(float %x) {
   ret float %neg
 }
 
+; Check that there's no infinite loop.
+define <vscale x 2 x double> @test_fneg_select_svec(<vscale x 2 x i1> %cond, <vscale x 2 x double> %b) {
+; CHECK-LABEL: @test_fneg_select_svec(
+; CHECK-NEXT:    [[TMP1:%.*]] = select <vscale x 2 x i1> [[COND:%.*]], <vscale x 2 x double> zeroinitializer, <vscale x 2 x double> [[B:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = fneg fast <vscale x 2 x double> [[TMP1]]
+; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP2]]
+;
+  %1 = select <vscale x 2 x i1> %cond, <vscale x 2 x double> zeroinitializer, <vscale x 2 x double> %b
+  %2 = fneg fast <vscale x 2 x double> %1
+  ret <vscale x 2 x double> %2
+}
+
 !0 = !{}

>From e8719751e700b94e301cbeafb6ef8b090436c394 Mon Sep 17 00:00:00 2001
From: Sanjin Sijaric <ssijaric at nvidia.com>
Date: Wed, 16 Oct 2024 19:04:26 -0700
Subject: [PATCH 2/3] Handle folding of constant splats for scalable vector
 types

---
 llvm/lib/IR/ConstantFold.cpp                  | 29 ++++++++++---------
 .../InstCombine/InstCombineAddSub.cpp         | 23 ++++++++++-----
 llvm/test/Transforms/InstCombine/fneg.ll      | 26 +++++++++++++++--
 llvm/test/Transforms/InstSimplify/fp-nan.ll   |  6 ++--
 4 files changed, 56 insertions(+), 28 deletions(-)

diff --git a/llvm/lib/IR/ConstantFold.cpp b/llvm/lib/IR/ConstantFold.cpp
index 57d9a03c9c22b8..07dfbc41e79b00 100644
--- a/llvm/lib/IR/ConstantFold.cpp
+++ b/llvm/lib/IR/ConstantFold.cpp
@@ -581,26 +581,27 @@ Constant *llvm::ConstantFoldUnaryInstruction(unsigned Opcode, Constant *C) {
     case Instruction::FNeg:
       return ConstantFP::get(C->getContext(), neg(CV));
     }
-  } else if (auto *VTy = dyn_cast<FixedVectorType>(C->getType())) {
-
-    Type *Ty = IntegerType::get(VTy->getContext(), 32);
+  } else if (auto *VTy = dyn_cast<VectorType>(C->getType())) {
     // Fast path for splatted constants.
     if (Constant *Splat = C->getSplatValue())
       if (Constant *Elt = ConstantFoldUnaryInstruction(Opcode, Splat))
         return ConstantVector::getSplat(VTy->getElementCount(), Elt);
 
-    // Fold each element and create a vector constant from those constants.
-    SmallVector<Constant *, 16> Result;
-    for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
-      Constant *ExtractIdx = ConstantInt::get(Ty, i);
-      Constant *Elt = ConstantExpr::getExtractElement(C, ExtractIdx);
-      Constant *Res = ConstantFoldUnaryInstruction(Opcode, Elt);
-      if (!Res)
-        return nullptr;
-      Result.push_back(Res);
-    }
+    if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
+      // Fold each element and create a vector constant from those constants.
+      Type *Ty = IntegerType::get(FVTy->getContext(), 32);
+      SmallVector<Constant *, 16> Result;
+      for (unsigned i = 0, e = FVTy->getNumElements(); i != e; ++i) {
+        Constant *ExtractIdx = ConstantInt::get(Ty, i);
+        Constant *Elt = ConstantExpr::getExtractElement(C, ExtractIdx);
+        Constant *Res = ConstantFoldUnaryInstruction(Opcode, Elt);
+        if (!Res)
+          return nullptr;
+        Result.push_back(Res);
+      }
 
-    return ConstantVector::get(Result);
+      return ConstantVector::get(Result);
+    }
   }
 
   // We don't know how to fold this.
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 645fad14c1999c..abb731d81de340 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -2878,13 +2878,22 @@ Instruction *InstCombinerImpl::visitFNeg(UnaryOperator &I) {
 
     // -(Cond ? X : C) --> Cond ? -X : -C
     // -(Cond ? C : Y) --> Cond ? -C : -Y
-    if ((match(X, m_ImmConstant()) && !isa<ScalableVectorType>(X->getType())) ||
-        (match(Y, m_ImmConstant()) && !isa<ScalableVectorType>(Y->getType()))) {
-      Value *NegX = Builder.CreateFNegFMF(X, &I, X->getName() + ".neg");
-      Value *NegY = Builder.CreateFNegFMF(Y, &I, Y->getName() + ".neg");
-      SelectInst *NewSel = SelectInst::Create(Cond, NegX, NegY);
-      propagateSelectFMF(NewSel, /*CommonOperand=*/true);
-      return NewSel;
+    Constant *XC = nullptr, *YC = nullptr;
+    if (match(X, m_ImmConstant(XC)) || match(Y, m_ImmConstant(YC))) {
+      Value *NegX = nullptr, *NegY = nullptr;
+      if (XC)
+        NegX = ConstantFoldUnaryOpOperand(Instruction::FNeg, XC, DL);
+      if (YC)
+        NegY = ConstantFoldUnaryOpOperand(Instruction::FNeg, YC, DL);
+      if (NegX || NegY) {
+        if (!NegX)
+          NegX = Builder.CreateFNegFMF(X, &I, X->getName() + ".neg");
+        if (!NegY)
+          NegY = Builder.CreateFNegFMF(Y, &I, Y->getName() + ".neg");
+        SelectInst *NewSel = SelectInst::Create(Cond, NegX, NegY);
+        propagateSelectFMF(NewSel, /*CommonOperand=*/true);
+        return NewSel;
+      }
     }
   }
 
diff --git a/llvm/test/Transforms/InstCombine/fneg.ll b/llvm/test/Transforms/InstCombine/fneg.ll
index 643c8fd8d8a742..6a9b3309bb347e 100644
--- a/llvm/test/Transforms/InstCombine/fneg.ll
+++ b/llvm/test/Transforms/InstCombine/fneg.ll
@@ -1112,13 +1112,33 @@ define float @test_fneg_select_maxnum(float %x) {
 ; Check that there's no infinite loop.
 define <vscale x 2 x double> @test_fneg_select_svec(<vscale x 2 x i1> %cond, <vscale x 2 x double> %b) {
 ; CHECK-LABEL: @test_fneg_select_svec(
-; CHECK-NEXT:    [[TMP1:%.*]] = select <vscale x 2 x i1> [[COND:%.*]], <vscale x 2 x double> zeroinitializer, <vscale x 2 x double> [[B:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = fneg fast <vscale x 2 x double> [[TMP1]]
-; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP2]]
+; CHECK-NEXT:    [[TMP2:%.*]] = fneg fast <vscale x 2 x double> [[TMP1:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = select fast <vscale x 2 x i1> [[COND:%.*]], <vscale x 2 x double> shufflevector (<vscale x 2 x double> insertelement (<vscale x 2 x double> poison, double -0.000000e+00, i64 0), <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer), <vscale x 2 x double> [[TMP2]]
+; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP3]]
 ;
   %1 = select <vscale x 2 x i1> %cond, <vscale x 2 x double> zeroinitializer, <vscale x 2 x double> %b
   %2 = fneg fast <vscale x 2 x double> %1
   ret <vscale x 2 x double> %2
 }
 
+define <vscale x 2 x double> @test_fneg_select_svec_2(<vscale x 2 x i1> %cond, <vscale x 2 x double> %a) {
+; CHECK-LABEL: @test_fneg_select_svec_2(
+; CHECK-NEXT:    [[A_NEG:%.*]] = fneg fast <vscale x 2 x double> [[A:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = select fast <vscale x 2 x i1> [[COND:%.*]], <vscale x 2 x double> [[A_NEG]], <vscale x 2 x double> shufflevector (<vscale x 2 x double> insertelement (<vscale x 2 x double> poison, double -0.000000e+00, i64 0), <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer)
+; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+;
+  %1 = select <vscale x 2 x i1> %cond, <vscale x 2 x double> %a, <vscale x 2 x double> zeroinitializer
+  %2 = fneg fast <vscale x 2 x double> %1
+  ret <vscale x 2 x double> %2
+}
+
+define <vscale x 2 x double> @test_fneg_select_svec_3(<vscale x 2 x i1> %cond, <vscale x 2 x double> %b) {
+; CHECK-LABEL: @test_fneg_select_svec_3(
+; CHECK-NEXT:    ret <vscale x 2 x double> shufflevector (<vscale x 2 x double> insertelement (<vscale x 2 x double> poison, double -0.000000e+00, i64 0), <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer)
+;
+  %1 = select <vscale x 2 x i1> %cond, <vscale x 2 x double> zeroinitializer, <vscale x 2 x double> zeroinitializer
+  %2 = fneg fast <vscale x 2 x double> %1
+  ret <vscale x 2 x double> %2
+}
+
 !0 = !{}
diff --git a/llvm/test/Transforms/InstSimplify/fp-nan.ll b/llvm/test/Transforms/InstSimplify/fp-nan.ll
index bb557500822c14..06b23200bafff8 100644
--- a/llvm/test/Transforms/InstSimplify/fp-nan.ll
+++ b/llvm/test/Transforms/InstSimplify/fp-nan.ll
@@ -237,8 +237,7 @@ define <2 x double> @unary_fneg_nan_2(<2 x double> %x) {
 ; FIXME: This doesn't behave the same way as the fixed-length vectors above
 define <vscale x 1 x double> @unary_fneg_nan_2_scalable_vec_0() {
 ; CHECK-LABEL: @unary_fneg_nan_2_scalable_vec_0(
-; CHECK-NEXT:    [[R:%.*]] = fneg <vscale x 1 x double> shufflevector (<vscale x 1 x double> insertelement (<vscale x 1 x double> poison, double 0xFFF1234567890ABC, i64 0), <vscale x 1 x double> poison, <vscale x 1 x i32> zeroinitializer)
-; CHECK-NEXT:    ret <vscale x 1 x double> [[R]]
+; CHECK-NEXT:    ret <vscale x 1 x double> shufflevector (<vscale x 1 x double> insertelement (<vscale x 1 x double> poison, double 0x7FF1234567890ABC, i64 0), <vscale x 1 x double> poison, <vscale x 1 x i32> zeroinitializer)
 ;
   %r = fneg <vscale x 1 x double> splat (double 0xFFF1234567890ABC)
   ret <vscale x 1 x double> %r
@@ -247,8 +246,7 @@ define <vscale x 1 x double> @unary_fneg_nan_2_scalable_vec_0() {
 ; FIXME: This doesn't behave the same way as the fixed-length vectors above
 define <vscale x 1 x double> @unary_fneg_nan_2_scalable_vec_1() {
 ; CHECK-LABEL: @unary_fneg_nan_2_scalable_vec_1(
-; CHECK-NEXT:    [[R:%.*]] = fneg <vscale x 1 x double> shufflevector (<vscale x 1 x double> insertelement (<vscale x 1 x double> poison, double 0x7FF0000000000001, i64 0), <vscale x 1 x double> poison, <vscale x 1 x i32> zeroinitializer)
-; CHECK-NEXT:    ret <vscale x 1 x double> [[R]]
+; CHECK-NEXT:    ret <vscale x 1 x double> shufflevector (<vscale x 1 x double> insertelement (<vscale x 1 x double> poison, double 0xFFF0000000000001, i64 0), <vscale x 1 x double> poison, <vscale x 1 x i32> zeroinitializer)
 ;
   %r = fneg <vscale x 1 x double> splat (double 0x7FF0000000000001)
   ret <vscale x 1 x double> %r

>From f2e4e8bab179eba99f319226d9e64dee4cf1f14b Mon Sep 17 00:00:00 2001
From: Sanjin Sijaric <ssijaric at nvidia.com>
Date: Thu, 17 Oct 2024 09:58:02 -0700
Subject: [PATCH 3/3] Only the constant folding change is needed; revert the
 fneg folding change

---
 .../InstCombine/InstCombineAddSub.cpp         | 22 +++++--------------
 1 file changed, 6 insertions(+), 16 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index abb731d81de340..21588aca512758 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -2878,22 +2878,12 @@ Instruction *InstCombinerImpl::visitFNeg(UnaryOperator &I) {
 
     // -(Cond ? X : C) --> Cond ? -X : -C
     // -(Cond ? C : Y) --> Cond ? -C : -Y
-    Constant *XC = nullptr, *YC = nullptr;
-    if (match(X, m_ImmConstant(XC)) || match(Y, m_ImmConstant(YC))) {
-      Value *NegX = nullptr, *NegY = nullptr;
-      if (XC)
-        NegX = ConstantFoldUnaryOpOperand(Instruction::FNeg, XC, DL);
-      if (YC)
-        NegY = ConstantFoldUnaryOpOperand(Instruction::FNeg, YC, DL);
-      if (NegX || NegY) {
-        if (!NegX)
-          NegX = Builder.CreateFNegFMF(X, &I, X->getName() + ".neg");
-        if (!NegY)
-          NegY = Builder.CreateFNegFMF(Y, &I, Y->getName() + ".neg");
-        SelectInst *NewSel = SelectInst::Create(Cond, NegX, NegY);
-        propagateSelectFMF(NewSel, /*CommonOperand=*/true);
-        return NewSel;
-      }
+    if (match(X, m_ImmConstant()) || match(Y, m_ImmConstant())) {
+      Value *NegX = Builder.CreateFNegFMF(X, &I, X->getName() + ".neg");
+      Value *NegY = Builder.CreateFNegFMF(Y, &I, Y->getName() + ".neg");
+      SelectInst *NewSel = SelectInst::Create(Cond, NegX, NegY);
+      propagateSelectFMF(NewSel, /*CommonOperand=*/true);
+      return NewSel;
     }
   }