[llvm-branch-commits] [llvm] eac91d5 - [PowerPC] Prevent legalization loop from promoting SELECT_CC from v4i32 to v4i32

Mon Jun 22 15:10:35 PDT 2020

Author: Nemanja Ivanovic
Date: 2020-06-22T15:09:29-07:00
New Revision: eac91d5864010a25e7e693ad565b64467dfa6ab9

URL: https://github.com/llvm/llvm-project/commit/eac91d5864010a25e7e693ad565b64467dfa6ab9
DIFF: https://github.com/llvm/llvm-project/commit/eac91d5864010a25e7e693ad565b64467dfa6ab9.diff

LOG: [PowerPC] Prevent legalization loop from promoting SELECT_CC from v4i32 to v4i32

As reported in https://bugs.llvm.org/show_bug.cgi?id=45709 we can hit an
infinite loop in legalization since we set the legalization action for
ISD::SELECT_CC for all fixed length vector types to Promote. Without some
different legalization action for the type being promoted to, the legalizer
simply loops. Since we don't have patterns to match the node, the right
legalization action should be Expand.

Differential revision: https://reviews.llvm.org/D79854

(cherry picked from commit 793cc518b9428a0b7a40c59d4ecd5939a7bc84f7)

Added: 
    llvm/test/CodeGen/PowerPC/pr45709.ll

Modified: 
    llvm/lib/Target/PowerPC/PPCISelLowering.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index e61d44b5f968..cdefb38ec0ae 100644

--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -694,6 +694,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
         setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
       }
     }
+    setOperationAction(ISD::SELECT_CC, MVT::v4i32, Expand);
     if (!Subtarget.hasP8Vector()) {
       setOperationAction(ISD::SMAX, MVT::v2i64, Expand);
       setOperationAction(ISD::SMIN, MVT::v2i64, Expand);

diff  --git a/llvm/test/CodeGen/PowerPC/pr45709.ll b/llvm/test/CodeGen/PowerPC/pr45709.ll
new file mode 100644
index 000000000000..bc295fafd210
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/pr45709.ll
@@ -0,0 +1,58 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-unknown \
+; RUN:     -mcpu=pwr6 -ppc-asm-full-reg-names -mattr=-vsx \
+; RUN:     -ppc-vsr-nums-as-vr < %s | FileCheck %s
+
+; There is code in the SDAG to expand FMAX/FMIN with fast flags to SELECT_CC.
+; On PPC, we had SELECT_CC legalized using Promote for all vector types
+; (including the type that they are all promoted to - which caused an infinite
+; loop in legalization). This test just ensures that we terminate on such input.
+define dso_local void @_ZN1a1bEv(<4 x float> %in) local_unnamed_addr #0 align 2 {
+; CHECK-LABEL: _ZN1a1bEv:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    bclr 12, 4*cr5+lt, 0
+; CHECK-NEXT:  # %bb.1: # %.preheader
+; CHECK-NEXT:    addis r3, r2, .LCPI0_0 at toc@ha
+; CHECK-NEXT:    vxor v3, v3, v3
+; CHECK-NEXT:    addi r3, r3, .LCPI0_0 at toc@l
+; CHECK-NEXT:    lvx v4, 0, r3
+; CHECK-NEXT:    addi r3, r1, -48
+; CHECK-NEXT:    stvx v3, 0, r3
+; CHECK-NEXT:    addi r3, r1, -32
+; CHECK-NEXT:    vperm v2, v2, v2, v4
+; CHECK-NEXT:    stvx v2, 0, r3
+; CHECK-NEXT:    lwz r3, -48(r1)
+; CHECK-NEXT:    lwz r4, -32(r1)
+; CHECK-NEXT:    cmpw r4, r3
+; CHECK-NEXT:    bc 12, gt, .LBB0_2
+; CHECK-NEXT:    b .LBB0_3
+; CHECK-NEXT:  .LBB0_2: # %.preheader
+; CHECK-NEXT:    addi r3, r4, 0
+; CHECK-NEXT:  .LBB0_3: # %.preheader
+; CHECK-NEXT:    stw r3, -64(r1)
+; CHECK-NEXT:    addi r3, r1, -64
+; CHECK-NEXT:    lvx v2, 0, r3
+; CHECK-NEXT:    addi r3, r1, -16
+; CHECK-NEXT:    stvx v2, 0, r3
+; CHECK-NEXT:    blr
+  br i1 undef, label %7, label %1
+
+1:                                                ; preds = %1, %0
+  br i1 undef, label %2, label %1
+
+2:                                                ; preds = %1
+  %3 = shufflevector <4 x float> %in, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 1, i32 0>
+  %4 = call fast <4 x float> @llvm.maxnum.v4f32(<4 x float> %3, <4 x float> zeroinitializer)
+  %5 = call fast <4 x float> @llvm.maxnum.v4f32(<4 x float> %4, <4 x float> undef)
+  %6 = extractelement <4 x float> %5, i32 0
+  br label %7
+
+7:                                                ; preds = %2, %0
+  %8 = phi float [ %6, %2 ], [ undef, %0 ]
+  %9 = fcmp fast une float %8, 0.000000e+00
+  ret void
+}
+
+declare <4 x float> @llvm.maxnum.v4f32(<4 x float>, <4 x float>) #0
+
+attributes #0 = { nounwind }