[llvm] 7f05bdf - [AArch64][SME] Fix an infinite loop in DAGCombine related to adding -force-streaming-compatible-sve flag.

Wed Apr 5 03:11:32 PDT 2023

Author: Dinar Temirbulatov
Date: 2023-04-05T10:10:55Z
New Revision: 7f05bdf4ee1c4a6779ecde1e44a889c4cef72269

URL: https://github.com/llvm/llvm-project/commit/7f05bdf4ee1c4a6779ecde1e44a889c4cef72269
DIFF: https://github.com/llvm/llvm-project/commit/7f05bdf4ee1c4a6779ecde1e44a889c4cef72269.diff

LOG: [AArch64][SME] Fix an infinite loop in DAGCombine related to adding -force-streaming-compatible-sve flag.

Compiler hits infinite loop in DAGCombine. For force-streaming-compatible-sve
mode we have custom lowering for 128-bit vector splats and later in
DAGCombiner::SimplifyVCastOp() we scalarized SPLAT because we have custom
lowering for SME. Later, we restored SPLAT opertion via performMulCombine().

Added: 
    

Modified: 
    llvm/include/llvm/CodeGen/TargetLowering.h
    llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
    llvm/lib/Target/AArch64/AArch64ISelLowering.h
    llvm/lib/Target/RISCV/RISCVISelLowering.cpp
    llvm/lib/Target/RISCV/RISCVISelLowering.h
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/lib/Target/X86/X86ISelLowering.h
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index ffc7dbcf81dd9..71d0cb51e6f06 100644

--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -802,7 +802,7 @@ class TargetLoweringBase {
   }
 
   // Return true if the target wants to transform Op(Splat(X)) -> Splat(Op(X))
-  virtual bool preferScalarizeSplat(unsigned Opc) const { return true; }
+  virtual bool preferScalarizeSplat(SDNode *N) const { return true; }
 
   /// Return true if the target wants to use the optimization that
   /// turns ext(promotableInst1(...(promotableInstN(load)))) into

diff  --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index f79a9a1b74d3c..84c06321e4cc4 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -25627,7 +25627,7 @@ SDValue DAGCombiner::SimplifyVCastOp(SDNode *N, const SDLoc &DL) {
       (N0.getOpcode() == ISD::SPLAT_VECTOR ||
        TLI.isExtractVecEltCheap(VT, Index0)) &&
       TLI.isOperationLegalOrCustom(Opcode, EltVT) &&
-      TLI.preferScalarizeSplat(Opcode)) {
+      TLI.preferScalarizeSplat(N)) {
     SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
     SDValue Elt =
         DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, SrcEltVT, Src0, IndexC);

diff  --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 697240a1afe51..d0890ade0a89d 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -24601,3 +24601,14 @@ Value *AArch64TargetLowering::createComplexDeinterleavingIR(
 
   return nullptr;
 }
+
+bool AArch64TargetLowering::preferScalarizeSplat(SDNode *N) const {
+  unsigned Opc = N->getOpcode();
+  if (Opc == ISD::ZERO_EXTEND || Opc == ISD::SIGN_EXTEND ||
+      Opc == ISD::ANY_EXTEND) {
+    if (any_of(N->uses(),
+               [&](SDNode *Use) { return Use->getOpcode() == ISD::MUL; }))
+      return false;
+  }
+  return true;
+}

diff  --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 9bb30d58d2922..82653d473f152 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -1222,6 +1222,8 @@ class AArch64TargetLowering : public TargetLowering {
 
   bool isConstantUnsignedBitfieldExtractLegal(unsigned Opc, LLT Ty1,
                                               LLT Ty2) const override;
+
+  bool preferScalarizeSplat(SDNode *N) const override;
 };
 
 namespace AArch64 {

diff  --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 6e622163a177b..8e166d28992a0 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -15017,9 +15017,10 @@ bool RISCVTargetLowering::isIntDivCheap(EVT VT, AttributeList Attr) const {
   return OptSize && !VT.isVector();
 }
 
-bool RISCVTargetLowering::preferScalarizeSplat(unsigned Opc) const {
+bool RISCVTargetLowering::preferScalarizeSplat(SDNode *N) const {
   // Scalarize zero_ext and sign_ext might stop match to widening instruction in
   // some situation.
+  unsigned Opc = N->getOpcode();
   if (Opc == ISD::ZERO_EXTEND || Opc == ISD::SIGN_EXTEND)
     return false;
   return true;

diff  --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 9a3b97f3ce8cc..ecef5aa7786ef 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -408,7 +408,7 @@ class RISCVTargetLowering : public TargetLowering {
 
   bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
 
-  bool preferScalarizeSplat(unsigned Opc) const override;
+  bool preferScalarizeSplat(SDNode *N) const override;
 
   bool softPromoteHalfType() const override { return true; }
 

diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index f67a1c4bd81e5..ff097ea5622c0 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -6115,8 +6115,8 @@ bool X86TargetLowering::
   return NewShiftOpcode == ISD::SHL;
 }
 
-bool X86TargetLowering::preferScalarizeSplat(unsigned Opc) const {
-  return Opc != ISD::FP_EXTEND;
+bool X86TargetLowering::preferScalarizeSplat(SDNode *N) const {
+  return N->getOpcode() != ISD::FP_EXTEND;
 }
 
 bool X86TargetLowering::shouldFoldConstantShiftPairToMask(

diff  --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index ca28e32d28426..d431bd6fd2704 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -1126,7 +1126,7 @@ namespace llvm {
         unsigned OldShiftOpcode, unsigned NewShiftOpcode,
         SelectionDAG &DAG) const override;
 
-    bool preferScalarizeSplat(unsigned Opc) const override;
+    bool preferScalarizeSplat(SDNode *N) const override;
 
     bool shouldFoldConstantShiftPairToMask(const SDNode *N,
                                            CombineLevel Level) const override;

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll
index 4badbd50d5e9e..95a224ac8ad18 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll
@@ -891,4 +891,37 @@ define void @zext_v8i32_v8i64(ptr %in, ptr %out) #0 {
   ret void
 }
 
+define void @extend_and_mul(i32 %0, <2 x i64> %1, ptr %2) #0 {
+; CHECK-LABEL: extend_and_mul:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z1.s, w0
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    uunpklo z1.d, z1.s
+; CHECK-NEXT:    mul z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    str q0, [x1]
+; CHECK-NEXT:    ret
+  %broadcast.splatinsert2 = insertelement <2 x i32> poison, i32 %0, i64 0
+  %broadcast.splat3 = shufflevector <2 x i32> %broadcast.splatinsert2, <2 x i32> poison, <2 x i32> zeroinitializer
+  %4 = zext <2 x i32> %broadcast.splat3 to <2 x i64>
+  %5 = mul <2 x i64> %4, %1
+  store <2 x i64> %5, ptr %2, align 2
+  ret void
+}
+
+define void @extend_no_mul(i32 %0, <2 x i64> %1, ptr %2) #0 {
+; CHECK-LABEL: extend_no_mul:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    mov z0.d, x8
+; CHECK-NEXT:    str q0, [x1]
+; CHECK-NEXT:    ret
+entry:
+  %broadcast.splatinsert2 = insertelement <2 x i32> poison, i32 %0, i64 0
+  %broadcast.splat3 = shufflevector <2 x i32> %broadcast.splatinsert2, <2 x i32> poison, <2 x i32> zeroinitializer
+  %3 = zext <2 x i32> %broadcast.splat3 to <2 x i64>
+  store <2 x i64> %3, ptr %2, align 2
+  ret void
+}
+
 attributes #0 = { nounwind "target-features"="+sve" }