[llvm] a3d5f1c - [x86] Fix infinite loop inside DAG combiner with lzcnt feature.

Tue Apr 5 09:32:24 PDT 2022

Author: Pierre Gousseau
Date: 2022-04-05T17:32:10+01:00
New Revision: a3d5f1cf5d88dfbbed931951e07f328d5ceba510

URL: https://github.com/llvm/llvm-project/commit/a3d5f1cf5d88dfbbed931951e07f328d5ceba510
DIFF: https://github.com/llvm/llvm-project/commit/a3d5f1cf5d88dfbbed931951e07f328d5ceba510.diff

LOG: [x86] Fix infinite loop inside DAG combiner with lzcnt feature.

The issue affects targets supporting fast-lzcnt such as btver2.
This removes extraneous zext/trunc node insertions to fix the infinite
loop.
This fixes Issue https://github.com/llvm/llvm-project/issues/54694

Differential Revision: https://reviews.llvm.org/D122900

Reviewed By: RKSimon, spatel, lebedev.ri

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/test/CodeGen/X86/lzcnt-zext-cmp.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 5bd5546651f60..362d528914e86 100644

--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -47606,8 +47606,7 @@ static SDValue combineLogicBlendIntoPBLENDV(SDNode *N, SelectionDAG &DAG,
 //   into:
 //   srl(ctlz x), log2(bitsize(x))
 // Input pattern is checked by caller.
-static SDValue lowerX86CmpEqZeroToCtlzSrl(SDValue Op, EVT ExtTy,
-                                          SelectionDAG &DAG) {
+static SDValue lowerX86CmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) {
   SDValue Cmp = Op.getOperand(1);
   EVT VT = Cmp.getOperand(0).getValueType();
   unsigned Log2b = Log2_32(VT.getSizeInBits());
@@ -47618,7 +47617,7 @@ static SDValue lowerX86CmpEqZeroToCtlzSrl(SDValue Op, EVT ExtTy,
   SDValue Trunc = DAG.getZExtOrTrunc(Clz, dl, MVT::i32);
   SDValue Scc = DAG.getNode(ISD::SRL, dl, MVT::i32, Trunc,
                             DAG.getConstant(Log2b, dl, MVT::i8));
-  return DAG.getZExtOrTrunc(Scc, dl, ExtTy);
+  return Scc;
 }
 
 // Try to transform:
@@ -47678,11 +47677,10 @@ static SDValue combineOrCmpEqZeroToCtlzSrl(SDNode *N, SelectionDAG &DAG,
   // or(srl(ctlz),srl(ctlz)).
   // The dag combiner can then fold it into:
   // srl(or(ctlz, ctlz)).
-  EVT VT = OR->getValueType(0);
-  SDValue NewLHS = lowerX86CmpEqZeroToCtlzSrl(LHS, VT, DAG);
+  SDValue NewLHS = lowerX86CmpEqZeroToCtlzSrl(LHS, DAG);
   SDValue Ret, NewRHS;
-  if (NewLHS && (NewRHS = lowerX86CmpEqZeroToCtlzSrl(RHS, VT, DAG)))
-    Ret = DAG.getNode(ISD::OR, SDLoc(OR), VT, NewLHS, NewRHS);
+  if (NewLHS && (NewRHS = lowerX86CmpEqZeroToCtlzSrl(RHS, DAG)))
+    Ret = DAG.getNode(ISD::OR, SDLoc(OR), MVT::i32, NewLHS, NewRHS);
 
   if (!Ret)
     return SDValue();
@@ -47695,16 +47693,13 @@ static SDValue combineOrCmpEqZeroToCtlzSrl(SDNode *N, SelectionDAG &DAG,
     // Swap rhs with lhs to match or(setcc(eq, cmp, 0), or).
     if (RHS->getOpcode() == ISD::OR)
       std::swap(LHS, RHS);
-    NewRHS = lowerX86CmpEqZeroToCtlzSrl(RHS, VT, DAG);
+    NewRHS = lowerX86CmpEqZeroToCtlzSrl(RHS, DAG);
     if (!NewRHS)
       return SDValue();
-    Ret = DAG.getNode(ISD::OR, SDLoc(OR), VT, Ret, NewRHS);
+    Ret = DAG.getNode(ISD::OR, SDLoc(OR), MVT::i32, Ret, NewRHS);
   }
 
-  if (Ret)
-    Ret = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), Ret);
-
-  return Ret;
+  return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), Ret);
 }
 
 static SDValue foldMaskedMergeImpl(SDValue And0_L, SDValue And0_R,

diff  --git a/llvm/test/CodeGen/X86/lzcnt-zext-cmp.ll b/llvm/test/CodeGen/X86/lzcnt-zext-cmp.ll
index 0c5450a59e422..0662a5c521ab6 100644
--- a/llvm/test/CodeGen/X86/lzcnt-zext-cmp.ll
+++ b/llvm/test/CodeGen/X86/lzcnt-zext-cmp.ll
@@ -154,11 +154,11 @@ entry:
 define i32 @test_zext_cmp6(i32 %a, i32 %b, i32 %c) {
 ; FASTLZCNT-LABEL: test_zext_cmp6:
 ; FASTLZCNT:       # %bb.0: # %entry
-; FASTLZCNT-NEXT:    lzcntl %edi, %eax
-; FASTLZCNT-NEXT:    lzcntl %esi, %ecx
-; FASTLZCNT-NEXT:    orl %eax, %ecx
+; FASTLZCNT-NEXT:    lzcntl %edi, %ecx
 ; FASTLZCNT-NEXT:    lzcntl %edx, %eax
+; FASTLZCNT-NEXT:    lzcntl %esi, %esi
 ; FASTLZCNT-NEXT:    orl %ecx, %eax
+; FASTLZCNT-NEXT:    orl %esi, %eax
 ; FASTLZCNT-NEXT:    shrl $5, %eax
 ; FASTLZCNT-NEXT:    retq
 ;
@@ -189,11 +189,11 @@ entry:
 define i32 @test_zext_cmp7(i32 %a, i32 %b, i32 %c) {
 ; FASTLZCNT-LABEL: test_zext_cmp7:
 ; FASTLZCNT:       # %bb.0: # %entry
-; FASTLZCNT-NEXT:    lzcntl %edi, %eax
-; FASTLZCNT-NEXT:    lzcntl %esi, %ecx
-; FASTLZCNT-NEXT:    orl %eax, %ecx
+; FASTLZCNT-NEXT:    lzcntl %edi, %ecx
 ; FASTLZCNT-NEXT:    lzcntl %edx, %eax
+; FASTLZCNT-NEXT:    lzcntl %esi, %esi
 ; FASTLZCNT-NEXT:    orl %ecx, %eax
+; FASTLZCNT-NEXT:    orl %esi, %eax
 ; FASTLZCNT-NEXT:    shrl $5, %eax
 ; FASTLZCNT-NEXT:    retq
 ;
@@ -335,3 +335,37 @@ entry:
   %conv = zext i1 %0 to i32
   ret i32 %conv
 }
+
+; PR54694 Fix an infinite loop in DAG combiner.
+define i32 @test_zext_cmp12(i32 %0, i32 %1) {
+; FASTLZCNT-LABEL: test_zext_cmp12:
+; FASTLZCNT:       # %bb.0:
+; FASTLZCNT-NEXT:    andl $131072, %edi # imm = 0x20000
+; FASTLZCNT-NEXT:    andl $131072, %esi # imm = 0x20000
+; FASTLZCNT-NEXT:    lzcntl %edi, %eax
+; FASTLZCNT-NEXT:    lzcntl %esi, %ecx
+; FASTLZCNT-NEXT:    orl %eax, %ecx
+; FASTLZCNT-NEXT:    movl $2, %eax
+; FASTLZCNT-NEXT:    shrl $5, %ecx
+; FASTLZCNT-NEXT:    subl %ecx, %eax
+; FASTLZCNT-NEXT:    retq
+;
+; NOFASTLZCNT-LABEL: test_zext_cmp12:
+; NOFASTLZCNT:       # %bb.0:
+; NOFASTLZCNT-NEXT:    testl $131072, %edi # imm = 0x20000
+; NOFASTLZCNT-NEXT:    sete %al
+; NOFASTLZCNT-NEXT:    testl $131072, %esi # imm = 0x20000
+; NOFASTLZCNT-NEXT:    sete %cl
+; NOFASTLZCNT-NEXT:    orb %al, %cl
+; NOFASTLZCNT-NEXT:    movl $2, %eax
+; NOFASTLZCNT-NEXT:    movzbl %cl, %ecx
+; NOFASTLZCNT-NEXT:    subl %ecx, %eax
+; NOFASTLZCNT-NEXT:    retq
+  %3 = and i32 %0, 131072
+  %4 = icmp eq i32 %3, 0
+  %5 = and i32 %1, 131072
+  %6 = icmp eq i32 %5, 0
+  %7 = select i1 %4, i1 true, i1 %6
+  %8 = select i1 %7, i32 1, i32 2
+  ret i32 %8
+}