[llvm] [DAG] Generalize setcc(setcc) fold to use known bits. (PR #66503)

via llvm-commits llvm-commits at lists.llvm.org
Fri Sep 15 05:37:39 PDT 2023


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-aarch64
            
<details>
<summary>Changes</summary>
If we have a `SETCC (SETCC), 0, NE` and ZeroOrOneBooleanContent, we can remove
the outer setcc as it will produce the same value as the inner. This can be
generalized to anything where the top bits are known to be 0, as the value will
remain as 1 or 0.
--
Full diff: https://github.com/llvm/llvm-project/pull/66503.diff

6 Files Affected:

- (modified) llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp (+13-9) 
- (added) llvm/test/CodeGen/AArch64/setcc_knownbits.ll (+81) 
- (modified) llvm/test/CodeGen/WebAssembly/xor_reassociate.ll (+1-1) 
- (modified) llvm/test/CodeGen/X86/lzcnt-cmp.ll (+22-24) 
- (modified) llvm/test/CodeGen/X86/umul_fix_sat.ll (+6-8) 
- (modified) llvm/test/CodeGen/X86/xor.ll (+8-3) 


<pre>
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index bd1940994a87f0f..a544cfbe833ddff 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -4652,21 +4652,25 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
                           DAG.getConstant(C1 &amp; Imm, dl, ExtDstTy), Cond);
     } else if ((N1C-&gt;isZero() || N1C-&gt;isOne()) &amp;&amp;
                (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
-      // SETCC (SETCC), [0|1], [EQ|NE]  -&gt; SETCC
-      if (N0.getOpcode() == ISD::SETCC &amp;&amp;
+      // SETCC (X), [0|1], [EQ|NE]  -&gt; X if X is known 0/1
+      if ((N0.getOpcode() == ISD::SETCC || VT != MVT::i1) &amp;&amp;
           isTypeLegal(VT) &amp;&amp; VT.bitsLE(N0.getValueType()) &amp;&amp;
           (N0.getValueType() == MVT::i1 ||
-           getBooleanContents(N0.getOperand(0).getValueType()) ==
-                       ZeroOrOneBooleanContent)) {
+           getBooleanContents(N0.getValueType()) == ZeroOrOneBooleanContent) &amp;&amp;
+          DAG.MaskedValueIsZero(
+              N0, APInt::getHighBitsSet(N0.getValueSizeInBits(),
+                                        N0.getValueSizeInBits() - 1))) {
         bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (!N1C-&gt;isOne());
         if (TrueWhenTrue)
           return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
         // Invert the condition.
-        ISD::CondCode CC = cast&lt;CondCodeSDNode&gt;(N0.getOperand(2))-&gt;get();
-        CC = ISD::getSetCCInverse(CC, N0.getOperand(0).getValueType());
-        if (DCI.isBeforeLegalizeOps() ||
-            isCondCodeLegal(CC, N0.getOperand(0).getSimpleValueType()))
-          return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
+        if (N0.getOpcode() == ISD::SETCC) {
+          ISD::CondCode CC = cast&lt;CondCodeSDNode&gt;(N0.getOperand(2))-&gt;get();
+          CC = ISD::getSetCCInverse(CC, N0.getOperand(0).getValueType());
+          if (DCI.isBeforeLegalizeOps() ||
+              isCondCodeLegal(CC, N0.getOperand(0).getSimpleValueType()))
+            return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
+        }
       }
 
       if ((N0.getOpcode() == ISD::XOR ||
diff --git a/llvm/test/CodeGen/AArch64/setcc_knownbits.ll b/llvm/test/CodeGen/AArch64/setcc_knownbits.ll
new file mode 100644
index 000000000000000..9e9c814be0266ef
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/setcc_knownbits.ll
@@ -0,0 +1,81 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
+; RUN: llc &lt; %s -mtriple=aarch64-none-eabi | FileCheck %s
+
+define i1 @load_bv_v4i8(i1 zeroext %a) {
+; CHECK-LABEL: load_bv_v4i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ret
+  %b = zext i1 %a to i32
+  %c = icmp eq i32 %b, 1
+  ret i1 %c
+}
+
+define noundef i1 @logger(i32 noundef %logLevel, ptr %ea, ptr %pll) {
+; CHECK-LABEL: logger:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldr w8, [x2]
+; CHECK-NEXT:    cmp w8, w0
+; CHECK-NEXT:    b.ls .LBB1_2
+; CHECK-NEXT:  // %bb.1:
+; CHECK-NEXT:    mov w0, wzr
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB1_2: // %land.rhs
+; CHECK-NEXT:    ldr x8, [x1]
+; CHECK-NEXT:    ldrb w8, [x8]
+; CHECK-NEXT:    cmp w8, #0
+; CHECK-NEXT:    cset w0, ne
+; CHECK-NEXT:    ret
+entry:
+  %0 = load i32, ptr %pll, align 4
+  %cmp.not = icmp ugt i32 %0, %logLevel
+  br i1 %cmp.not, label %land.end, label %land.rhs
+
+land.rhs:                                         ; preds = %entry
+  %1 = load ptr, ptr %ea, align 8
+  %2 = load i8, ptr %1, align 1, !range !14, !noundef !15
+  %tobool.i = icmp ne i8 %2, 0
+  br label %land.end
+
+land.end:                                         ; preds = %land.rhs, %entry
+  %3 = phi i1 [ false, %entry ], [ %tobool.i, %land.rhs ]
+  ret i1 %3
+}
+!14 = !{i8 0, i8 2}
+!15 = !{}
+
+
+declare i64 @llvm.ctlz.i64(i64 %in, i1)
+define i1 @lshr_ctlz_undef_cmpeq_one_i64(i64 %in) {
+; CHECK-LABEL: lshr_ctlz_undef_cmpeq_one_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    clz x8, x0
+; CHECK-NEXT:    lsr x0, x8, #6
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; CHECK-NEXT:    ret
+  %ctlz = call i64 @llvm.ctlz.i64(i64 %in, i1 -1)
+  %lshr = lshr i64 %ctlz, 6
+  %icmp = icmp eq i64 %lshr, 1
+  ret i1 %icmp
+}
+
+define i32 @PR17487(i1 %tobool) {
+; CHECK-LABEL: PR17487:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    dup v0.2s, w0
+; CHECK-NEXT:    mov w8, #1 // =0x1
+; CHECK-NEXT:    dup v1.2d, x8
+; CHECK-NEXT:    ushll v0.2d, v0.2s, #0
+; CHECK-NEXT:    bic v0.16b, v1.16b, v0.16b
+; CHECK-NEXT:    mov x8, v0.d[1]
+; CHECK-NEXT:    cmp x8, #1
+; CHECK-NEXT:    cset w0, ne
+; CHECK-NEXT:    ret
+  %tmp = insertelement &lt;2 x i1&gt; undef, i1 %tobool, i32 1
+  %tmp1 = zext &lt;2 x i1&gt; %tmp to &lt;2 x i64&gt;
+  %tmp2 = xor &lt;2 x i64&gt; %tmp1, &lt;i64 1, i64 1&gt;
+  %tmp3 = extractelement &lt;2 x i64&gt; %tmp2, i32 1
+  %add = add nsw i64 0, %tmp3
+  %cmp6 = icmp ne i64 %add, 1
+  %conv7 = zext i1 %cmp6 to i32
+  ret i32 %conv7
+}
diff --git a/llvm/test/CodeGen/WebAssembly/xor_reassociate.ll b/llvm/test/CodeGen/WebAssembly/xor_reassociate.ll
index 9ef9c14084a7d7f..3dd8463d9dd1016 100644
--- a/llvm/test/CodeGen/WebAssembly/xor_reassociate.ll
+++ b/llvm/test/CodeGen/WebAssembly/xor_reassociate.ll
@@ -17,7 +17,7 @@ define i32 @reassociate_xor(float %x, float %y) {
 ; CHECK-NEXT:    local.get 0
 ; CHECK-NEXT:    f32.const 0x1p-23
 ; CHECK-NEXT:    f32.gt
-; CHECK-NEXT:    i32.ne
+; CHECK-NEXT:    i32.xor
 ; CHECK-NEXT:    br_if 0 # 0: down to label0
 ; CHECK-NEXT:  # %bb.1: # %if.then.i
 ; CHECK-NEXT:    i32.const 0
diff --git a/llvm/test/CodeGen/X86/lzcnt-cmp.ll b/llvm/test/CodeGen/X86/lzcnt-cmp.ll
index c1cce6f5d8ca10d..6c8d5c9d55a6d55 100644
--- a/llvm/test/CodeGen/X86/lzcnt-cmp.ll
+++ b/llvm/test/CodeGen/X86/lzcnt-cmp.ll
@@ -50,35 +50,33 @@ define i1 @lshr_ctlz_undef_cmpeq_one_i64(i64 %in) nounwind {
 ; X86-BSR-NEXT:    xorl $31, %eax
 ; X86-BSR-NEXT:    addl $32, %eax
 ; X86-BSR-NEXT:  .LBB1_2:
-; X86-BSR-NEXT:    testl $-64, %eax
-; X86-BSR-NEXT:    setne %al
+; X86-BSR-NEXT:    shrl $6, %eax
+; X86-BSR-NEXT:    # kill: def $al killed $al killed $eax
 ; X86-BSR-NEXT:    retl
 ;
 ; X86-LZCNT-LABEL: lshr_ctlz_undef_cmpeq_one_i64:
 ; X86-LZCNT:       # %bb.0:
-; X86-LZCNT-NEXT:    lzcntl {{[0-9]+}}(%esp), %eax
-; X86-LZCNT-NEXT:    addl $32, %eax
-; X86-LZCNT-NEXT:    xorl %ecx, %ecx
+; X86-LZCNT-NEXT:    lzcntl {{[0-9]+}}(%esp), %ecx
+; X86-LZCNT-NEXT:    addl $32, %ecx
+; X86-LZCNT-NEXT:    xorl %eax, %eax
 ; X86-LZCNT-NEXT:    cmpl $0, {{[0-9]+}}(%esp)
-; X86-LZCNT-NEXT:    cmovel %eax, %ecx
-; X86-LZCNT-NEXT:    testb $64, %cl
-; X86-LZCNT-NEXT:    setne %al
+; X86-LZCNT-NEXT:    cmovel %ecx, %eax
+; X86-LZCNT-NEXT:    shrl $6, %eax
+; X86-LZCNT-NEXT:    # kill: def $al killed $al killed $eax
 ; X86-LZCNT-NEXT:    retl
 ;
 ; X64-BSR-LABEL: lshr_ctlz_undef_cmpeq_one_i64:
 ; X64-BSR:       # %bb.0:
 ; X64-BSR-NEXT:    bsrq %rdi, %rax
 ; X64-BSR-NEXT:    shrl $6, %eax
-; X64-BSR-NEXT:    cmpl $1, %eax
-; X64-BSR-NEXT:    sete %al
+; X64-BSR-NEXT:    # kill: def $al killed $al killed $rax
 ; X64-BSR-NEXT:    retq
 ;
 ; X64-LZCNT-LABEL: lshr_ctlz_undef_cmpeq_one_i64:
 ; X64-LZCNT:       # %bb.0:
 ; X64-LZCNT-NEXT:    lzcntq %rdi, %rax
 ; X64-LZCNT-NEXT:    shrl $6, %eax
-; X64-LZCNT-NEXT:    cmpl $1, %eax
-; X64-LZCNT-NEXT:    sete %al
+; X64-LZCNT-NEXT:    # kill: def $al killed $al killed $rax
 ; X64-LZCNT-NEXT:    retq
   %ctlz = call i64 @llvm.ctlz.i64(i64 %in, i1 -1)
   %lshr = lshr i64 %ctlz, 6
@@ -131,33 +129,33 @@ define i1 @lshr_ctlz_undef_cmpne_zero_i64(i64 %in) nounwind {
 ; X86-BSR-NEXT:    xorl $31, %eax
 ; X86-BSR-NEXT:    addl $32, %eax
 ; X86-BSR-NEXT:  .LBB3_2:
-; X86-BSR-NEXT:    testl $-64, %eax
-; X86-BSR-NEXT:    setne %al
+; X86-BSR-NEXT:    shrl $6, %eax
+; X86-BSR-NEXT:    # kill: def $al killed $al killed $eax
 ; X86-BSR-NEXT:    retl
 ;
 ; X86-LZCNT-LABEL: lshr_ctlz_undef_cmpne_zero_i64:
 ; X86-LZCNT:       # %bb.0:
-; X86-LZCNT-NEXT:    lzcntl {{[0-9]+}}(%esp), %eax
-; X86-LZCNT-NEXT:    addl $32, %eax
-; X86-LZCNT-NEXT:    xorl %ecx, %ecx
+; X86-LZCNT-NEXT:    lzcntl {{[0-9]+}}(%esp), %ecx
+; X86-LZCNT-NEXT:    addl $32, %ecx
+; X86-LZCNT-NEXT:    xorl %eax, %eax
 ; X86-LZCNT-NEXT:    cmpl $0, {{[0-9]+}}(%esp)
-; X86-LZCNT-NEXT:    cmovel %eax, %ecx
-; X86-LZCNT-NEXT:    testb $64, %cl
-; X86-LZCNT-NEXT:    setne %al
+; X86-LZCNT-NEXT:    cmovel %ecx, %eax
+; X86-LZCNT-NEXT:    shrl $6, %eax
+; X86-LZCNT-NEXT:    # kill: def $al killed $al killed $eax
 ; X86-LZCNT-NEXT:    retl
 ;
 ; X64-BSR-LABEL: lshr_ctlz_undef_cmpne_zero_i64:
 ; X64-BSR:       # %bb.0:
 ; X64-BSR-NEXT:    bsrq %rdi, %rax
-; X64-BSR-NEXT:    testl $-64, %eax
-; X64-BSR-NEXT:    setne %al
+; X64-BSR-NEXT:    shrl $6, %eax
+; X64-BSR-NEXT:    # kill: def $al killed $al killed $rax
 ; X64-BSR-NEXT:    retq
 ;
 ; X64-LZCNT-LABEL: lshr_ctlz_undef_cmpne_zero_i64:
 ; X64-LZCNT:       # %bb.0:
 ; X64-LZCNT-NEXT:    lzcntq %rdi, %rax
-; X64-LZCNT-NEXT:    testb $64, %al
-; X64-LZCNT-NEXT:    setne %al
+; X64-LZCNT-NEXT:    shrl $6, %eax
+; X64-LZCNT-NEXT:    # kill: def $al killed $al killed $rax
 ; X64-LZCNT-NEXT:    retq
   %ctlz = call i64 @llvm.ctlz.i64(i64 %in, i1 -1)
   %lshr = lshr i64 %ctlz, 6
diff --git a/llvm/test/CodeGen/X86/umul_fix_sat.ll b/llvm/test/CodeGen/X86/umul_fix_sat.ll
index 33f43c75cad3d7c..6f10aae8909f8a3 100644
--- a/llvm/test/CodeGen/X86/umul_fix_sat.ll
+++ b/llvm/test/CodeGen/X86/umul_fix_sat.ll
@@ -517,15 +517,13 @@ define i64 @func8(i64 %x, i64 %y) nounwind {
 ; X86-NEXT:    adcl $0, %ecx
 ; X86-NEXT:    addl %ebp, %edx
 ; X86-NEXT:    adcl $0, %ecx
+; X86-NEXT:    shldl $1, %edx, %ecx
 ; X86-NEXT:    shrdl $31, %edx, %eax
-; X86-NEXT:    movl %edx, %esi
-; X86-NEXT:    shrl $31, %esi
-; X86-NEXT:    xorl %edi, %edi
-; X86-NEXT:    negl %esi
-; X86-NEXT:    sbbl %edi, %edi
-; X86-NEXT:    orl %edi, %eax
-; X86-NEXT:    shrdl $31, %ecx, %edx
-; X86-NEXT:    orl %edi, %edx
+; X86-NEXT:    testl $-2147483648, %edx # imm = 0x80000000
+; X86-NEXT:    movl $-1, %edx
+; X86-NEXT:    cmovnel %edx, %eax
+; X86-NEXT:    cmovnel %edx, %ecx
+; X86-NEXT:    movl %ecx, %edx
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    popl %edi
 ; X86-NEXT:    popl %ebx
diff --git a/llvm/test/CodeGen/X86/xor.ll b/llvm/test/CodeGen/X86/xor.ll
index 2072568b7ba75dd..8c8b7cc51610560 100644
--- a/llvm/test/CodeGen/X86/xor.ll
+++ b/llvm/test/CodeGen/X86/xor.ll
@@ -403,14 +403,19 @@ define i32 @PR17487(i1 %tobool) {
 ;
 ; X64-LIN-LABEL: PR17487:
 ; X64-LIN:       # %bb.0:
-; X64-LIN-NEXT:    movl %edi, %eax
-; X64-LIN-NEXT:    andl $1, %eax
+; X64-LIN-NEXT:    movd %edi, %xmm0
+; X64-LIN-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
+; X64-LIN-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-LIN-NEXT:    pextrw $4, %xmm0, %eax
 ; X64-LIN-NEXT:    retq
 ;
 ; X64-WIN-LABEL: PR17487:
 ; X64-WIN:       # %bb.0:
-; X64-WIN-NEXT:    andb $1, %cl
 ; X64-WIN-NEXT:    movzbl %cl, %eax
+; X64-WIN-NEXT:    movd %eax, %xmm0
+; X64-WIN-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
+; X64-WIN-NEXT:    pand __xmm at 00000000000000010000000000000001(%rip), %xmm0
+; X64-WIN-NEXT:    pextrw $4, %xmm0, %eax
 ; X64-WIN-NEXT:    retq
   %tmp = insertelement &lt;2 x i1&gt; undef, i1 %tobool, i32 1
   %tmp1 = zext &lt;2 x i1&gt; %tmp to &lt;2 x i64&gt;
</pre>
</details>


https://github.com/llvm/llvm-project/pull/66503


More information about the llvm-commits mailing list