[llvm] [DAG] Generalize setcc(setcc) fold to use known bits. (PR #66503)

David Green via llvm-commits llvm-commits at lists.llvm.org
Fri Sep 15 05:36:34 PDT 2023


https://github.com/davemgreen created https://github.com/llvm/llvm-project/pull/66503

If we have a `SETCC (SETCC), 0, NE` and ZeroOrOneBooleanContent, we can remove
the outer setcc as it will produce the same value as the inner. This can be
generalized to anything where the top bits are known to be 0, as the value will
remain as 1 or 0.

>From 774d15ac0378875703bf5ed42392ea488d8b73ec Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Thu, 14 Sep 2023 21:04:53 +0100
Subject: [PATCH 1/2] [AArch64] Add some tests for setcc known bits fold. NFC

---
 llvm/test/CodeGen/AArch64/setcc_knownbits.ll | 84 ++++++++++++++++++++
 1 file changed, 84 insertions(+)
 create mode 100644 llvm/test/CodeGen/AArch64/setcc_knownbits.ll

diff --git a/llvm/test/CodeGen/AArch64/setcc_knownbits.ll b/llvm/test/CodeGen/AArch64/setcc_knownbits.ll
new file mode 100644
index 000000000000000..aa62a7aa176c81d
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/setcc_knownbits.ll
@@ -0,0 +1,84 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
+; RUN: llc < %s -mtriple=aarch64-none-eabi | FileCheck %s
+
+define i1 @load_bv_v4i8(i1 zeroext %a) {
+; CHECK-LABEL: load_bv_v4i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmp w0, #0
+; CHECK-NEXT:    cset w0, ne
+; CHECK-NEXT:    ret
+  %b = zext i1 %a to i32
+  %c = icmp eq i32 %b, 1
+  ret i1 %c
+}
+
+define noundef i1 @logger(i32 noundef %logLevel, ptr %ea, ptr %pll) {
+; CHECK-LABEL: logger:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldr w8, [x2]
+; CHECK-NEXT:    cmp w8, w0
+; CHECK-NEXT:    b.ls .LBB1_2
+; CHECK-NEXT:  // %bb.1:
+; CHECK-NEXT:    mov w0, wzr
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB1_2: // %land.rhs
+; CHECK-NEXT:    ldr x8, [x1]
+; CHECK-NEXT:    ldrb w8, [x8]
+; CHECK-NEXT:    cmp w8, #0
+; CHECK-NEXT:    cset w0, ne
+; CHECK-NEXT:    ret
+entry:
+  %0 = load i32, ptr %pll, align 4
+  %cmp.not = icmp ugt i32 %0, %logLevel
+  br i1 %cmp.not, label %land.end, label %land.rhs
+
+land.rhs:                                         ; preds = %entry
+  %1 = load ptr, ptr %ea, align 8
+  %2 = load i8, ptr %1, align 1, !range !14, !noundef !15
+  %tobool.i = icmp ne i8 %2, 0
+  br label %land.end
+
+land.end:                                         ; preds = %land.rhs, %entry
+  %3 = phi i1 [ false, %entry ], [ %tobool.i, %land.rhs ]
+  ret i1 %3
+}
+!14 = !{i8 0, i8 2}
+!15 = !{}
+
+
+declare i64 @llvm.ctlz.i64(i64 %in, i1)
+define i1 @lshr_ctlz_undef_cmpeq_one_i64(i64 %in) {
+; CHECK-LABEL: lshr_ctlz_undef_cmpeq_one_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    clz x8, x0
+; CHECK-NEXT:    lsr x8, x8, #6
+; CHECK-NEXT:    cmp x8, #1
+; CHECK-NEXT:    cset w0, eq
+; CHECK-NEXT:    ret
+  %ctlz = call i64 @llvm.ctlz.i64(i64 %in, i1 -1)
+  %lshr = lshr i64 %ctlz, 6
+  %icmp = icmp eq i64 %lshr, 1
+  ret i1 %icmp
+}
+
+define i32 @PR17487(i1 %tobool) {
+; CHECK-LABEL: PR17487:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    dup v0.2s, w0
+; CHECK-NEXT:    mov w8, #1 // =0x1
+; CHECK-NEXT:    dup v1.2d, x8
+; CHECK-NEXT:    ushll v0.2d, v0.2s, #0
+; CHECK-NEXT:    bic v0.16b, v1.16b, v0.16b
+; CHECK-NEXT:    mov x8, v0.d[1]
+; CHECK-NEXT:    cmp x8, #1
+; CHECK-NEXT:    cset w0, ne
+; CHECK-NEXT:    ret
+  %tmp = insertelement <2 x i1> undef, i1 %tobool, i32 1
+  %tmp1 = zext <2 x i1> %tmp to <2 x i64>
+  %tmp2 = xor <2 x i64> %tmp1, <i64 1, i64 1>
+  %tmp3 = extractelement <2 x i64> %tmp2, i32 1
+  %add = add nsw i64 0, %tmp3
+  %cmp6 = icmp ne i64 %add, 1
+  %conv7 = zext i1 %cmp6 to i32
+  ret i32 %conv7
+}

>From d723c95e1a3c28269c329a490770a27fdde91671 Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Fri, 15 Sep 2023 12:48:31 +0100
Subject: [PATCH 2/2] [DAG] Generalize setcc(setcc) fold to use known bits.

If we have a `SETCC (SETCC), 0, NE` and ZeroOrOneBooleanContent, we can remove
the outer setcc as it will produce the same value as the inner. This can be
generalized to anything where the top bits are known to be 0, as the value will
remain as 1 or 0.
---
 .../CodeGen/SelectionDAG/TargetLowering.cpp   | 22 +++++----
 llvm/test/CodeGen/AArch64/setcc_knownbits.ll  |  7 +--
 .../CodeGen/WebAssembly/xor_reassociate.ll    |  2 +-
 llvm/test/CodeGen/X86/lzcnt-cmp.ll            | 46 +++++++++----------
 llvm/test/CodeGen/X86/umul_fix_sat.ll         | 14 +++---
 llvm/test/CodeGen/X86/xor.ll                  | 11 +++--
 6 files changed, 52 insertions(+), 50 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index bd1940994a87f0f..a544cfbe833ddff 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -4652,21 +4652,25 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
                           DAG.getConstant(C1 & Imm, dl, ExtDstTy), Cond);
     } else if ((N1C->isZero() || N1C->isOne()) &&
                (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
-      // SETCC (SETCC), [0|1], [EQ|NE]  -> SETCC
-      if (N0.getOpcode() == ISD::SETCC &&
+      // SETCC (X), [0|1], [EQ|NE]  -> X if X is known 0/1
+      if ((N0.getOpcode() == ISD::SETCC || VT != MVT::i1) &&
           isTypeLegal(VT) && VT.bitsLE(N0.getValueType()) &&
           (N0.getValueType() == MVT::i1 ||
-           getBooleanContents(N0.getOperand(0).getValueType()) ==
-                       ZeroOrOneBooleanContent)) {
+           getBooleanContents(N0.getValueType()) == ZeroOrOneBooleanContent) &&
+          DAG.MaskedValueIsZero(
+              N0, APInt::getHighBitsSet(N0.getValueSizeInBits(),
+                                        N0.getValueSizeInBits() - 1))) {
         bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (!N1C->isOne());
         if (TrueWhenTrue)
           return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
         // Invert the condition.
-        ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
-        CC = ISD::getSetCCInverse(CC, N0.getOperand(0).getValueType());
-        if (DCI.isBeforeLegalizeOps() ||
-            isCondCodeLegal(CC, N0.getOperand(0).getSimpleValueType()))
-          return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
+        if (N0.getOpcode() == ISD::SETCC) {
+          ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
+          CC = ISD::getSetCCInverse(CC, N0.getOperand(0).getValueType());
+          if (DCI.isBeforeLegalizeOps() ||
+              isCondCodeLegal(CC, N0.getOperand(0).getSimpleValueType()))
+            return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
+        }
       }
 
       if ((N0.getOpcode() == ISD::XOR ||
diff --git a/llvm/test/CodeGen/AArch64/setcc_knownbits.ll b/llvm/test/CodeGen/AArch64/setcc_knownbits.ll
index aa62a7aa176c81d..9e9c814be0266ef 100644
--- a/llvm/test/CodeGen/AArch64/setcc_knownbits.ll
+++ b/llvm/test/CodeGen/AArch64/setcc_knownbits.ll
@@ -4,8 +4,6 @@
 define i1 @load_bv_v4i8(i1 zeroext %a) {
 ; CHECK-LABEL: load_bv_v4i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    cmp w0, #0
-; CHECK-NEXT:    cset w0, ne
 ; CHECK-NEXT:    ret
   %b = zext i1 %a to i32
   %c = icmp eq i32 %b, 1
@@ -51,9 +49,8 @@ define i1 @lshr_ctlz_undef_cmpeq_one_i64(i64 %in) {
 ; CHECK-LABEL: lshr_ctlz_undef_cmpeq_one_i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    clz x8, x0
-; CHECK-NEXT:    lsr x8, x8, #6
-; CHECK-NEXT:    cmp x8, #1
-; CHECK-NEXT:    cset w0, eq
+; CHECK-NEXT:    lsr x0, x8, #6
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %ctlz = call i64 @llvm.ctlz.i64(i64 %in, i1 -1)
   %lshr = lshr i64 %ctlz, 6
diff --git a/llvm/test/CodeGen/WebAssembly/xor_reassociate.ll b/llvm/test/CodeGen/WebAssembly/xor_reassociate.ll
index 9ef9c14084a7d7f..3dd8463d9dd1016 100644
--- a/llvm/test/CodeGen/WebAssembly/xor_reassociate.ll
+++ b/llvm/test/CodeGen/WebAssembly/xor_reassociate.ll
@@ -17,7 +17,7 @@ define i32 @reassociate_xor(float %x, float %y) {
 ; CHECK-NEXT:    local.get 0
 ; CHECK-NEXT:    f32.const 0x1p-23
 ; CHECK-NEXT:    f32.gt
-; CHECK-NEXT:    i32.ne
+; CHECK-NEXT:    i32.xor
 ; CHECK-NEXT:    br_if 0 # 0: down to label0
 ; CHECK-NEXT:  # %bb.1: # %if.then.i
 ; CHECK-NEXT:    i32.const 0
diff --git a/llvm/test/CodeGen/X86/lzcnt-cmp.ll b/llvm/test/CodeGen/X86/lzcnt-cmp.ll
index c1cce6f5d8ca10d..6c8d5c9d55a6d55 100644
--- a/llvm/test/CodeGen/X86/lzcnt-cmp.ll
+++ b/llvm/test/CodeGen/X86/lzcnt-cmp.ll
@@ -50,35 +50,33 @@ define i1 @lshr_ctlz_undef_cmpeq_one_i64(i64 %in) nounwind {
 ; X86-BSR-NEXT:    xorl $31, %eax
 ; X86-BSR-NEXT:    addl $32, %eax
 ; X86-BSR-NEXT:  .LBB1_2:
-; X86-BSR-NEXT:    testl $-64, %eax
-; X86-BSR-NEXT:    setne %al
+; X86-BSR-NEXT:    shrl $6, %eax
+; X86-BSR-NEXT:    # kill: def $al killed $al killed $eax
 ; X86-BSR-NEXT:    retl
 ;
 ; X86-LZCNT-LABEL: lshr_ctlz_undef_cmpeq_one_i64:
 ; X86-LZCNT:       # %bb.0:
-; X86-LZCNT-NEXT:    lzcntl {{[0-9]+}}(%esp), %eax
-; X86-LZCNT-NEXT:    addl $32, %eax
-; X86-LZCNT-NEXT:    xorl %ecx, %ecx
+; X86-LZCNT-NEXT:    lzcntl {{[0-9]+}}(%esp), %ecx
+; X86-LZCNT-NEXT:    addl $32, %ecx
+; X86-LZCNT-NEXT:    xorl %eax, %eax
 ; X86-LZCNT-NEXT:    cmpl $0, {{[0-9]+}}(%esp)
-; X86-LZCNT-NEXT:    cmovel %eax, %ecx
-; X86-LZCNT-NEXT:    testb $64, %cl
-; X86-LZCNT-NEXT:    setne %al
+; X86-LZCNT-NEXT:    cmovel %ecx, %eax
+; X86-LZCNT-NEXT:    shrl $6, %eax
+; X86-LZCNT-NEXT:    # kill: def $al killed $al killed $eax
 ; X86-LZCNT-NEXT:    retl
 ;
 ; X64-BSR-LABEL: lshr_ctlz_undef_cmpeq_one_i64:
 ; X64-BSR:       # %bb.0:
 ; X64-BSR-NEXT:    bsrq %rdi, %rax
 ; X64-BSR-NEXT:    shrl $6, %eax
-; X64-BSR-NEXT:    cmpl $1, %eax
-; X64-BSR-NEXT:    sete %al
+; X64-BSR-NEXT:    # kill: def $al killed $al killed $rax
 ; X64-BSR-NEXT:    retq
 ;
 ; X64-LZCNT-LABEL: lshr_ctlz_undef_cmpeq_one_i64:
 ; X64-LZCNT:       # %bb.0:
 ; X64-LZCNT-NEXT:    lzcntq %rdi, %rax
 ; X64-LZCNT-NEXT:    shrl $6, %eax
-; X64-LZCNT-NEXT:    cmpl $1, %eax
-; X64-LZCNT-NEXT:    sete %al
+; X64-LZCNT-NEXT:    # kill: def $al killed $al killed $rax
 ; X64-LZCNT-NEXT:    retq
   %ctlz = call i64 @llvm.ctlz.i64(i64 %in, i1 -1)
   %lshr = lshr i64 %ctlz, 6
@@ -131,33 +129,33 @@ define i1 @lshr_ctlz_undef_cmpne_zero_i64(i64 %in) nounwind {
 ; X86-BSR-NEXT:    xorl $31, %eax
 ; X86-BSR-NEXT:    addl $32, %eax
 ; X86-BSR-NEXT:  .LBB3_2:
-; X86-BSR-NEXT:    testl $-64, %eax
-; X86-BSR-NEXT:    setne %al
+; X86-BSR-NEXT:    shrl $6, %eax
+; X86-BSR-NEXT:    # kill: def $al killed $al killed $eax
 ; X86-BSR-NEXT:    retl
 ;
 ; X86-LZCNT-LABEL: lshr_ctlz_undef_cmpne_zero_i64:
 ; X86-LZCNT:       # %bb.0:
-; X86-LZCNT-NEXT:    lzcntl {{[0-9]+}}(%esp), %eax
-; X86-LZCNT-NEXT:    addl $32, %eax
-; X86-LZCNT-NEXT:    xorl %ecx, %ecx
+; X86-LZCNT-NEXT:    lzcntl {{[0-9]+}}(%esp), %ecx
+; X86-LZCNT-NEXT:    addl $32, %ecx
+; X86-LZCNT-NEXT:    xorl %eax, %eax
 ; X86-LZCNT-NEXT:    cmpl $0, {{[0-9]+}}(%esp)
-; X86-LZCNT-NEXT:    cmovel %eax, %ecx
-; X86-LZCNT-NEXT:    testb $64, %cl
-; X86-LZCNT-NEXT:    setne %al
+; X86-LZCNT-NEXT:    cmovel %ecx, %eax
+; X86-LZCNT-NEXT:    shrl $6, %eax
+; X86-LZCNT-NEXT:    # kill: def $al killed $al killed $eax
 ; X86-LZCNT-NEXT:    retl
 ;
 ; X64-BSR-LABEL: lshr_ctlz_undef_cmpne_zero_i64:
 ; X64-BSR:       # %bb.0:
 ; X64-BSR-NEXT:    bsrq %rdi, %rax
-; X64-BSR-NEXT:    testl $-64, %eax
-; X64-BSR-NEXT:    setne %al
+; X64-BSR-NEXT:    shrl $6, %eax
+; X64-BSR-NEXT:    # kill: def $al killed $al killed $rax
 ; X64-BSR-NEXT:    retq
 ;
 ; X64-LZCNT-LABEL: lshr_ctlz_undef_cmpne_zero_i64:
 ; X64-LZCNT:       # %bb.0:
 ; X64-LZCNT-NEXT:    lzcntq %rdi, %rax
-; X64-LZCNT-NEXT:    testb $64, %al
-; X64-LZCNT-NEXT:    setne %al
+; X64-LZCNT-NEXT:    shrl $6, %eax
+; X64-LZCNT-NEXT:    # kill: def $al killed $al killed $rax
 ; X64-LZCNT-NEXT:    retq
   %ctlz = call i64 @llvm.ctlz.i64(i64 %in, i1 -1)
   %lshr = lshr i64 %ctlz, 6
diff --git a/llvm/test/CodeGen/X86/umul_fix_sat.ll b/llvm/test/CodeGen/X86/umul_fix_sat.ll
index 33f43c75cad3d7c..6f10aae8909f8a3 100644
--- a/llvm/test/CodeGen/X86/umul_fix_sat.ll
+++ b/llvm/test/CodeGen/X86/umul_fix_sat.ll
@@ -517,15 +517,13 @@ define i64 @func8(i64 %x, i64 %y) nounwind {
 ; X86-NEXT:    adcl $0, %ecx
 ; X86-NEXT:    addl %ebp, %edx
 ; X86-NEXT:    adcl $0, %ecx
+; X86-NEXT:    shldl $1, %edx, %ecx
 ; X86-NEXT:    shrdl $31, %edx, %eax
-; X86-NEXT:    movl %edx, %esi
-; X86-NEXT:    shrl $31, %esi
-; X86-NEXT:    xorl %edi, %edi
-; X86-NEXT:    negl %esi
-; X86-NEXT:    sbbl %edi, %edi
-; X86-NEXT:    orl %edi, %eax
-; X86-NEXT:    shrdl $31, %ecx, %edx
-; X86-NEXT:    orl %edi, %edx
+; X86-NEXT:    testl $-2147483648, %edx # imm = 0x80000000
+; X86-NEXT:    movl $-1, %edx
+; X86-NEXT:    cmovnel %edx, %eax
+; X86-NEXT:    cmovnel %edx, %ecx
+; X86-NEXT:    movl %ecx, %edx
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    popl %edi
 ; X86-NEXT:    popl %ebx
diff --git a/llvm/test/CodeGen/X86/xor.ll b/llvm/test/CodeGen/X86/xor.ll
index 2072568b7ba75dd..8c8b7cc51610560 100644
--- a/llvm/test/CodeGen/X86/xor.ll
+++ b/llvm/test/CodeGen/X86/xor.ll
@@ -403,14 +403,19 @@ define i32 @PR17487(i1 %tobool) {
 ;
 ; X64-LIN-LABEL: PR17487:
 ; X64-LIN:       # %bb.0:
-; X64-LIN-NEXT:    movl %edi, %eax
-; X64-LIN-NEXT:    andl $1, %eax
+; X64-LIN-NEXT:    movd %edi, %xmm0
+; X64-LIN-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
+; X64-LIN-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-LIN-NEXT:    pextrw $4, %xmm0, %eax
 ; X64-LIN-NEXT:    retq
 ;
 ; X64-WIN-LABEL: PR17487:
 ; X64-WIN:       # %bb.0:
-; X64-WIN-NEXT:    andb $1, %cl
 ; X64-WIN-NEXT:    movzbl %cl, %eax
+; X64-WIN-NEXT:    movd %eax, %xmm0
+; X64-WIN-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
+; X64-WIN-NEXT:    pand __xmm at 00000000000000010000000000000001(%rip), %xmm0
+; X64-WIN-NEXT:    pextrw $4, %xmm0, %eax
 ; X64-WIN-NEXT:    retq
   %tmp = insertelement <2 x i1> undef, i1 %tobool, i32 1
   %tmp1 = zext <2 x i1> %tmp to <2 x i64>



More information about the llvm-commits mailing list