[llvm] [X86] LowerSelect - generalize "select icmp(x,0), lhs, rhs" folding patterns. (PR #107272)

Thu Sep 5 06:58:48 PDT 2024

https://github.com/RKSimon updated https://github.com/llvm/llvm-project/pull/107272

>From 95a1857a92e3732bbee76351b9afee9ccc2a48bb Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Wed, 4 Sep 2024 18:22:16 +0100
Subject: [PATCH] [X86] LowerSelect - generalize "select icmp(x,0), lhs, rhs"
 folding patterns.

We have many cases where the condition is a "(and x, 1)" pattern (e.g. from a bool argument or some other simplified bitlogic), and we have a large number of existing generic/x86 patterns that make use of this (and trying to convert to a SETCC node can cause infinite loops).

Use the LowerSELECTWithCmpZero helper, simulating the comparison with adjusted operands.
---
 llvm/lib/Target/X86/X86ISelLowering.cpp       |   7 +
 .../pull-conditional-binop-through-shift.ll   | 132 ++++++++----------
 llvm/test/CodeGen/X86/select.ll               |  36 +++--
 3 files changed, 83 insertions(+), 92 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 5f87ffd2f1eabf..46071971bedb7e 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -24295,6 +24295,13 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
       isOneConstant(Cond.getOperand(1)))
     Cond = Cond.getOperand(0);
 
+  // Attempt to fold "raw cond" cases by treating them as:
+  // (select (and X, 1), Op1, Op2  --> (select (icmpeq (and X, 1), 0), Op2, Op1)
+  if (Cond.getOpcode() == ISD::AND && isOneConstant(Cond.getOperand(1)))
+    if (SDValue R = LowerSELECTWithCmpZero(Cond, Op2, Op1, X86::COND_E, DL, DAG,
+                                           Subtarget))
+      return R;
+
   // If condition flag is set by a X86ISD::CMP, then use it as the condition
   // setting operand in place of the X86ISD::SETCC.
   unsigned CondOpcode = Cond.getOpcode();
diff --git a/llvm/test/CodeGen/X86/pull-conditional-binop-through-shift.ll b/llvm/test/CodeGen/X86/pull-conditional-binop-through-shift.ll
index 4f39b8f945413c..def4c08a3592ec 100644
--- a/llvm/test/CodeGen/X86/pull-conditional-binop-through-shift.ll
+++ b/llvm/test/CodeGen/X86/pull-conditional-binop-through-shift.ll
@@ -77,12 +77,11 @@ define i32 @or_signbit_select_shl(i32 %x, i1 %cond, ptr %dst) {
 ; X86-LABEL: or_signbit_select_shl:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    testb $1, {{[0-9]+}}(%esp)
-; X86-NEXT:    je .LBB2_2
-; X86-NEXT:  # %bb.1:
-; X86-NEXT:    orl $16711680, %eax # imm = 0xFF0000
-; X86-NEXT:  .LBB2_2:
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    andl $1, %eax
+; X86-NEXT:    negl %eax
+; X86-NEXT:    andl $16711680, %eax # imm = 0xFF0000
+; X86-NEXT:    orl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    shll $8, %eax
 ; X86-NEXT:    movl %eax, (%ecx)
 ; X86-NEXT:    retl
@@ -106,12 +105,11 @@ define i32 @or_nosignbit_select_shl(i32 %x, i1 %cond, ptr %dst) {
 ; X86-LABEL: or_nosignbit_select_shl:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    testb $1, {{[0-9]+}}(%esp)
-; X86-NEXT:    je .LBB3_2
-; X86-NEXT:  # %bb.1:
-; X86-NEXT:    orl $16711680, %eax # imm = 0xFF0000
-; X86-NEXT:  .LBB3_2:
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    andl $1, %eax
+; X86-NEXT:    negl %eax
+; X86-NEXT:    andl $16711680, %eax # imm = 0xFF0000
+; X86-NEXT:    orl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    shll $8, %eax
 ; X86-NEXT:    movl %eax, (%ecx)
 ; X86-NEXT:    retl
@@ -136,12 +134,11 @@ define i32 @xor_signbit_select_shl(i32 %x, i1 %cond, ptr %dst) {
 ; X86-LABEL: xor_signbit_select_shl:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    testb $1, {{[0-9]+}}(%esp)
-; X86-NEXT:    je .LBB4_2
-; X86-NEXT:  # %bb.1:
-; X86-NEXT:    xorl $16711680, %eax # imm = 0xFF0000
-; X86-NEXT:  .LBB4_2:
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    andl $1, %eax
+; X86-NEXT:    negl %eax
+; X86-NEXT:    andl $16711680, %eax # imm = 0xFF0000
+; X86-NEXT:    xorl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    shll $8, %eax
 ; X86-NEXT:    movl %eax, (%ecx)
 ; X86-NEXT:    retl
@@ -165,12 +162,11 @@ define i32 @xor_nosignbit_select_shl(i32 %x, i1 %cond, ptr %dst) {
 ; X86-LABEL: xor_nosignbit_select_shl:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    testb $1, {{[0-9]+}}(%esp)
-; X86-NEXT:    je .LBB5_2
-; X86-NEXT:  # %bb.1:
-; X86-NEXT:    xorl $16711680, %eax # imm = 0xFF0000
-; X86-NEXT:  .LBB5_2:
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    andl $1, %eax
+; X86-NEXT:    negl %eax
+; X86-NEXT:    andl $16711680, %eax # imm = 0xFF0000
+; X86-NEXT:    xorl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    shll $8, %eax
 ; X86-NEXT:    movl %eax, (%ecx)
 ; X86-NEXT:    retl
@@ -315,12 +311,11 @@ define i32 @or_signbit_select_lshr(i32 %x, i1 %cond, ptr %dst) {
 ; X86-LABEL: or_signbit_select_lshr:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    testb $1, {{[0-9]+}}(%esp)
-; X86-NEXT:    je .LBB10_2
-; X86-NEXT:  # %bb.1:
-; X86-NEXT:    orl $-65536, %eax # imm = 0xFFFF0000
-; X86-NEXT:  .LBB10_2:
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    andl $1, %eax
+; X86-NEXT:    negl %eax
+; X86-NEXT:    andl $-65536, %eax # imm = 0xFFFF0000
+; X86-NEXT:    orl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    shrl $8, %eax
 ; X86-NEXT:    movl %eax, (%ecx)
 ; X86-NEXT:    retl
@@ -344,12 +339,11 @@ define i32 @or_nosignbit_select_lshr(i32 %x, i1 %cond, ptr %dst) {
 ; X86-LABEL: or_nosignbit_select_lshr:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    testb $1, {{[0-9]+}}(%esp)
-; X86-NEXT:    je .LBB11_2
-; X86-NEXT:  # %bb.1:
-; X86-NEXT:    orl $2147418112, %eax # imm = 0x7FFF0000
-; X86-NEXT:  .LBB11_2:
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    andl $1, %eax
+; X86-NEXT:    negl %eax
+; X86-NEXT:    andl $2147418112, %eax # imm = 0x7FFF0000
+; X86-NEXT:    orl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    shrl $8, %eax
 ; X86-NEXT:    movl %eax, (%ecx)
 ; X86-NEXT:    retl
@@ -374,12 +368,11 @@ define i32 @xor_signbit_select_lshr(i32 %x, i1 %cond, ptr %dst) {
 ; X86-LABEL: xor_signbit_select_lshr:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    testb $1, {{[0-9]+}}(%esp)
-; X86-NEXT:    je .LBB12_2
-; X86-NEXT:  # %bb.1:
-; X86-NEXT:    xorl $-65536, %eax # imm = 0xFFFF0000
-; X86-NEXT:  .LBB12_2:
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    andl $1, %eax
+; X86-NEXT:    negl %eax
+; X86-NEXT:    andl $-65536, %eax # imm = 0xFFFF0000
+; X86-NEXT:    xorl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    shrl $8, %eax
 ; X86-NEXT:    movl %eax, (%ecx)
 ; X86-NEXT:    retl
@@ -403,12 +396,11 @@ define i32 @xor_nosignbit_select_lshr(i32 %x, i1 %cond, ptr %dst) {
 ; X86-LABEL: xor_nosignbit_select_lshr:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    testb $1, {{[0-9]+}}(%esp)
-; X86-NEXT:    je .LBB13_2
-; X86-NEXT:  # %bb.1:
-; X86-NEXT:    xorl $2147418112, %eax # imm = 0x7FFF0000
-; X86-NEXT:  .LBB13_2:
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    andl $1, %eax
+; X86-NEXT:    negl %eax
+; X86-NEXT:    andl $2147418112, %eax # imm = 0x7FFF0000
+; X86-NEXT:    xorl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    shrl $8, %eax
 ; X86-NEXT:    movl %eax, (%ecx)
 ; X86-NEXT:    retl
@@ -553,12 +545,11 @@ define i32 @or_signbit_select_ashr(i32 %x, i1 %cond, ptr %dst) {
 ; X86-LABEL: or_signbit_select_ashr:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    testb $1, {{[0-9]+}}(%esp)
-; X86-NEXT:    je .LBB18_2
-; X86-NEXT:  # %bb.1:
-; X86-NEXT:    orl $-65536, %eax # imm = 0xFFFF0000
-; X86-NEXT:  .LBB18_2:
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    andl $1, %eax
+; X86-NEXT:    negl %eax
+; X86-NEXT:    andl $-65536, %eax # imm = 0xFFFF0000
+; X86-NEXT:    orl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    sarl $8, %eax
 ; X86-NEXT:    movl %eax, (%ecx)
 ; X86-NEXT:    retl
@@ -582,12 +573,11 @@ define i32 @or_nosignbit_select_ashr(i32 %x, i1 %cond, ptr %dst) {
 ; X86-LABEL: or_nosignbit_select_ashr:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    testb $1, {{[0-9]+}}(%esp)
-; X86-NEXT:    je .LBB19_2
-; X86-NEXT:  # %bb.1:
-; X86-NEXT:    orl $2147418112, %eax # imm = 0x7FFF0000
-; X86-NEXT:  .LBB19_2:
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    andl $1, %eax
+; X86-NEXT:    negl %eax
+; X86-NEXT:    andl $2147418112, %eax # imm = 0x7FFF0000
+; X86-NEXT:    orl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    sarl $8, %eax
 ; X86-NEXT:    movl %eax, (%ecx)
 ; X86-NEXT:    retl
@@ -612,12 +602,11 @@ define i32 @xor_signbit_select_ashr(i32 %x, i1 %cond, ptr %dst) {
 ; X86-LABEL: xor_signbit_select_ashr:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    testb $1, {{[0-9]+}}(%esp)
-; X86-NEXT:    je .LBB20_2
-; X86-NEXT:  # %bb.1:
-; X86-NEXT:    xorl $-65536, %eax # imm = 0xFFFF0000
-; X86-NEXT:  .LBB20_2:
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    andl $1, %eax
+; X86-NEXT:    negl %eax
+; X86-NEXT:    andl $-65536, %eax # imm = 0xFFFF0000
+; X86-NEXT:    xorl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    sarl $8, %eax
 ; X86-NEXT:    movl %eax, (%ecx)
 ; X86-NEXT:    retl
@@ -641,12 +630,11 @@ define i32 @xor_nosignbit_select_ashr(i32 %x, i1 %cond, ptr %dst) {
 ; X86-LABEL: xor_nosignbit_select_ashr:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    testb $1, {{[0-9]+}}(%esp)
-; X86-NEXT:    je .LBB21_2
-; X86-NEXT:  # %bb.1:
-; X86-NEXT:    xorl $2147418112, %eax # imm = 0x7FFF0000
-; X86-NEXT:  .LBB21_2:
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    andl $1, %eax
+; X86-NEXT:    negl %eax
+; X86-NEXT:    andl $2147418112, %eax # imm = 0x7FFF0000
+; X86-NEXT:    xorl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    sarl $8, %eax
 ; X86-NEXT:    movl %eax, (%ecx)
 ; X86-NEXT:    retl
diff --git a/llvm/test/CodeGen/X86/select.ll b/llvm/test/CodeGen/X86/select.ll
index ca5558561a65b9..f370ac0a8c7c55 100644
--- a/llvm/test/CodeGen/X86/select.ll
+++ b/llvm/test/CodeGen/X86/select.ll
@@ -1475,11 +1475,10 @@ define i16 @select_xor_1b(i16 %A, i8 %cond) {
 ;
 ; MCU-LABEL: select_xor_1b:
 ; MCU:       # %bb.0: # %entry
-; MCU-NEXT:    testb $1, %dl
-; MCU-NEXT:    je .LBB29_2
-; MCU-NEXT:  # %bb.1:
-; MCU-NEXT:    xorl $43, %eax
-; MCU-NEXT:  .LBB29_2: # %entry
+; MCU-NEXT:    andl $1, %edx
+; MCU-NEXT:    negl %edx
+; MCU-NEXT:    andl $43, %edx
+; MCU-NEXT:    xorl %edx, %eax
 ; MCU-NEXT:    # kill: def $ax killed $ax killed $eax
 ; MCU-NEXT:    retl
 entry:
@@ -1545,11 +1544,10 @@ define i32 @select_xor_2b(i32 %A, i32 %B, i8 %cond) {
 ;
 ; MCU-LABEL: select_xor_2b:
 ; MCU:       # %bb.0: # %entry
-; MCU-NEXT:    testb $1, %cl
-; MCU-NEXT:    je .LBB31_2
-; MCU-NEXT:  # %bb.1:
-; MCU-NEXT:    xorl %edx, %eax
-; MCU-NEXT:  .LBB31_2: # %entry
+; MCU-NEXT:    andl $1, %ecx
+; MCU-NEXT:    negl %ecx
+; MCU-NEXT:    andl %edx, %ecx
+; MCU-NEXT:    xorl %ecx, %eax
 ; MCU-NEXT:    retl
 entry:
  %and = and i8 %cond, 1
@@ -1614,11 +1612,10 @@ define i32 @select_or_b(i32 %A, i32 %B, i8 %cond) {
 ;
 ; MCU-LABEL: select_or_b:
 ; MCU:       # %bb.0: # %entry
-; MCU-NEXT:    testb $1, %cl
-; MCU-NEXT:    je .LBB33_2
-; MCU-NEXT:  # %bb.1:
-; MCU-NEXT:    orl %edx, %eax
-; MCU-NEXT:  .LBB33_2: # %entry
+; MCU-NEXT:    andl $1, %ecx
+; MCU-NEXT:    negl %ecx
+; MCU-NEXT:    andl %edx, %ecx
+; MCU-NEXT:    orl %ecx, %eax
 ; MCU-NEXT:    retl
 entry:
  %and = and i8 %cond, 1
@@ -1683,11 +1680,10 @@ define i32 @select_or_1b(i32 %A, i32 %B, i32 %cond) {
 ;
 ; MCU-LABEL: select_or_1b:
 ; MCU:       # %bb.0: # %entry
-; MCU-NEXT:    testb $1, %cl
-; MCU-NEXT:    je .LBB35_2
-; MCU-NEXT:  # %bb.1:
-; MCU-NEXT:    orl %edx, %eax
-; MCU-NEXT:  .LBB35_2: # %entry
+; MCU-NEXT:    andl $1, %ecx
+; MCU-NEXT:    negl %ecx
+; MCU-NEXT:    andl %edx, %ecx
+; MCU-NEXT:    orl %ecx, %eax
 ; MCU-NEXT:    retl
 entry:
  %and = and i32 %cond, 1