[llvm] [X86] LowerSELECTWithCmpZero - extend branchless OR/XOR select codegen to handle ADD/SUB as well (PR #107612)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 6 10:13:22 PDT 2024
https://github.com/RKSimon created https://github.com/llvm/llvm-project/pull/107612
Extend the "SELECT ((AND X, 1) != 0), Y, (OR/XOR Y, Z) -> (OR/XOR Y, (AND (NEG(AND X, 1)), Z))" to also handle ADD/SUB.
As SUB is not commutative, we have to be more careful and only accept LHS matches.
>From cf77743be9a4e9074d934a713d4124c29f3855ef Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Fri, 6 Sep 2024 18:12:07 +0100
Subject: [PATCH] [X86] LowerSELECTWithCmpZero - extend branchless OR/XOR
select codegen to handle ADD/SUB as well
Extend the "SELECT ((AND X, 1) != 0), Y, (OR/XOR Y, Z) -> (OR/XOR Y, (AND (NEG(AND X, 1)), Z))" to also handle ADD/SUB.
As SUB is not commutative, we have to be more careful and only accept LHS matches.
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 32 ++++++---
.../pull-conditional-binop-through-shift.ll | 66 ++++++++---------
llvm/test/CodeGen/X86/select.ll | 72 +++++++++----------
3 files changed, 85 insertions(+), 85 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index e20633e041072f..839b87dd5d4dd8 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -24084,18 +24084,32 @@ static SDValue LowerSELECTWithCmpZero(SDValue CmpVal, SDValue LHS, SDValue RHS,
if (!CmpVT.isScalarInteger() || !VT.isScalarInteger())
return SDValue();
- // Convert OR/XOR 'identity' patterns (iff X is 0 or 1):
- // select (X != 0), Y, (OR Y, Z) -> (OR Y, (AND (0 - X), Z))
- // select (X != 0), Y, (XOR Y, Z) -> (XOR Y, (AND (0 - X), Z))
+ // Convert 'identity' patterns (iff X is 0 or 1):
+ // SELECT (X != 0), Y, (OR Y, Z) -> (OR Y, (AND (0 - X), Z))
+ // SELECT (X != 0), Y, (XOR Y, Z) -> (XOR Y, (AND (0 - X), Z))
+ // SELECT (X != 0), Y, (ADD Y, Z) -> (ADD Y, (AND (0 - X), Z))
+ // SELECT (X != 0), Y, (SUB Y, Z) -> (SUB Y, (AND (0 - X), Z))
if (!Subtarget.canUseCMOV() && X86CC == X86::COND_E &&
CmpVal.getOpcode() == ISD::AND && isOneConstant(CmpVal.getOperand(1))) {
SDValue Src1, Src2;
auto isIdentityPattern = [&]() {
- if ((RHS.getOpcode() == ISD::XOR || RHS.getOpcode() == ISD::OR) &&
- (RHS.getOperand(0) == LHS || RHS.getOperand(1) == LHS)) {
- Src1 = RHS.getOperand(RHS.getOperand(0) == LHS ? 1 : 0);
- Src2 = LHS;
- return true;
+ switch (RHS.getOpcode()) {
+ case ISD::OR:
+ case ISD::XOR:
+ case ISD::ADD:
+ if (RHS.getOperand(0) == LHS || RHS.getOperand(1) == LHS) {
+ Src1 = RHS.getOperand(RHS.getOperand(0) == LHS ? 1 : 0);
+ Src2 = LHS;
+ return true;
+ }
+ break;
+ case ISD::SUB:
+ if (RHS.getOperand(0) == LHS) {
+ Src1 = RHS.getOperand(1);
+ Src2 = LHS;
+ return true;
+ }
+ break;
}
return false;
};
@@ -24113,7 +24127,7 @@ static SDValue LowerSELECTWithCmpZero(SDValue CmpVal, SDValue LHS, SDValue RHS,
DAG.getConstant(1, DL, VT));
SDValue Mask = DAG.getNegative(Neg, DL, VT); // -(and (x, 0x1))
SDValue And = DAG.getNode(ISD::AND, DL, VT, Mask, Src1); // Mask & z
- return DAG.getNode(RHS.getOpcode(), DL, VT, And, Src2); // And Op y
+ return DAG.getNode(RHS.getOpcode(), DL, VT, Src2, And); // y Op And
}
}
diff --git a/llvm/test/CodeGen/X86/pull-conditional-binop-through-shift.ll b/llvm/test/CodeGen/X86/pull-conditional-binop-through-shift.ll
index def4c08a3592ec..86a8a6a53248b7 100644
--- a/llvm/test/CodeGen/X86/pull-conditional-binop-through-shift.ll
+++ b/llvm/test/CodeGen/X86/pull-conditional-binop-through-shift.ll
@@ -191,12 +191,11 @@ define i32 @add_signbit_select_shl(i32 %x, i1 %cond, ptr %dst) {
; X86-LABEL: add_signbit_select_shl:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: testb $1, {{[0-9]+}}(%esp)
-; X86-NEXT: je .LBB6_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: addl $-65536, %eax # imm = 0xFFFF0000
-; X86-NEXT: .LBB6_2:
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: andl $1, %eax
+; X86-NEXT: negl %eax
+; X86-NEXT: andl $16711680, %eax # imm = 0xFF0000
+; X86-NEXT: addl {{[0-9]+}}(%esp), %eax
; X86-NEXT: shll $8, %eax
; X86-NEXT: movl %eax, (%ecx)
; X86-NEXT: retl
@@ -220,12 +219,11 @@ define i32 @add_nosignbit_select_shl(i32 %x, i1 %cond, ptr %dst) {
; X86-LABEL: add_nosignbit_select_shl:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: testb $1, {{[0-9]+}}(%esp)
-; X86-NEXT: je .LBB7_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: addl $2147418112, %eax # imm = 0x7FFF0000
-; X86-NEXT: .LBB7_2:
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: andl $1, %eax
+; X86-NEXT: negl %eax
+; X86-NEXT: andl $16711680, %eax # imm = 0xFF0000
+; X86-NEXT: addl {{[0-9]+}}(%esp), %eax
; X86-NEXT: shll $8, %eax
; X86-NEXT: movl %eax, (%ecx)
; X86-NEXT: retl
@@ -425,12 +423,11 @@ define i32 @add_signbit_select_lshr(i32 %x, i1 %cond, ptr %dst) {
; X86-LABEL: add_signbit_select_lshr:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: testb $1, {{[0-9]+}}(%esp)
-; X86-NEXT: je .LBB14_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: addl $-65536, %eax # imm = 0xFFFF0000
-; X86-NEXT: .LBB14_2:
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: andl $1, %eax
+; X86-NEXT: negl %eax
+; X86-NEXT: andl $-65536, %eax # imm = 0xFFFF0000
+; X86-NEXT: addl {{[0-9]+}}(%esp), %eax
; X86-NEXT: shrl $8, %eax
; X86-NEXT: movl %eax, (%ecx)
; X86-NEXT: retl
@@ -454,12 +451,11 @@ define i32 @add_nosignbit_select_lshr(i32 %x, i1 %cond, ptr %dst) {
; X86-LABEL: add_nosignbit_select_lshr:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: testb $1, {{[0-9]+}}(%esp)
-; X86-NEXT: je .LBB15_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: addl $2147418112, %eax # imm = 0x7FFF0000
-; X86-NEXT: .LBB15_2:
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: andl $1, %eax
+; X86-NEXT: negl %eax
+; X86-NEXT: andl $2147418112, %eax # imm = 0x7FFF0000
+; X86-NEXT: addl {{[0-9]+}}(%esp), %eax
; X86-NEXT: shrl $8, %eax
; X86-NEXT: movl %eax, (%ecx)
; X86-NEXT: retl
@@ -659,12 +655,11 @@ define i32 @add_signbit_select_ashr(i32 %x, i1 %cond, ptr %dst) {
; X86-LABEL: add_signbit_select_ashr:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: testb $1, {{[0-9]+}}(%esp)
-; X86-NEXT: je .LBB22_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: addl $-65536, %eax # imm = 0xFFFF0000
-; X86-NEXT: .LBB22_2:
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: andl $1, %eax
+; X86-NEXT: negl %eax
+; X86-NEXT: andl $-65536, %eax # imm = 0xFFFF0000
+; X86-NEXT: addl {{[0-9]+}}(%esp), %eax
; X86-NEXT: sarl $8, %eax
; X86-NEXT: movl %eax, (%ecx)
; X86-NEXT: retl
@@ -688,12 +683,11 @@ define i32 @add_nosignbit_select_ashr(i32 %x, i1 %cond, ptr %dst) {
; X86-LABEL: add_nosignbit_select_ashr:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: testb $1, {{[0-9]+}}(%esp)
-; X86-NEXT: je .LBB23_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: addl $2147418112, %eax # imm = 0x7FFF0000
-; X86-NEXT: .LBB23_2:
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: andl $1, %eax
+; X86-NEXT: negl %eax
+; X86-NEXT: andl $2147418112, %eax # imm = 0x7FFF0000
+; X86-NEXT: addl {{[0-9]+}}(%esp), %eax
; X86-NEXT: sarl $8, %eax
; X86-NEXT: movl %eax, (%ecx)
; X86-NEXT: retl
diff --git a/llvm/test/CodeGen/X86/select.ll b/llvm/test/CodeGen/X86/select.ll
index aa7f509327002f..760c8355aa4572 100644
--- a/llvm/test/CodeGen/X86/select.ll
+++ b/llvm/test/CodeGen/X86/select.ll
@@ -1725,11 +1725,10 @@ define i32 @select_add(i32 %A, i32 %B, i8 %cond) {
;
; MCU-LABEL: select_add:
; MCU: # %bb.0: # %entry
-; MCU-NEXT: testb $1, %cl
-; MCU-NEXT: je .LBB36_2
-; MCU-NEXT: # %bb.1: # %entry
-; MCU-NEXT: addl %edx, %eax
-; MCU-NEXT: .LBB36_2: # %entry
+; MCU-NEXT: andl $1, %ecx
+; MCU-NEXT: negl %ecx
+; MCU-NEXT: andl %edx, %ecx
+; MCU-NEXT: addl %ecx, %eax
; MCU-NEXT: retl
entry:
%and = and i8 %cond, 1
@@ -1773,11 +1772,10 @@ define i32 @select_add_b(i32 %A, i32 %B, i8 %cond) {
;
; MCU-LABEL: select_add_b:
; MCU: # %bb.0: # %entry
-; MCU-NEXT: testb $1, %cl
-; MCU-NEXT: je .LBB37_2
-; MCU-NEXT: # %bb.1:
-; MCU-NEXT: addl %edx, %eax
-; MCU-NEXT: .LBB37_2: # %entry
+; MCU-NEXT: andl $1, %ecx
+; MCU-NEXT: negl %ecx
+; MCU-NEXT: andl %edx, %ecx
+; MCU-NEXT: addl %ecx, %eax
; MCU-NEXT: retl
entry:
%and = and i8 %cond, 1
@@ -1819,11 +1817,10 @@ define i32 @select_add_1(i32 %A, i32 %B, i32 %cond) {
;
; MCU-LABEL: select_add_1:
; MCU: # %bb.0: # %entry
-; MCU-NEXT: testb $1, %cl
-; MCU-NEXT: je .LBB38_2
-; MCU-NEXT: # %bb.1: # %entry
-; MCU-NEXT: addl %edx, %eax
-; MCU-NEXT: .LBB38_2: # %entry
+; MCU-NEXT: andl $1, %ecx
+; MCU-NEXT: negl %ecx
+; MCU-NEXT: andl %edx, %ecx
+; MCU-NEXT: addl %ecx, %eax
; MCU-NEXT: retl
entry:
%and = and i32 %cond, 1
@@ -1867,11 +1864,10 @@ define i32 @select_add_1b(i32 %A, i32 %B, i32 %cond) {
;
; MCU-LABEL: select_add_1b:
; MCU: # %bb.0: # %entry
-; MCU-NEXT: testb $1, %cl
-; MCU-NEXT: je .LBB39_2
-; MCU-NEXT: # %bb.1:
-; MCU-NEXT: addl %edx, %eax
-; MCU-NEXT: .LBB39_2: # %entry
+; MCU-NEXT: andl $1, %ecx
+; MCU-NEXT: negl %ecx
+; MCU-NEXT: andl %edx, %ecx
+; MCU-NEXT: addl %ecx, %eax
; MCU-NEXT: retl
entry:
%and = and i32 %cond, 1
@@ -1901,11 +1897,10 @@ define i32 @select_sub(i32 %A, i32 %B, i8 %cond) {
;
; MCU-LABEL: select_sub:
; MCU: # %bb.0: # %entry
-; MCU-NEXT: testb $1, %cl
-; MCU-NEXT: je .LBB40_2
-; MCU-NEXT: # %bb.1: # %entry
-; MCU-NEXT: subl %eax, %edx
-; MCU-NEXT: .LBB40_2: # %entry
+; MCU-NEXT: andl $1, %ecx
+; MCU-NEXT: negl %ecx
+; MCU-NEXT: andl %eax, %ecx
+; MCU-NEXT: subl %ecx, %edx
; MCU-NEXT: movl %edx, %eax
; MCU-NEXT: retl
entry:
@@ -1938,11 +1933,10 @@ define i32 @select_sub_b(i32 %A, i32 %B, i8 %cond) {
;
; MCU-LABEL: select_sub_b:
; MCU: # %bb.0: # %entry
-; MCU-NEXT: testb $1, %cl
-; MCU-NEXT: je .LBB41_2
-; MCU-NEXT: # %bb.1:
-; MCU-NEXT: subl %eax, %edx
-; MCU-NEXT: .LBB41_2: # %entry
+; MCU-NEXT: andl $1, %ecx
+; MCU-NEXT: negl %ecx
+; MCU-NEXT: andl %eax, %ecx
+; MCU-NEXT: subl %ecx, %edx
; MCU-NEXT: movl %edx, %eax
; MCU-NEXT: retl
entry:
@@ -1973,11 +1967,10 @@ define i32 @select_sub_1(i32 %A, i32 %B, i32 %cond) {
;
; MCU-LABEL: select_sub_1:
; MCU: # %bb.0: # %entry
-; MCU-NEXT: testb $1, %cl
-; MCU-NEXT: je .LBB42_2
-; MCU-NEXT: # %bb.1: # %entry
-; MCU-NEXT: subl %eax, %edx
-; MCU-NEXT: .LBB42_2: # %entry
+; MCU-NEXT: andl $1, %ecx
+; MCU-NEXT: negl %ecx
+; MCU-NEXT: andl %eax, %ecx
+; MCU-NEXT: subl %ecx, %edx
; MCU-NEXT: movl %edx, %eax
; MCU-NEXT: retl
entry:
@@ -2010,11 +2003,10 @@ define i32 @select_sub_1b(i32 %A, i32 %B, i32 %cond) {
;
; MCU-LABEL: select_sub_1b:
; MCU: # %bb.0: # %entry
-; MCU-NEXT: testb $1, %cl
-; MCU-NEXT: je .LBB43_2
-; MCU-NEXT: # %bb.1:
-; MCU-NEXT: subl %eax, %edx
-; MCU-NEXT: .LBB43_2: # %entry
+; MCU-NEXT: andl $1, %ecx
+; MCU-NEXT: negl %ecx
+; MCU-NEXT: andl %eax, %ecx
+; MCU-NEXT: subl %ecx, %edx
; MCU-NEXT: movl %edx, %eax
; MCU-NEXT: retl
entry:
More information about the llvm-commits
mailing list