[PATCH] D106684: [x86] improve CMOV codegen by pushing add into operands, part 2
Sanjay Patel via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 23 10:00:00 PDT 2021
spatel created this revision.
spatel added reviewers: craig.topper, lebedev.ri, pengfei, RKSimon.
Herald added subscribers: hiraditya, mcrosier.
spatel requested review of this revision.
Herald added a project: LLVM.
This is a minimum extension of D106607 <https://reviews.llvm.org/D106607> to allow folding for 2 non-zero constants.
In the reduced test examples, we save 1 instruction by rolling the constants into LEA/ADD. In the motivating test from the bullet benchmark, we absorb both of the constant moves into add ops via LEA magic, so we reduce by 2 instructions.
https://reviews.llvm.org/D106684
Files:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/add-cmov.ll
Index: llvm/test/CodeGen/X86/add-cmov.ll
===================================================================
--- llvm/test/CodeGen/X86/add-cmov.ll
+++ llvm/test/CodeGen/X86/add-cmov.ll
@@ -74,6 +74,8 @@
ret i32 %r
}
+; Special-case LEA hacks are done before we try to push the add into a CMOV.
+
define i32 @select_40_43_i32(i32 %offset, i64 %x) {
; CHECK-LABEL: select_40_43_i32:
; CHECK: # %bb.0:
@@ -119,11 +121,11 @@
define i32 @select_20_43_i32(i32 %offset, i64 %x) {
; CHECK-LABEL: select_20_43_i32:
; CHECK: # %bb.0:
+; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
+; CHECK-NEXT: leal 43(%rdi), %eax
+; CHECK-NEXT: addl $20, %edi
; CHECK-NEXT: cmpq $42, %rsi
-; CHECK-NEXT: movl $20, %ecx
-; CHECK-NEXT: movl $43, %eax
-; CHECK-NEXT: cmovgel %ecx, %eax
-; CHECK-NEXT: addl %edi, %eax
+; CHECK-NEXT: cmovgel %edi, %eax
; CHECK-NEXT: retq
%b = icmp sgt i64 %x, 41
%s = select i1 %b, i32 20, i32 43
@@ -134,11 +136,11 @@
define i16 @select_n2_17_i16(i16 %offset, i1 %b) {
; CHECK-LABEL: select_n2_17_i16:
; CHECK: # %bb.0:
+; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
+; CHECK-NEXT: leal 17(%rdi), %eax
+; CHECK-NEXT: addl $65534, %edi # imm = 0xFFFE
; CHECK-NEXT: testb $1, %sil
-; CHECK-NEXT: movl $65534, %ecx # imm = 0xFFFE
-; CHECK-NEXT: movl $17, %eax
-; CHECK-NEXT: cmovnel %ecx, %eax
-; CHECK-NEXT: addl %edi, %eax
+; CHECK-NEXT: cmovnel %edi, %eax
; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
; CHECK-NEXT: retq
%s = select i1 %b, i16 -2, i16 17
@@ -153,14 +155,12 @@
define i16* @bullet(i1 %b, %class.btAxis* readnone %ptr, i64 %idx) {
; CHECK-LABEL: bullet:
; CHECK: # %bb.0:
-; CHECK-NEXT: leaq (%rdx,%rdx,4), %rcx
-; CHECK-NEXT: shlq $4, %rcx
-; CHECK-NEXT: addq %rsi, %rcx
+; CHECK-NEXT: leaq (%rdx,%rdx,4), %rax
+; CHECK-NEXT: shlq $4, %rax
+; CHECK-NEXT: leaq 60(%rsi,%rax), %rcx
+; CHECK-NEXT: leaq 66(%rsi,%rax), %rax
; CHECK-NEXT: testb $1, %dil
-; CHECK-NEXT: movl $60, %edx
-; CHECK-NEXT: movl $66, %eax
-; CHECK-NEXT: cmovneq %rdx, %rax
-; CHECK-NEXT: addq %rcx, %rax
+; CHECK-NEXT: cmovneq %rcx, %rax
; CHECK-NEXT: retq
%gep2 = getelementptr inbounds %class.btAxis, %class.btAxis* %ptr, i64 %idx, i32 2, i64 0
%gep1 = getelementptr inbounds %class.btAxis, %class.btAxis* %ptr, i64 %idx, i32 1, i64 0
Index: llvm/lib/Target/X86/X86ISelLowering.cpp
===================================================================
--- llvm/lib/Target/X86/X86ISelLowering.cpp
+++ llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -49872,14 +49872,14 @@
/// count. We do this with CMOV rather the generic 'select' because there are
/// earlier folds that may be used to turn select-of-constants into logic hacks.
static SDValue pushAddIntoCmovOfConsts(SDNode *N, SelectionDAG &DAG) {
- // This checks for a zero operand because add-of-0 gets simplified away.
- // TODO: Allow generating an extra add?
+ // If an operand is zero, add-of-0 gets simplified away, so that's clearly
+ // better because we eliminate 2 instructions. This transform is still likely
+ // an improvement without zero operands because we trade 2 move constants and
+ // 1 add for 2 adds (LEA).
auto isSuitableCmov = [](SDValue V) {
- if (V.getOpcode() != X86ISD::CMOV || !V.hasOneUse())
- return false;
- return isa<ConstantSDNode>(V.getOperand(0)) &&
- isa<ConstantSDNode>(V.getOperand(1)) &&
- (isNullConstant(V.getOperand(0)) || isNullConstant(V.getOperand(1)));
+ return V.getOpcode() == X86ISD::CMOV && V.hasOneUse() &&
+ isa<ConstantSDNode>(V.getOperand(0)) &&
+ isa<ConstantSDNode>(V.getOperand(1));
};
// Match an appropriate CMOV as the first operand of the add.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D106684.361263.patch
Type: text/x-patch
Size: 3811 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20210723/04631747/attachment.bin>
More information about the llvm-commits
mailing list