[llvm] 1ce05ad - [x86] improve CMOV codegen by pushing add into operands, part 2

Sun Jul 25 07:08:41 PDT 2021

Author: Sanjay Patel
Date: 2021-07-25T10:05:41-04:00
New Revision: 1ce05ad619a5904f15f35a5c96ece27ee1991f1c

URL: https://github.com/llvm/llvm-project/commit/1ce05ad619a5904f15f35a5c96ece27ee1991f1c
DIFF: https://github.com/llvm/llvm-project/commit/1ce05ad619a5904f15f35a5c96ece27ee1991f1c.diff

LOG: [x86] improve CMOV codegen by pushing add into operands, part 2

This is a minimum extension of D106607 to allow folding for
2 non-zero constantsi that can be materialized as immediates..

In the reduced test examples, we save 1 instruction by rolling
the constants into LEA/ADD. In the motivating test from the bullet
benchmark, we absorb both of the constant moves into add ops via
LEA magic, so we reduce by 2 instructions.

Differential Revision: https://reviews.llvm.org/D106684

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/test/CodeGen/X86/add-cmov.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 235b27bf99cd..069a9703334e 100644

--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -49873,14 +49873,20 @@ static SDValue matchPMADDWD_2(SelectionDAG &DAG, SDValue N0, SDValue N1,
 /// count. We do this with CMOV rather the generic 'select' because there are
 /// earlier folds that may be used to turn select-of-constants into logic hacks.
 static SDValue pushAddIntoCmovOfConsts(SDNode *N, SelectionDAG &DAG) {
-  // This checks for a zero operand because add-of-0 gets simplified away.
-  // TODO: Allow generating an extra add?
+  // If an operand is zero, add-of-0 gets simplified away, so that's clearly
+  // better because we eliminate 1-2 instructions. This transform is still
+  // an improvement without zero operands because we trade 2 move constants and
+  // 1 add for 2 adds (LEA) as long as the constants can be represented as
+  // immediate asm operands (fit in 32-bits).
   auto isSuitableCmov = [](SDValue V) {
     if (V.getOpcode() != X86ISD::CMOV || !V.hasOneUse())
       return false;
-    return isa<ConstantSDNode>(V.getOperand(0)) &&
-           isa<ConstantSDNode>(V.getOperand(1)) &&
-           (isNullConstant(V.getOperand(0)) || isNullConstant(V.getOperand(1)));
+    if (!isa<ConstantSDNode>(V.getOperand(0)) ||
+        !isa<ConstantSDNode>(V.getOperand(1)))
+      return false;
+    return isNullConstant(V.getOperand(0)) || isNullConstant(V.getOperand(1)) ||
+           (V.getConstantOperandAPInt(0).isSignedIntN(32) &&
+            V.getConstantOperandAPInt(1).isSignedIntN(32));
   };
 
   // Match an appropriate CMOV as the first operand of the add.

diff  --git a/llvm/test/CodeGen/X86/add-cmov.ll b/llvm/test/CodeGen/X86/add-cmov.ll
index 1a7c810739bd..d4e8640890ea 100644
--- a/llvm/test/CodeGen/X86/add-cmov.ll
+++ b/llvm/test/CodeGen/X86/add-cmov.ll
@@ -88,6 +88,8 @@ define i32 @select_consts_use_i32(i32 %offset, i64 %x, i32* %p) {
   ret i32 %r
 }
 
+; Special-case LEA hacks are done before we try to push the add into a CMOV.
+
 define i32 @select_40_43_i32(i32 %offset, i64 %x) {
 ; CHECK-LABEL: select_40_43_i32:
 ; CHECK:       # %bb.0:
@@ -133,11 +135,10 @@ define i32 @select_1_0_i32(i32 %offset, i64 %x) {
 define i64 @select_max32_2_i64(i64 %offset, i64 %x) {
 ; CHECK-LABEL: select_max32_2_i64:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    leaq 2(%rdi), %rax
+; CHECK-NEXT:    addq $2147483647, %rdi # imm = 0x7FFFFFFF
 ; CHECK-NEXT:    cmpq $41, %rsi
-; CHECK-NEXT:    movl $2147483647, %ecx # imm = 0x7FFFFFFF
-; CHECK-NEXT:    movl $2, %eax
-; CHECK-NEXT:    cmovneq %rcx, %rax
-; CHECK-NEXT:    addq %rdi, %rax
+; CHECK-NEXT:    cmovneq %rdi, %rax
 ; CHECK-NEXT:    retq
   %b = icmp ne i64 %x, 41
   %s = select i1 %b, i64 2147483647, i64 2
@@ -207,11 +208,11 @@ define i64 @select_big_bigger_i64(i64 %offset, i64 %x) {
 define i32 @select_20_43_i32(i32 %offset, i64 %x) {
 ; CHECK-LABEL: select_20_43_i32:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
+; CHECK-NEXT:    leal 43(%rdi), %eax
+; CHECK-NEXT:    addl $20, %edi
 ; CHECK-NEXT:    cmpq $42, %rsi
-; CHECK-NEXT:    movl $20, %ecx
-; CHECK-NEXT:    movl $43, %eax
-; CHECK-NEXT:    cmovgel %ecx, %eax
-; CHECK-NEXT:    addl %edi, %eax
+; CHECK-NEXT:    cmovgel %edi, %eax
 ; CHECK-NEXT:    retq
   %b = icmp sgt i64 %x, 41
   %s = select i1 %b, i32 20, i32 43
@@ -222,11 +223,11 @@ define i32 @select_20_43_i32(i32 %offset, i64 %x) {
 define i16 @select_n2_17_i16(i16 %offset, i1 %b) {
 ; CHECK-LABEL: select_n2_17_i16:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
+; CHECK-NEXT:    leal 17(%rdi), %eax
+; CHECK-NEXT:    addl $65534, %edi # imm = 0xFFFE
 ; CHECK-NEXT:    testb $1, %sil
-; CHECK-NEXT:    movl $65534, %ecx # imm = 0xFFFE
-; CHECK-NEXT:    movl $17, %eax
-; CHECK-NEXT:    cmovnel %ecx, %eax
-; CHECK-NEXT:    addl %edi, %eax
+; CHECK-NEXT:    cmovnel %edi, %eax
 ; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
 ; CHECK-NEXT:    retq
   %s = select i1 %b, i16 -2, i16 17
@@ -241,14 +242,12 @@ define i16 @select_n2_17_i16(i16 %offset, i1 %b) {
 define i16* @bullet(i1 %b, %class.btAxis* readnone %ptr, i64 %idx) {
 ; CHECK-LABEL: bullet:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    leaq (%rdx,%rdx,4), %rcx
-; CHECK-NEXT:    shlq $4, %rcx
-; CHECK-NEXT:    addq %rsi, %rcx
+; CHECK-NEXT:    leaq (%rdx,%rdx,4), %rax
+; CHECK-NEXT:    shlq $4, %rax
+; CHECK-NEXT:    leaq 60(%rsi,%rax), %rcx
+; CHECK-NEXT:    leaq 66(%rsi,%rax), %rax
 ; CHECK-NEXT:    testb $1, %dil
-; CHECK-NEXT:    movl $60, %edx
-; CHECK-NEXT:    movl $66, %eax
-; CHECK-NEXT:    cmovneq %rdx, %rax
-; CHECK-NEXT:    addq %rcx, %rax
+; CHECK-NEXT:    cmovneq %rcx, %rax
 ; CHECK-NEXT:    retq
   %gep2 = getelementptr inbounds %class.btAxis, %class.btAxis* %ptr, i64 %idx, i32 2, i64 0
   %gep1 = getelementptr inbounds %class.btAxis, %class.btAxis* %ptr, i64 %idx, i32 1, i64 0