[llvm] f060aa1 - [x86] improve CMOV codegen by pushing add into operands
Sanjay Patel via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 23 06:41:10 PDT 2021
Author: Sanjay Patel
Date: 2021-07-23T09:39:32-04:00
New Revision: f060aa1cf3f42ca967c3f63e18381d3579bb12d9
URL: https://github.com/llvm/llvm-project/commit/f060aa1cf3f42ca967c3f63e18381d3579bb12d9
DIFF: https://github.com/llvm/llvm-project/commit/f060aa1cf3f42ca967c3f63e18381d3579bb12d9.diff
LOG: [x86] improve CMOV codegen by pushing add into operands
This is not the transform direction we want in general,
but by the time we have a CMOV, we've already tried
everything else that could be better.
The transform increases the uses of the other add operand,
but that is safe according to Alive2:
https://alive2.llvm.org/ce/z/Yn6p-A
We could probably extend this to other binops (not just add).
This is the motivating pattern discussed in:
https://llvm.org/PR51069
The test with i8 shows a missed fold because there's a trunc
sitting in front of the add. That can be handled with a small
follow-up.
Differential Revision: https://reviews.llvm.org/D106607
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/add-cmov.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 35e91dd94009..ffe361899567 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -49867,6 +49867,41 @@ static SDValue matchPMADDWD_2(SelectionDAG &DAG, SDValue N0, SDValue N1,
PMADDBuilder);
}
+/// CMOV of constants requires materializing constant operands in registers.
+/// Try to fold those constants into an 'add' instruction to reduce instruction
+/// count. We do this with CMOV rather the generic 'select' because there are
+/// earlier folds that may be used to turn select-of-constants into logic hacks.
+static SDValue pushAddIntoCmovOfConsts(SDNode *N, SelectionDAG &DAG) {
+ // This checks for a zero operand because add-of-0 gets simplified away.
+ // TODO: Allow generating an extra add?
+ auto isSuitableCmov = [](SDValue V) {
+ if (V.getOpcode() != X86ISD::CMOV || !V.hasOneUse())
+ return false;
+ return isa<ConstantSDNode>(V.getOperand(0)) &&
+ isa<ConstantSDNode>(V.getOperand(1)) &&
+ (isNullConstant(V.getOperand(0)) || isNullConstant(V.getOperand(1)));
+ };
+
+ // Match an appropriate CMOV as the first operand of the add.
+ SDValue Cmov = N->getOperand(0);
+ SDValue OtherOp = N->getOperand(1);
+ if (!isSuitableCmov(Cmov))
+ std::swap(Cmov, OtherOp);
+ if (!isSuitableCmov(Cmov))
+ return SDValue();
+
+ // add (cmov C, 0), OtherOp --> cmov (add OtherOp, C), OtherOp
+ // add (cmov 0, C), OtherOp --> cmov OtherOp, (add OtherOp, C)
+ SDLoc DL(N);
+ SDValue FalseOp = Cmov.getOperand(0);
+ SDValue TrueOp = Cmov.getOperand(1);
+ EVT VT = N->getValueType(0);
+ FalseOp = DAG.getNode(ISD::ADD, DL, VT, OtherOp, FalseOp);
+ TrueOp = DAG.getNode(ISD::ADD, DL, VT, OtherOp, TrueOp);
+ return DAG.getNode(X86ISD::CMOV, DL, VT, FalseOp, TrueOp, Cmov.getOperand(2),
+ Cmov.getOperand(3));
+}
+
static SDValue combineAdd(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
@@ -49874,6 +49909,9 @@ static SDValue combineAdd(SDNode *N, SelectionDAG &DAG,
SDValue Op0 = N->getOperand(0);
SDValue Op1 = N->getOperand(1);
+ if (SDValue Select = pushAddIntoCmovOfConsts(N, DAG))
+ return Select;
+
if (SDValue MAdd = matchPMADDWD(DAG, Op0, Op1, SDLoc(N), VT, Subtarget))
return MAdd;
if (SDValue MAdd = matchPMADDWD_2(DAG, Op0, Op1, SDLoc(N), VT, Subtarget))
diff --git a/llvm/test/CodeGen/X86/add-cmov.ll b/llvm/test/CodeGen/X86/add-cmov.ll
index 71ddea4b0c94..bd3d96e3aaea 100644
--- a/llvm/test/CodeGen/X86/add-cmov.ll
+++ b/llvm/test/CodeGen/X86/add-cmov.ll
@@ -4,11 +4,9 @@
define i64 @select_consts_i64(i64 %offset, i32 %x) {
; CHECK-LABEL: select_consts_i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: xorl %ecx, %ecx
+; CHECK-NEXT: leaq 42(%rdi), %rax
; CHECK-NEXT: testl %esi, %esi
-; CHECK-NEXT: movl $42, %eax
-; CHECK-NEXT: cmovneq %rcx, %rax
-; CHECK-NEXT: addq %rdi, %rax
+; CHECK-NEXT: cmovneq %rdi, %rax
; CHECK-NEXT: retq
%b = icmp eq i32 %x, 0
%s = select i1 %b, i64 42, i64 0
@@ -19,11 +17,10 @@ define i64 @select_consts_i64(i64 %offset, i32 %x) {
define i32 @select_consts_i32(i32 %offset, i64 %x) {
; CHECK-LABEL: select_consts_i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: xorl %ecx, %ecx
+; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
+; CHECK-NEXT: leal 43(%rdi), %eax
; CHECK-NEXT: cmpq $42, %rsi
-; CHECK-NEXT: movl $43, %eax
-; CHECK-NEXT: cmovgel %ecx, %eax
-; CHECK-NEXT: addl %edi, %eax
+; CHECK-NEXT: cmovgel %edi, %eax
; CHECK-NEXT: retq
%b = icmp sgt i64 %x, 41
%s = select i1 %b, i32 0, i32 43
@@ -34,11 +31,10 @@ define i32 @select_consts_i32(i32 %offset, i64 %x) {
define i16 @select_consts_i16(i16 %offset, i1 %b) {
; CHECK-LABEL: select_consts_i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: xorl %ecx, %ecx
+; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
+; CHECK-NEXT: leal 44(%rdi), %eax
; CHECK-NEXT: testb $1, %sil
-; CHECK-NEXT: movl $44, %eax
-; CHECK-NEXT: cmovel %ecx, %eax
-; CHECK-NEXT: addl %edi, %eax
+; CHECK-NEXT: cmovel %edi, %eax
; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
; CHECK-NEXT: retq
%s = select i1 %b, i16 44, i16 0
More information about the llvm-commits
mailing list