[llvm] ad7214f - [x86] add load folding restriction to pushAddIntoCmovOfConsts()
Sanjay Patel via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 22 05:03:51 PST 2022
Author: Sanjay Patel
Date: 2022-02-22T08:02:11-05:00
New Revision: ad7214f23dc3a088d73eb3974b146a0bb09d6ffd
URL: https://github.com/llvm/llvm-project/commit/ad7214f23dc3a088d73eb3974b146a0bb09d6ffd
DIFF: https://github.com/llvm/llvm-project/commit/ad7214f23dc3a088d73eb3974b146a0bb09d6ffd.diff
LOG: [x86] add load folding restriction to pushAddIntoCmovOfConsts()
With only a load-fold the diffs look neutral. If there's a load and store (rmw)
fold opportunity as shown in the test based on #53862, then we end up with an
extra instruction.
Fixes #53862
Differential Revision: https://reviews.llvm.org/D120281
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/add-cmov.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index c372919f44f70..9666d71288a34 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -52606,7 +52606,8 @@ static SDValue combineAddOfPMADDWD(SelectionDAG &DAG, SDValue N0, SDValue N1,
/// Try to fold those constants into an 'add' instruction to reduce instruction
/// count. We do this with CMOV rather the generic 'select' because there are
/// earlier folds that may be used to turn select-of-constants into logic hacks.
-static SDValue pushAddIntoCmovOfConsts(SDNode *N, SelectionDAG &DAG) {
+static SDValue pushAddIntoCmovOfConsts(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
// If an operand is zero, add-of-0 gets simplified away, so that's clearly
// better because we eliminate 1-2 instructions. This transform is still
// an improvement without zero operands because we trade 2 move constants and
@@ -52631,6 +52632,11 @@ static SDValue pushAddIntoCmovOfConsts(SDNode *N, SelectionDAG &DAG) {
if (!isSuitableCmov(Cmov))
return SDValue();
+ // Don't remove a load folding opportunity for the add. That would neutralize
+ // any improvements from removing constant materializations.
+ if (X86::mayFoldLoad(OtherOp, Subtarget))
+ return SDValue();
+
EVT VT = N->getValueType(0);
SDLoc DL(N);
SDValue FalseOp = Cmov.getOperand(0);
@@ -52673,7 +52679,7 @@ static SDValue combineAdd(SDNode *N, SelectionDAG &DAG,
SDValue Op1 = N->getOperand(1);
SDLoc DL(N);
- if (SDValue Select = pushAddIntoCmovOfConsts(N, DAG))
+ if (SDValue Select = pushAddIntoCmovOfConsts(N, DAG, Subtarget))
return Select;
if (SDValue MAdd = matchPMADDWD(DAG, Op0, Op1, DL, VT, Subtarget))
diff --git a/llvm/test/CodeGen/X86/add-cmov.ll b/llvm/test/CodeGen/X86/add-cmov.ll
index a47cad269da96..492feff344152 100644
--- a/llvm/test/CodeGen/X86/add-cmov.ll
+++ b/llvm/test/CodeGen/X86/add-cmov.ll
@@ -477,12 +477,11 @@ define void @complex_lea_alt8(i1 %b, i16* readnone %ptr, i64 %idx) {
define i32 @loadfold_select_const_arms(i32* %x, i1 %y) {
; CHECK-LABEL: loadfold_select_const_arms:
; CHECK: # %bb.0:
-; CHECK-NEXT: movl (%rdi), %eax
-; CHECK-NEXT: leal -10(%rax), %ecx
-; CHECK-NEXT: addl $10, %eax
; CHECK-NEXT: testb $1, %sil
-; CHECK-NEXT: cmovel %ecx, %eax
-; CHECK-NEXT: # kill: def $eax killed $eax killed $rax
+; CHECK-NEXT: movl $10, %ecx
+; CHECK-NEXT: movl $-10, %eax
+; CHECK-NEXT: cmovnel %ecx, %eax
+; CHECK-NEXT: addl (%rdi), %eax
; CHECK-NEXT: retq
%cond = select i1 %y, i32 10, i32 -10
%t0 = load i32, i32* %x, align 4
@@ -522,12 +521,11 @@ define void @rmw_add_select_const_arm(i32* %x, i1 %y, i32 %z) {
define void @rmw_select_const_arms(i32* %x, i1 %y) {
; CHECK-LABEL: rmw_select_const_arms:
; CHECK: # %bb.0:
-; CHECK-NEXT: movl (%rdi), %eax
-; CHECK-NEXT: leal -10(%rax), %ecx
-; CHECK-NEXT: addl $10, %eax
; CHECK-NEXT: testb $1, %sil
-; CHECK-NEXT: cmovel %ecx, %eax
-; CHECK-NEXT: movl %eax, (%rdi)
+; CHECK-NEXT: movl $10, %eax
+; CHECK-NEXT: movl $-10, %ecx
+; CHECK-NEXT: cmovnel %eax, %ecx
+; CHECK-NEXT: addl %ecx, (%rdi)
; CHECK-NEXT: retq
%cond = select i1 %y, i32 10, i32 -10
%t0 = load i32, i32* %x, align 4
@@ -557,13 +555,12 @@ define i32 @rmw_select_const_arms_extra_load_use(i32* %x, i1 %y) {
define i32 @rmw_select_const_arms_extra_add_use(i32* %x, i1 %y) {
; CHECK-LABEL: rmw_select_const_arms_extra_add_use:
; CHECK: # %bb.0:
-; CHECK-NEXT: movl (%rdi), %eax
-; CHECK-NEXT: leal -10(%rax), %ecx
-; CHECK-NEXT: addl $10, %eax
; CHECK-NEXT: testb $1, %sil
-; CHECK-NEXT: cmovel %ecx, %eax
+; CHECK-NEXT: movl $10, %ecx
+; CHECK-NEXT: movl $-10, %eax
+; CHECK-NEXT: cmovnel %ecx, %eax
+; CHECK-NEXT: addl (%rdi), %eax
; CHECK-NEXT: movl %eax, (%rdi)
-; CHECK-NEXT: # kill: def $eax killed $eax killed $rax
; CHECK-NEXT: retq
%cond = select i1 %y, i32 10, i32 -10
%t0 = load i32, i32* %x, align 4
More information about the llvm-commits
mailing list