[llvm] 943cc71 - [X86][APX] Convert store(cmov(load(x), y), x) to cstore(y, x) (#118946)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Dec 6 19:38:47 PST 2024
Author: Phoebe Wang
Date: 2024-12-07T11:38:43+08:00
New Revision: 943cc71a6cfc52a88a413ab093c2d1e3f12d55e0
URL: https://github.com/llvm/llvm-project/commit/943cc71a6cfc52a88a413ab093c2d1e3f12d55e0
DIFF: https://github.com/llvm/llvm-project/commit/943cc71a6cfc52a88a413ab093c2d1e3f12d55e0.diff
LOG: [X86][APX] Convert store(cmov(load(x), y), x) to cstore(y, x) (#118946)
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/apx/cfcmov.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 5582dbc3e092d7..fb8e791d8ec981 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -52820,6 +52820,44 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
}
}
+ // Convert store(cmov(load(p), x, CC), p) to cstore(x, p, CC)
+ // store(cmov(x, load(p), CC), p) to cstore(x, p, InvertCC)
+ if ((VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64) &&
+ Subtarget.hasCF() && St->isSimple()) {
+ SDValue Cmov;
+ if (StoredVal.getOpcode() == X86ISD::CMOV)
+ Cmov = StoredVal;
+ else if (StoredVal.getOpcode() == ISD::TRUNCATE &&
+ StoredVal.getOperand(0).getOpcode() == X86ISD::CMOV)
+ Cmov = StoredVal.getOperand(0);
+ else
+ return SDValue();
+
+ auto *Ld = dyn_cast<LoadSDNode>(St->getChain());
+ if (!Ld || !Ld->isSimple() || Ld->getBasePtr() != St->getBasePtr())
+ return SDValue();
+
+ bool InvertCC = false;
+ SDValue V = SDValue(Ld, 0);
+ if (V == Cmov.getOperand(1))
+ InvertCC = true;
+ else if (V != Cmov.getOperand(0))
+ return SDValue();
+
+ SDVTList Tys = DAG.getVTList(MVT::Other);
+ SDValue CC = Cmov.getOperand(2);
+ SDValue Src = DAG.getAnyExtOrTrunc(Cmov.getOperand(!InvertCC), dl, VT);
+ if (InvertCC)
+ CC = DAG.getTargetConstant(
+ GetOppositeBranchCondition(
+ (X86::CondCode)Cmov.getConstantOperandVal(2)),
+ dl, MVT::i8);
+ SDValue Ops[] = {St->getChain(), Src, St->getBasePtr(), CC,
+ Cmov.getOperand(3)};
+ return DAG.getMemIntrinsicNode(X86ISD::CSTORE, dl, Tys, Ops, VT,
+ St->getMemOperand());
+ }
+
// Turn load->store of MMX types into GPR load/stores. This avoids clobbering
// the FP state in cases where an emms may be missing.
// A preferable solution to the general problem is to figure out the right
diff --git a/llvm/test/CodeGen/X86/apx/cfcmov.ll b/llvm/test/CodeGen/X86/apx/cfcmov.ll
index f643120c9b50ff..37ba3d451c2b16 100644
--- a/llvm/test/CodeGen/X86/apx/cfcmov.ll
+++ b/llvm/test/CodeGen/X86/apx/cfcmov.ll
@@ -93,3 +93,91 @@ define i64 @cfcmov64rr_inv(i64 %0) {
%3 = select i1 %2, i64 0, i64 %0
ret i64 %3
}
+
+define void @cfcmov16mr(ptr %p, i16 %0) {
+; CHECK-LABEL: cfcmov16mr:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movzwl (%rdi), %eax
+; CHECK-NEXT: cmpw %ax, %si
+; CHECK-NEXT: cfcmovlew %si, (%rdi)
+; CHECK-NEXT: retq
+ %2 = load i16, ptr %p, align 2
+ %3 = icmp sgt i16 %0, %2
+ %4 = select i1 %3, i16 %2, i16 %0
+ store i16 %4, ptr %p, align 2
+ ret void
+}
+
+define void @cfcmov32mr(ptr %p, i32 %0) {
+; CHECK-LABEL: cfcmov32mr:
+; CHECK: # %bb.0:
+; CHECK-NEXT: cmpl (%rdi), %esi
+; CHECK-NEXT: cfcmovgl %esi, (%rdi)
+; CHECK-NEXT: retq
+ %2 = load i32, ptr %p, align 4
+ %3 = call i32 @llvm.smax.i32(i32 %0, i32 %2)
+ store i32 %3, ptr %p, align 4
+ ret void
+}
+
+define void @cfcmov64mr(ptr %p, i64 %0) {
+; CHECK-LABEL: cfcmov64mr:
+; CHECK: # %bb.0:
+; CHECK-NEXT: cmpq (%rdi), %rsi
+; CHECK-NEXT: cfcmovgq %rsi, (%rdi)
+; CHECK-NEXT: retq
+ %2 = load i64, ptr %p, align 8
+ %3 = icmp sgt i64 %0, %2
+ %4 = select i1 %3, i64 %0, i64 %2
+ store i64 %4, ptr %p, align 8
+ ret void
+}
+
+define void @volatileload(ptr %p, i32 %0) {
+; CHECK-LABEL: volatileload:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movl (%rdi), %eax
+; CHECK-NEXT: cmpl %eax, %esi
+; CHECK-NEXT: cmovbl %esi, %eax
+; CHECK-NEXT: movl %eax, (%rdi)
+; CHECK-NEXT: retq
+ %2 = load volatile i32, ptr %p, align 4
+ %3 = call i32 @llvm.umin.i32(i32 %0, i32 %2)
+ store i32 %3, ptr %p, align 4
+ ret void
+}
+
+define void @atomicstore(ptr %p, i64 %0) {
+; CHECK-LABEL: atomicstore:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movq (%rdi), %rax
+; CHECK-NEXT: cmpq %rax, %rsi
+; CHECK-NEXT: cmovaq %rsi, %rax
+; CHECK-NEXT: movq %rax, (%rdi)
+; CHECK-NEXT: retq
+ %2 = load i64, ptr %p, align 8
+ %3 = icmp ugt i64 %0, %2
+ %4 = select i1 %3, i64 %0, i64 %2
+ store atomic i64 %4, ptr %p unordered, align 8
+ ret void
+}
+
+define void @loadstore
diff ptr(ptr %p, i32 %0) {
+; CHECK-LABEL: loadstore
diff ptr:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movl (%rdi), %eax
+; CHECK-NEXT: cmpl %eax, %esi
+; CHECK-NEXT: cmovbel %eax, %esi
+; CHECK-NEXT: movl %esi, 4(%rdi)
+; CHECK-NEXT: retq
+ %2 = getelementptr [2 x i32], ptr %p, i32 0, i32 0
+ %3 = load i32, ptr %2, align 4
+ %4 = icmp ule i32 %0, %3
+ %5 = select i1 %4, i32 %3, i32 %0
+ %6 = getelementptr [2 x i32], ptr %p, i32 0, i32 1
+ store i32 %5, ptr %6, align 4
+ ret void
+}
+
+declare i32 @llvm.smax.i32(i32, i32)
+declare i32 @llvm.umin.i32(i32, i32)
More information about the llvm-commits
mailing list