[llvm] [X86][APX] Combine (X86Sub 0, AND(X, Y)) to (X86And X, Y) for CLOAD/CSTORE (PR #136429)
Phoebe Wang via llvm-commits
llvm-commits at lists.llvm.org
Sun Apr 20 18:40:40 PDT 2025
https://github.com/phoebewang updated https://github.com/llvm/llvm-project/pull/136429
>From c57ca00863f0d31af049b665d5c1657f922d4e6f Mon Sep 17 00:00:00 2001
From: "Wang, Phoebe" <phoebe.wang at intel.com>
Date: Sat, 19 Apr 2025 20:57:06 +0800
Subject: [PATCH 1/2] [X86][APX] Combine (X86Sub 0, AND(X, Y)) to (X86And X, Y)
for CLOAD/CSTORE
https://godbolt.org/z/TsWochrbf
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 19 +++++++++----
llvm/test/CodeGen/X86/apx/cf.ll | 36 +++++++++++++++++++++++++
2 files changed, 50 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index a4381b99dbae0..c47e1ad526ba1 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -57814,16 +57814,25 @@ static SDValue combineX86CloadCstore(SDNode *N, SelectionDAG &DAG) {
if (Sub.getOpcode() != X86ISD::SUB)
return SDValue();
- SDValue SetCC = Sub.getOperand(1);
+ SDValue Op1 = Sub.getOperand(1);
- if (!X86::isZeroNode(Sub.getOperand(0)) || SetCC.getOpcode() != X86ISD::SETCC)
+ if (!X86::isZeroNode(Sub.getOperand(0)))
return SDValue();
+ SDLoc DL(N);
SmallVector<SDValue, 5> Ops(N->op_values());
- Ops[3] = SetCC.getOperand(0);
- Ops[4] = SetCC.getOperand(1);
+ if (Op1.getOpcode() == X86ISD::SETCC) {
+ Ops[3] = Op1.getOperand(0);
+ Ops[4] = Op1.getOperand(1);
+ } else if (Op1.getOpcode() == ISD::AND && Sub.getValue(0).use_empty()) {
+ Ops[4] = DAG.getNode(X86ISD::AND, DL, Sub->getVTList(), Op1.getOperand(0),
+ Op1.getOperand(1))
+ .getValue(1);
+ } else {
+ return SDValue();
+ }
- return DAG.getMemIntrinsicNode(N->getOpcode(), SDLoc(N), N->getVTList(), Ops,
+ return DAG.getMemIntrinsicNode(N->getOpcode(), DL, N->getVTList(), Ops,
cast<MemSDNode>(N)->getMemoryVT(),
cast<MemSDNode>(N)->getMemOperand());
}
diff --git a/llvm/test/CodeGen/X86/apx/cf.ll b/llvm/test/CodeGen/X86/apx/cf.ll
index 8d104e5f3ced2..1e4ac3f419314 100644
--- a/llvm/test/CodeGen/X86/apx/cf.ll
+++ b/llvm/test/CodeGen/X86/apx/cf.ll
@@ -158,3 +158,39 @@ entry:
tail call void @llvm.masked.store.v1i16.p0(<1 x i16> %5, ptr %p, i32 2, <1 x i1> %1)
ret void
}
+
+define void @load_zext(i1 %cond, ptr %b, ptr %p) {
+; CHECK-LABEL: load_zext:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: andb $1, %dil
+; CHECK-NEXT: cfcmovnew (%rsi), %ax
+; CHECK-NEXT: movzwl %ax, %eax
+; CHECK-NEXT: cfcmovnel %eax, (%rdx)
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast i1 %cond to <1 x i1>
+ %1 = call <1 x i16> @llvm.masked.load.v1i16.p0(ptr %b, i32 2, <1 x i1> %0, <1 x i16> poison)
+ %2 = bitcast <1 x i16> %1 to i16
+ %zext = zext i16 %2 to i32
+ %3 = bitcast i32 %zext to <1 x i32>
+ call void @llvm.masked.store.v1i32.p0(<1 x i32> %3, ptr %p, i32 4, <1 x i1> %0)
+ ret void
+}
+
+define void @load_sext(i1 %cond, ptr %b, ptr %p) {
+; CHECK-LABEL: load_sext:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: andb $1, %dil
+; CHECK-NEXT: cfcmovnel (%rsi), %eax
+; CHECK-NEXT: cltq
+; CHECK-NEXT: cfcmovneq %rax, (%rdx)
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast i1 %cond to <1 x i1>
+ %1 = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr %b, i32 2, <1 x i1> %0, <1 x i32> poison)
+ %2 = bitcast <1 x i32> %1 to i32
+ %zext = sext i32 %2 to i64
+ %3 = bitcast i64 %zext to <1 x i64>
+ call void @llvm.masked.store.v1i64.p0(<1 x i64> %3, ptr %p, i32 4, <1 x i1> %0)
+ ret void
+}
>From daa43853e85889862fe3d6ab78c2a49b83e9f0f2 Mon Sep 17 00:00:00 2001
From: "Wang, Phoebe" <phoebe.wang at intel.com>
Date: Mon, 21 Apr 2025 09:40:26 +0800
Subject: [PATCH 2/2] Update comments
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 13 +++++++++----
1 file changed, 9 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index c47e1ad526ba1..eec596d6d44ab 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -57803,10 +57803,6 @@ static SDValue combineSubSetcc(SDNode *N, SelectionDAG &DAG) {
}
static SDValue combineX86CloadCstore(SDNode *N, SelectionDAG &DAG) {
- // res, flags2 = sub 0, (setcc cc, flag)
- // cload/cstore ..., cond_ne, flag2
- // ->
- // cload/cstore cc, flag
if (N->getConstantOperandVal(3) != X86::COND_NE)
return SDValue();
@@ -57822,9 +57818,18 @@ static SDValue combineX86CloadCstore(SDNode *N, SelectionDAG &DAG) {
SDLoc DL(N);
SmallVector<SDValue, 5> Ops(N->op_values());
if (Op1.getOpcode() == X86ISD::SETCC) {
+ // res, flags2 = sub 0, (setcc cc, flag)
+ // cload/cstore ..., cond_ne, flag2
+ // ->
+ // cload/cstore cc, flag
Ops[3] = Op1.getOperand(0);
Ops[4] = Op1.getOperand(1);
} else if (Op1.getOpcode() == ISD::AND && Sub.getValue(0).use_empty()) {
+ // res, flags2 = sub 0, (and X, Y)
+ // cload/cstore ..., cond_ne, flag2
+ // ->
+ // res, flags2 = and X, Y
+ // cload/cstore ..., cond_ne, flag2
Ops[4] = DAG.getNode(X86ISD::AND, DL, Sub->getVTList(), Op1.getOperand(0),
Op1.getOperand(1))
.getValue(1);
More information about the llvm-commits
mailing list