[llvm] [LoongArch] Convert vector mask to `vXi1` using `[X]VMSKLTZ` (PR #142978)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Jun 5 07:26:00 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-loongarch
Author: hev (heiher)
<details>
<summary>Changes</summary>
This patch adds a DAG combine optimization that transforms `BITCAST` nodes converting vector masks into `vXi1` types via the `[X]VMSKLTZ` instructions.
---
Patch is 37.73 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/142978.diff
3 Files Affected:
- (modified) llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp (+105-3)
- (modified) llvm/test/CodeGen/LoongArch/lasx/xvmskcond.ll (+487)
- (modified) llvm/test/CodeGen/LoongArch/lsx/vmskcond.ll (+459)
``````````diff
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index b0e23c297e204..9fe462dea9bbe 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -4331,6 +4331,62 @@ static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+// Helper to peek through bitops/trunc/setcc to determine size of source vector.
+// Allows BITCASTCombine to determine what size vector generated a <X x i1>.
+static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size,
+ unsigned Depth) {
+ // Limit recursion.
+ if (Depth >= SelectionDAG::MaxRecursionDepth)
+ return false;
+ switch (Src.getOpcode()) {
+ case ISD::SETCC:
+ case ISD::TRUNCATE:
+ return Src.getOperand(0).getValueSizeInBits() == Size;
+ case ISD::FREEZE:
+ return checkBitcastSrcVectorSize(Src.getOperand(0), Size, Depth + 1);
+ case ISD::AND:
+ case ISD::XOR:
+ case ISD::OR:
+ return checkBitcastSrcVectorSize(Src.getOperand(0), Size, Depth + 1) &&
+ checkBitcastSrcVectorSize(Src.getOperand(1), Size, Depth + 1);
+ case ISD::SELECT:
+ case ISD::VSELECT:
+ return Src.getOperand(0).getScalarValueSizeInBits() == 1 &&
+ checkBitcastSrcVectorSize(Src.getOperand(1), Size, Depth + 1) &&
+ checkBitcastSrcVectorSize(Src.getOperand(2), Size, Depth + 1);
+ case ISD::BUILD_VECTOR:
+ return ISD::isBuildVectorAllZeros(Src.getNode()) ||
+ ISD::isBuildVectorAllOnes(Src.getNode());
+ }
+ return false;
+}
+
+// Helper to push sign extension of vXi1 SETCC result through bitops.
+static SDValue signExtendBitcastSrcVector(SelectionDAG &DAG, EVT SExtVT,
+ SDValue Src, const SDLoc &DL) {
+ switch (Src.getOpcode()) {
+ case ISD::SETCC:
+ case ISD::FREEZE:
+ case ISD::TRUNCATE:
+ case ISD::BUILD_VECTOR:
+ return DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src);
+ case ISD::AND:
+ case ISD::XOR:
+ case ISD::OR:
+ return DAG.getNode(
+ Src.getOpcode(), DL, SExtVT,
+ signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(0), DL),
+ signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(1), DL));
+ case ISD::SELECT:
+ case ISD::VSELECT:
+ return DAG.getSelect(
+ DL, SExtVT, Src.getOperand(0),
+ signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(1), DL),
+ signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(2), DL));
+ }
+ llvm_unreachable("Unexpected node type for vXi1 sign extension");
+}
+
static SDValue performBITCASTCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const LoongArchSubtarget &Subtarget) {
@@ -4401,10 +4457,56 @@ static SDValue performBITCASTCombine(SDNode *N, SelectionDAG &DAG,
}
}
- if (Opc == ISD::DELETED_NODE)
- return SDValue();
+ // Generate vXi1 using [X]VMSKLTZ
+ if (Opc == ISD::DELETED_NODE) {
+ MVT SExtVT;
+ bool UseLASX = false;
+ bool PropagateSExt = false;
+ switch (SrcVT.getSimpleVT().SimpleTy) {
+ default:
+ return SDValue();
+ case MVT::v2i1:
+ SExtVT = MVT::v2i64;
+ break;
+ case MVT::v4i1:
+ SExtVT = MVT::v4i32;
+ if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
+ SExtVT = MVT::v4i64;
+ UseLASX = true;
+ PropagateSExt = true;
+ }
+ break;
+ case MVT::v8i1:
+ SExtVT = MVT::v8i16;
+ if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
+ SExtVT = MVT::v8i32;
+ UseLASX = true;
+ PropagateSExt = true;
+ }
+ break;
+ case MVT::v16i1:
+ SExtVT = MVT::v16i8;
+ if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
+ SExtVT = MVT::v16i16;
+ UseLASX = true;
+ PropagateSExt = true;
+ }
+ break;
+ case MVT::v32i1:
+ SExtVT = MVT::v32i8;
+ UseLASX = true;
+ break;
+ };
+ if (UseLASX && !Subtarget.has32S())
+ return SDValue();
+ Src = PropagateSExt ? signExtendBitcastSrcVector(DAG, SExtVT, Src, DL)
+ : DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src);
+ Opc = UseLASX ? LoongArchISD::XVMSKLTZ : LoongArchISD::VMSKLTZ;
+ } else {
+ Src = Src.getOperand(0);
+ }
- SDValue V = DAG.getNode(Opc, DL, MVT::i64, Src.getOperand(0));
+ SDValue V = DAG.getNode(Opc, DL, MVT::i64, Src);
EVT T = EVT::getIntegerVT(*DAG.getContext(), SrcVT.getVectorNumElements());
V = DAG.getZExtOrTrunc(V, DL, T);
return DAG.getBitcast(VT, V);
diff --git a/llvm/test/CodeGen/LoongArch/lasx/xvmskcond.ll b/llvm/test/CodeGen/LoongArch/lasx/xvmskcond.ll
index fb3937c476b03..9f5ca5b318516 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/xvmskcond.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/xvmskcond.ll
@@ -16,6 +16,22 @@ entry:
ret i32 %2
}
+define i32 @xmsk_sgt_allzeros_i8(<32 x i8 > %a) {
+; CHECK-LABEL: xmsk_sgt_allzeros_i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvrepli.b $xr1, 0
+; CHECK-NEXT: xvslt.b $xr0, $xr1, $xr0
+; CHECK-NEXT: xvmskltz.b $xr0, $xr0
+; CHECK-NEXT: xvpickve2gr.wu $a0, $xr0, 0
+; CHECK-NEXT: xvpickve2gr.wu $a1, $xr0, 4
+; CHECK-NEXT: bstrins.d $a0, $a1, 31, 16
+; CHECK-NEXT: ret
+entry:
+ %1 = icmp sgt <32 x i8> %a, splat (i8 0)
+ %2 = bitcast <32 x i1> %1 to i32
+ ret i32 %2
+}
+
define i32 @xmsk_sgt_allones_i8(<32 x i8 > %a) {
; CHECK-LABEL: xmsk_sgt_allones_i8:
; CHECK: # %bb.0: # %entry
@@ -100,6 +116,21 @@ entry:
ret i4 %2
}
+define i32 @xmsk_sle_allzeros_i8(<32 x i8 > %a) {
+; CHECK-LABEL: xmsk_sle_allzeros_i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvslei.b $xr0, $xr0, 0
+; CHECK-NEXT: xvmskltz.b $xr0, $xr0
+; CHECK-NEXT: xvpickve2gr.wu $a0, $xr0, 0
+; CHECK-NEXT: xvpickve2gr.wu $a1, $xr0, 4
+; CHECK-NEXT: bstrins.d $a0, $a1, 31, 16
+; CHECK-NEXT: ret
+entry:
+ %1 = icmp sle <32 x i8> %a, splat (i8 0)
+ %2 = bitcast <32 x i1> %1 to i32
+ ret i32 %2
+}
+
define i32 @xmsk_sle_allones_i8(<32 x i8 > %a) {
; CHECK-LABEL: xmsk_sle_allones_i8:
; CHECK: # %bb.0: # %entry
@@ -169,3 +200,459 @@ entry:
%2 = bitcast <32 x i1> %1 to i32
ret i32 %2
}
+
+define i4 @xvmsk_sgt_v4i64(<4 x i64> %a, <4 x i64> %b) {
+; CHECK-LABEL: xvmsk_sgt_v4i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvslt.d $xr0, $xr1, $xr0
+; CHECK-NEXT: xvmskltz.d $xr0, $xr0
+; CHECK-NEXT: xvpickve2gr.wu $a0, $xr0, 0
+; CHECK-NEXT: xvpickve2gr.wu $a1, $xr0, 4
+; CHECK-NEXT: bstrins.d $a0, $a1, 3, 2
+; CHECK-NEXT: ret
+ %x = icmp sgt <4 x i64> %a, %b
+ %res = bitcast <4 x i1> %x to i4
+ ret i4 %res
+}
+
+define i4 @xvmsk_ogt_v4f64(<4 x double> %a, <4 x double> %b) {
+; CHECK-LABEL: xvmsk_ogt_v4f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvfcmp.clt.d $xr0, $xr1, $xr0
+; CHECK-NEXT: xvmskltz.d $xr0, $xr0
+; CHECK-NEXT: xvpickve2gr.wu $a0, $xr0, 0
+; CHECK-NEXT: xvpickve2gr.wu $a1, $xr0, 4
+; CHECK-NEXT: bstrins.d $a0, $a1, 3, 2
+; CHECK-NEXT: ret
+ %x = fcmp ogt <4 x double> %a, %b
+ %res = bitcast <4 x i1> %x to i4
+ ret i4 %res
+}
+
+define i8 @xvmsk_sgt_v8i32(<8 x i32> %a, <8 x i32> %b) {
+; CHECK-LABEL: xvmsk_sgt_v8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvslt.w $xr0, $xr1, $xr0
+; CHECK-NEXT: xvmskltz.w $xr0, $xr0
+; CHECK-NEXT: xvpickve2gr.wu $a0, $xr0, 0
+; CHECK-NEXT: xvpickve2gr.wu $a1, $xr0, 4
+; CHECK-NEXT: bstrins.d $a0, $a1, 7, 4
+; CHECK-NEXT: ret
+ %x = icmp sgt <8 x i32> %a, %b
+ %res = bitcast <8 x i1> %x to i8
+ ret i8 %res
+}
+
+define i8 @xvmsk_ogt_v8f32(<8 x float> %a, <8 x float> %b) {
+; CHECK-LABEL: xvmsk_ogt_v8f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvfcmp.clt.s $xr0, $xr1, $xr0
+; CHECK-NEXT: xvmskltz.w $xr0, $xr0
+; CHECK-NEXT: xvpickve2gr.wu $a0, $xr0, 0
+; CHECK-NEXT: xvpickve2gr.wu $a1, $xr0, 4
+; CHECK-NEXT: bstrins.d $a0, $a1, 7, 4
+; CHECK-NEXT: ret
+ %x = fcmp ogt <8 x float> %a, %b
+ %res = bitcast <8 x i1> %x to i8
+ ret i8 %res
+}
+
+define i16 @xvmsk_sgt_v16i16(<16 x i16> %a, <16 x i16> %b) {
+; CHECK-LABEL: xvmsk_sgt_v16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvslt.h $xr0, $xr1, $xr0
+; CHECK-NEXT: xvmskltz.h $xr0, $xr0
+; CHECK-NEXT: xvpickve2gr.wu $a0, $xr0, 0
+; CHECK-NEXT: xvpickve2gr.wu $a1, $xr0, 4
+; CHECK-NEXT: bstrins.d $a0, $a1, 15, 8
+; CHECK-NEXT: ret
+ %x = icmp sgt <16 x i16> %a, %b
+ %res = bitcast <16 x i1> %x to i16
+ ret i16 %res
+}
+
+define i32 @xvmsk_sgt_v32i8(<32 x i8> %a, <32 x i8> %b) {
+; CHECK-LABEL: xvmsk_sgt_v32i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvslt.b $xr0, $xr1, $xr0
+; CHECK-NEXT: xvmskltz.b $xr0, $xr0
+; CHECK-NEXT: xvpickve2gr.wu $a0, $xr0, 0
+; CHECK-NEXT: xvpickve2gr.wu $a1, $xr0, 4
+; CHECK-NEXT: bstrins.d $a0, $a1, 31, 16
+; CHECK-NEXT: ret
+ %x = icmp sgt <32 x i8> %a, %b
+ %res = bitcast <32 x i1> %x to i32
+ ret i32 %res
+}
+
+define i4 @xvmsk_sgt_and_sgt_v4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64> %c, <4 x i64> %d) {
+; CHECK-LABEL: xvmsk_sgt_and_sgt_v4i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvslt.d $xr2, $xr3, $xr2
+; CHECK-NEXT: xvslt.d $xr0, $xr1, $xr0
+; CHECK-NEXT: xvand.v $xr0, $xr0, $xr2
+; CHECK-NEXT: xvmskltz.d $xr0, $xr0
+; CHECK-NEXT: xvpickve2gr.wu $a0, $xr0, 0
+; CHECK-NEXT: xvpickve2gr.wu $a1, $xr0, 4
+; CHECK-NEXT: bstrins.d $a0, $a1, 3, 2
+; CHECK-NEXT: ret
+ %x0 = icmp sgt <4 x i64> %a, %b
+ %x1 = icmp sgt <4 x i64> %c, %d
+ %y = and <4 x i1> %x0, %x1
+ %res = bitcast <4 x i1> %y to i4
+ ret i4 %res
+}
+
+define i4 @xvmsk_ogt_and_ogt_v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c, <4 x double> %d) {
+; CHECK-LABEL: xvmsk_ogt_and_ogt_v4f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvfcmp.clt.d $xr2, $xr3, $xr2
+; CHECK-NEXT: xvfcmp.clt.d $xr0, $xr1, $xr0
+; CHECK-NEXT: xvand.v $xr0, $xr0, $xr2
+; CHECK-NEXT: xvmskltz.d $xr0, $xr0
+; CHECK-NEXT: xvpickve2gr.wu $a0, $xr0, 0
+; CHECK-NEXT: xvpickve2gr.wu $a1, $xr0, 4
+; CHECK-NEXT: bstrins.d $a0, $a1, 3, 2
+; CHECK-NEXT: ret
+ %x0 = fcmp ogt <4 x double> %a, %b
+ %x1 = fcmp ogt <4 x double> %c, %d
+ %y = and <4 x i1> %x0, %x1
+ %res = bitcast <4 x i1> %y to i4
+ ret i4 %res
+}
+
+define i8 @xvmsk_sgt_and_sgt_v8i32(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c, <8 x i32> %d) {
+; CHECK-LABEL: xvmsk_sgt_and_sgt_v8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvslt.w $xr2, $xr3, $xr2
+; CHECK-NEXT: xvslt.w $xr0, $xr1, $xr0
+; CHECK-NEXT: xvand.v $xr0, $xr0, $xr2
+; CHECK-NEXT: xvmskltz.w $xr0, $xr0
+; CHECK-NEXT: xvpickve2gr.wu $a0, $xr0, 0
+; CHECK-NEXT: xvpickve2gr.wu $a1, $xr0, 4
+; CHECK-NEXT: bstrins.d $a0, $a1, 7, 4
+; CHECK-NEXT: ret
+ %x0 = icmp sgt <8 x i32> %a, %b
+ %x1 = icmp sgt <8 x i32> %c, %d
+ %y = and <8 x i1> %x0, %x1
+ %res = bitcast <8 x i1> %y to i8
+ ret i8 %res
+}
+
+define i8 @xvmsk_sgt_or_sgt_v8i32(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c, <8 x i32> %d) {
+; CHECK-LABEL: xvmsk_sgt_or_sgt_v8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvslt.w $xr2, $xr3, $xr2
+; CHECK-NEXT: xvslt.w $xr0, $xr1, $xr0
+; CHECK-NEXT: xvor.v $xr0, $xr0, $xr2
+; CHECK-NEXT: xvmskltz.w $xr0, $xr0
+; CHECK-NEXT: xvpickve2gr.wu $a0, $xr0, 0
+; CHECK-NEXT: xvpickve2gr.wu $a1, $xr0, 4
+; CHECK-NEXT: bstrins.d $a0, $a1, 7, 4
+; CHECK-NEXT: ret
+ %x0 = icmp sgt <8 x i32> %a, %b
+ %x1 = icmp sgt <8 x i32> %c, %d
+ %y = or <8 x i1> %x0, %x1
+ %res = bitcast <8 x i1> %y to i8
+ ret i8 %res
+}
+
+define i8 @xvmsk_sgt_or_slt_and_eq_v8i32(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c, <8 x i32> %d, <8 x i32> %e, <8 x i32> %f) {
+; CHECK-LABEL: xvmsk_sgt_or_slt_and_eq_v8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvslt.w $xr2, $xr2, $xr3
+; CHECK-NEXT: xvslt.w $xr0, $xr1, $xr0
+; CHECK-NEXT: xvor.v $xr0, $xr0, $xr2
+; CHECK-NEXT: xvseq.w $xr1, $xr4, $xr5
+; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvmskltz.w $xr0, $xr0
+; CHECK-NEXT: xvpickve2gr.wu $a0, $xr0, 0
+; CHECK-NEXT: xvpickve2gr.wu $a1, $xr0, 4
+; CHECK-NEXT: bstrins.d $a0, $a1, 7, 4
+; CHECK-NEXT: ret
+ %x0 = icmp sgt <8 x i32> %a, %b
+ %x1 = icmp slt <8 x i32> %c, %d
+ %x2 = icmp eq <8 x i32> %e, %f
+ %y = or <8 x i1> %x0, %x1
+ %z = and <8 x i1> %y, %x2
+ %res = bitcast <8 x i1> %z to i8
+ ret i8 %res
+}
+
+define i8 @xvmsk_eq_vsel_slt_v8i32(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> %a2) {
+; CHECK-LABEL: xvmsk_eq_vsel_slt_v8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvseq.w $xr0, $xr0, $xr1
+; CHECK-NEXT: xvslti.w $xr1, $xr2, 0
+; CHECK-NEXT: xvrepli.b $xr2, -1
+; CHECK-NEXT: xvbitsel.v $xr0, $xr1, $xr2, $xr0
+; CHECK-NEXT: xvmskltz.w $xr0, $xr0
+; CHECK-NEXT: xvpickve2gr.wu $a0, $xr0, 0
+; CHECK-NEXT: xvpickve2gr.wu $a1, $xr0, 4
+; CHECK-NEXT: bstrins.d $a0, $a1, 7, 4
+; CHECK-NEXT: ret
+ %cmp = icmp eq <8 x i32> %a0, %a1
+ %slt = icmp slt <8 x i32> %a2, zeroinitializer
+ %sel = select <8 x i1> %cmp, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i1> %slt
+ %res = bitcast <8 x i1> %sel to i8
+ ret i8 %res
+}
+
+define i8 @xvmsk_sel_eq_or_eq_or_slt_v8i32(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> %a2, <8 x i32> %a3, i1 %a4) {
+; CHECK-LABEL: xvmsk_sel_eq_or_eq_or_slt_v8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: andi $a0, $a0, 1
+; CHECK-NEXT: xvseq.w $xr2, $xr0, $xr2
+; CHECK-NEXT: addi.d $a1, $zero, -1
+; CHECK-NEXT: maskeqz $a0, $a1, $a0
+; CHECK-NEXT: xvreplgr2vr.w $xr4, $a0
+; CHECK-NEXT: xvand.v $xr2, $xr2, $xr4
+; CHECK-NEXT: xvseq.w $xr0, $xr0, $xr1
+; CHECK-NEXT: xvslti.w $xr1, $xr3, 0
+; CHECK-NEXT: xvor.v $xr0, $xr1, $xr0
+; CHECK-NEXT: xvor.v $xr0, $xr0, $xr2
+; CHECK-NEXT: xvmskltz.w $xr0, $xr0
+; CHECK-NEXT: xvpickve2gr.wu $a0, $xr0, 0
+; CHECK-NEXT: xvpickve2gr.wu $a1, $xr0, 4
+; CHECK-NEXT: bstrins.d $a0, $a1, 7, 4
+; CHECK-NEXT: ret
+ %cmp0 = icmp eq <8 x i32> %a0, %a1
+ %cmp1 = icmp eq <8 x i32> %a0, %a2
+ %cmp2 = icmp slt <8 x i32> %a3, zeroinitializer
+ %sel = select i1 %a4, <8 x i1> %cmp1, <8 x i1> zeroinitializer
+ %or0 = or <8 x i1> %cmp2, %cmp0
+ %or1 = or <8 x i1> %or0, %sel
+ %res = bitcast <8 x i1> %or1 to i8
+ ret i8 %res
+}
+
+define i8 @xvmsk_ogt_and_ogt_v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x float> %d) {
+; CHECK-LABEL: xvmsk_ogt_and_ogt_v8f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvfcmp.clt.s $xr2, $xr3, $xr2
+; CHECK-NEXT: xvfcmp.clt.s $xr0, $xr1, $xr0
+; CHECK-NEXT: xvand.v $xr0, $xr0, $xr2
+; CHECK-NEXT: xvmskltz.w $xr0, $xr0
+; CHECK-NEXT: xvpickve2gr.wu $a0, $xr0, 0
+; CHECK-NEXT: xvpickve2gr.wu $a1, $xr0, 4
+; CHECK-NEXT: bstrins.d $a0, $a1, 7, 4
+; CHECK-NEXT: ret
+ %x0 = fcmp ogt <8 x float> %a, %b
+ %x1 = fcmp ogt <8 x float> %c, %d
+ %y = and <8 x i1> %x0, %x1
+ %res = bitcast <8 x i1> %y to i8
+ ret i8 %res
+}
+
+define i8 @xvmsk_sgt_xor_sgt_v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x float> %d) {
+; CHECK-LABEL: xvmsk_sgt_xor_sgt_v8f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvfcmp.clt.s $xr2, $xr3, $xr2
+; CHECK-NEXT: xvfcmp.clt.s $xr0, $xr1, $xr0
+; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr2
+; CHECK-NEXT: xvmskltz.w $xr0, $xr0
+; CHECK-NEXT: xvpickve2gr.wu $a0, $xr0, 0
+; CHECK-NEXT: xvpickve2gr.wu $a1, $xr0, 4
+; CHECK-NEXT: bstrins.d $a0, $a1, 7, 4
+; CHECK-NEXT: ret
+ %x0 = fcmp ogt <8 x float> %a, %b
+ %x1 = fcmp ogt <8 x float> %c, %d
+ %y = xor <8 x i1> %x0, %x1
+ %res = bitcast <8 x i1> %y to i8
+ ret i8 %res
+}
+
+define i8 @xvmsk_ugt_xor_ueq_and_ogt_v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x float> %d, <8 x float> %e, <8 x float> %f) {
+; CHECK-LABEL: xvmsk_ugt_xor_ueq_and_ogt_v8f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvfcmp.cueq.s $xr2, $xr2, $xr3
+; CHECK-NEXT: xvfcmp.cult.s $xr0, $xr1, $xr0
+; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr2
+; CHECK-NEXT: xvfcmp.clt.s $xr1, $xr5, $xr4
+; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvmskltz.w $xr0, $xr0
+; CHECK-NEXT: xvpickve2gr.wu $a0, $xr0, 0
+; CHECK-NEXT: xvpickve2gr.wu $a1, $xr0, 4
+; CHECK-NEXT: bstrins.d $a0, $a1, 7, 4
+; CHECK-NEXT: ret
+ %x0 = fcmp ugt <8 x float> %a, %b
+ %x1 = fcmp ueq <8 x float> %c, %d
+ %x2 = fcmp ogt <8 x float> %e, %f
+ %y = xor <8 x i1> %x0, %x1
+ %z = and <8 x i1> %y, %x2
+ %res = bitcast <8 x i1> %z to i8
+ ret i8 %res
+}
+
+define i16 @xvmsk_sgt_and_sgt_v16i16(<16 x i16> %a, <16 x i16> %b, <16 x i16> %c, <16 x i16> %d) {
+; CHECK-LABEL: xvmsk_sgt_and_sgt_v16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvslt.h $xr2, $xr3, $xr2
+; CHECK-NEXT: xvslt.h $xr0, $xr1, $xr0
+; CHECK-NEXT: xvand.v $xr0, $xr0, $xr2
+; CHECK-NEXT: xvmskltz.h $xr0, $xr0
+; CHECK-NEXT: xvpickve2gr.wu $a0, $xr0, 0
+; CHECK-NEXT: xvpickve2gr.wu $a1, $xr0, 4
+; CHECK-NEXT: bstrins.d $a0, $a1, 15, 8
+; CHECK-NEXT: ret
+ %x0 = icmp sgt <16 x i16> %a, %b
+ %x1 = icmp sgt <16 x i16> %c, %d
+ %y = and <16 x i1> %x0, %x1
+ %res = bitcast <16 x i1> %y to i16
+ ret i16 %res
+}
+
+define i32 @xvmsk_sgt_and_sgt_v32i8(<32 x i8> %a, <32 x i8> %b, <32 x i8> %c, <32 x i8> %d) {
+; CHECK-LABEL: xvmsk_sgt_and_sgt_v32i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvslt.b $xr0, $xr1, $xr0
+; CHECK-NEXT: xvslt.b $xr1, $xr3, $xr2
+; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvmskltz.b $xr0, $xr0
+; CHECK-NEXT: xvpickve2gr.wu $a0, $xr0, 0
+; CHECK-NEXT: xvpickve2gr.wu $a1, $xr0, 4
+; CHECK-NEXT: bstrins.d $a0, $a1, 31, 16
+; CHECK-NEXT: ret
+ %x0 = icmp sgt <32 x i8> %a, %b
+ %x1 = icmp sgt <32 x i8> %c, %d
+ %y = and <32 x i1> %x0, %x1
+ %res = bitcast <32 x i1> %y to i32
+ ret i32 %res
+}
+
+define i8 @xvmsk_eq_v2i64_concat_undef(<2 x i64> %vec) {
+; CHECK-LABEL: xvmsk_eq_v2i64_concat_undef:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vseqi.d $vr0, $vr0, 0
+; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 0
+; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 0
+; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 1
+; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 0
+; CHECK-NEXT: vpackev.h $vr0, $vr0, $vr1
+; CHECK-NEXT: vslli.h $vr0, $vr0, 15
+; CHECK-NEXT: vsrai.h $vr0, $vr0, 15
+; CHECK-NEXT: vmskltz.h $vr0, $vr0
+; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0
+; CHECK-NEXT: ret
+ %tobool = icmp eq <2 x i64> %vec, zeroinitializer
+ %insertvec = shufflevector <2 x i1> %tobool, <2 x i1> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %res = bitcast <8 x i1> %insertvec to i8
+ ret i8 %res
+}
+
+define i8 @xvmsk_ne_v4i32_concat_undef(<4 x i32> %vec) {
+; CHECK-LABEL: xvmsk_ne_v4i32_concat_undef:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi.d $sp, $sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: vseqi.w $vr0, $vr0, 0
+; CHECK-NEXT: vrepli.b $vr1, -1
+; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 3
+; CHECK-NEXT: st.h $a0, $sp, 6
+; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 2
+; CHECK-NEXT: st.h $a0, $sp, 4
+; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 1
+; CHECK-NEXT: st.h $a0, $sp, 2
+; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 0
+; CHECK-NEXT: st.h $a0, $sp, 0
+; CHECK-NEXT: vld $vr0, $sp, 0
+; CHECK-NEXT: vslli.h $vr0, $vr0, 15
+; CHECK-NEXT: vsrai.h $vr0, $vr0, 15
+; CHECK-NEXT: vmskltz.h $vr0, $vr0
+; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0
+; CHECK-NEXT: addi.d $sp, $sp, 16
+; CHECK-NEXT: ret
+ %tobool = icmp ne <4 x i32> %vec, zeroinitializer
+ %insertvec = shufflevector <4 x i1> %tobool, <4 x i1> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+ %res = bitcast <8 x i1> %insertvec to i8
+ ret i8 %res
+}
+
+define i8 @xvmsk_ogt_v4f64_concat_undef(<4 x double> %vec) {
+; CHECK-LABEL: xvmsk_ogt_v4f64_concat_undef:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi.d $sp, $sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: xvrepli.b $xr1, 0
+; CHECK-NEXT: xvfcmp.clt.d $xr0, $xr1, $xr0
+; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 0
+; CHECK-NEXT: xvpickve2gr.d $a1, $xr0, 1
+; CHECK-NEXT: xvpickve2gr.d $a2, $xr0, 2
+; CHECK-NEXT: xvpickve2gr.d $a3, $xr0, 3
+; CHECK-NEXT: st.h $a3, $sp, 6
+; CHECK-NEXT: st.h $a2, $sp, 4
+; CHECK...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/142978
More information about the llvm-commits
mailing list