[llvm] [LoongArch] Lowering v32i8 vector mask generation to `VMSKLTZ` (PR #149953)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 21 19:02:38 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-loongarch
Author: hev (heiher)
<details>
<summary>Changes</summary>
---
Full diff: https://github.com/llvm/llvm-project/pull/149953.diff
2 Files Affected:
- (modified) llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp (+20-4)
- (modified) llvm/test/CodeGen/LoongArch/lsx/vmskcond.ll (+204)
``````````diff
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index a31fa57fcd8c6..cc8b853432118 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -4700,13 +4700,29 @@ static SDValue performBITCASTCombine(SDNode *N, SelectionDAG &DAG,
UseLASX = true;
break;
};
- if (UseLASX && !(Subtarget.has32S() && Subtarget.hasExtLASX()))
- return SDValue();
Src = PropagateSExt ? signExtendBitcastSrcVector(DAG, SExtVT, Src, DL)
: DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src);
- Opc = UseLASX ? LoongArchISD::XVMSKLTZ : LoongArchISD::VMSKLTZ;
- SDValue V = DAG.getNode(Opc, DL, MVT::i64, Src);
+ SDValue V;
+ if (!Subtarget.has32S() || !Subtarget.hasExtLASX()) {
+ if (Src.getSimpleValueType() == MVT::v32i8) {
+ SDValue Lo, Hi;
+ std::tie(Lo, Hi) = DAG.SplitVector(Src, DL);
+ Lo = DAG.getNode(LoongArchISD::VMSKLTZ, DL, MVT::i64, Lo);
+ Hi = DAG.getNode(LoongArchISD::VMSKLTZ, DL, MVT::i64, Hi);
+ Hi = DAG.getNode(ISD::SHL, DL, MVT::i64, Hi,
+ DAG.getConstant(16, DL, MVT::i8));
+ V = DAG.getNode(ISD::OR, DL, MVT::i64, Lo, Hi);
+ } else if (UseLASX) {
+ return SDValue();
+ }
+ }
+
+ if (!V) {
+ Opc = UseLASX ? LoongArchISD::XVMSKLTZ : LoongArchISD::VMSKLTZ;
+ V = DAG.getNode(Opc, DL, MVT::i64, Src);
+ }
+
EVT T = EVT::getIntegerVT(*DAG.getContext(), SrcVT.getVectorNumElements());
V = DAG.getZExtOrTrunc(V, DL, T);
return DAG.getBitcast(VT, V);
diff --git a/llvm/test/CodeGen/LoongArch/lsx/vmskcond.ll b/llvm/test/CodeGen/LoongArch/lsx/vmskcond.ll
index ad57bbf9ee5c0..7fa591db5d1fa 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/vmskcond.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/vmskcond.ll
@@ -603,3 +603,207 @@ define i4 @vmsk_eq_allzeros_v4i8(<4 x i8> %a) {
%2 = bitcast <4 x i1> %1 to i4
ret i4 %2
}
+
+define i32 @vmsk2_eq_allzeros_i8(<32 x i8> %a) {
+; CHECK-LABEL: vmsk2_eq_allzeros_i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vseqi.b $vr0, $vr0, 0
+; CHECK-NEXT: vmskltz.b $vr0, $vr0
+; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0
+; CHECK-NEXT: vseqi.b $vr0, $vr1, 0
+; CHECK-NEXT: vmskltz.b $vr0, $vr0
+; CHECK-NEXT: vpickve2gr.hu $a1, $vr0, 0
+; CHECK-NEXT: slli.d $a1, $a1, 16
+; CHECK-NEXT: or $a0, $a0, $a1
+; CHECK-NEXT: ret
+entry:
+ %1 = icmp eq <32 x i8> %a, splat (i8 0)
+ %2 = bitcast <32 x i1> %1 to i32
+ ret i32 %2
+}
+
+define i32 @vmsk2_sgt_allzeros_i8(<32 x i8> %a) {
+; CHECK-LABEL: vmsk2_sgt_allzeros_i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vrepli.b $vr2, 0
+; CHECK-NEXT: vslt.b $vr0, $vr2, $vr0
+; CHECK-NEXT: vmskltz.b $vr0, $vr0
+; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0
+; CHECK-NEXT: vslt.b $vr0, $vr2, $vr1
+; CHECK-NEXT: vmskltz.b $vr0, $vr0
+; CHECK-NEXT: vpickve2gr.hu $a1, $vr0, 0
+; CHECK-NEXT: slli.d $a1, $a1, 16
+; CHECK-NEXT: or $a0, $a0, $a1
+; CHECK-NEXT: ret
+entry:
+ %1 = icmp sgt <32 x i8> %a, splat (i8 0)
+ %2 = bitcast <32 x i1> %1 to i32
+ ret i32 %2
+}
+
+define i32 @vmsk2_sgt_allones_i8(<32 x i8> %a) {
+; CHECK-LABEL: vmsk2_sgt_allones_i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vrepli.b $vr2, -1
+; CHECK-NEXT: vslt.b $vr0, $vr2, $vr0
+; CHECK-NEXT: vmskltz.b $vr0, $vr0
+; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0
+; CHECK-NEXT: vslt.b $vr0, $vr2, $vr1
+; CHECK-NEXT: vmskltz.b $vr0, $vr0
+; CHECK-NEXT: vpickve2gr.hu $a1, $vr0, 0
+; CHECK-NEXT: slli.d $a1, $a1, 16
+; CHECK-NEXT: or $a0, $a0, $a1
+; CHECK-NEXT: ret
+entry:
+ %1 = icmp sgt <32 x i8> %a, splat (i8 -1)
+ %2 = bitcast <32 x i1> %1 to i32
+ ret i32 %2
+}
+
+define i32 @vmsk2_sge_allzeros_i8(<32 x i8> %a) {
+; CHECK-LABEL: vmsk2_sge_allzeros_i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vrepli.b $vr2, 0
+; CHECK-NEXT: vsle.b $vr0, $vr2, $vr0
+; CHECK-NEXT: vmskltz.b $vr0, $vr0
+; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0
+; CHECK-NEXT: vsle.b $vr0, $vr2, $vr1
+; CHECK-NEXT: vmskltz.b $vr0, $vr0
+; CHECK-NEXT: vpickve2gr.hu $a1, $vr0, 0
+; CHECK-NEXT: slli.d $a1, $a1, 16
+; CHECK-NEXT: or $a0, $a0, $a1
+; CHECK-NEXT: ret
+entry:
+ %1 = icmp sge <32 x i8> %a, splat (i8 0)
+ %2 = bitcast <32 x i1> %1 to i32
+ ret i32 %2
+}
+
+define i32 @vmsk2_slt_allzeros_i8(<32 x i8> %a) {
+; CHECK-LABEL: vmsk2_slt_allzeros_i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmskltz.b $vr0, $vr0
+; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0
+; CHECK-NEXT: vmskltz.b $vr0, $vr1
+; CHECK-NEXT: vpickve2gr.hu $a1, $vr0, 0
+; CHECK-NEXT: slli.d $a1, $a1, 16
+; CHECK-NEXT: or $a0, $a0, $a1
+; CHECK-NEXT: ret
+entry:
+ %1 = icmp slt <32 x i8> %a, splat (i8 0)
+ %2 = bitcast <32 x i1> %1 to i32
+ ret i32 %2
+}
+
+define i32 @vmsk2_sle_allzeros_i8(<32 x i8> %a) {
+; CHECK-LABEL: vmsk2_sle_allzeros_i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vslei.b $vr0, $vr0, 0
+; CHECK-NEXT: vmskltz.b $vr0, $vr0
+; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0
+; CHECK-NEXT: vslei.b $vr0, $vr1, 0
+; CHECK-NEXT: vmskltz.b $vr0, $vr0
+; CHECK-NEXT: vpickve2gr.hu $a1, $vr0, 0
+; CHECK-NEXT: slli.d $a1, $a1, 16
+; CHECK-NEXT: or $a0, $a0, $a1
+; CHECK-NEXT: ret
+entry:
+ %1 = icmp sle <32 x i8> %a, splat (i8 0)
+ %2 = bitcast <32 x i1> %1 to i32
+ ret i32 %2
+}
+
+define i32 @vmsk2_sle_allones_i8(<32 x i8> %a) {
+; CHECK-LABEL: vmsk2_sle_allones_i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vslei.b $vr0, $vr0, -1
+; CHECK-NEXT: vmskltz.b $vr0, $vr0
+; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0
+; CHECK-NEXT: vslei.b $vr0, $vr1, -1
+; CHECK-NEXT: vmskltz.b $vr0, $vr0
+; CHECK-NEXT: vpickve2gr.hu $a1, $vr0, 0
+; CHECK-NEXT: slli.d $a1, $a1, 16
+; CHECK-NEXT: or $a0, $a0, $a1
+; CHECK-NEXT: ret
+entry:
+ %1 = icmp sle <32 x i8> %a, splat (i8 -1)
+ %2 = bitcast <32 x i1> %1 to i32
+ ret i32 %2
+}
+
+define i32 @vmsk2_ne_allzeros_i8(<32 x i8> %a) {
+; CHECK-LABEL: vmsk2_ne_allzeros_i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vseqi.b $vr0, $vr0, 0
+; CHECK-NEXT: vxori.b $vr0, $vr0, 255
+; CHECK-NEXT: vmskltz.b $vr0, $vr0
+; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0
+; CHECK-NEXT: vseqi.b $vr0, $vr1, 0
+; CHECK-NEXT: vxori.b $vr0, $vr0, 255
+; CHECK-NEXT: vmskltz.b $vr0, $vr0
+; CHECK-NEXT: vpickve2gr.hu $a1, $vr0, 0
+; CHECK-NEXT: slli.d $a1, $a1, 16
+; CHECK-NEXT: or $a0, $a0, $a1
+; CHECK-NEXT: ret
+entry:
+ %1 = icmp ne <32 x i8> %a, splat (i8 0)
+ %2 = bitcast <32 x i1> %1 to i32
+ ret i32 %2
+}
+
+define i32 @vmsk2_sgt_v32i8(<32 x i8> %a, <32 x i8> %b) {
+; CHECK-LABEL: vmsk2_sgt_v32i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vslt.b $vr0, $vr2, $vr0
+; CHECK-NEXT: vmskltz.b $vr0, $vr0
+; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0
+; CHECK-NEXT: vslt.b $vr0, $vr3, $vr1
+; CHECK-NEXT: vmskltz.b $vr0, $vr0
+; CHECK-NEXT: vpickve2gr.hu $a1, $vr0, 0
+; CHECK-NEXT: slli.d $a1, $a1, 16
+; CHECK-NEXT: or $a0, $a0, $a1
+; CHECK-NEXT: ret
+ %x = icmp sgt <32 x i8> %a, %b
+ %res = bitcast <32 x i1> %x to i32
+ ret i32 %res
+}
+
+define i32 @vmsk2_sgt_and_sgt_v32i8(<32 x i8> %a, <32 x i8> %b, <32 x i8> %c, <32 x i8> %d) {
+; CHECK-LABEL: vmsk2_sgt_and_sgt_v32i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vslt.b $vr0, $vr2, $vr0
+; CHECK-NEXT: vslt.b $vr1, $vr3, $vr1
+; CHECK-NEXT: vslt.b $vr2, $vr6, $vr4
+; CHECK-NEXT: vslt.b $vr3, $vr7, $vr5
+; CHECK-NEXT: vand.v $vr1, $vr1, $vr3
+; CHECK-NEXT: vand.v $vr0, $vr0, $vr2
+; CHECK-NEXT: vmskltz.b $vr0, $vr0
+; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0
+; CHECK-NEXT: vmskltz.b $vr0, $vr1
+; CHECK-NEXT: vpickve2gr.hu $a1, $vr0, 0
+; CHECK-NEXT: slli.d $a1, $a1, 16
+; CHECK-NEXT: or $a0, $a0, $a1
+; CHECK-NEXT: ret
+ %x0 = icmp sgt <32 x i8> %a, %b
+ %x1 = icmp sgt <32 x i8> %c, %d
+ %y = and <32 x i1> %x0, %x1
+ %res = bitcast <32 x i1> %y to i32
+ ret i32 %res
+}
+
+define i32 @vmsk2_trunc_i8(<32 x i8> %a) {
+; CHECK-LABEL: vmsk2_trunc_i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vslli.b $vr0, $vr0, 7
+; CHECK-NEXT: vmskltz.b $vr0, $vr0
+; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0
+; CHECK-NEXT: vslli.b $vr0, $vr1, 7
+; CHECK-NEXT: vmskltz.b $vr0, $vr0
+; CHECK-NEXT: vpickve2gr.hu $a1, $vr0, 0
+; CHECK-NEXT: slli.d $a1, $a1, 16
+; CHECK-NEXT: or $a0, $a0, $a1
+; CHECK-NEXT: ret
+ %y = trunc <32 x i8> %a to <32 x i1>
+ %res = bitcast <32 x i1> %y to i32
+ ret i32 %res
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/149953
More information about the llvm-commits
mailing list