[llvm] de21308 - [LoongArch] Make ISD::VSELECT a legal operation with lsx/lasx
via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 6 00:44:03 PST 2023
Author: wanglei
Date: 2023-12-06T16:43:38+08:00
New Revision: de21308f78f3b0f0910638dbdac90967150d19f0
URL: https://github.com/llvm/llvm-project/commit/de21308f78f3b0f0910638dbdac90967150d19f0
DIFF: https://github.com/llvm/llvm-project/commit/de21308f78f3b0f0910638dbdac90967150d19f0.diff
LOG: [LoongArch] Make ISD::VSELECT a legal operation with lsx/lasx
Added:
llvm/test/CodeGen/LoongArch/lasx/vselect.ll
llvm/test/CodeGen/LoongArch/lsx/vselect.ll
Modified:
llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
Removed:
################################################################################
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index d297d115d59cb..400327373ad1c 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -246,6 +246,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal);
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+
+ setOperationAction(ISD::VSELECT, VT, Legal);
}
for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
@@ -277,6 +279,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal);
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+
+ setOperationAction(ISD::VSELECT, VT, Legal);
}
for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) {
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
@@ -314,6 +318,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
setStackPointerRegisterToSaveRestore(LoongArch::R3);
setBooleanContents(ZeroOrOneBooleanContent);
+ setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
setMaxAtomicSizeInBitsSupported(Subtarget.getGRLen());
diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
index 240f28b0dc5ae..0bd8db1bfdf05 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
@@ -1480,6 +1480,14 @@ def : Pat<(f32 (vector_extract v8f32:$xj, i64:$rk)),
def : Pat<(f64 (vector_extract v4f64:$xj, i64:$rk)),
(f64 (EXTRACT_SUBREG (XVREPLVE_D v4f64:$xj, i64:$rk), sub_64))>;
+// vselect
+def : Pat<(v32i8 (vselect LASX256:$xj, LASX256:$xd,
+ (v32i8 (SplatPat_uimm8 uimm8:$imm)))),
+ (XVBITSELI_B LASX256:$xd, LASX256:$xj, uimm8:$imm)>;
+foreach vt = [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64] in
+ def : Pat<(vt (vselect LASX256:$xa, LASX256:$xk, LASX256:$xj)),
+ (XVBITSEL_V LASX256:$xj, LASX256:$xk, LASX256:$xa)>;
+
} // Predicates = [HasExtLASX]
/// Intrinsic pattern
diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
index fb4726c530b55..5800ff6f62662 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
@@ -1564,6 +1564,14 @@ def : Pat<(f32 (vector_extract v4f32:$vj, i64:$rk)),
def : Pat<(f64 (vector_extract v2f64:$vj, i64:$rk)),
(f64 (EXTRACT_SUBREG (VREPLVE_D v2f64:$vj, i64:$rk), sub_64))>;
+// vselect
+def : Pat<(v16i8 (vselect LSX128:$vj, LSX128:$vd,
+ (v16i8 (SplatPat_uimm8 uimm8:$imm)))),
+ (VBITSELI_B LSX128:$vd, LSX128:$vj, uimm8:$imm)>;
+foreach vt = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in
+ def : Pat<(vt (vselect LSX128:$va, LSX128:$vk, LSX128:$vj)),
+ (VBITSEL_V LSX128:$vj, LSX128:$vk, LSX128:$va)>;
+
} // Predicates = [HasExtLSX]
/// Intrinsic pattern
diff --git a/llvm/test/CodeGen/LoongArch/lasx/vselect.ll b/llvm/test/CodeGen/LoongArch/lasx/vselect.ll
new file mode 100644
index 0000000000000..78b99701832a1
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/lasx/vselect.ll
@@ -0,0 +1,85 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s
+
+define void @select_v32i8_imm(ptr %res, ptr %a0) nounwind {
+; CHECK-LABEL: select_v32i8_imm:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvrepli.h $xr1, -256
+; CHECK-NEXT: xvbitseli.b $xr0, $xr1, 1
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+ %v0 = load <32 x i8>, ptr %a0
+ %sel = select <32 x i1> <i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true>, <32 x i8> %v0, <32 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+ store <32 x i8> %sel, ptr %res
+ ret void
+}
+
+define void @select_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind {
+; CHECK-LABEL: select_v32i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvrepli.h $xr2, -256
+; CHECK-NEXT: xvbitsel.v $xr0, $xr1, $xr0, $xr2
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+ %v0 = load <32 x i8>, ptr %a0
+ %v1 = load <32 x i8>, ptr %a1
+ %sel = select <32 x i1> <i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true>, <32 x i8> %v0, <32 x i8> %v1
+ store <32 x i8> %sel, ptr %res
+ ret void
+}
+
+define void @select_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind {
+; CHECK-LABEL: select_v16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lu12i.w $a3, -16
+; CHECK-NEXT: xvreplgr2vr.w $xr0, $a3
+; CHECK-NEXT: xvld $xr1, $a1, 0
+; CHECK-NEXT: xvld $xr2, $a2, 0
+; CHECK-NEXT: xvbitsel.v $xr0, $xr2, $xr1, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+ %v0 = load <16 x i16>, ptr %a0
+ %v1 = load <16 x i16>, ptr %a1
+ %sel = select <16 x i1> <i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true>, <16 x i16> %v0, <16 x i16> %v1
+ store <16 x i16> %sel, ptr %res
+ ret void
+}
+
+define void @select_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind {
+; CHECK-LABEL: select_v8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: ori $a1, $zero, 0
+; CHECK-NEXT: lu32i.d $a1, -1
+; CHECK-NEXT: xvreplgr2vr.d $xr2, $a1
+; CHECK-NEXT: xvbitsel.v $xr0, $xr1, $xr0, $xr2
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+ %v0 = load <8 x i32>, ptr %a0
+ %v1 = load <8 x i32>, ptr %a1
+ %sel = select <8 x i1> <i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true>, <8 x i32> %v0, <8 x i32> %v1
+ store <8 x i32> %sel, ptr %res
+ ret void
+}
+
+define void @select_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind {
+; CHECK-LABEL: select_v4i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: pcalau12i $a3, %pc_hi20(.LCPI4_0)
+; CHECK-NEXT: addi.d $a3, $a3, %pc_lo12(.LCPI4_0)
+; CHECK-NEXT: xvld $xr0, $a3, 0
+; CHECK-NEXT: xvld $xr1, $a1, 0
+; CHECK-NEXT: xvld $xr2, $a2, 0
+; CHECK-NEXT: xvbitsel.v $xr0, $xr2, $xr1, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+ %v0 = load <4 x i64>, ptr %a0
+ %v1 = load <4 x i64>, ptr %a1
+ %sel = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x i64> %v0, <4 x i64> %v1
+ store <4 x i64> %sel, ptr %res
+ ret void
+}
diff --git a/llvm/test/CodeGen/LoongArch/lsx/vselect.ll b/llvm/test/CodeGen/LoongArch/lsx/vselect.ll
new file mode 100644
index 0000000000000..823bd9baba91f
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/lsx/vselect.ll
@@ -0,0 +1,85 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s
+
+define void @select_v16i8_imm(ptr %res, ptr %a0) nounwind {
+; CHECK-LABEL: select_v16i8_imm:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vrepli.h $vr1, -256
+; CHECK-NEXT: vbitseli.b $vr0, $vr1, 255
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+ %v0 = load <16 x i8>, ptr %a0
+ %sel = select <16 x i1> <i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true>, <16 x i8> %v0, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+ store <16 x i8> %sel, ptr %res
+ ret void
+}
+
+define void @select_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind {
+; CHECK-LABEL: select_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vld $vr1, $a2, 0
+; CHECK-NEXT: vrepli.h $vr2, -256
+; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr0, $vr2
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+ %v0 = load <16 x i8>, ptr %a0
+ %v1 = load <16 x i8>, ptr %a1
+ %sel = select <16 x i1> <i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true>, <16 x i8> %v0, <16 x i8> %v1
+ store <16 x i8> %sel, ptr %res
+ ret void
+}
+
+define void @select_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind {
+; CHECK-LABEL: select_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lu12i.w $a3, -16
+; CHECK-NEXT: vreplgr2vr.w $vr0, $a3
+; CHECK-NEXT: vld $vr1, $a1, 0
+; CHECK-NEXT: vld $vr2, $a2, 0
+; CHECK-NEXT: vbitsel.v $vr0, $vr2, $vr1, $vr0
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+ %v0 = load <8 x i16>, ptr %a0
+ %v1 = load <8 x i16>, ptr %a1
+ %sel = select <8 x i1> <i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true>, <8 x i16> %v0, <8 x i16> %v1
+ store <8 x i16> %sel, ptr %res
+ ret void
+}
+
+define void @select_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind {
+; CHECK-LABEL: select_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vld $vr1, $a2, 0
+; CHECK-NEXT: ori $a1, $zero, 0
+; CHECK-NEXT: lu32i.d $a1, -1
+; CHECK-NEXT: vreplgr2vr.d $vr2, $a1
+; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr0, $vr2
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+ %v0 = load <4 x i32>, ptr %a0
+ %v1 = load <4 x i32>, ptr %a1
+ %sel = select <4 x i1> <i1 false, i1 true, i1 false, i1 true>, <4 x i32> %v0, <4 x i32> %v1
+ store <4 x i32> %sel, ptr %res
+ ret void
+}
+
+define void @select_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind {
+; CHECK-LABEL: select_v2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: pcalau12i $a3, %pc_hi20(.LCPI4_0)
+; CHECK-NEXT: addi.d $a3, $a3, %pc_lo12(.LCPI4_0)
+; CHECK-NEXT: vld $vr0, $a3, 0
+; CHECK-NEXT: vld $vr1, $a1, 0
+; CHECK-NEXT: vld $vr2, $a2, 0
+; CHECK-NEXT: vbitsel.v $vr0, $vr2, $vr1, $vr0
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+ %v0 = load <2 x i64>, ptr %a0
+ %v1 = load <2 x i64>, ptr %a1
+ %sel = select <2 x i1> <i1 true, i1 false>, <2 x i64> %v0, <2 x i64> %v1
+ store <2 x i64> %sel, ptr %res
+ ret void
+}
More information about the llvm-commits
mailing list