[llvm] [llvm][LoongArch] Introduce LASX and LSX conversion intrinsics (PR #157818)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 10 02:20:45 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-loongarch
Author: hev (heiher)
<details>
<summary>Changes</summary>
This patch introduces the LASX and LSX conversion intrinsics:
- <8 x float> @<!-- -->llvm.loongarch.lasx.cast.128.s(<4 x float>)
- <4 x double> @<!-- -->llvm.loongarch.lasx.cast.128.d(<2 x double>)
- <32 x i8> @<!-- -->llvm.loongarch.lasx.cast.128(<16 x i8>)
- <8 x float> @<!-- -->llvm.loongarch.lasx.concat.128.s(<4 x float>, <4 x float>)
- <4 x double> @<!-- -->llvm.loongarch.lasx.concat.128.d(<2 x double>, <2 x double>)
- <32 x i8> @<!-- -->llvm.loongarch.lasx.concat.128(<16 x i8>, <16 x i8>)
- <4 x float> @<!-- -->llvm.loongarch.lasx.extract.128.lo.s(<8 x float>)
- <2 x double> @<!-- -->llvm.loongarch.lasx.extract.128.lo.d(<4 x double>)
- <16 x i8> @<!-- -->llvm.loongarch.lasx.extract.128.lo(<32 x i8>)
- <4 x float> @<!-- -->llvm.loongarch.lasx.extract.128.hi.s(<8 x float>)
- <2 x double> @<!-- -->llvm.loongarch.lasx.extract.128.hi.d(<4 x double>)
- <16 x i8> @<!-- -->llvm.loongarch.lasx.extract.128.hi(<32 x i8>)
- <8 x float> @<!-- -->llvm.loongarch.lasx.insert.128.lo.s(<8 x float>, <4 x float>)
- <4 x double> @<!-- -->llvm.loongarch.lasx.insert.128.lo.d(<4 x double>, <2 x double>)
- <32 x i8> @<!-- -->llvm.loongarch.lasx.insert.128.lo(<32 x i8>, <16 x i8>)
- <8 x float> @<!-- -->llvm.loongarch.lasx.insert.128.hi.s(<8 x float>, <4 x float>)
- <4 x double> @<!-- -->llvm.loongarch.lasx.insert.128.hi.d(<4 x double>, <2 x double>)
- <32 x i8> @<!-- -->llvm.loongarch.lasx.insert.128.hi(<32 x i8>, <16 x i8>)
---
Full diff: https://github.com/llvm/llvm-project/pull/157818.diff
4 Files Affected:
- (modified) llvm/include/llvm/IR/IntrinsicsLoongArch.td (+38)
- (modified) llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp (+5)
- (modified) llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td (+31)
- (added) llvm/test/CodeGen/LoongArch/lasx/intrinsic-conversion.ll (+234)
``````````diff
diff --git a/llvm/include/llvm/IR/IntrinsicsLoongArch.td b/llvm/include/llvm/IR/IntrinsicsLoongArch.td
index 84026aa9d3624..2466aeec4474a 100644
--- a/llvm/include/llvm/IR/IntrinsicsLoongArch.td
+++ b/llvm/include/llvm/IR/IntrinsicsLoongArch.td
@@ -1192,4 +1192,42 @@ def int_loongarch_lasx_xvstelm_w
def int_loongarch_lasx_xvstelm_d
: VecInt<[], [llvm_v4i64_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty],
[IntrWriteMem, IntrArgMemOnly, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>]>;
+
+// LASX and LSX conversion
+def int_loongarch_lasx_cast_128_s
+ : VecInt<[llvm_v8f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
+def int_loongarch_lasx_cast_128_d
+ : VecInt<[llvm_v4f64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
+def int_loongarch_lasx_cast_128
+ : VecInt<[llvm_v32i8_ty], [llvm_v16i8_ty], [IntrNoMem]>;
+def int_loongarch_lasx_concat_128_s
+ : VecInt<[llvm_v8f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
+def int_loongarch_lasx_concat_128_d
+ : VecInt<[llvm_v4f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
+def int_loongarch_lasx_concat_128
+ : VecInt<[llvm_v32i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
+def int_loongarch_lasx_extract_128_lo_s
+ : VecInt<[llvm_v4f32_ty], [llvm_v8f32_ty], [IntrNoMem]>;
+def int_loongarch_lasx_extract_128_lo_d
+ : VecInt<[llvm_v2f64_ty], [llvm_v4f64_ty], [IntrNoMem]>;
+def int_loongarch_lasx_extract_128_lo
+ : VecInt<[llvm_v16i8_ty], [llvm_v32i8_ty], [IntrNoMem]>;
+def int_loongarch_lasx_extract_128_hi_s
+ : VecInt<[llvm_v4f32_ty], [llvm_v8f32_ty], [IntrNoMem]>;
+def int_loongarch_lasx_extract_128_hi_d
+ : VecInt<[llvm_v2f64_ty], [llvm_v4f64_ty], [IntrNoMem]>;
+def int_loongarch_lasx_extract_128_hi
+ : VecInt<[llvm_v16i8_ty], [llvm_v32i8_ty], [IntrNoMem]>;
+def int_loongarch_lasx_insert_128_lo_s
+ : VecInt<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
+def int_loongarch_lasx_insert_128_lo_d
+ : VecInt<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
+def int_loongarch_lasx_insert_128_lo
+ : VecInt<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
+def int_loongarch_lasx_insert_128_hi_s
+ : VecInt<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
+def int_loongarch_lasx_insert_128_hi_d
+ : VecInt<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
+def int_loongarch_lasx_insert_128_hi
+ : VecInt<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
} // TargetPrefix = "loongarch"
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 634914d3b3fd0..1ae5fc8bcfdbb 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -6168,6 +6168,11 @@ performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG,
N->getOperand(1),
DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
N->getOperand(2)));
+ case Intrinsic::loongarch_lasx_concat_128_s:
+ case Intrinsic::loongarch_lasx_concat_128_d:
+ case Intrinsic::loongarch_lasx_concat_128:
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, N->getValueType(0),
+ N->getOperand(1), N->getOperand(2));
}
return SDValue();
}
diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
index a79c01cbe577a..d070717a8c8ea 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
@@ -2028,6 +2028,37 @@ defm : subvector_subreg_lowering<LSX128, v2f64, LASX256, v4f64, 2, sub_128>;
defm : subvector_subreg_lowering<LSX128, v8i16, LASX256, v16i16, 8, sub_128>;
defm : subvector_subreg_lowering<LSX128, v16i8, LASX256, v32i8, 16, sub_128>;
+// LASX and LSX conversion
+def : Pat<(int_loongarch_lasx_cast_128_s (v4f32 LSX128:$src)),
+ (INSERT_SUBREG (IMPLICIT_DEF), LSX128:$src, sub_128)>;
+def : Pat<(int_loongarch_lasx_cast_128_d (v2f64 LSX128:$src)),
+ (INSERT_SUBREG (IMPLICIT_DEF), LSX128:$src, sub_128)>;
+def : Pat<(int_loongarch_lasx_cast_128 (v16i8 LSX128:$src)),
+ (INSERT_SUBREG (IMPLICIT_DEF), LSX128:$src, sub_128)>;
+def : Pat<(int_loongarch_lasx_extract_128_lo_s (v8f32 LASX256:$src)),
+ (EXTRACT_SUBREG LASX256:$src, sub_128)>;
+def : Pat<(int_loongarch_lasx_extract_128_lo_d (v4f64 LASX256:$src)),
+ (EXTRACT_SUBREG LASX256:$src, sub_128)>;
+def : Pat<(int_loongarch_lasx_extract_128_lo (v32i8 LASX256:$src)),
+ (EXTRACT_SUBREG LASX256:$src, sub_128)>;
+def : Pat<(int_loongarch_lasx_extract_128_hi_s (v8f32 LASX256:$src)),
+ (EXTRACT_SUBREG (XVPERMI_Q (IMPLICIT_DEF), LASX256:$src, 1), sub_128)>;
+def : Pat<(int_loongarch_lasx_extract_128_hi_d (v4f64 LASX256:$src)),
+ (EXTRACT_SUBREG (XVPERMI_Q (IMPLICIT_DEF), LASX256:$src, 1), sub_128)>;
+def : Pat<(int_loongarch_lasx_extract_128_hi (v32i8 LASX256:$src)),
+ (EXTRACT_SUBREG (XVPERMI_Q (IMPLICIT_DEF), LASX256:$src, 1), sub_128)>;
+def : Pat<(int_loongarch_lasx_insert_128_lo_s (v8f32 LASX256:$src), (v4f32 LSX128:$lo)),
+ (XVPERMI_Q LASX256:$src, (INSERT_SUBREG (IMPLICIT_DEF), LSX128:$lo, sub_128), 48)>;
+def : Pat<(int_loongarch_lasx_insert_128_lo_d (v4f64 LASX256:$src), (v2f64 LSX128:$lo)),
+ (XVPERMI_Q LASX256:$src, (INSERT_SUBREG (IMPLICIT_DEF), LSX128:$lo, sub_128), 48)>;
+def : Pat<(int_loongarch_lasx_insert_128_lo (v32i8 LASX256:$src), (v16i8 LSX128:$lo)),
+ (XVPERMI_Q LASX256:$src, (INSERT_SUBREG (IMPLICIT_DEF), LSX128:$lo, sub_128), 48)>;
+def : Pat<(int_loongarch_lasx_insert_128_hi_s (v8f32 LASX256:$src), (v4f32 LSX128:$lo)),
+ (XVPERMI_Q LASX256:$src, (INSERT_SUBREG (IMPLICIT_DEF), LSX128:$lo, sub_128), 2)>;
+def : Pat<(int_loongarch_lasx_insert_128_hi_d (v4f64 LASX256:$src), (v2f64 LSX128:$lo)),
+ (XVPERMI_Q LASX256:$src, (INSERT_SUBREG (IMPLICIT_DEF), LSX128:$lo, sub_128), 2)>;
+def : Pat<(int_loongarch_lasx_insert_128_hi (v32i8 LASX256:$src), (v16i8 LSX128:$lo)),
+ (XVPERMI_Q LASX256:$src, (INSERT_SUBREG (IMPLICIT_DEF), LSX128:$lo, sub_128), 2)>;
} // Predicates = [HasExtLASX]
/// Intrinsic pattern
diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-conversion.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-conversion.ll
new file mode 100644
index 0000000000000..ede421015f626
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-conversion.ll
@@ -0,0 +1,234 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s
+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s
+
+declare <8 x float> @llvm.loongarch.lasx.cast.128.s(<4 x float>)
+
+define <8 x float> @lasx_cast_128_s(<4 x float> %va) {
+; CHECK-LABEL: lasx_cast_128_s:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0
+; CHECK-NEXT: ret
+entry:
+ %res = call <8 x float> @llvm.loongarch.lasx.cast.128.s(<4 x float> %va)
+ ret <8 x float> %res
+}
+
+declare <4 x double> @llvm.loongarch.lasx.cast.128.d(<2 x double>)
+
+define <4 x double> @lasx_cast_128_d(<2 x double> %va) {
+; CHECK-LABEL: lasx_cast_128_d:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0
+; CHECK-NEXT: ret
+entry:
+ %res = call <4 x double> @llvm.loongarch.lasx.cast.128.d(<2 x double> %va)
+ ret <4 x double> %res
+}
+
+declare <32 x i8> @llvm.loongarch.lasx.cast.128(<16 x i8>)
+
+define <32 x i8> @lasx_cast_128(<16 x i8> %va) {
+; CHECK-LABEL: lasx_cast_128:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0
+; CHECK-NEXT: ret
+entry:
+ %res = call <32 x i8> @llvm.loongarch.lasx.cast.128(<16 x i8> %va)
+ ret <32 x i8> %res
+}
+
+declare <8 x float> @llvm.loongarch.lasx.concat.128.s(<4 x float>, <4 x float>)
+
+define <8 x float> @lasx_concat_128_s(<4 x float> %va, <4 x float> %vb) {
+; CHECK-LABEL: lasx_concat_128_s:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1
+; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0
+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
+; CHECK-NEXT: ret
+entry:
+ %res = call <8 x float> @llvm.loongarch.lasx.concat.128.s(<4 x float> %va, <4 x float> %vb)
+ ret <8 x float> %res
+}
+
+declare <4 x double> @llvm.loongarch.lasx.concat.128.d(<2 x double>, <2 x double>)
+
+define <4 x double> @lasx_concat_128_d(<2 x double> %va, <2 x double> %vb) {
+; CHECK-LABEL: lasx_concat_128_d:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1
+; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0
+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
+; CHECK-NEXT: ret
+entry:
+ %res = call <4 x double> @llvm.loongarch.lasx.concat.128.d(<2 x double> %va, <2 x double> %vb)
+ ret <4 x double> %res
+}
+
+declare <32 x i8> @llvm.loongarch.lasx.concat.128(<16 x i8>, <16 x i8>)
+
+define <32 x i8> @lasx_concat_128(<16 x i8> %va, <16 x i8> %vb) {
+; CHECK-LABEL: lasx_concat_128:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1
+; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0
+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
+; CHECK-NEXT: ret
+entry:
+ %res = call <32 x i8> @llvm.loongarch.lasx.concat.128(<16 x i8> %va, <16 x i8> %vb)
+ ret <32 x i8> %res
+}
+
+declare <4 x float> @llvm.loongarch.lasx.extract.128.lo.s(<8 x float>)
+
+define <4 x float> @lasx_extract_128_lo_s(<8 x float> %va) {
+; CHECK-LABEL: lasx_extract_128_lo_s:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: # kill: def $vr0 killed $vr0 killed $xr0
+; CHECK-NEXT: ret
+entry:
+ %res = call <4 x float> @llvm.loongarch.lasx.extract.128.lo.s(<8 x float> %va)
+ ret <4 x float> %res
+}
+
+declare <2 x double> @llvm.loongarch.lasx.extract.128.lo.d(<4 x double>)
+
+define <2 x double> @lasx_extract_128_lo_d(<4 x double> %va) {
+; CHECK-LABEL: lasx_extract_128_lo_d:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: # kill: def $vr0 killed $vr0 killed $xr0
+; CHECK-NEXT: ret
+entry:
+ %res = call <2 x double> @llvm.loongarch.lasx.extract.128.lo.d(<4 x double> %va)
+ ret <2 x double> %res
+}
+
+declare <16 x i8> @llvm.loongarch.lasx.extract.128.lo(<32 x i8>)
+
+define <16 x i8> @lasx_extract_128_lo(<32 x i8> %va) {
+; CHECK-LABEL: lasx_extract_128_lo:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: # kill: def $vr0 killed $vr0 killed $xr0
+; CHECK-NEXT: ret
+entry:
+ %res = call <16 x i8> @llvm.loongarch.lasx.extract.128.lo(<32 x i8> %va)
+ ret <16 x i8> %res
+}
+
+declare <4 x float> @llvm.loongarch.lasx.extract.128.hi.s(<8 x float>)
+
+define <4 x float> @lasx_extract_128_hi_s(<8 x float> %va) {
+; CHECK-LABEL: lasx_extract_128_hi_s:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvpermi.q $xr0, $xr0, 1
+; CHECK-NEXT: # kill: def $vr0 killed $vr0 killed $xr0
+; CHECK-NEXT: ret
+entry:
+ %res = call <4 x float> @llvm.loongarch.lasx.extract.128.hi.s(<8 x float> %va)
+ ret <4 x float> %res
+}
+
+declare <2 x double> @llvm.loongarch.lasx.extract.128.hi.d(<4 x double>)
+
+define <2 x double> @lasx_extract_128_hi_d(<4 x double> %va) {
+; CHECK-LABEL: lasx_extract_128_hi_d:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvpermi.q $xr0, $xr0, 1
+; CHECK-NEXT: # kill: def $vr0 killed $vr0 killed $xr0
+; CHECK-NEXT: ret
+entry:
+ %res = call <2 x double> @llvm.loongarch.lasx.extract.128.hi.d(<4 x double> %va)
+ ret <2 x double> %res
+}
+
+declare <16 x i8> @llvm.loongarch.lasx.extract.128.hi(<32 x i8>)
+
+define <16 x i8> @lasx_extract_128_hi(<32 x i8> %va) {
+; CHECK-LABEL: lasx_extract_128_hi:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvpermi.q $xr0, $xr0, 1
+; CHECK-NEXT: # kill: def $vr0 killed $vr0 killed $xr0
+; CHECK-NEXT: ret
+entry:
+ %res = call <16 x i8> @llvm.loongarch.lasx.extract.128.hi(<32 x i8> %va)
+ ret <16 x i8> %res
+}
+
+declare <8 x float> @llvm.loongarch.lasx.insert.128.lo.s(<8 x float>, <4 x float>)
+
+define <8 x float> @lasx_insert_128_lo_s(<8 x float> %va, <4 x float> %vb) {
+; CHECK-LABEL: lasx_insert_128_lo_s:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1
+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 48
+; CHECK-NEXT: ret
+entry:
+ %res = call <8 x float> @llvm.loongarch.lasx.insert.128.lo.s(<8 x float> %va, <4 x float> %vb)
+ ret <8 x float> %res
+}
+
+declare <4 x double> @llvm.loongarch.lasx.insert.128.lo.d(<4 x double>, <2 x double>)
+
+define <4 x double> @lasx_insert_128_lo_d(<4 x double> %va, <2 x double> %vb) {
+; CHECK-LABEL: lasx_insert_128_lo_d:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1
+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 48
+; CHECK-NEXT: ret
+entry:
+ %res = call <4 x double> @llvm.loongarch.lasx.insert.128.lo.d(<4 x double> %va, <2 x double> %vb)
+ ret <4 x double> %res
+}
+
+declare <32 x i8> @llvm.loongarch.lasx.insert.128.lo(<32 x i8>, <16 x i8>)
+
+define <32 x i8> @lasx_insert_128_lo(<32 x i8> %va, <16 x i8> %vb) {
+; CHECK-LABEL: lasx_insert_128_lo:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1
+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 48
+; CHECK-NEXT: ret
+entry:
+ %res = call <32 x i8> @llvm.loongarch.lasx.insert.128.lo(<32 x i8> %va, <16 x i8> %vb)
+ ret <32 x i8> %res
+}
+
+declare <8 x float> @llvm.loongarch.lasx.insert.128.hi.s(<8 x float>, <4 x float>)
+
+define <8 x float> @lasx_insert_128_hi_s(<8 x float> %va, <4 x float> %vb) {
+; CHECK-LABEL: lasx_insert_128_hi_s:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1
+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
+; CHECK-NEXT: ret
+entry:
+ %res = call <8 x float> @llvm.loongarch.lasx.insert.128.hi.s(<8 x float> %va, <4 x float> %vb)
+ ret <8 x float> %res
+}
+
+declare <4 x double> @llvm.loongarch.lasx.insert.128.hi.d(<4 x double>, <2 x double>)
+
+define <4 x double> @lasx_insert_128_hi_d(<4 x double> %va, <2 x double> %vb) {
+; CHECK-LABEL: lasx_insert_128_hi_d:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1
+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
+; CHECK-NEXT: ret
+entry:
+ %res = call <4 x double> @llvm.loongarch.lasx.insert.128.hi.d(<4 x double> %va, <2 x double> %vb)
+ ret <4 x double> %res
+}
+
+declare <32 x i8> @llvm.loongarch.lasx.insert.128.hi(<32 x i8>, <16 x i8>)
+
+define <32 x i8> @lasx_insert_128_hi(<32 x i8> %va, <16 x i8> %vb) {
+; CHECK-LABEL: lasx_insert_128_hi:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1
+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
+; CHECK-NEXT: ret
+entry:
+ %res = call <32 x i8> @llvm.loongarch.lasx.insert.128.hi(<32 x i8> %va, <16 x i8> %vb)
+ ret <32 x i8> %res
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/157818
More information about the llvm-commits
mailing list