[llvm] [LoongArch] use TypeWidenVector for most illegal vector types (PR #126456)

via llvm-commits llvm-commits at lists.llvm.org
Sun Feb 9 18:16:32 PST 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-loongarch

Author: None (tangaac)

<details>
<summary>Changes</summary>

`TypeWidenVector` makes an illegal vector a larger one
e.g. in lsx
v2i32 -> v4i32
v4i16 -> v8i16
With this we can make good use of `vilvh`, `vilvl` instructions in vector `sext`, `zext` in later pr.

Previous action is `TypePromoteInteger`,  which replaces integer with a larger one
e.g. in lsx
v2i32 -> v2i64
v4i16 -> v4i32



---

Patch is 23.96 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/126456.diff


4 Files Affected:

- (modified) llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp (+9) 
- (modified) llvm/lib/Target/LoongArch/LoongArchISelLowering.h (+1) 
- (added) llvm/test/CodeGen/LoongArch/lsx/vec-sext.ll (+322) 
- (added) llvm/test/CodeGen/LoongArch/lsx/vec-zext.ll (+301) 


``````````diff
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 2282dc8955613ad..dceb3c682d2df49 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -6570,3 +6570,12 @@ bool LoongArchTargetLowering::shouldAlignPointerArgs(CallInst *CI,
 
   return true;
 }
+
+TargetLoweringBase::LegalizeTypeAction
+LoongArchTargetLowering::getPreferredVectorAction(MVT VT) const {
+  if (!VT.isScalableVector() && VT.getVectorNumElements() != 1 &&
+      VT.getVectorElementType() != MVT::i1)
+    return TypeWidenVector;
+
+  return TargetLoweringBase::getPreferredVectorAction(VT);
+}
\ No newline at end of file
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
index a215ab523874b26..f8d4cef76b9551c 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
@@ -281,6 +281,7 @@ class LoongArchTargetLowering : public TargetLowering {
                               Align &PrefAlign) const override;
 
   bool isFPImmVLDILegal(const APFloat &Imm, EVT VT) const;
+  LegalizeTypeAction getPreferredVectorAction(MVT VT) const override;
 
 private:
   /// Target-specific function used to lower LoongArch calling conventions.
diff --git a/llvm/test/CodeGen/LoongArch/lsx/vec-sext.ll b/llvm/test/CodeGen/LoongArch/lsx/vec-sext.ll
new file mode 100644
index 000000000000000..84e629825d78992
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/lsx/vec-sext.ll
@@ -0,0 +1,322 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc --mtriple=loongarch64 --mattr=+lsx %s -o - | FileCheck %s
+
+
+define void @load_sext_2i8_to_2i64(ptr%ptr, ptr%dst) {
+; CHECK-LABEL: load_sext_2i8_to_2i64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    ld.h $a0, $a0, 0
+; CHECK-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI0_0)
+; CHECK-NEXT:    vld $vr0, $a2, %pc_lo12(.LCPI0_0)
+; CHECK-NEXT:    vinsgr2vr.h $vr1, $a0, 0
+; CHECK-NEXT:    vshuf.b $vr0, $vr0, $vr1, $vr0
+; CHECK-NEXT:    vslli.d $vr0, $vr0, 56
+; CHECK-NEXT:    vsrai.d $vr0, $vr0, 56
+; CHECK-NEXT:    vst $vr0, $a1, 0
+; CHECK-NEXT:    ret
+entry:
+  %A = load <2 x i8>, ptr %ptr
+  %B = sext <2 x i8> %A to <2 x i64>
+  store <2 x i64> %B, ptr %dst
+  ret void
+}
+
+define void @load_sext_4i8_to_4i32(ptr%ptr, ptr%dst) {
+; CHECK-LABEL: load_sext_4i8_to_4i32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    ld.w $a0, $a0, 0
+; CHECK-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI1_0)
+; CHECK-NEXT:    vld $vr0, $a2, %pc_lo12(.LCPI1_0)
+; CHECK-NEXT:    vinsgr2vr.w $vr1, $a0, 0
+; CHECK-NEXT:    vshuf.b $vr0, $vr0, $vr1, $vr0
+; CHECK-NEXT:    vslli.w $vr0, $vr0, 24
+; CHECK-NEXT:    vsrai.w $vr0, $vr0, 24
+; CHECK-NEXT:    vst $vr0, $a1, 0
+; CHECK-NEXT:    ret
+entry:
+  %A = load <4 x i8>, ptr %ptr
+  %B = sext <4 x i8> %A to <4 x i32>
+  store <4 x i32> %B, ptr %dst
+  ret void
+}
+
+define void @load_sext_8i8_to_8i16(ptr%ptr, ptr%dst) {
+; CHECK-LABEL: load_sext_8i8_to_8i16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    ld.d $a0, $a0, 0
+; CHECK-NEXT:    vinsgr2vr.d $vr0, $a0, 0
+; CHECK-NEXT:    vilvl.b $vr0, $vr0, $vr0
+; CHECK-NEXT:    vslli.h $vr0, $vr0, 8
+; CHECK-NEXT:    vsrai.h $vr0, $vr0, 8
+; CHECK-NEXT:    vst $vr0, $a1, 0
+; CHECK-NEXT:    ret
+entry:
+  %A = load <8 x i8>, ptr %ptr
+  %B = sext <8 x i8> %A to <8 x i16>
+  store <8 x i16> %B, ptr %dst
+  ret void
+}
+
+define void @load_sext_2i16_to_2i64(ptr%ptr, ptr%dst) {
+; CHECK-LABEL: load_sext_2i16_to_2i64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    ld.w $a0, $a0, 0
+; CHECK-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI3_0)
+; CHECK-NEXT:    vld $vr0, $a2, %pc_lo12(.LCPI3_0)
+; CHECK-NEXT:    vinsgr2vr.w $vr1, $a0, 0
+; CHECK-NEXT:    vshuf.h $vr0, $vr0, $vr1
+; CHECK-NEXT:    vslli.d $vr0, $vr0, 48
+; CHECK-NEXT:    vsrai.d $vr0, $vr0, 48
+; CHECK-NEXT:    vst $vr0, $a1, 0
+; CHECK-NEXT:    ret
+entry:
+  %A = load <2 x i16>, ptr %ptr
+  %B = sext <2 x i16> %A to <2 x i64>
+  store <2 x i64> %B, ptr %dst
+  ret void
+}
+
+define void @load_sext_4i16_to_4i32(ptr%ptr, ptr%dst) {
+; CHECK-LABEL: load_sext_4i16_to_4i32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    ld.d $a0, $a0, 0
+; CHECK-NEXT:    vinsgr2vr.d $vr0, $a0, 0
+; CHECK-NEXT:    vilvl.h $vr0, $vr0, $vr0
+; CHECK-NEXT:    vslli.w $vr0, $vr0, 16
+; CHECK-NEXT:    vsrai.w $vr0, $vr0, 16
+; CHECK-NEXT:    vst $vr0, $a1, 0
+; CHECK-NEXT:    ret
+entry:
+  %A = load <4 x i16>, ptr %ptr
+  %B = sext <4 x i16> %A to <4 x i32>
+  store <4 x i32> %B, ptr %dst
+  ret void
+}
+
+define void @load_sext_2i32_to_2i64(ptr%ptr, ptr%dst) {
+; CHECK-LABEL: load_sext_2i32_to_2i64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    ld.d $a0, $a0, 0
+; CHECK-NEXT:    vinsgr2vr.d $vr0, $a0, 0
+; CHECK-NEXT:    vshuf4i.w $vr0, $vr0, 16
+; CHECK-NEXT:    vslli.d $vr0, $vr0, 32
+; CHECK-NEXT:    vsrai.d $vr0, $vr0, 32
+; CHECK-NEXT:    vst $vr0, $a1, 0
+; CHECK-NEXT:    ret
+entry:
+  %A = load <2 x i32>, ptr %ptr
+  %B = sext <2 x i32> %A to <2 x i64>
+  store <2 x i64> %B, ptr %dst
+  ret void
+}
+
+define void @load_sext_16i8_to_16i16(ptr%ptr, ptr%dst) {
+; CHECK-LABEL: load_sext_16i8_to_16i16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vld $vr0, $a0, 0
+; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI6_0)
+; CHECK-NEXT:    vld $vr1, $a0, %pc_lo12(.LCPI6_0)
+; CHECK-NEXT:    vshuf.b $vr1, $vr0, $vr0, $vr1
+; CHECK-NEXT:    vilvl.b $vr1, $vr1, $vr1
+; CHECK-NEXT:    vslli.h $vr1, $vr1, 8
+; CHECK-NEXT:    vsrai.h $vr1, $vr1, 8
+; CHECK-NEXT:    vilvl.b $vr0, $vr0, $vr0
+; CHECK-NEXT:    vslli.h $vr0, $vr0, 8
+; CHECK-NEXT:    vsrai.h $vr0, $vr0, 8
+; CHECK-NEXT:    vst $vr0, $a1, 0
+; CHECK-NEXT:    vst $vr1, $a1, 16
+; CHECK-NEXT:    ret
+entry:
+  %A = load <16 x i8>, ptr %ptr
+  %B = sext <16 x i8> %A to <16 x i16>
+  store <16 x i16> %B, ptr %dst
+  ret void
+}
+
+define void @load_sext_16i8_to_16i32(ptr%ptr, ptr%dst) {
+; CHECK-LABEL: load_sext_16i8_to_16i32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vld $vr0, $a0, 0
+; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI7_0)
+; CHECK-NEXT:    vld $vr1, $a0, %pc_lo12(.LCPI7_0)
+; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI7_1)
+; CHECK-NEXT:    vld $vr2, $a0, %pc_lo12(.LCPI7_1)
+; CHECK-NEXT:    vshuf.b $vr1, $vr0, $vr0, $vr1
+; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI7_2)
+; CHECK-NEXT:    vld $vr3, $a0, %pc_lo12(.LCPI7_2)
+; CHECK-NEXT:    vshuf.b $vr1, $vr0, $vr1, $vr2
+; CHECK-NEXT:    vslli.w $vr1, $vr1, 24
+; CHECK-NEXT:    vsrai.w $vr1, $vr1, 24
+; CHECK-NEXT:    vshuf.b $vr3, $vr0, $vr0, $vr3
+; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI7_3)
+; CHECK-NEXT:    vld $vr4, $a0, %pc_lo12(.LCPI7_3)
+; CHECK-NEXT:    vshuf.b $vr3, $vr0, $vr3, $vr2
+; CHECK-NEXT:    vslli.w $vr3, $vr3, 24
+; CHECK-NEXT:    vsrai.w $vr3, $vr3, 24
+; CHECK-NEXT:    vshuf.b $vr4, $vr0, $vr0, $vr4
+; CHECK-NEXT:    vshuf.b $vr4, $vr0, $vr4, $vr2
+; CHECK-NEXT:    vslli.w $vr4, $vr4, 24
+; CHECK-NEXT:    vsrai.w $vr4, $vr4, 24
+; CHECK-NEXT:    vshuf.b $vr0, $vr0, $vr0, $vr2
+; CHECK-NEXT:    vslli.w $vr0, $vr0, 24
+; CHECK-NEXT:    vsrai.w $vr0, $vr0, 24
+; CHECK-NEXT:    vst $vr0, $a1, 0
+; CHECK-NEXT:    vst $vr4, $a1, 48
+; CHECK-NEXT:    vst $vr3, $a1, 32
+; CHECK-NEXT:    vst $vr1, $a1, 16
+; CHECK-NEXT:    ret
+entry:
+  %A = load <16 x i8>, ptr %ptr
+  %B = sext <16 x i8> %A to <16 x i32>
+  store <16 x i32> %B, ptr %dst
+  ret void
+}
+
+define void @load_sext_16i8_to_16i64(ptr%ptr, ptr%dst) {
+; CHECK-LABEL: load_sext_16i8_to_16i64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vld $vr0, $a0, 0
+; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI8_0)
+; CHECK-NEXT:    vld $vr1, $a0, %pc_lo12(.LCPI8_0)
+; CHECK-NEXT:    vshuf4i.b $vr2, $vr0, 14
+; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI8_1)
+; CHECK-NEXT:    vld $vr3, $a0, %pc_lo12(.LCPI8_1)
+; CHECK-NEXT:    vshuf.b $vr2, $vr0, $vr2, $vr1
+; CHECK-NEXT:    vslli.d $vr2, $vr2, 56
+; CHECK-NEXT:    vsrai.d $vr2, $vr2, 56
+; CHECK-NEXT:    vshuf.b $vr3, $vr0, $vr0, $vr3
+; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI8_2)
+; CHECK-NEXT:    vld $vr4, $a0, %pc_lo12(.LCPI8_2)
+; CHECK-NEXT:    vshuf.b $vr3, $vr0, $vr3, $vr1
+; CHECK-NEXT:    vslli.d $vr3, $vr3, 56
+; CHECK-NEXT:    vsrai.d $vr3, $vr3, 56
+; CHECK-NEXT:    vshuf.b $vr4, $vr0, $vr0, $vr4
+; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI8_3)
+; CHECK-NEXT:    vld $vr5, $a0, %pc_lo12(.LCPI8_3)
+; CHECK-NEXT:    vshuf.b $vr4, $vr0, $vr4, $vr1
+; CHECK-NEXT:    vslli.d $vr4, $vr4, 56
+; CHECK-NEXT:    vsrai.d $vr4, $vr4, 56
+; CHECK-NEXT:    vshuf.b $vr5, $vr0, $vr0, $vr5
+; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI8_4)
+; CHECK-NEXT:    vld $vr6, $a0, %pc_lo12(.LCPI8_4)
+; CHECK-NEXT:    vshuf.b $vr5, $vr0, $vr5, $vr1
+; CHECK-NEXT:    vslli.d $vr5, $vr5, 56
+; CHECK-NEXT:    vsrai.d $vr5, $vr5, 56
+; CHECK-NEXT:    vshuf.b $vr6, $vr0, $vr0, $vr6
+; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI8_5)
+; CHECK-NEXT:    vld $vr7, $a0, %pc_lo12(.LCPI8_5)
+; CHECK-NEXT:    vshuf.b $vr6, $vr0, $vr6, $vr1
+; CHECK-NEXT:    vslli.d $vr6, $vr6, 56
+; CHECK-NEXT:    vsrai.d $vr6, $vr6, 56
+; CHECK-NEXT:    vshuf.b $vr7, $vr0, $vr0, $vr7
+; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI8_6)
+; CHECK-NEXT:    vld $vr8, $a0, %pc_lo12(.LCPI8_6)
+; CHECK-NEXT:    vshuf.b $vr7, $vr0, $vr7, $vr1
+; CHECK-NEXT:    vslli.d $vr7, $vr7, 56
+; CHECK-NEXT:    vsrai.d $vr7, $vr7, 56
+; CHECK-NEXT:    vshuf.b $vr8, $vr0, $vr0, $vr8
+; CHECK-NEXT:    vshuf.b $vr8, $vr0, $vr8, $vr1
+; CHECK-NEXT:    vslli.d $vr8, $vr8, 56
+; CHECK-NEXT:    vsrai.d $vr8, $vr8, 56
+; CHECK-NEXT:    vshuf.b $vr0, $vr0, $vr0, $vr1
+; CHECK-NEXT:    vslli.d $vr0, $vr0, 56
+; CHECK-NEXT:    vsrai.d $vr0, $vr0, 56
+; CHECK-NEXT:    vst $vr0, $a1, 0
+; CHECK-NEXT:    vst $vr8, $a1, 112
+; CHECK-NEXT:    vst $vr7, $a1, 96
+; CHECK-NEXT:    vst $vr6, $a1, 80
+; CHECK-NEXT:    vst $vr5, $a1, 64
+; CHECK-NEXT:    vst $vr4, $a1, 48
+; CHECK-NEXT:    vst $vr3, $a1, 32
+; CHECK-NEXT:    vst $vr2, $a1, 16
+; CHECK-NEXT:    ret
+entry:
+  %A = load <16 x i8>, ptr %ptr
+  %B = sext <16 x i8> %A to <16 x i64>
+  store <16 x i64> %B, ptr %dst
+  ret void
+}
+
+define void @load_sext_8i16_to_8i32(ptr%ptr, ptr%dst) {
+; CHECK-LABEL: load_sext_8i16_to_8i32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vld $vr0, $a0, 0
+; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI9_0)
+; CHECK-NEXT:    vld $vr1, $a0, %pc_lo12(.LCPI9_0)
+; CHECK-NEXT:    vshuf.h $vr1, $vr0, $vr0
+; CHECK-NEXT:    vilvl.h $vr1, $vr1, $vr1
+; CHECK-NEXT:    vslli.w $vr1, $vr1, 16
+; CHECK-NEXT:    vsrai.w $vr1, $vr1, 16
+; CHECK-NEXT:    vilvl.h $vr0, $vr0, $vr0
+; CHECK-NEXT:    vslli.w $vr0, $vr0, 16
+; CHECK-NEXT:    vsrai.w $vr0, $vr0, 16
+; CHECK-NEXT:    vst $vr0, $a1, 0
+; CHECK-NEXT:    vst $vr1, $a1, 16
+; CHECK-NEXT:    ret
+entry:
+  %A = load <8 x i16>, ptr %ptr
+  %B = sext <8 x i16> %A to <8 x i32>
+  store <8 x i32> %B, ptr %dst
+  ret void
+}
+
+define void @load_sext_8i16_to_8i64(ptr%ptr, ptr%dst) {
+; CHECK-LABEL: load_sext_8i16_to_8i64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vld $vr0, $a0, 0
+; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI10_0)
+; CHECK-NEXT:    vld $vr1, $a0, %pc_lo12(.LCPI10_0)
+; CHECK-NEXT:    vshuf4i.h $vr2, $vr0, 14
+; CHECK-NEXT:    vori.b $vr3, $vr1, 0
+; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI10_1)
+; CHECK-NEXT:    vld $vr4, $a0, %pc_lo12(.LCPI10_1)
+; CHECK-NEXT:    vshuf.h $vr3, $vr0, $vr2
+; CHECK-NEXT:    vslli.d $vr2, $vr3, 48
+; CHECK-NEXT:    vsrai.d $vr2, $vr2, 48
+; CHECK-NEXT:    vshuf.h $vr4, $vr0, $vr0
+; CHECK-NEXT:    vori.b $vr3, $vr1, 0
+; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI10_2)
+; CHECK-NEXT:    vld $vr5, $a0, %pc_lo12(.LCPI10_2)
+; CHECK-NEXT:    vshuf.h $vr3, $vr0, $vr4
+; CHECK-NEXT:    vslli.d $vr3, $vr3, 48
+; CHECK-NEXT:    vsrai.d $vr3, $vr3, 48
+; CHECK-NEXT:    vshuf.h $vr5, $vr0, $vr0
+; CHECK-NEXT:    vori.b $vr4, $vr1, 0
+; CHECK-NEXT:    vshuf.h $vr4, $vr0, $vr5
+; CHECK-NEXT:    vslli.d $vr4, $vr4, 48
+; CHECK-NEXT:    vsrai.d $vr4, $vr4, 48
+; CHECK-NEXT:    vshuf.h $vr1, $vr0, $vr0
+; CHECK-NEXT:    vslli.d $vr0, $vr1, 48
+; CHECK-NEXT:    vsrai.d $vr0, $vr0, 48
+; CHECK-NEXT:    vst $vr0, $a1, 0
+; CHECK-NEXT:    vst $vr4, $a1, 48
+; CHECK-NEXT:    vst $vr3, $a1, 32
+; CHECK-NEXT:    vst $vr2, $a1, 16
+; CHECK-NEXT:    ret
+entry:
+  %A = load <8 x i16>, ptr %ptr
+  %B = sext <8 x i16> %A to <8 x i64>
+  store <8 x i64> %B, ptr %dst
+  ret void
+}
+
+define void @load_sext_4i32_to_4i64(ptr%ptr, ptr%dst) {
+; CHECK-LABEL: load_sext_4i32_to_4i64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vld $vr0, $a0, 0
+; CHECK-NEXT:    vshuf4i.w $vr1, $vr0, 14
+; CHECK-NEXT:    vshuf4i.w $vr1, $vr1, 16
+; CHECK-NEXT:    vslli.d $vr1, $vr1, 32
+; CHECK-NEXT:    vsrai.d $vr1, $vr1, 32
+; CHECK-NEXT:    vshuf4i.w $vr0, $vr0, 16
+; CHECK-NEXT:    vslli.d $vr0, $vr0, 32
+; CHECK-NEXT:    vsrai.d $vr0, $vr0, 32
+; CHECK-NEXT:    vst $vr0, $a1, 0
+; CHECK-NEXT:    vst $vr1, $a1, 16
+; CHECK-NEXT:    ret
+entry:
+  %A = load <4 x i32>, ptr %ptr
+  %B = sext <4 x i32> %A to <4 x i64>
+  store <4 x i64> %B, ptr %dst
+  ret void
+}
diff --git a/llvm/test/CodeGen/LoongArch/lsx/vec-zext.ll b/llvm/test/CodeGen/LoongArch/lsx/vec-zext.ll
new file mode 100644
index 000000000000000..40f102bed97a557
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/lsx/vec-zext.ll
@@ -0,0 +1,301 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc --mtriple=loongarch64 --mattr=+lsx %s -o - | FileCheck %s
+
+
+define void @load_zext_2i8_to_2i64(ptr%ptr, ptr%dst) {
+; CHECK-LABEL: load_zext_2i8_to_2i64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    ld.h $a0, $a0, 0
+; CHECK-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI0_0)
+; CHECK-NEXT:    vld $vr0, $a2, %pc_lo12(.LCPI0_0)
+; CHECK-NEXT:    vinsgr2vr.h $vr1, $a0, 0
+; CHECK-NEXT:    vrepli.b $vr2, 0
+; CHECK-NEXT:    vshuf.b $vr0, $vr1, $vr2, $vr0
+; CHECK-NEXT:    vst $vr0, $a1, 0
+; CHECK-NEXT:    ret
+entry:
+  %A = load <2 x i8>, ptr %ptr
+  %B = zext <2 x i8> %A to <2 x i64>
+  store <2 x i64> %B, ptr %dst
+  ret void
+}
+
+define void @load_zext_4i8_to_4i32(ptr%ptr, ptr%dst) {
+; CHECK-LABEL: load_zext_4i8_to_4i32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    ld.w $a0, $a0, 0
+; CHECK-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI1_0)
+; CHECK-NEXT:    vld $vr0, $a2, %pc_lo12(.LCPI1_0)
+; CHECK-NEXT:    vinsgr2vr.w $vr1, $a0, 0
+; CHECK-NEXT:    vrepli.b $vr2, 0
+; CHECK-NEXT:    vshuf.b $vr0, $vr1, $vr2, $vr0
+; CHECK-NEXT:    vst $vr0, $a1, 0
+; CHECK-NEXT:    ret
+entry:
+  %A = load <4 x i8>, ptr %ptr
+  %B = zext <4 x i8> %A to <4 x i32>
+  store <4 x i32> %B, ptr %dst
+  ret void
+}
+
+define void @load_zext_8i8_to_8i16(ptr%ptr, ptr%dst) {
+; CHECK-LABEL: load_zext_8i8_to_8i16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    ld.d $a0, $a0, 0
+; CHECK-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI2_0)
+; CHECK-NEXT:    vld $vr0, $a2, %pc_lo12(.LCPI2_0)
+; CHECK-NEXT:    vinsgr2vr.d $vr1, $a0, 0
+; CHECK-NEXT:    vrepli.b $vr2, 0
+; CHECK-NEXT:    vshuf.b $vr0, $vr1, $vr2, $vr0
+; CHECK-NEXT:    vst $vr0, $a1, 0
+; CHECK-NEXT:    ret
+entry:
+  %A = load <8 x i8>, ptr %ptr
+  %B = zext <8 x i8> %A to <8 x i16>
+  store <8 x i16> %B, ptr %dst
+  ret void
+}
+
+define void @load_zext_2i16_to_2i64(ptr%ptr, ptr%dst) {
+; CHECK-LABEL: load_zext_2i16_to_2i64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    ld.w $a0, $a0, 0
+; CHECK-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI3_0)
+; CHECK-NEXT:    vld $vr0, $a2, %pc_lo12(.LCPI3_0)
+; CHECK-NEXT:    vinsgr2vr.w $vr1, $a0, 0
+; CHECK-NEXT:    vrepli.b $vr2, 0
+; CHECK-NEXT:    vshuf.h $vr0, $vr1, $vr2
+; CHECK-NEXT:    vst $vr0, $a1, 0
+; CHECK-NEXT:    ret
+entry:
+  %A = load <2 x i16>, ptr %ptr
+  %B = zext <2 x i16> %A to <2 x i64>
+  store <2 x i64> %B, ptr %dst
+  ret void
+}
+
+define void @load_zext_4i16_to_4i32(ptr%ptr, ptr%dst) {
+; CHECK-LABEL: load_zext_4i16_to_4i32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    ld.d $a0, $a0, 0
+; CHECK-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI4_0)
+; CHECK-NEXT:    vld $vr0, $a2, %pc_lo12(.LCPI4_0)
+; CHECK-NEXT:    vinsgr2vr.d $vr1, $a0, 0
+; CHECK-NEXT:    vrepli.b $vr2, 0
+; CHECK-NEXT:    vshuf.h $vr0, $vr1, $vr2
+; CHECK-NEXT:    vst $vr0, $a1, 0
+; CHECK-NEXT:    ret
+entry:
+  %A = load <4 x i16>, ptr %ptr
+  %B = zext <4 x i16> %A to <4 x i32>
+  store <4 x i32> %B, ptr %dst
+  ret void
+}
+
+define void @load_zext_2i32_to_2i64(ptr%ptr, ptr%dst) {
+; CHECK-LABEL: load_zext_2i32_to_2i64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    ld.d $a0, $a0, 0
+; CHECK-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI5_0)
+; CHECK-NEXT:    vld $vr0, $a2, %pc_lo12(.LCPI5_0)
+; CHECK-NEXT:    vinsgr2vr.d $vr1, $a0, 0
+; CHECK-NEXT:    vrepli.b $vr2, 0
+; CHECK-NEXT:    vshuf.w $vr0, $vr1, $vr2
+; CHECK-NEXT:    vst $vr0, $a1, 0
+; CHECK-NEXT:    ret
+entry:
+  %A = load <2 x i32>, ptr %ptr
+  %B = zext <2 x i32> %A to <2 x i64>
+  store <2 x i64> %B, ptr %dst
+  ret void
+}
+
+define void @load_zext_16i8_to_16i16(ptr%ptr, ptr%dst) {
+; CHECK-LABEL: load_zext_16i8_to_16i16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vld $vr0, $a0, 0
+; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI6_0)
+; CHECK-NEXT:    vld $vr1, $a0, %pc_lo12(.LCPI6_0)
+; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI6_1)
+; CHECK-NEXT:    vld $vr2, $a0, %pc_lo12(.LCPI6_1)
+; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI6_2)
+; CHECK-NEXT:    vld $vr3, $a0, %pc_lo12(.LCPI6_2)
+; CHECK-NEXT:    vshuf.b $vr1, $vr0, $vr0, $vr1
+; CHECK-NEXT:    vrepli.b $vr4, 0
+; CHECK-NEXT:    vshuf.b $vr1, $vr4, $vr1, $vr2
+; CHECK-NEXT:    vshuf.b $vr0, $vr0, $vr4, $vr3
+; CHECK-NEXT:    vst $vr0, $a1, 0
+; CHECK-NEXT:    vst $vr1, $a1, 16
+; CHECK-NEXT:    ret
+entry:
+  %A = load <16 x i8>, ptr %ptr
+  %B = zext <16 x i8> %A to <16 x i16>
+  store <16 x i16> %B, ptr %dst
+  ret void
+}
+
+define void @load_zext_16i8_to_16i32(ptr%ptr, ptr%dst) {
+; CHECK-LABEL: load_zext_16i8_to_16i32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vld $vr0, $a0, 0
+; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI7_0)
+; CHECK-NEXT:    vld $vr1, $a0, %pc_lo12(.LCPI7_0)
+; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI7_1)
+; CHECK-NEXT:    vld $vr2, $a0, %pc_lo12(.LCPI7_1)
+; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI7_2)
+; CHECK-NEXT:    vld $vr3, $a0, %pc_lo12(.LCPI7_2)
+; CHECK-NEXT:    vshuf.b $vr1, $vr0, $vr0, $vr1
+; CHECK-NEXT:    vrepli.b $vr4, 0
+; CHECK-NEXT:    vshuf.b $vr1, $vr4, $vr1, $vr2
+; CHECK-NEXT:    vshuf.b $vr3, $vr0, $vr0, $vr3
+; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI7_3)
+; CHECK-NEXT:    vld $vr5, $a0, %pc_lo12(.LCPI7_3)
+; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI7_4)
+; CHECK-NEXT:    vld $vr6, $a0, %pc_lo12(.LCPI7_4)
+; CHECK-NEXT:    vshuf.b $vr3, $vr4, $vr3, $vr2
+; CHECK-NEXT:    vshuf.b $vr5, $vr0, $vr0, $vr5
+; CHECK-NEXT:    vshuf.b $vr2, $vr4, $vr5, $vr2
+; CHECK-NEXT:    vshuf.b $vr0, $vr0, $vr4, $vr6
+; CHECK-NEXT:    vst $vr0, $a1, 0
+; CHECK-NEXT:    vst $vr2, $a1, 48
+; CHECK-NEXT:    vst $vr3, $a1, 32
+; CHECK-NEXT:    vst $vr1, $a1, 16
+; CHECK-NEXT:    ret
+entry:
+  %A = load <16 x i8>, ptr %ptr
+  %B = zext <16 x i8> %A to <16 x i32>
+  store <16 x i32> %B, ptr %dst
+  ret void
+}
+
+define void @load_zext_16i8_to_16i64(ptr%ptr, ptr%dst) {
+; CHECK-LABEL: load_zext_16i8_to_16i64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vld $vr0, $a0, 0
+; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI8_0)
+; CHECK-NEXT:    vld $vr1, $a0, %pc_lo12(.LCPI8_0)
+; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI8_1)
+; CHECK-NEXT:    vld $vr2, $a0, %pc_lo12(.LCPI8_1)
+; CHECK-NEXT:    vshuf4i.b $vr3, $vr0, 14
+; CHECK-NEXT:    vrepli.b $vr4, 0
+; CHECK-NEXT:    vshuf.b $vr3, $vr4, $vr3, $vr1
+; CHECK-NEXT:    vshuf.b $vr2, $vr0, $vr0, $vr2
+; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI8_2)
+; CHECK-NEXT:    vld $vr5, $a0, %pc_lo12(.LCPI8_2)
+; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI8_3)
+; CHECK-NEXT:    vld $vr6, $a0, %pc_lo12(.LCPI8_3)
+; CHECK-NEXT:    vshuf.b $vr2, $vr4, $vr2, $vr1
+; CHECK-NEXT:    vshuf.b $vr5, $vr0, $vr0, $vr5
+; CHECK-NEXT:    vshuf.b $vr5, $vr4, $vr5, $vr1
+; CHECK-NEXT:    vshuf.b $vr6, $vr0, $vr0, $vr6
+; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI8_4)
+; CHECK-NEXT:    vld $vr7, $a0, %pc_lo12(.LCPI8_4)
+; CHECK-NE...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/126456


More information about the llvm-commits mailing list