[llvm-branch-commits] [llvm] [LoongArch] Introduce instruction patterns for vector sign/zero extensions (PR #160810)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Thu Sep 25 21:59:29 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-loongarch
Author: hev (heiher)
<details>
<summary>Changes</summary>
This patch introduces legalization and instruction patterns for vector sign and zero extension operations.
---
Patch is 155.43 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/160810.diff
10 Files Affected:
- (modified) llvm/lib/Target/LoongArch/LoongArch.td (+1)
- (modified) llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp (+13)
- (modified) llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td (+60)
- (modified) llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td (+26)
- (modified) llvm/test/CodeGen/LoongArch/lasx/vec-sext.ll (+104-795)
- (modified) llvm/test/CodeGen/LoongArch/lasx/vec-zext.ll (+112-935)
- (modified) llvm/test/CodeGen/LoongArch/lsx/vec-sext.ll (+212-274)
- (modified) llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-sign-ext.ll (+14-22)
- (modified) llvm/test/CodeGen/LoongArch/lsx/vec-zext.ll (+221-178)
- (modified) llvm/test/CodeGen/LoongArch/lsx/vmskcond.ll (+22-35)
``````````diff
diff --git a/llvm/lib/Target/LoongArch/LoongArch.td b/llvm/lib/Target/LoongArch/LoongArch.td
index 6497ff999f6fa..62e837aad10b7 100644
--- a/llvm/lib/Target/LoongArch/LoongArch.td
+++ b/llvm/lib/Target/LoongArch/LoongArch.td
@@ -59,6 +59,7 @@ def FeatureExtLSX
: SubtargetFeature<"lsx", "HasExtLSX", "true",
"'LSX' (Loongson SIMD Extension)", [FeatureBasicD]>;
def HasExtLSX : Predicate<"Subtarget->hasExtLSX()">;
+def IsExtLSX : Predicate<"Subtarget->hasExtLSX() && !Subtarget->hasExtLASX()">;
// Loongson Advanced SIMD eXtension (LASX)
def FeatureExtLASX
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 801e557a22520..104b315d9bfcc 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -385,6 +385,10 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
}
+ for (MVT VT : {MVT::v2i64, MVT::v4i32, MVT::v8i16}) {
+ setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Legal);
+ setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Legal);
+ }
}
// Set operations for 'LASX' feature.
@@ -446,6 +450,15 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
VT, Expand);
setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Legal);
}
+ for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16}) {
+ setOperationAction(ISD::SIGN_EXTEND, VT, Legal);
+ setOperationAction(ISD::ZERO_EXTEND, VT, Legal);
+ }
+ for (MVT VT :
+ {MVT::v2i64, MVT::v4i32, MVT::v4i64, MVT::v8i16, MVT::v8i32}) {
+ setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Legal);
+ setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Legal);
+ }
}
// Set DAG combine for LA32 and LA64.
diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
index adfe990ba1234..b3389463b633c 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
@@ -2063,6 +2063,66 @@ defm : subvector_subreg_lowering<LSX128, v2f64, LASX256, v4f64, 2, sub_128>;
defm : subvector_subreg_lowering<LSX128, v8i16, LASX256, v16i16, 8, sub_128>;
defm : subvector_subreg_lowering<LSX128, v16i8, LASX256, v32i8, 16, sub_128>;
+// Sign extensions
+def : Pat<(v4i64 (sext v4i32:$vj)),
+ (v4i64 (VEXT2XV_D_W (SUBREG_TO_REG (i64 0), v4i32:$vj, sub_128)))>;
+def : Pat<(v8i32 (sext v8i16:$vj)),
+ (v8i32 (VEXT2XV_W_H (SUBREG_TO_REG (i64 0), v8i16:$vj, sub_128)))>;
+def : Pat<(v16i16 (sext v16i8:$vj)),
+ (v16i16 (VEXT2XV_H_B (SUBREG_TO_REG (i64 0), v16i8:$vj, sub_128)))>;
+
+def : Pat<(v2i64 (sext_invec v16i8:$vj)),
+ (v2i64 (EXTRACT_SUBREG (VEXT2XV_D_B (SUBREG_TO_REG (i64 0), v16i8:$vj, sub_128)),
+ sub_128))>;
+def : Pat<(v2i64 (sext_invec v8i16:$vj)),
+ (v2i64 (EXTRACT_SUBREG (VEXT2XV_D_H (SUBREG_TO_REG (i64 0), v8i16:$vj, sub_128)),
+ sub_128))>;
+def : Pat<(v2i64 (sext_invec v4i32:$vj)),
+ (v2i64 (EXTRACT_SUBREG (VEXT2XV_D_W (SUBREG_TO_REG (i64 0), v4i32:$vj, sub_128)),
+ sub_128))>;
+def : Pat<(v4i32 (sext_invec v16i8:$vj)),
+ (v4i32 (EXTRACT_SUBREG (VEXT2XV_W_B (SUBREG_TO_REG (i64 0), v16i8:$vj, sub_128)),
+ sub_128))>;
+def : Pat<(v4i32 (sext_invec v8i16:$vj)),
+ (v4i32 (EXTRACT_SUBREG (VEXT2XV_W_H (SUBREG_TO_REG (i64 0), v8i16:$vj, sub_128)),
+ sub_128))>;
+def : Pat<(v4i64 (sext_invec v32i8:$xj)), (v4i64 (VEXT2XV_D_B v32i8:$xj))>;
+def : Pat<(v4i64 (sext_invec v16i16:$xj)), (v4i64 (VEXT2XV_D_H v16i16:$xj))>;
+def : Pat<(v8i16 (sext_invec v16i8:$vj)),
+ (v8i16 (EXTRACT_SUBREG (VEXT2XV_H_B (SUBREG_TO_REG (i64 0), v16i8:$vj, sub_128)),
+ sub_128))>;
+def : Pat<(v8i32 (sext_invec v32i8:$xj)), (v8i32 (VEXT2XV_W_B v32i8:$xj))>;
+
+// Zero extensions
+def : Pat<(v4i64 (zext v4i32:$vj)),
+ (v4i64 (VEXT2XV_DU_WU (SUBREG_TO_REG (i64 0), v4i32:$vj, sub_128)))>;
+def : Pat<(v8i32 (zext v8i16:$vj)),
+ (v8i32 (VEXT2XV_WU_HU (SUBREG_TO_REG (i64 0), v8i16:$vj, sub_128)))>;
+def : Pat<(v16i16 (zext v16i8:$vj)),
+ (v16i16 (VEXT2XV_HU_BU (SUBREG_TO_REG (i64 0), v16i8:$vj, sub_128)))>;
+
+def : Pat<(v2i64 (zext_invec v16i8:$vj)),
+ (v2i64 (EXTRACT_SUBREG (VEXT2XV_DU_BU (SUBREG_TO_REG (i64 0), v16i8:$vj, sub_128)),
+ sub_128))>;
+def : Pat<(v2i64 (zext_invec v8i16:$vj)),
+ (v2i64 (EXTRACT_SUBREG (VEXT2XV_DU_HU (SUBREG_TO_REG (i64 0), v8i16:$vj, sub_128)),
+ sub_128))>;
+def : Pat<(v2i64 (zext_invec v4i32:$vj)),
+ (v2i64 (EXTRACT_SUBREG (VEXT2XV_DU_WU (SUBREG_TO_REG (i64 0), v4i32:$vj, sub_128)),
+ sub_128))>;
+def : Pat<(v4i32 (zext_invec v16i8:$vj)),
+ (v4i32 (EXTRACT_SUBREG (VEXT2XV_WU_BU (SUBREG_TO_REG (i64 0), v16i8:$vj, sub_128)),
+ sub_128))>;
+def : Pat<(v4i32 (zext_invec v8i16:$vj)),
+ (v4i32 (EXTRACT_SUBREG (VEXT2XV_WU_HU (SUBREG_TO_REG (i64 0), v8i16:$vj, sub_128)),
+ sub_128))>;
+def : Pat<(v4i64 (zext_invec v32i8:$xj)), (v4i64 (VEXT2XV_DU_BU v32i8:$xj))>;
+def : Pat<(v4i64 (zext_invec v16i16:$xj)), (v4i64 (VEXT2XV_DU_HU v16i16:$xj))>;
+def : Pat<(v8i16 (zext_invec v16i8:$vj)),
+ (v8i16 (EXTRACT_SUBREG (VEXT2XV_HU_BU (SUBREG_TO_REG (i64 0), v16i8:$vj, sub_128)),
+ sub_128))>;
+def : Pat<(v8i32 (zext_invec v32i8:$xj)), (v8i32 (VEXT2XV_WU_BU v32i8:$xj))>;
+
} // Predicates = [HasExtLASX]
/// Intrinsic pattern
diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
index b0eb51a92c6c6..eb1fe93475f50 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
@@ -2174,6 +2174,32 @@ def : Pat<(loongarch_vmsknez (v16i8 LSX128:$vj)), (PseudoVMSKNEZ_B LSX128:$vj)>;
} // Predicates = [HasExtLSX]
+let Predicates = [IsExtLSX] in {
+
+// Sign extensions
+def : Pat<(v2i64 (sext_invec v16i8:$vj)),
+ (v2i64 (VSLLWIL_D_W (VSLLWIL_W_H (VSLLWIL_H_B v16i8:$vj, 0), 0), 0))>;
+def : Pat<(v2i64 (sext_invec v8i16:$vj)),
+ (v2i64 (VSLLWIL_D_W (VSLLWIL_W_H v8i16:$vj, 0), 0))>;
+def : Pat<(v2i64 (sext_invec v4i32:$vj)), (v2i64 (VSLLWIL_D_W v4i32:$vj, 0))>;
+def : Pat<(v4i32 (sext_invec v16i8:$vj)),
+ (v4i32 (VSLLWIL_W_H (VSLLWIL_H_B v16i8:$vj, 0), 0))>;
+def : Pat<(v4i32 (sext_invec v8i16:$vj)), (v4i32 (VSLLWIL_W_H v8i16:$vj, 0))>;
+def : Pat<(v8i16 (sext_invec v16i8:$vj)), (v8i16 (VSLLWIL_H_B v16i8:$vj, 0))>;
+
+// Zero extensions
+def : Pat<(v2i64 (zext_invec v16i8:$vj)),
+ (v2i64 (VSLLWIL_DU_WU (VSLLWIL_WU_HU (VSLLWIL_HU_BU v16i8:$vj, 0), 0), 0))>;
+def : Pat<(v2i64 (zext_invec v8i16:$vj)),
+ (v2i64 (VSLLWIL_DU_WU (VSLLWIL_WU_HU v8i16:$vj, 0), 0))>;
+def : Pat<(v2i64 (zext_invec v4i32:$vj)), (v2i64 (VSLLWIL_DU_WU v4i32:$vj, 0))>;
+def : Pat<(v4i32 (zext_invec v16i8:$vj)),
+ (v4i32 (VSLLWIL_WU_HU (VSLLWIL_HU_BU v16i8:$vj, 0), 0))>;
+def : Pat<(v4i32 (zext_invec v8i16:$vj)), (v4i32 (VSLLWIL_WU_HU v8i16:$vj, 0))>;
+def : Pat<(v8i16 (zext_invec v16i8:$vj)), (v8i16 (VSLLWIL_HU_BU v16i8:$vj, 0))>;
+
+} // Predicates = [IsExtLSX]
+
/// Intrinsic pattern
class deriveLSXIntrinsic<string Inst> {
diff --git a/llvm/test/CodeGen/LoongArch/lasx/vec-sext.ll b/llvm/test/CodeGen/LoongArch/lasx/vec-sext.ll
index 953e6c45608c0..8884aacc16f51 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/vec-sext.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/vec-sext.ll
@@ -7,11 +7,7 @@ define void @load_sext_2i8_to_2i64(ptr %ptr, ptr %dst) {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: ld.h $a0, $a0, 0
; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 0
-; CHECK-NEXT: vilvl.b $vr0, $vr0, $vr0
-; CHECK-NEXT: vilvl.h $vr0, $vr0, $vr0
-; CHECK-NEXT: vilvl.w $vr0, $vr0, $vr0
-; CHECK-NEXT: vslli.d $vr0, $vr0, 56
-; CHECK-NEXT: vsrai.d $vr0, $vr0, 56
+; CHECK-NEXT: vext2xv.d.b $xr0, $xr0
; CHECK-NEXT: vst $vr0, $a1, 0
; CHECK-NEXT: ret
entry:
@@ -26,10 +22,7 @@ define void @load_sext_2i16_to_2i64(ptr %ptr, ptr %dst) {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: ld.w $a0, $a0, 0
; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0
-; CHECK-NEXT: vilvl.h $vr0, $vr0, $vr0
-; CHECK-NEXT: vilvl.w $vr0, $vr0, $vr0
-; CHECK-NEXT: vslli.d $vr0, $vr0, 48
-; CHECK-NEXT: vsrai.d $vr0, $vr0, 48
+; CHECK-NEXT: vext2xv.d.h $xr0, $xr0
; CHECK-NEXT: vst $vr0, $a1, 0
; CHECK-NEXT: ret
entry:
@@ -45,9 +38,8 @@ define void @load_sext_2i32_to_2i64(ptr %ptr, ptr %dst) {
; LA32-NEXT: ld.w $a2, $a0, 0
; LA32-NEXT: ld.w $a0, $a0, 4
; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0
-; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 2
-; LA32-NEXT: vslli.d $vr0, $vr0, 32
-; LA32-NEXT: vsrai.d $vr0, $vr0, 32
+; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1
+; LA32-NEXT: vext2xv.d.w $xr0, $xr0
; LA32-NEXT: vst $vr0, $a1, 0
; LA32-NEXT: ret
;
@@ -55,9 +47,7 @@ define void @load_sext_2i32_to_2i64(ptr %ptr, ptr %dst) {
; LA64: # %bb.0: # %entry
; LA64-NEXT: ld.d $a0, $a0, 0
; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0
-; LA64-NEXT: vshuf4i.w $vr0, $vr0, 16
-; LA64-NEXT: vslli.d $vr0, $vr0, 32
-; LA64-NEXT: vsrai.d $vr0, $vr0, 32
+; LA64-NEXT: vext2xv.d.w $xr0, $xr0
; LA64-NEXT: vst $vr0, $a1, 0
; LA64-NEXT: ret
entry:
@@ -72,10 +62,7 @@ define void @load_sext_4i8_to_4i32(ptr %ptr, ptr %dst) {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: ld.w $a0, $a0, 0
; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0
-; CHECK-NEXT: vilvl.b $vr0, $vr0, $vr0
-; CHECK-NEXT: vilvl.h $vr0, $vr0, $vr0
-; CHECK-NEXT: vslli.w $vr0, $vr0, 24
-; CHECK-NEXT: vsrai.w $vr0, $vr0, 24
+; CHECK-NEXT: vext2xv.w.b $xr0, $xr0
; CHECK-NEXT: vst $vr0, $a1, 0
; CHECK-NEXT: ret
entry:
@@ -89,13 +76,8 @@ define void @load_sext_4i8_to_4i64(ptr %ptr, ptr %dst) {
; CHECK-LABEL: load_sext_4i8_to_4i64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: ld.w $a0, $a0, 0
-; CHECK-NEXT: pcalau12i $a2, %pc_hi20(.LCPI4_0)
-; CHECK-NEXT: xvld $xr0, $a2, %pc_lo12(.LCPI4_0)
-; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 0
-; CHECK-NEXT: xvpermi.d $xr1, $xr1, 68
-; CHECK-NEXT: xvshuf.b $xr0, $xr0, $xr1, $xr0
-; CHECK-NEXT: xvslli.d $xr0, $xr0, 56
-; CHECK-NEXT: xvsrai.d $xr0, $xr0, 56
+; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0
+; CHECK-NEXT: vext2xv.d.b $xr0, $xr0
; CHECK-NEXT: xvst $xr0, $a1, 0
; CHECK-NEXT: ret
entry:
@@ -112,9 +94,7 @@ define void @load_sext_4i16_to_4i32(ptr %ptr, ptr %dst) {
; LA32-NEXT: ld.w $a0, $a0, 4
; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0
; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1
-; LA32-NEXT: vilvl.h $vr0, $vr0, $vr0
-; LA32-NEXT: vslli.w $vr0, $vr0, 16
-; LA32-NEXT: vsrai.w $vr0, $vr0, 16
+; LA32-NEXT: vext2xv.w.h $xr0, $xr0
; LA32-NEXT: vst $vr0, $a1, 0
; LA32-NEXT: ret
;
@@ -122,9 +102,7 @@ define void @load_sext_4i16_to_4i32(ptr %ptr, ptr %dst) {
; LA64: # %bb.0: # %entry
; LA64-NEXT: ld.d $a0, $a0, 0
; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0
-; LA64-NEXT: vilvl.h $vr0, $vr0, $vr0
-; LA64-NEXT: vslli.w $vr0, $vr0, 16
-; LA64-NEXT: vsrai.w $vr0, $vr0, 16
+; LA64-NEXT: vext2xv.w.h $xr0, $xr0
; LA64-NEXT: vst $vr0, $a1, 0
; LA64-NEXT: ret
entry:
@@ -139,27 +117,17 @@ define void @load_sext_4i16_to_4i64(ptr %ptr, ptr %dst) {
; LA32: # %bb.0: # %entry
; LA32-NEXT: ld.w $a2, $a0, 0
; LA32-NEXT: ld.w $a0, $a0, 4
-; LA32-NEXT: pcalau12i $a3, %pc_hi20(.LCPI6_0)
-; LA32-NEXT: xvld $xr0, $a3, %pc_lo12(.LCPI6_0)
-; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 0
-; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 1
-; LA32-NEXT: xvpermi.d $xr1, $xr1, 68
-; LA32-NEXT: xvshuf.h $xr0, $xr0, $xr1
-; LA32-NEXT: xvslli.d $xr0, $xr0, 48
-; LA32-NEXT: xvsrai.d $xr0, $xr0, 48
+; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0
+; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1
+; LA32-NEXT: vext2xv.d.h $xr0, $xr0
; LA32-NEXT: xvst $xr0, $a1, 0
; LA32-NEXT: ret
;
; LA64-LABEL: load_sext_4i16_to_4i64:
; LA64: # %bb.0: # %entry
; LA64-NEXT: ld.d $a0, $a0, 0
-; LA64-NEXT: pcalau12i $a2, %pc_hi20(.LCPI6_0)
-; LA64-NEXT: xvld $xr0, $a2, %pc_lo12(.LCPI6_0)
-; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 0
-; LA64-NEXT: xvpermi.d $xr1, $xr1, 68
-; LA64-NEXT: xvshuf.h $xr0, $xr0, $xr1
-; LA64-NEXT: xvslli.d $xr0, $xr0, 48
-; LA64-NEXT: xvsrai.d $xr0, $xr0, 48
+; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0
+; LA64-NEXT: vext2xv.d.h $xr0, $xr0
; LA64-NEXT: xvst $xr0, $a1, 0
; LA64-NEXT: ret
entry:
@@ -170,43 +138,12 @@ entry:
}
define void @load_sext_4i32_to_4i64(ptr %ptr, ptr %dst) {
-; LA32-LABEL: load_sext_4i32_to_4i64:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: vld $vr0, $a0, 0
-; LA32-NEXT: vextrins.w $vr1, $vr0, 2
-; LA32-NEXT: vpickve2gr.w $a0, $vr0, 2
-; LA32-NEXT: srai.w $a0, $a0, 31
-; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 1
-; LA32-NEXT: vextrins.w $vr1, $vr0, 35
-; LA32-NEXT: vpickve2gr.w $a0, $vr0, 3
-; LA32-NEXT: srai.w $a0, $a0, 31
-; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 3
-; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0
-; LA32-NEXT: srai.w $a0, $a0, 31
-; LA32-NEXT: vori.b $vr2, $vr0, 0
-; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 1
-; LA32-NEXT: vextrins.w $vr2, $vr0, 33
-; LA32-NEXT: vpickve2gr.w $a0, $vr0, 1
-; LA32-NEXT: srai.w $a0, $a0, 31
-; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 3
-; LA32-NEXT: xvpermi.q $xr2, $xr1, 2
-; LA32-NEXT: xvst $xr2, $a1, 0
-; LA32-NEXT: ret
-;
-; LA64-LABEL: load_sext_4i32_to_4i64:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: vld $vr0, $a0, 0
-; LA64-NEXT: vpickve2gr.w $a0, $vr0, 2
-; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 0
-; LA64-NEXT: vpickve2gr.w $a0, $vr0, 3
-; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 1
-; LA64-NEXT: vpickve2gr.w $a0, $vr0, 0
-; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 0
-; LA64-NEXT: vpickve2gr.w $a0, $vr0, 1
-; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 1
-; LA64-NEXT: xvpermi.q $xr2, $xr1, 2
-; LA64-NEXT: xvst $xr2, $a1, 0
-; LA64-NEXT: ret
+; CHECK-LABEL: load_sext_4i32_to_4i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a0, 0
+; CHECK-NEXT: vext2xv.d.w $xr0, $xr0
+; CHECK-NEXT: xvst $xr0, $a1, 0
+; CHECK-NEXT: ret
entry:
%A = load <4 x i32>, ptr %ptr
%B = sext <4 x i32> %A to <4 x i64>
@@ -221,9 +158,7 @@ define void @load_sext_8i8_to_8i16(ptr %ptr, ptr %dst) {
; LA32-NEXT: ld.w $a0, $a0, 4
; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0
; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1
-; LA32-NEXT: vilvl.b $vr0, $vr0, $vr0
-; LA32-NEXT: vslli.h $vr0, $vr0, 8
-; LA32-NEXT: vsrai.h $vr0, $vr0, 8
+; LA32-NEXT: vext2xv.h.b $xr0, $xr0
; LA32-NEXT: vst $vr0, $a1, 0
; LA32-NEXT: ret
;
@@ -231,9 +166,7 @@ define void @load_sext_8i8_to_8i16(ptr %ptr, ptr %dst) {
; LA64: # %bb.0: # %entry
; LA64-NEXT: ld.d $a0, $a0, 0
; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0
-; LA64-NEXT: vilvl.b $vr0, $vr0, $vr0
-; LA64-NEXT: vslli.h $vr0, $vr0, 8
-; LA64-NEXT: vsrai.h $vr0, $vr0, 8
+; LA64-NEXT: vext2xv.h.b $xr0, $xr0
; LA64-NEXT: vst $vr0, $a1, 0
; LA64-NEXT: ret
entry:
@@ -248,27 +181,17 @@ define void @load_sext_8i8_to_8i32(ptr %ptr, ptr %dst) {
; LA32: # %bb.0: # %entry
; LA32-NEXT: ld.w $a2, $a0, 0
; LA32-NEXT: ld.w $a0, $a0, 4
-; LA32-NEXT: pcalau12i $a3, %pc_hi20(.LCPI9_0)
-; LA32-NEXT: xvld $xr0, $a3, %pc_lo12(.LCPI9_0)
-; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 0
-; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 1
-; LA32-NEXT: xvpermi.d $xr1, $xr1, 68
-; LA32-NEXT: xvshuf.b $xr0, $xr0, $xr1, $xr0
-; LA32-NEXT: xvslli.w $xr0, $xr0, 24
-; LA32-NEXT: xvsrai.w $xr0, $xr0, 24
+; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0
+; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1
+; LA32-NEXT: vext2xv.w.b $xr0, $xr0
; LA32-NEXT: xvst $xr0, $a1, 0
; LA32-NEXT: ret
;
; LA64-LABEL: load_sext_8i8_to_8i32:
; LA64: # %bb.0: # %entry
; LA64-NEXT: ld.d $a0, $a0, 0
-; LA64-NEXT: pcalau12i $a2, %pc_hi20(.LCPI9_0)
-; LA64-NEXT: xvld $xr0, $a2, %pc_lo12(.LCPI9_0)
-; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 0
-; LA64-NEXT: xvpermi.d $xr1, $xr1, 68
-; LA64-NEXT: xvshuf.b $xr0, $xr0, $xr1, $xr0
-; LA64-NEXT: xvslli.w $xr0, $xr0, 24
-; LA64-NEXT: xvsrai.w $xr0, $xr0, 24
+; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0
+; LA64-NEXT: vext2xv.w.b $xr0, $xr0
; LA64-NEXT: xvst $xr0, $a1, 0
; LA64-NEXT: ret
entry:
@@ -282,21 +205,13 @@ define void @load_sext_8i8_to_8i64(ptr %ptr, ptr %dst) {
; LA32-LABEL: load_sext_8i8_to_8i64:
; LA32: # %bb.0: # %entry
; LA32-NEXT: ld.w $a2, $a0, 0
-; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0
; LA32-NEXT: ld.w $a0, $a0, 4
-; LA32-NEXT: xvpermi.d $xr1, $xr0, 68
+; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0
+; LA32-NEXT: vext2xv.d.b $xr1, $xr0
; LA32-NEXT: # kill: def $vr0 killed $vr0 killed $xr0
-; LA32-NEXT: pcalau12i $a2, %pc_hi20(.LCPI10_0)
-; LA32-NEXT: xvld $xr2, $a2, %pc_lo12(.LCPI10_0)
; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1
; LA32-NEXT: vreplvei.w $vr0, $vr0, 1
-; LA32-NEXT: xvpermi.d $xr0, $xr0, 68
-; LA32-NEXT: xvshuf.b $xr0, $xr0, $xr0, $xr2
-; LA32-NEXT: xvslli.d $xr0, $xr0, 56
-; LA32-NEXT: xvsrai.d $xr0, $xr0, 56
-; LA32-NEXT: xvshuf.b $xr1, $xr0, $xr1, $xr2
-; LA32-NEXT: xvslli.d $xr1, $xr1, 56
-; LA32-NEXT: xvsrai.d $xr1, $xr1, 56
+; LA32-NEXT: vext2xv.d.b $xr0, $xr0
; LA32-NEXT: xvst $xr1, $a1, 0
; LA32-NEXT: xvst $xr0, $a1, 32
; LA32-NEXT: ret
@@ -304,20 +219,12 @@ define void @load_sext_8i8_to_8i64(ptr %ptr, ptr %dst) {
; LA64-LABEL: load_sext_8i8_to_8i64:
; LA64: # %bb.0: # %entry
; LA64-NEXT: ld.d $a0, $a0, 0
-; LA64-NEXT: pcalau12i $a2, %pc_hi20(.LCPI10_0)
-; LA64-NEXT: xvld $xr0, $a2, %pc_lo12(.LCPI10_0)
-; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 0
-; LA64-NEXT: vsrli.d $vr2, $vr1, 32
-; LA64-NEXT: xvpermi.d $xr2, $xr2, 68
-; LA64-NEXT: xvshuf.b $xr2, $xr0, $xr2, $xr0
-; LA64-NEXT: xvslli.d $xr2, $xr2, 56
-; LA64-NEXT: xvsrai.d $xr2, $xr2, 56
-; LA64-NEXT: xvpermi.d $xr1, $xr1, 68
-; LA64-NEXT: xvshuf.b $xr0, $xr0, $xr1, $xr0
-; LA64-NEXT: xvslli.d $xr0, $xr0, 56
-; LA64-NEXT: xvsrai.d $xr0, $xr0, 56
+; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0
+; LA64-NEXT: vsrli.d $vr1, $vr0, 32
+; LA64-NEXT: vext2xv.d.b $xr1, $xr1
+; LA64-NEXT: vext2xv.d.b $xr0, $xr0
; LA64-NEXT: xvst $xr0, $a1, 0
-; LA64-NEXT: xvst $xr2, $a1, 32
+; LA64-NEXT: xvst $xr1, $a1, 32
; LA64-NEXT: ret
entry:
%A = load <8 x i8>, ptr %ptr
@@ -330,32 +237,8 @@ define void @load_sext_8i16_to_8i32(ptr %ptr, ptr %dst) {
; CHECK-LABEL: load_sext_8i16_to_8i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a0, 0
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 4
-; CHECK-NEXT: ext.w.h $a0, $a0
-; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 0
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 5
-; CHECK-NEXT: ext.w.h $a0, $a0
-; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 1
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 6
-; CHECK-NEXT: ext.w.h $a0, $a0
-; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 2
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 7
-; CHECK-NEXT: ext.w.h $a0, $a0
-; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 3
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 0
-; CHECK-NEXT: ext.w.h $a0, $a0
-; CHECK-NEXT: vinsgr2vr.w $vr2, $a0, 0
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 1
-; CHECK-NEXT: ext.w.h $a0, $a0
-; CHECK-NEXT: vinsgr2vr.w $vr2, $a0, 1
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 2
-; CHECK-NEXT: ext.w.h $a0, $a0
-; CHECK-NEXT: vinsgr2vr.w $vr2, $a0, 2
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 3
-; CHECK-NEXT: ext.w.h $a0, $a0
-; CHECK-NEXT: vinsgr2vr.w $vr2, $a0, 3
-; CHECK-NEXT: xvpermi.q $xr2, $xr1, 2
-; CHECK-NEXT: xvst $xr2, $a1, 0
+; CHECK-NEXT: vext2xv.w.h $xr0, $xr0
+; CHECK-NEXT: xvst $xr0, $a1, 0
; CHECK-NEXT: ret
entry:
%A = load <8 ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/160810
More information about the llvm-branch-commits
mailing list