[llvm] [LLVM][SVE] Improve code generation for vector.insert into posion. (PR #105665)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 22 07:15:18 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: Paul Walker (paulwalker-arm)
<details>
<summary>Changes</summary>
---
Patch is 29.37 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/105665.diff
6 Files Affected:
- (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (+4)
- (modified) llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td (+20-4)
- (modified) llvm/lib/Target/AArch64/SVEInstrFormats.td (+30-9)
- (modified) llvm/test/CodeGen/AArch64/sve-bitcast.ll (+46-88)
- (modified) llvm/test/CodeGen/AArch64/sve-extract-fixed-from-scalable-vector.ll (+5-19)
- (modified) llvm/test/CodeGen/AArch64/sve-insert-scalable-vector.ll (+20-88)
``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index e98b430e62389b..c614daaf4c6a9c 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -14867,6 +14867,10 @@ SDValue AArch64TargetLowering::LowerINSERT_SUBVECTOR(SDValue Op,
return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
}
+ // We can select these directly.
+ if (isTypeLegal(InVT) && Vec0.isUndef())
+ return Op;
+
// Ensure the subvector is half the size of the main vector.
if (VT.getVectorElementCount() != (InVT.getVectorElementCount() * 2))
return SDValue();
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 3f4651ea9c2b68..d7e58eb800eea0 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -1938,19 +1938,35 @@ let Predicates = [HasSVEorSME] in {
def : Pat<(nxv2bf16 (extract_subvector nxv8bf16:$Zs, (i64 6))),
(UUNPKHI_ZZ_D (UUNPKHI_ZZ_S ZPR:$Zs))>;
+ // Insert subvectors into FP SVE vectors.
+ foreach VT = [nxv4f16, nxv4f32, nxv4bf16] in
+ foreach idx = [0, 2] in
+ def : Pat<(VT (vector_insert_subvec undef, SVEType<VT>.HalfLength:$src, (i64 idx))),
+ (UZP1_ZZZ_S $src, $src)>;
+
+ foreach VT = [nxv8f16, nxv8bf16] in {
+ foreach idx = [0, 4] in
+ def : Pat<(VT (vector_insert_subvec undef, SVEType<VT>.HalfLength:$src, (i64 idx))),
+ (UZP1_ZZZ_H $src, $src)>;
+
+ foreach idx = [0, 2, 4, 6] in
+ def : Pat<(VT (vector_insert_subvec undef, SVEType<VT>.QuarterLength:$src, (i64 idx))),
+ (UZP1_ZZZ_H (UZP1_ZZZ_H $src, $src), (UZP1_ZZZ_H $src, $src))>;
+ }
+
// extract/insert 64-bit fixed length vector from/into a scalable vector
foreach VT = [v8i8, v4i16, v2i32, v1i64, v4f16, v2f32, v1f64, v4bf16] in {
- def : Pat<(VT (vector_extract_subvec SVEContainerVT<VT>.Value:$Zs, (i64 0))),
+ def : Pat<(VT (vector_extract_subvec NEONType<VT>.SVEContainer:$Zs, (i64 0))),
(EXTRACT_SUBREG ZPR:$Zs, dsub)>;
- def : Pat<(SVEContainerVT<VT>.Value (vector_insert_subvec undef, (VT V64:$src), (i64 0))),
+ def : Pat<(NEONType<VT>.SVEContainer (vector_insert_subvec undef, (VT V64:$src), (i64 0))),
(INSERT_SUBREG (IMPLICIT_DEF), $src, dsub)>;
}
// extract/insert 128-bit fixed length vector from/into a scalable vector
foreach VT = [v16i8, v8i16, v4i32, v2i64, v8f16, v4f32, v2f64, v8bf16] in {
- def : Pat<(VT (vector_extract_subvec SVEContainerVT<VT>.Value:$Zs, (i64 0))),
+ def : Pat<(VT (vector_extract_subvec NEONType<VT>.SVEContainer:$Zs, (i64 0))),
(EXTRACT_SUBREG ZPR:$Zs, zsub)>;
- def : Pat<(SVEContainerVT<VT>.Value (vector_insert_subvec undef, (VT V128:$src), (i64 0))),
+ def : Pat<(NEONType<VT>.SVEContainer (vector_insert_subvec undef, (VT V128:$src), (i64 0))),
(INSERT_SUBREG (IMPLICIT_DEF), $src, zsub)>;
}
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 107bc79f70dbcb..4f0cf69f05f194 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -10,11 +10,10 @@
//
//===----------------------------------------------------------------------===//
-// Helper class to find the largest legal scalable vector type that can hold VT.
-// Non-matches return VT, which often means VT is the container type.
-class SVEContainerVT<ValueType VT> {
- ValueType Value = !cond(
- // fixed length vectors
+// Helper class to hold conversions of legal fixed-length vector types.
+class NEONType<ValueType VT> {
+ // The largest legal scalable vector type that can hold VT.
+ ValueType SVEContainer = !cond(
!eq(VT, v8i8): nxv16i8,
!eq(VT, v16i8): nxv16i8,
!eq(VT, v4i16): nxv8i16,
@@ -31,13 +30,35 @@ class SVEContainerVT<ValueType VT> {
!eq(VT, v2f64): nxv2f64,
!eq(VT, v4bf16): nxv8bf16,
!eq(VT, v8bf16): nxv8bf16,
- // unpacked scalable vectors
+ true : untyped);
+}
+
+// Helper class to hold conversions of legal scalable vector types.
+class SVEType<ValueType VT> {
+ // The largest legal scalable vector type that can hold VT.
+ // Non-matches return VT because only packed types remiain.
+ ValueType Packed = !cond(
!eq(VT, nxv2f16): nxv8f16,
!eq(VT, nxv4f16): nxv8f16,
!eq(VT, nxv2f32): nxv4f32,
!eq(VT, nxv2bf16): nxv8bf16,
!eq(VT, nxv4bf16): nxv8bf16,
true : VT);
+
+ // The legal scalable vector that is half the length of VT.
+ ValueType HalfLength = !cond(
+ !eq(VT, nxv8f16): nxv4f16,
+ !eq(VT, nxv4f16): nxv2f16,
+ !eq(VT, nxv4f32): nxv2f32,
+ !eq(VT, nxv8bf16): nxv4bf16,
+ !eq(VT, nxv4bf16): nxv2bf16,
+ true : untyped);
+
+ // The legal scalable vector that is quarter the length of VT.
+ ValueType QuarterLength = !cond(
+ !eq(VT, nxv8f16): nxv2f16,
+ !eq(VT, nxv8bf16): nxv2bf16,
+ true : untyped);
}
def SDT_AArch64Setcc : SDTypeProfile<1, 4, [
@@ -2959,10 +2980,10 @@ multiclass sve_fp_2op_p_zd<bits<7> opc, string asm,
def NAME : sve_fp_2op_p_zd<opc, asm, i_zprtype, o_zprtype, Sz>,
SVEPseudo2Instr<NAME, 1>;
// convert vt1 to a packed type for the intrinsic patterns
- defvar packedvt1 = SVEContainerVT<vt1>.Value;
+ defvar packedvt1 = SVEType<vt1>.Packed;
// convert vt3 to a packed type for the intrinsic patterns
- defvar packedvt3 = SVEContainerVT<vt3>.Value;
+ defvar packedvt3 = SVEType<vt3>.Packed;
def : SVE_3_Op_Pat<packedvt1, int_op, packedvt1, vt2, packedvt3, !cast<Instruction>(NAME)>;
def : SVE_1_Op_Passthru_Pat<vt1, ir_op, vt2, vt3, !cast<Instruction>(NAME)>;
@@ -2982,7 +3003,7 @@ multiclass sve_fp_2op_p_zdr<bits<7> opc, string asm,
SVEPseudo2Instr<NAME, 1>;
// convert vt1 to a packed type for the intrinsic patterns
- defvar packedvt1 = SVEContainerVT<vt1>.Value;
+ defvar packedvt1 = SVEType<vt1>.Packed;
def : SVE_3_Op_Pat<packedvt1, int_op, packedvt1, vt2, vt3, !cast<Instruction>(NAME)>;
def : SVE_1_Op_Passthru_Round_Pat<vt1, ir_op, vt2, vt3, !cast<Instruction>(NAME)>;
diff --git a/llvm/test/CodeGen/AArch64/sve-bitcast.ll b/llvm/test/CodeGen/AArch64/sve-bitcast.ll
index 95f43ba5126323..e3b961237018b9 100644
--- a/llvm/test/CodeGen/AArch64/sve-bitcast.ll
+++ b/llvm/test/CodeGen/AArch64/sve-bitcast.ll
@@ -1763,18 +1763,13 @@ define <vscale x 1 x i64> @bitcast_nxv4f16_to_nxv1i64(<vscale x 4 x half> %v) #0
; CHECK_BE-LABEL: bitcast_nxv4f16_to_nxv1i64:
; CHECK_BE: // %bb.0:
; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
-; CHECK_BE-NEXT: addvl sp, sp, #-3
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK_BE-NEXT: ptrue p0.h
-; CHECK_BE-NEXT: ptrue p1.s
-; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp]
-; CHECK_BE-NEXT: ld1w { z0.s }, p1/z, [sp]
; CHECK_BE-NEXT: ptrue p1.d
-; CHECK_BE-NEXT: uzp1 z0.h, z0.h, z0.h
-; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp, #1, mul vl]
-; CHECK_BE-NEXT: ld1h { z0.h }, p0/z, [sp, #1, mul vl]
-; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp, #2, mul vl]
-; CHECK_BE-NEXT: ld1d { z0.d }, p1/z, [sp, #2, mul vl]
-; CHECK_BE-NEXT: addvl sp, sp, #3
+; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp]
+; CHECK_BE-NEXT: ld1d { z0.d }, p1/z, [sp]
+; CHECK_BE-NEXT: addvl sp, sp, #1
; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 4 x half> %v to <vscale x 1 x i64>
@@ -1790,17 +1785,13 @@ define <vscale x 1 x i64> @bitcast_nxv2f32_to_nxv1i64(<vscale x 2 x float> %v) #
; CHECK_BE-LABEL: bitcast_nxv2f32_to_nxv1i64:
; CHECK_BE: // %bb.0:
; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
-; CHECK_BE-NEXT: addvl sp, sp, #-3
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: uzp1 z0.s, z0.s, z0.s
; CHECK_BE-NEXT: ptrue p0.s
; CHECK_BE-NEXT: ptrue p1.d
; CHECK_BE-NEXT: st1w { z0.s }, p0, [sp]
; CHECK_BE-NEXT: ld1d { z0.d }, p1/z, [sp]
-; CHECK_BE-NEXT: uzp1 z0.s, z0.s, z0.s
-; CHECK_BE-NEXT: st1w { z0.s }, p0, [sp, #1, mul vl]
-; CHECK_BE-NEXT: ld1w { z0.s }, p0/z, [sp, #1, mul vl]
-; CHECK_BE-NEXT: st1w { z0.s }, p0, [sp, #2, mul vl]
-; CHECK_BE-NEXT: ld1d { z0.d }, p1/z, [sp, #2, mul vl]
-; CHECK_BE-NEXT: addvl sp, sp, #3
+; CHECK_BE-NEXT: addvl sp, sp, #1
; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 2 x float> %v to <vscale x 1 x i64>
@@ -1835,18 +1826,13 @@ define <vscale x 1 x i64> @bitcast_nxv4bf16_to_nxv1i64(<vscale x 4 x bfloat> %v)
; CHECK_BE-LABEL: bitcast_nxv4bf16_to_nxv1i64:
; CHECK_BE: // %bb.0:
; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
-; CHECK_BE-NEXT: addvl sp, sp, #-3
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK_BE-NEXT: ptrue p0.h
-; CHECK_BE-NEXT: ptrue p1.s
-; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp]
-; CHECK_BE-NEXT: ld1w { z0.s }, p1/z, [sp]
; CHECK_BE-NEXT: ptrue p1.d
-; CHECK_BE-NEXT: uzp1 z0.h, z0.h, z0.h
-; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp, #1, mul vl]
-; CHECK_BE-NEXT: ld1h { z0.h }, p0/z, [sp, #1, mul vl]
-; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp, #2, mul vl]
-; CHECK_BE-NEXT: ld1d { z0.d }, p1/z, [sp, #2, mul vl]
-; CHECK_BE-NEXT: addvl sp, sp, #3
+; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp]
+; CHECK_BE-NEXT: ld1d { z0.d }, p1/z, [sp]
+; CHECK_BE-NEXT: addvl sp, sp, #1
; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 4 x bfloat> %v to <vscale x 1 x i64>
@@ -2302,18 +2288,13 @@ define <vscale x 1 x double> @bitcast_nxv4f16_to_nxv1f64(<vscale x 4 x half> %v)
; CHECK_BE-LABEL: bitcast_nxv4f16_to_nxv1f64:
; CHECK_BE: // %bb.0:
; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
-; CHECK_BE-NEXT: addvl sp, sp, #-3
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK_BE-NEXT: ptrue p0.h
-; CHECK_BE-NEXT: ptrue p1.s
-; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp]
-; CHECK_BE-NEXT: ld1w { z0.s }, p1/z, [sp]
; CHECK_BE-NEXT: ptrue p1.d
-; CHECK_BE-NEXT: uzp1 z0.h, z0.h, z0.h
-; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp, #1, mul vl]
-; CHECK_BE-NEXT: ld1h { z0.h }, p0/z, [sp, #1, mul vl]
-; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp, #2, mul vl]
-; CHECK_BE-NEXT: ld1d { z0.d }, p1/z, [sp, #2, mul vl]
-; CHECK_BE-NEXT: addvl sp, sp, #3
+; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp]
+; CHECK_BE-NEXT: ld1d { z0.d }, p1/z, [sp]
+; CHECK_BE-NEXT: addvl sp, sp, #1
; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 4 x half> %v to <vscale x 1 x double>
@@ -2329,17 +2310,13 @@ define <vscale x 1 x double> @bitcast_nxv2f32_to_nxv1f64(<vscale x 2 x float> %v
; CHECK_BE-LABEL: bitcast_nxv2f32_to_nxv1f64:
; CHECK_BE: // %bb.0:
; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
-; CHECK_BE-NEXT: addvl sp, sp, #-3
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: uzp1 z0.s, z0.s, z0.s
; CHECK_BE-NEXT: ptrue p0.s
; CHECK_BE-NEXT: ptrue p1.d
; CHECK_BE-NEXT: st1w { z0.s }, p0, [sp]
; CHECK_BE-NEXT: ld1d { z0.d }, p1/z, [sp]
-; CHECK_BE-NEXT: uzp1 z0.s, z0.s, z0.s
-; CHECK_BE-NEXT: st1w { z0.s }, p0, [sp, #1, mul vl]
-; CHECK_BE-NEXT: ld1w { z0.s }, p0/z, [sp, #1, mul vl]
-; CHECK_BE-NEXT: st1w { z0.s }, p0, [sp, #2, mul vl]
-; CHECK_BE-NEXT: ld1d { z0.d }, p1/z, [sp, #2, mul vl]
-; CHECK_BE-NEXT: addvl sp, sp, #3
+; CHECK_BE-NEXT: addvl sp, sp, #1
; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 2 x float> %v to <vscale x 1 x double>
@@ -2355,18 +2332,13 @@ define <vscale x 1 x double> @bitcast_nxv4bf16_to_nxv1f64(<vscale x 4 x bfloat>
; CHECK_BE-LABEL: bitcast_nxv4bf16_to_nxv1f64:
; CHECK_BE: // %bb.0:
; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
-; CHECK_BE-NEXT: addvl sp, sp, #-3
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK_BE-NEXT: ptrue p0.h
-; CHECK_BE-NEXT: ptrue p1.s
-; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp]
-; CHECK_BE-NEXT: ld1w { z0.s }, p1/z, [sp]
; CHECK_BE-NEXT: ptrue p1.d
-; CHECK_BE-NEXT: uzp1 z0.h, z0.h, z0.h
-; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp, #1, mul vl]
-; CHECK_BE-NEXT: ld1h { z0.h }, p0/z, [sp, #1, mul vl]
-; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp, #2, mul vl]
-; CHECK_BE-NEXT: ld1d { z0.d }, p1/z, [sp, #2, mul vl]
-; CHECK_BE-NEXT: addvl sp, sp, #3
+; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp]
+; CHECK_BE-NEXT: ld1d { z0.d }, p1/z, [sp]
+; CHECK_BE-NEXT: addvl sp, sp, #1
; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 4 x bfloat> %v to <vscale x 1 x double>
@@ -2811,28 +2783,21 @@ define <vscale x 1 x i32> @bitcast_nxv2i16_to_nxv1i32(<vscale x 2 x i16> %v) #0
define <vscale x 1 x i32> @bitcast_nxv2f16_to_nxv1i32(<vscale x 2 x half> %v) #0 {
; CHECK-LABEL: bitcast_nxv2f16_to_nxv1i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT: addvl sp, sp, #-1
-; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: ptrue p1.s
-; CHECK-NEXT: st1h { z0.d }, p0, [sp]
-; CHECK-NEXT: ld1w { z0.s }, p1/z, [sp]
-; CHECK-NEXT: addvl sp, sp, #1
-; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
+; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-NEXT: ret
;
; CHECK_BE-LABEL: bitcast_nxv2f16_to_nxv1i32:
; CHECK_BE: // %bb.0:
; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
-; CHECK_BE-NEXT: addvl sp, sp, #-2
-; CHECK_BE-NEXT: ptrue p0.d
-; CHECK_BE-NEXT: ptrue p1.h
-; CHECK_BE-NEXT: st1h { z0.d }, p0, [sp]
-; CHECK_BE-NEXT: ptrue p0.s
-; CHECK_BE-NEXT: ld1h { z0.h }, p1/z, [sp]
-; CHECK_BE-NEXT: st1h { z0.h }, p1, [sp, #1, mul vl]
-; CHECK_BE-NEXT: ld1w { z0.s }, p0/z, [sp, #1, mul vl]
-; CHECK_BE-NEXT: addvl sp, sp, #2
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: uzp1 z0.h, z0.h, z0.h
+; CHECK_BE-NEXT: ptrue p0.h
+; CHECK_BE-NEXT: ptrue p1.s
+; CHECK_BE-NEXT: uzp1 z0.h, z0.h, z0.h
+; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp]
+; CHECK_BE-NEXT: ld1w { z0.s }, p1/z, [sp]
+; CHECK_BE-NEXT: addvl sp, sp, #1
; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 2 x half> %v to <vscale x 1 x i32>
@@ -2844,28 +2809,21 @@ define <vscale x 1 x i32> @bitcast_nxv2f16_to_nxv1i32(<vscale x 2 x half> %v) #0
define <vscale x 1 x i32> @bitcast_nxv2bf16_to_nxv1i32(<vscale x 2 x bfloat> %v) #0 {
; CHECK-LABEL: bitcast_nxv2bf16_to_nxv1i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT: addvl sp, sp, #-1
-; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: ptrue p1.s
-; CHECK-NEXT: st1h { z0.d }, p0, [sp]
-; CHECK-NEXT: ld1w { z0.s }, p1/z, [sp]
-; CHECK-NEXT: addvl sp, sp, #1
-; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
+; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-NEXT: ret
;
; CHECK_BE-LABEL: bitcast_nxv2bf16_to_nxv1i32:
; CHECK_BE: // %bb.0:
; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
-; CHECK_BE-NEXT: addvl sp, sp, #-2
-; CHECK_BE-NEXT: ptrue p0.d
-; CHECK_BE-NEXT: ptrue p1.h
-; CHECK_BE-NEXT: st1h { z0.d }, p0, [sp]
-; CHECK_BE-NEXT: ptrue p0.s
-; CHECK_BE-NEXT: ld1h { z0.h }, p1/z, [sp]
-; CHECK_BE-NEXT: st1h { z0.h }, p1, [sp, #1, mul vl]
-; CHECK_BE-NEXT: ld1w { z0.s }, p0/z, [sp, #1, mul vl]
-; CHECK_BE-NEXT: addvl sp, sp, #2
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: uzp1 z0.h, z0.h, z0.h
+; CHECK_BE-NEXT: ptrue p0.h
+; CHECK_BE-NEXT: ptrue p1.s
+; CHECK_BE-NEXT: uzp1 z0.h, z0.h, z0.h
+; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp]
+; CHECK_BE-NEXT: ld1w { z0.s }, p1/z, [sp]
+; CHECK_BE-NEXT: addvl sp, sp, #1
; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 2 x bfloat> %v to <vscale x 1 x i32>
diff --git a/llvm/test/CodeGen/AArch64/sve-extract-fixed-from-scalable-vector.ll b/llvm/test/CodeGen/AArch64/sve-extract-fixed-from-scalable-vector.ll
index 641050ae69d9b7..5b7522856e2daf 100644
--- a/llvm/test/CodeGen/AArch64/sve-extract-fixed-from-scalable-vector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-extract-fixed-from-scalable-vector.ll
@@ -296,15 +296,9 @@ define <4 x i64> @extract_v4i64_nxv8i64_0(<vscale x 8 x i64> %arg) {
define <4 x half> @extract_v4f16_nxv2f16_0(<vscale x 2 x half> %arg) {
; CHECK-LABEL: extract_v4f16_nxv2f16_0:
; CHECK: // %bb.0:
-; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT: addvl sp, sp, #-1
-; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
-; CHECK-NEXT: .cfi_offset w29, -16
-; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: st1h { z0.d }, p0, [sp]
-; CHECK-NEXT: ldr d0, [sp]
-; CHECK-NEXT: addvl sp, sp, #1
-; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
+; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
%ext = call <4 x half> @llvm.vector.extract.v4f16.nxv2f16(<vscale x 2 x half> %arg, i64 0)
ret <4 x half> %ext
@@ -313,18 +307,10 @@ define <4 x half> @extract_v4f16_nxv2f16_0(<vscale x 2 x half> %arg) {
define <4 x half> @extract_v4f16_nxv2f16_4(<vscale x 2 x half> %arg) {
; CHECK-LABEL: extract_v4f16_nxv2f16_4:
; CHECK: // %bb.0:
-; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT: addvl sp, sp, #-1
-; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
-; CHECK-NEXT: .cfi_offset w29, -16
-; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: ptrue p1.h
-; CHECK-NEXT: st1h { z0.d }, p0, [sp]
-; CHECK-NEXT: ld1h { z0.h }, p1/z, [sp]
+; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
+; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
-; CHECK-NEXT: addvl sp, sp, #1
-; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
%ext = call <4 x half> @llvm.vector.extract.v4f16.nxv2f16(<vscale x 2 x half> %arg, i64 4)
ret <4 x half> %ext
diff --git a/llvm/test/CodeGen/AArch64/sve-insert-scalable-vector.ll b/llvm/test/CodeGen/AArch64/sve-insert-scalable-vector.ll
index 484bed2b84d34e..d2215fa9075fde 100644
--- a/llvm/test/CodeGen/AArch64/sve-insert-scalable-vector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-insert-scalable-vector.ll
@@ -8,8 +8,7 @@ target triple = "aarch64-unknown-linux-gnu"
define <vscale x 4 x half> @insert_into_poison_nxv4f16_nxv2f16_0(<vscale x 2 x half> %a) #0 {
; CHECK-LABEL: insert_into_poison_nxv4f16_nxv2f16_0:
; CHECK: // %bb.0:
-; CHECK-NEXT: uunpkhi z1.d, z0.s
-; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s
+; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
; CHECK-NEXT: ret
%res = call <vscale x 4 x half> @llvm.vector.insert.nxv4f16.nxv2f16(<vscale x 4 x half> poison, <vscale x 2 x half> %a, i64 0)
ret <vscale x 4 x half> %res
@@ -18,8 +17,7 @@ define <vscale x 4 x half> @insert_into_poison_nxv4f16_nxv2f16_0(<vscale x 2 x h
define <vscale x 4 x half> @insert_into_poison_nxv4f16_nxv2f16_2(<vscale x 2 x half> %a) #0 {
; CHECK-LABEL: insert_into_poison_nxv4f16_nxv2f16_2:
; CHECK: // %bb.0:
-; CHECK-NEXT: uunpklo z1.d, z0.s
-; CHECK-NEXT: uzp1 z0.s, z1.s, z0.s
+; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
; CHECK-NEXT: ret
%res = call <vscale x 4 x half> @llvm.vector.insert.nxv4f16.nxv2f16(<vscale x 4 x half> poison, <vscale x 2 x half> %a, i64 2)
ret <vscale x 4 x half> %res
@@ -28,16 +26,8 @@ define <vscale x 4 x half> @insert_into_poison_nxv4f16_nxv2f16_2(<vscale x 2 x h
define <vscale x 8 x half> @insert_in...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/105665
More information about the llvm-commits
mailing list