[llvm-branch-commits] [llvm] [AArch64][ISel] Select constructive SVE2 ext instruction (PR #151730)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Fri Aug 1 09:57:05 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: Gaëtan Bossu (gbossu)
<details>
<summary>Changes</summary>
👉 This is a chained PR. Predecessor is https://github.com/llvm/llvm-project/pull/151729
This adds patterns for selecting EXT_ZZI_B.
They are tested for fixed vectors using extract shuffles, and for
scalable vectors using llvm.vector.splice intrinsics.
We will get better codegen when enabling subreg liveness. Without it,
any use of a zpr2 tuple is always considered as using both zpr registers
of the pair.
---
Patch is 174.25 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/151730.diff
14 Files Affected:
- (modified) llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td (+16)
- (modified) llvm/test/CodeGen/AArch64/get-active-lane-mask-extract.ll (+5-5)
- (modified) llvm/test/CodeGen/AArch64/sve-fixed-length-partial-reduce.ll (+42-49)
- (modified) llvm/test/CodeGen/AArch64/sve-pr92779.ll (+8-9)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll (+12-12)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-subvector.ll (+16-16)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-extend-trunc.ll (+16-16)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll (+252-252)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll (+1226-600)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-rem.ll (+300-324)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-limit-duplane.ll (+11-11)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-reductions.ll (+47-49)
- (added) llvm/test/CodeGen/AArch64/sve-vector-splice.ll (+253)
- (modified) llvm/test/CodeGen/AArch64/sve2-fixed-length-extract-subvector.ll (+32-47)
``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 0c4b4f4c3ed88..201dd93302d7a 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -4069,6 +4069,22 @@ let Predicates = [HasSVE2_or_SME] in {
let AddedComplexity = 2 in {
def : Pat<(nxv16i8 (AArch64ext nxv16i8:$zn1, nxv16i8:$zn2, (i32 imm0_255:$imm))),
(EXT_ZZI_B (REG_SEQUENCE ZPR2, $zn1, zsub0, $zn2, zsub1), imm0_255:$imm)>;
+
+ foreach VT = [nxv16i8] in
+ def : Pat<(VT (vector_splice VT:$Z1, VT:$Z2, (i64 (sve_ext_imm_0_255 i32:$index)))),
+ (EXT_ZZI_B (REG_SEQUENCE ZPR2, $Z1, zsub0, $Z2, zsub1), imm0_255:$index)>;
+
+ foreach VT = [nxv8i16, nxv8f16, nxv8bf16] in
+ def : Pat<(VT (vector_splice VT:$Z1, VT:$Z2, (i64 (sve_ext_imm_0_127 i32:$index)))),
+ (EXT_ZZI_B (REG_SEQUENCE ZPR2, $Z1, zsub0, $Z2, zsub1), imm0_255:$index)>;
+
+ foreach VT = [nxv4i32, nxv4f16, nxv4f32, nxv4bf16] in
+ def : Pat<(VT (vector_splice VT:$Z1, VT:$Z2, (i64 (sve_ext_imm_0_63 i32:$index)))),
+ (EXT_ZZI_B (REG_SEQUENCE ZPR2, $Z1, zsub0, $Z2, zsub1), imm0_255:$index)>;
+
+ foreach VT = [nxv2i64, nxv2f16, nxv2f32, nxv2f64, nxv2bf16] in
+ def : Pat<(VT (vector_splice VT:$Z1, VT:$Z2, (i64 (sve_ext_imm_0_31 i32:$index)))),
+ (EXT_ZZI_B (REG_SEQUENCE ZPR2, $Z1, zsub0, $Z2, zsub1), imm0_255:$index)>;
}
} // End HasSVE2_or_SME
diff --git a/llvm/test/CodeGen/AArch64/get-active-lane-mask-extract.ll b/llvm/test/CodeGen/AArch64/get-active-lane-mask-extract.ll
index 50975d16c7e9e..13bec605839a9 100644
--- a/llvm/test/CodeGen/AArch64/get-active-lane-mask-extract.ll
+++ b/llvm/test/CodeGen/AArch64/get-active-lane-mask-extract.ll
@@ -192,7 +192,7 @@ define void @test_fixed_extract(i64 %i, i64 %n) #0 {
; CHECK-SVE2p1-NEXT: mov z1.s, p0/z, #1 // =0x1
; CHECK-SVE2p1-NEXT: fmov s0, w8
; CHECK-SVE2p1-NEXT: mov v0.s[1], v1.s[1]
-; CHECK-SVE2p1-NEXT: ext z1.b, z1.b, z0.b, #8
+; CHECK-SVE2p1-NEXT: ext z1.b, { z1.b, z2.b }, #8
; CHECK-SVE2p1-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-SVE2p1-NEXT: // kill: def $d1 killed $d1 killed $z1
; CHECK-SVE2p1-NEXT: b use
@@ -202,12 +202,12 @@ define void @test_fixed_extract(i64 %i, i64 %n) #0 {
; CHECK-SME2-NEXT: whilelo p0.s, x0, x1
; CHECK-SME2-NEXT: cset w8, mi
; CHECK-SME2-NEXT: mov z1.s, p0/z, #1 // =0x1
-; CHECK-SME2-NEXT: fmov s2, w8
+; CHECK-SME2-NEXT: fmov s3, w8
; CHECK-SME2-NEXT: mov z0.s, z1.s[1]
-; CHECK-SME2-NEXT: zip1 z0.s, z2.s, z0.s
-; CHECK-SME2-NEXT: ext z1.b, z1.b, z0.b, #8
-; CHECK-SME2-NEXT: // kill: def $d0 killed $d0 killed $z0
+; CHECK-SME2-NEXT: ext z1.b, { z1.b, z2.b }, #8
; CHECK-SME2-NEXT: // kill: def $d1 killed $d1 killed $z1
+; CHECK-SME2-NEXT: zip1 z0.s, z3.s, z0.s
+; CHECK-SME2-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-SME2-NEXT: b use
%r = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 %i, i64 %n)
%v0 = call <2 x i1> @llvm.vector.extract.v2i1.nxv4i1.i64(<vscale x 4 x i1> %r, i64 0)
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-partial-reduce.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-partial-reduce.ll
index 33d5ac4cd299e..3e8b3a40467dd 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-partial-reduce.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-partial-reduce.ll
@@ -109,14 +109,13 @@ define <16 x i16> @two_way_i8_i16_vl256(ptr %accptr, ptr %uptr, ptr %sptr) vscal
; SME-LABEL: two_way_i8_i16_vl256:
; SME: // %bb.0:
; SME-NEXT: ldr z0, [x0]
-; SME-NEXT: ldr z1, [x1]
-; SME-NEXT: ldr z2, [x2]
-; SME-NEXT: umlalb z0.h, z2.b, z1.b
-; SME-NEXT: umlalt z0.h, z2.b, z1.b
-; SME-NEXT: mov z1.d, z0.d
-; SME-NEXT: ext z1.b, z1.b, z0.b, #16
-; SME-NEXT: // kill: def $q0 killed $q0 killed $z0
-; SME-NEXT: // kill: def $q1 killed $q1 killed $z1
+; SME-NEXT: ldr z2, [x1]
+; SME-NEXT: ldr z3, [x2]
+; SME-NEXT: umlalb z0.h, z3.b, z2.b
+; SME-NEXT: umlalt z0.h, z3.b, z2.b
+; SME-NEXT: ext z2.b, { z0.b, z1.b }, #16
+; SME-NEXT: // kill: def $q0 killed $q0 killed $z0_z1
+; SME-NEXT: mov z1.d, z2.d
; SME-NEXT: ret
%acc = load <16 x i16>, ptr %accptr
%u = load <32 x i8>, ptr %uptr
@@ -232,14 +231,13 @@ define <8 x i32> @two_way_i16_i32_vl256(ptr %accptr, ptr %uptr, ptr %sptr) vscal
; SME-LABEL: two_way_i16_i32_vl256:
; SME: // %bb.0:
; SME-NEXT: ldr z0, [x0]
-; SME-NEXT: ldr z1, [x1]
-; SME-NEXT: ldr z2, [x2]
-; SME-NEXT: umlalb z0.s, z2.h, z1.h
-; SME-NEXT: umlalt z0.s, z2.h, z1.h
-; SME-NEXT: mov z1.d, z0.d
-; SME-NEXT: ext z1.b, z1.b, z0.b, #16
-; SME-NEXT: // kill: def $q0 killed $q0 killed $z0
-; SME-NEXT: // kill: def $q1 killed $q1 killed $z1
+; SME-NEXT: ldr z2, [x1]
+; SME-NEXT: ldr z3, [x2]
+; SME-NEXT: umlalb z0.s, z3.h, z2.h
+; SME-NEXT: umlalt z0.s, z3.h, z2.h
+; SME-NEXT: ext z2.b, { z0.b, z1.b }, #16
+; SME-NEXT: // kill: def $q0 killed $q0 killed $z0_z1
+; SME-NEXT: mov z1.d, z2.d
; SME-NEXT: ret
%acc = load <8 x i32>, ptr %accptr
%u = load <16 x i16>, ptr %uptr
@@ -355,14 +353,13 @@ define <4 x i64> @two_way_i32_i64_vl256(ptr %accptr, ptr %uptr, ptr %sptr) vscal
; SME-LABEL: two_way_i32_i64_vl256:
; SME: // %bb.0:
; SME-NEXT: ldr z0, [x0]
-; SME-NEXT: ldr z1, [x1]
-; SME-NEXT: ldr z2, [x2]
-; SME-NEXT: umlalb z0.d, z2.s, z1.s
-; SME-NEXT: umlalt z0.d, z2.s, z1.s
-; SME-NEXT: mov z1.d, z0.d
-; SME-NEXT: ext z1.b, z1.b, z0.b, #16
-; SME-NEXT: // kill: def $q0 killed $q0 killed $z0
-; SME-NEXT: // kill: def $q1 killed $q1 killed $z1
+; SME-NEXT: ldr z2, [x1]
+; SME-NEXT: ldr z3, [x2]
+; SME-NEXT: umlalb z0.d, z3.s, z2.s
+; SME-NEXT: umlalt z0.d, z3.s, z2.s
+; SME-NEXT: ext z2.b, { z0.b, z1.b }, #16
+; SME-NEXT: // kill: def $q0 killed $q0 killed $z0_z1
+; SME-NEXT: mov z1.d, z2.d
; SME-NEXT: ret
%acc = load <4 x i64>, ptr %accptr
%u = load <8 x i32>, ptr %uptr
@@ -644,13 +641,12 @@ define <8 x i32> @four_way_i8_i32_vl256(ptr %accptr, ptr %uptr, ptr %sptr) vscal
; SME-LABEL: four_way_i8_i32_vl256:
; SME: // %bb.0:
; SME-NEXT: ldr z0, [x0]
-; SME-NEXT: ldr z1, [x1]
-; SME-NEXT: ldr z2, [x2]
-; SME-NEXT: udot z0.s, z2.b, z1.b
-; SME-NEXT: mov z1.d, z0.d
-; SME-NEXT: ext z1.b, z1.b, z0.b, #16
-; SME-NEXT: // kill: def $q0 killed $q0 killed $z0
-; SME-NEXT: // kill: def $q1 killed $q1 killed $z1
+; SME-NEXT: ldr z2, [x1]
+; SME-NEXT: ldr z3, [x2]
+; SME-NEXT: udot z0.s, z3.b, z2.b
+; SME-NEXT: ext z2.b, { z0.b, z1.b }, #16
+; SME-NEXT: // kill: def $q0 killed $q0 killed $z0_z1
+; SME-NEXT: mov z1.d, z2.d
; SME-NEXT: ret
%acc = load <8 x i32>, ptr %accptr
%u = load <32 x i8>, ptr %uptr
@@ -689,13 +685,12 @@ define <8 x i32> @four_way_i8_i32_vl256_usdot(ptr %accptr, ptr %uptr, ptr %sptr)
; SME-LABEL: four_way_i8_i32_vl256_usdot:
; SME: // %bb.0:
; SME-NEXT: ldr z0, [x0]
-; SME-NEXT: ldr z1, [x1]
-; SME-NEXT: ldr z2, [x2]
-; SME-NEXT: usdot z0.s, z1.b, z2.b
-; SME-NEXT: mov z1.d, z0.d
-; SME-NEXT: ext z1.b, z1.b, z0.b, #16
-; SME-NEXT: // kill: def $q0 killed $q0 killed $z0
-; SME-NEXT: // kill: def $q1 killed $q1 killed $z1
+; SME-NEXT: ldr z2, [x1]
+; SME-NEXT: ldr z3, [x2]
+; SME-NEXT: usdot z0.s, z2.b, z3.b
+; SME-NEXT: ext z2.b, { z0.b, z1.b }, #16
+; SME-NEXT: // kill: def $q0 killed $q0 killed $z0_z1
+; SME-NEXT: mov z1.d, z2.d
; SME-NEXT: ret
%acc = load <8 x i32>, ptr %accptr
%u = load <32 x i8>, ptr %uptr
@@ -822,13 +817,12 @@ define <4 x i64> @four_way_i16_i64_vl256(ptr %accptr, ptr %uptr, ptr %sptr) vsca
; SME-LABEL: four_way_i16_i64_vl256:
; SME: // %bb.0:
; SME-NEXT: ldr z0, [x0]
-; SME-NEXT: ldr z1, [x1]
-; SME-NEXT: ldr z2, [x2]
-; SME-NEXT: udot z0.d, z2.h, z1.h
-; SME-NEXT: mov z1.d, z0.d
-; SME-NEXT: ext z1.b, z1.b, z0.b, #16
-; SME-NEXT: // kill: def $q0 killed $q0 killed $z0
-; SME-NEXT: // kill: def $q1 killed $q1 killed $z1
+; SME-NEXT: ldr z2, [x1]
+; SME-NEXT: ldr z3, [x2]
+; SME-NEXT: udot z0.d, z3.h, z2.h
+; SME-NEXT: ext z2.b, { z0.b, z1.b }, #16
+; SME-NEXT: // kill: def $q0 killed $q0 killed $z0_z1
+; SME-NEXT: mov z1.d, z2.d
; SME-NEXT: ret
%acc = load <4 x i64>, ptr %accptr
%u = load <16 x i16>, ptr %uptr
@@ -999,10 +993,9 @@ define <4 x i64> @four_way_i8_i64_vl256(ptr %accptr, ptr %uptr, ptr %sptr) vscal
; SME-NEXT: ldr z0, [x0]
; SME-NEXT: uaddwb z0.d, z0.d, z2.s
; SME-NEXT: uaddwt z0.d, z0.d, z2.s
-; SME-NEXT: mov z1.d, z0.d
-; SME-NEXT: ext z1.b, z1.b, z0.b, #16
-; SME-NEXT: // kill: def $q0 killed $q0 killed $z0
-; SME-NEXT: // kill: def $q1 killed $q1 killed $z1
+; SME-NEXT: ext z2.b, { z0.b, z1.b }, #16
+; SME-NEXT: // kill: def $q0 killed $q0 killed $z0_z1
+; SME-NEXT: mov z1.d, z2.d
; SME-NEXT: ret
%acc = load <4 x i64>, ptr %accptr
%u = load <32 x i8>, ptr %uptr
diff --git a/llvm/test/CodeGen/AArch64/sve-pr92779.ll b/llvm/test/CodeGen/AArch64/sve-pr92779.ll
index 3f34d79b3bb49..427d3903cf2e9 100644
--- a/llvm/test/CodeGen/AArch64/sve-pr92779.ll
+++ b/llvm/test/CodeGen/AArch64/sve-pr92779.ll
@@ -5,16 +5,15 @@ define void @main(ptr %0) {
; CHECK-LABEL: main:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: movi v0.2d, #0000000000000000
-; CHECK-NEXT: movi v1.2d, #0000000000000000
; CHECK-NEXT: ptrue p0.d, vl1
-; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT: uzp1 v0.2s, v1.2s, v0.2s
-; CHECK-NEXT: neg v0.2s, v0.2s
-; CHECK-NEXT: smov x8, v0.s[0]
-; CHECK-NEXT: smov x9, v0.s[1]
-; CHECK-NEXT: mov z1.d, p0/m, x8
-; CHECK-NEXT: mov z1.d, p0/m, x9
-; CHECK-NEXT: str z1, [x0]
+; CHECK-NEXT: ext z2.b, { z0.b, z1.b }, #8
+; CHECK-NEXT: uzp1 v2.2s, v0.2s, v2.2s
+; CHECK-NEXT: neg v2.2s, v2.2s
+; CHECK-NEXT: smov x8, v2.s[0]
+; CHECK-NEXT: smov x9, v2.s[1]
+; CHECK-NEXT: mov z0.d, p0/m, x8
+; CHECK-NEXT: mov z0.d, p0/m, x9
+; CHECK-NEXT: str z0, [x0]
; CHECK-NEXT: ret
"entry":
%1 = bitcast <vscale x 2 x i64> zeroinitializer to <vscale x 4 x i32>
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll
index 4d524bc848de6..6fe6b8a1c48d0 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -mattr=+sve2 -force-streaming-compatible < %s | FileCheck %s
; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
@@ -228,25 +228,25 @@ define <4 x i256> @load_sext_v4i32i256(ptr %ap) {
; CHECK-LABEL: load_sext_v4i32i256:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
-; CHECK-NEXT: sunpklo z1.d, z0.s
-; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
+; CHECK-NEXT: sunpklo z2.d, z0.s
+; CHECK-NEXT: ext z0.b, { z0.b, z1.b }, #8
; CHECK-NEXT: sunpklo z0.d, z0.s
-; CHECK-NEXT: fmov x9, d1
-; CHECK-NEXT: mov z1.d, z1.d[1]
-; CHECK-NEXT: fmov x11, d0
-; CHECK-NEXT: mov z0.d, z0.d[1]
+; CHECK-NEXT: fmov x9, d2
+; CHECK-NEXT: mov z2.d, z2.d[1]
; CHECK-NEXT: asr x10, x9, #63
+; CHECK-NEXT: fmov x11, d2
; CHECK-NEXT: stp x9, x10, [x8]
-; CHECK-NEXT: fmov x9, d1
+; CHECK-NEXT: fmov x9, d0
+; CHECK-NEXT: mov z0.d, z0.d[1]
; CHECK-NEXT: asr x12, x11, #63
; CHECK-NEXT: stp x10, x10, [x8, #16]
-; CHECK-NEXT: stp x11, x12, [x8, #64]
+; CHECK-NEXT: stp x11, x12, [x8, #32]
; CHECK-NEXT: fmov x11, d0
; CHECK-NEXT: asr x10, x9, #63
-; CHECK-NEXT: stp x12, x12, [x8, #80]
-; CHECK-NEXT: stp x10, x10, [x8, #48]
+; CHECK-NEXT: stp x12, x12, [x8, #48]
+; CHECK-NEXT: stp x10, x10, [x8, #80]
; CHECK-NEXT: asr x12, x11, #63
-; CHECK-NEXT: stp x9, x10, [x8, #32]
+; CHECK-NEXT: stp x9, x10, [x8, #64]
; CHECK-NEXT: stp x12, x12, [x8, #112]
; CHECK-NEXT: stp x11, x12, [x8, #96]
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-subvector.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-subvector.ll
index 35dd827bbabc5..7ef35f153f029 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-subvector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-subvector.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -mattr=+sve2 -force-streaming-compatible < %s | FileCheck %s
; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
@@ -78,8 +78,8 @@ define <4 x i8> @extract_subvector_v8i8(<8 x i8> %op) {
define <8 x i8> @extract_subvector_v16i8(<16 x i8> %op) {
; CHECK-LABEL: extract_subvector_v16i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
+; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0_z1
+; CHECK-NEXT: ext z0.b, { z0.b, z1.b }, #8
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
@@ -119,7 +119,7 @@ define <2 x i16> @extract_subvector_v4i16(<4 x i16> %op) {
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: uunpklo z0.s, z0.h
-; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
+; CHECK-NEXT: ext z0.b, { z0.b, z1.b }, #8
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
@@ -138,8 +138,8 @@ define <2 x i16> @extract_subvector_v4i16(<4 x i16> %op) {
define <4 x i16> @extract_subvector_v8i16(<8 x i16> %op) {
; CHECK-LABEL: extract_subvector_v8i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
+; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0_z1
+; CHECK-NEXT: ext z0.b, { z0.b, z1.b }, #8
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
@@ -198,8 +198,8 @@ define <1 x i32> @extract_subvector_v2i32(<2 x i32> %op) {
define <2 x i32> @extract_subvector_v4i32(<4 x i32> %op) {
; CHECK-LABEL: extract_subvector_v4i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
+; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0_z1
+; CHECK-NEXT: ext z0.b, { z0.b, z1.b }, #8
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
@@ -237,8 +237,8 @@ define void @extract_subvector_v8i32(ptr %a, ptr %b) {
define <1 x i64> @extract_subvector_v2i64(<2 x i64> %op) {
; CHECK-LABEL: extract_subvector_v2i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
+; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0_z1
+; CHECK-NEXT: ext z0.b, { z0.b, z1.b }, #8
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
@@ -297,8 +297,8 @@ define <2 x half> @extract_subvector_v4f16(<4 x half> %op) {
define <4 x half> @extract_subvector_v8f16(<8 x half> %op) {
; CHECK-LABEL: extract_subvector_v8f16:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
+; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0_z1
+; CHECK-NEXT: ext z0.b, { z0.b, z1.b }, #8
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
@@ -357,8 +357,8 @@ define <1 x float> @extract_subvector_v2f32(<2 x float> %op) {
define <2 x float> @extract_subvector_v4f32(<4 x float> %op) {
; CHECK-LABEL: extract_subvector_v4f32:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
+; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0_z1
+; CHECK-NEXT: ext z0.b, { z0.b, z1.b }, #8
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
@@ -396,8 +396,8 @@ define void @extract_subvector_v8f32(ptr %a, ptr %b) {
define <1 x double> @extract_subvector_v2f64(<2 x double> %op) {
; CHECK-LABEL: extract_subvector_v2f64:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
+; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0_z1
+; CHECK-NEXT: ext z0.b, { z0.b, z1.b }, #8
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-extend-trunc.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-extend-trunc.ll
index e3d0a72c74b87..bc9b0373d8e49 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-extend-trunc.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-extend-trunc.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -mattr=+sve2 -force-streaming-compatible < %s | FileCheck %s
; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
@@ -74,14 +74,14 @@ define void @fcvt_v4f16_to_v4f32(<4 x half> %a, ptr %b) {
define void @fcvt_v8f16_to_v8f32(<8 x half> %a, ptr %b) {
; CHECK-LABEL: fcvt_v8f16_to_v8f32:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT: uunpklo z1.s, z0.h
+; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0_z1
+; CHECK-NEXT: ext z2.b, { z0.b, z1.b }, #8
; CHECK-NEXT: ptrue p0.s, vl4
-; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
; CHECK-NEXT: uunpklo z0.s, z0.h
-; CHECK-NEXT: fcvt z1.s, p0/m, z1.h
+; CHECK-NEXT: uunpklo z1.s, z2.h
; CHECK-NEXT: fcvt z0.s, p0/m, z0.h
-; CHECK-NEXT: stp q1, q0, [x0]
+; CHECK-NEXT: fcvt z1.s, p0/m, z1.h
+; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: fcvt_v8f16_to_v8f32:
@@ -122,21 +122,21 @@ define void @fcvt_v8f16_to_v8f32(<8 x half> %a, ptr %b) {
define void @fcvt_v16f16_to_v16f32(<16 x half> %a, ptr %b) {
; CHECK-LABEL: fcvt_v16f16_to_v16f32:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
-; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT: uunpklo z2.s, z1.h
-; CHECK-NEXT: uunpklo z3.s, z0.h
+; CHECK-NEXT: mov z3.d, z0.d
+; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1_z2
+; CHECK-NEXT: ext z0.b, { z1.b, z2.b }, #8
; CHECK-NEXT: ptrue p0.s, vl4
-; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
-; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
; CHECK-NEXT: uunpklo z1.s, z1.h
+; CHECK-NEXT: ext z5.b, { z3.b, z4.b }, #8
; CHECK-NEXT: uunpklo z0.s, z0.h
-; CHECK-NEXT: fcvt z2.s, p0/m, z2.h
-; CHECK-NEXT: fcvt z3.s, p0/m, z3.h
+; CHECK-NEXT: uunpklo z2.s, z3.h
; CHECK-NEXT: fcvt z1.s, p0/m, z1.h
+; CHECK-NEXT: uunpklo z3.s, z5.h
; CHECK-NEXT: fcvt z0.s, p0/m, z0.h
-; CHECK-NEXT: stp q3, q0, [x0]
-; CHECK-NEXT: stp q2, q1, [x0, #32]
+; CHECK-NEXT: fcvt z2.s, p0/m, z2.h
+; CHECK-NEXT: fcvt z3.s, p0/m, z3.h
+; CHECK-NEXT: stp q1, q0, [x0, #32]
+; CHECK-NEXT: stp q2, q3, [x0]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: fcvt_v16f16_to_v16f32:
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll
index ae7c676172867..0e34b2cd09fe1 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll
@@ -58,21 +58,21 @@ define <8 x i8> @sdiv_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT: s...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/151730
More information about the llvm-branch-commits
mailing list