[llvm-branch-commits] [llvm] [AArch64] Enable subreg liveness tracking for streaming functions. (PR #174189)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Fri Jan 2 00:50:02 PST 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: Sander de Smalen (sdesmalen-arm)
<details>
<summary>Changes</summary>
Most use of subreg liveness tracking will be for streaming SME2 functions where it can use the strided- and contiguous form of the multi-vector LD1, see #<!-- -->123081 for details.
---
Patch is 963.67 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/174189.diff
100 Files Affected:
- (modified) llvm/lib/Target/AArch64/AArch64Subtarget.cpp (+11-1)
- (modified) llvm/test/CodeGen/AArch64/active_lane_mask.ll (+2-15)
- (modified) llvm/test/CodeGen/AArch64/fp8-sve-cvtn.ll (+3-9)
- (modified) llvm/test/CodeGen/AArch64/get-active-lane-mask-extract.ll (+3-9)
- (modified) llvm/test/CodeGen/AArch64/intrinsic-cttz-elts-sve.ll (+11-53)
- (modified) llvm/test/CodeGen/AArch64/sme-avoid-coalescing-locally-streaming.ll (-3)
- (modified) llvm/test/CodeGen/AArch64/sme-intrinsics-loads.ll (-1)
- (modified) llvm/test/CodeGen/AArch64/sme-intrinsics-stores.ll (-1)
- (modified) llvm/test/CodeGen/AArch64/sme-pstate-sm-changing-call-disable-coalescing.ll (+10-40)
- (modified) llvm/test/CodeGen/AArch64/sme-streaming-body.ll (-2)
- (modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-add-sub-za16.ll (-24)
- (modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-bfmul.ll (-18)
- (modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-bfscale.ll (-18)
- (modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-cvtn.ll (-4)
- (modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-faminmax.ll (+78-78)
- (modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-fclamp.ll (-18)
- (modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-fmlas.ll (+2-98)
- (modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-fscale.ll (-54)
- (modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-luti4-lane-x4.ll (+2-2)
- (modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-luti4.ll (+2-6)
- (modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-max.ll (+182-218)
- (modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-min.ll (+182-218)
- (modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-mlall.ll (+92-152)
- (modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-mlals.ll (+1-192)
- (modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-mop4-fp8.ll (-8)
- (modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-mop4a_2x1.ll (-64)
- (modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-mop4a_2x2.ll (-82)
- (modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-rshl.ll (+104-104)
- (modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-sclamp.ll (-24)
- (modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-select-sme-tileslice.ll (-2)
- (modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-sqdmulh.ll (+52-52)
- (modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-sub.ll (-60)
- (modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-tmop.ll (-26)
- (modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-uclamp.ll (-24)
- (modified) llvm/test/CodeGen/AArch64/sme2p2-intrinsics-fmul.ll (-54)
- (modified) llvm/test/CodeGen/AArch64/sve-bf16-reductions.ll (+2-4)
- (modified) llvm/test/CodeGen/AArch64/sve-fixed-length-partial-reduce.ll (-38)
- (modified) llvm/test/CodeGen/AArch64/sve-fmsub.ll (+2-50)
- (modified) llvm/test/CodeGen/AArch64/sve-intrinsics-reinterpret.ll (+2-5)
- (modified) llvm/test/CodeGen/AArch64/sve-intrinsics-sqdec.ll (+2-26)
- (modified) llvm/test/CodeGen/AArch64/sve-intrinsics-sqinc.ll (+2-26)
- (modified) llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-imm-addr-mode.ll (+2-121)
- (modified) llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-reg-addr-mode.ll (+2-65)
- (modified) llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll (+2-83)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-int-fp.ll (+4-16)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-to-int.ll (+3-13)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-int-to-fp.ll (+3-13)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-and-combine.ll (+3-37)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bit-counting.ll (+9-69)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitselect.ll (+3-5)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-build-vector.ll (+3-23)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-concat.ll (+3-39)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll (+3-20)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-subvector.ll (+3-29)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-vector-elt.ll (+3-18)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll (+10-10)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-arith.ll (+3-135)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-compares.ll (+3-22)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-extend-trunc.ll (+7-14)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-minmax.ll (+3-63)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-rounding.ll (+3-87)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-select.ll (+3-23)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll (+8-32)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-arith.ll (+4-144)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-compares.ll (+3-27)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll (+81-141)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll (+15-39)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-log.ll (+3-75)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-minmax.ll (+3-99)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mul.ll (+3-6)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mulh.ll (+6-126)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-reduce.ll (+3-38)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-rem.ll (+78-132)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-select.ll (+3-36)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-shifts.ll (+7-97)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-vselect.ll (+12-52)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll (+7-7)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-limit-duplane.ll (+3-5)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-loads.ll (+3-5)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-log-reduce.ll (+3-30)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-gather-scatter.ll (+3-4)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-rev.ll (+3-4)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ptest.ll (+26-26)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-reductions.ll (+12-16)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-reshuffle.ll (+2-3)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-rev.ll (+3-37)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-sdiv-pow2.ll (+3-23)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-shuffle.ll (+8-12)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc-stores.ll (+4-4)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc.ll (+355-365)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-vector-shuffle.ll (+6-33)
- (modified) llvm/test/CodeGen/AArch64/sve-vector-deinterleave.ll (+30-61)
- (modified) llvm/test/CodeGen/AArch64/sve-vector-interleave.ll (+5-64)
- (modified) llvm/test/CodeGen/AArch64/sve2-intrinsics-luti.ll (+6-6)
- (modified) llvm/test/CodeGen/AArch64/sve2p1-dots-partial-reduction.ll (+2-10)
- (modified) llvm/test/CodeGen/AArch64/sve2p1-intrinsics-multivec-stores.ll (+4-157)
- (modified) llvm/test/CodeGen/AArch64/sve2p1-intrinsics-selx4.ll (+56-56)
- (modified) llvm/test/CodeGen/AArch64/sve2p1-intrinsics-stores.ll (+51-51)
- (modified) llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uzpx4.ll (+10-10)
- (modified) llvm/test/CodeGen/AArch64/sve2p1-intrinsics-while-pp.ll (+4-36)
``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
index 1737a0c1529b4..194d1d94e0b25 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -417,7 +417,17 @@ AArch64Subtarget::AArch64Subtarget(const Triple &TT, StringRef CPU,
if (ReservedRegNames.count("X29") || ReservedRegNames.count("FP"))
ReserveXRegisterForRA.set(29);
- EnableSubregLiveness = EnableSubregLivenessTracking.getValue();
+ // To benefit from SME2's strided-register multi-vector load/store
+ // instructions we'll need to enable subreg liveness. Our longer
+ // term aim is to make this the default, regardless of streaming
+ // mode, but there are still some outstanding issues, see:
+ // https://github.com/llvm/llvm-project/pull/174188
+ // and:
+ // https://github.com/llvm/llvm-project/pull/168353
+ if (IsStreaming)
+ EnableSubregLiveness = true;
+ else
+ EnableSubregLiveness = EnableSubregLivenessTracking.getValue();
}
const CallLowering *AArch64Subtarget::getCallLowering() const {
diff --git a/llvm/test/CodeGen/AArch64/active_lane_mask.ll b/llvm/test/CodeGen/AArch64/active_lane_mask.ll
index b77e90f6fdc45..05d083a654cf6 100644
--- a/llvm/test/CodeGen/AArch64/active_lane_mask.ll
+++ b/llvm/test/CodeGen/AArch64/active_lane_mask.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SVE
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -force-streaming < %s | FileCheck %s --check-prefixes=CHECK,CHECK-STREAMING
+; RUN: llc -enable-subreg-liveness -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SVE
+; RUN: llc -enable-subreg-liveness -mtriple=aarch64-linux-gnu -mattr=+sme -force-streaming < %s | FileCheck %s --check-prefixes=CHECK,CHECK-STREAMING
; == Scalable ==
@@ -209,7 +209,6 @@ define <16 x i1> @lane_mask_v16i1_i32(i32 %index, i32 %TC) {
; CHECK: // %bb.0:
; CHECK-NEXT: whilelo p0.b, w0, w1
; CHECK-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
%active.lane.mask = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 %index, i32 %TC)
ret <16 x i1> %active.lane.mask
@@ -220,7 +219,6 @@ define <8 x i1> @lane_mask_v8i1_i32(i32 %index, i32 %TC) {
; CHECK: // %bb.0:
; CHECK-NEXT: whilelo p0.b, w0, w1
; CHECK-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
%active.lane.mask = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 %index, i32 %TC)
ret <8 x i1> %active.lane.mask
@@ -231,7 +229,6 @@ define <4 x i1> @lane_mask_v4i1_i32(i32 %index, i32 %TC) {
; CHECK: // %bb.0:
; CHECK-NEXT: whilelo p0.h, w0, w1
; CHECK-NEXT: mov z0.h, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
%active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %TC)
ret <4 x i1> %active.lane.mask
@@ -242,7 +239,6 @@ define <2 x i1> @lane_mask_v2i1_i32(i32 %index, i32 %TC) {
; CHECK: // %bb.0:
; CHECK-NEXT: whilelo p0.s, w0, w1
; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
%active.lane.mask = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i32(i32 %index, i32 %TC)
ret <2 x i1> %active.lane.mask
@@ -253,7 +249,6 @@ define <16 x i1> @lane_mask_v16i1_i64(i64 %index, i64 %TC) {
; CHECK: // %bb.0:
; CHECK-NEXT: whilelo p0.b, x0, x1
; CHECK-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
%active.lane.mask = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 %index, i64 %TC)
ret <16 x i1> %active.lane.mask
@@ -264,7 +259,6 @@ define <8 x i1> @lane_mask_v8i1_i64(i64 %index, i64 %TC) {
; CHECK: // %bb.0:
; CHECK-NEXT: whilelo p0.b, x0, x1
; CHECK-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
%active.lane.mask = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64 %index, i64 %TC)
ret <8 x i1> %active.lane.mask
@@ -275,7 +269,6 @@ define <4 x i1> @lane_mask_v4i1_i64(i64 %index, i64 %TC) {
; CHECK: // %bb.0:
; CHECK-NEXT: whilelo p0.h, x0, x1
; CHECK-NEXT: mov z0.h, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
%active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 %index, i64 %TC)
ret <4 x i1> %active.lane.mask
@@ -286,7 +279,6 @@ define <2 x i1> @lane_mask_v2i1_i64(i64 %index, i64 %TC) {
; CHECK: // %bb.0:
; CHECK-NEXT: whilelo p0.s, x0, x1
; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
%active.lane.mask = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 %index, i64 %TC)
ret <2 x i1> %active.lane.mask
@@ -313,7 +305,6 @@ define <16 x i1> @lane_mask_v16i1_i8(i8 %index, i8 %TC) {
; CHECK-STREAMING-NEXT: mov z1.b, w1
; CHECK-STREAMING-NEXT: cmphi p0.b, p0/z, z1.b, z0.b
; CHECK-STREAMING-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-STREAMING-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-STREAMING-NEXT: ret
%active.lane.mask = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i8(i8 %index, i8 %TC)
ret <16 x i1> %active.lane.mask
@@ -340,7 +331,6 @@ define <8 x i1> @lane_mask_v8i1_i8(i8 %index, i8 %TC) {
; CHECK-STREAMING-NEXT: mov z1.b, w1
; CHECK-STREAMING-NEXT: cmphi p0.b, p0/z, z1.b, z0.b
; CHECK-STREAMING-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-STREAMING-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-STREAMING-NEXT: ret
%active.lane.mask = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i8(i8 %index, i8 %TC)
ret <8 x i1> %active.lane.mask
@@ -372,7 +362,6 @@ define <4 x i1> @lane_mask_v4i1_i8(i8 %index, i8 %TC) {
; CHECK-STREAMING-NEXT: and z1.h, z1.h, #0xff
; CHECK-STREAMING-NEXT: cmphi p0.h, p0/z, z1.h, z0.h
; CHECK-STREAMING-NEXT: mov z0.h, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-STREAMING-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-STREAMING-NEXT: ret
%active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i8(i8 %index, i8 %TC)
ret <4 x i1> %active.lane.mask
@@ -402,7 +391,6 @@ define <2 x i1> @lane_mask_v2i1_i8(i8 %index, i8 %TC) {
; CHECK-STREAMING-NEXT: umin z0.s, z0.s, #255
; CHECK-STREAMING-NEXT: cmphi p0.s, p0/z, z1.s, z0.s
; CHECK-STREAMING-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-STREAMING-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-STREAMING-NEXT: ret
%active.lane.mask = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i8(i8 %index, i8 %TC)
ret <2 x i1> %active.lane.mask
@@ -465,7 +453,6 @@ define <8 x i1> @lane_mask_v8i1_imm3() {
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.b, vl3
; CHECK-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
entry:
%active.lane.mask = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64 0, i64 3)
diff --git a/llvm/test/CodeGen/AArch64/fp8-sve-cvtn.ll b/llvm/test/CodeGen/AArch64/fp8-sve-cvtn.ll
index e42f2b1cfba48..b735a52e49d63 100644
--- a/llvm/test/CodeGen/AArch64/fp8-sve-cvtn.ll
+++ b/llvm/test/CodeGen/AArch64/fp8-sve-cvtn.ll
@@ -1,15 +1,13 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -mattr=+sve2,+fp8 < %s | FileCheck %s
-; RUN: llc -mattr=+sve,+sme2,+fp8 < %s | FileCheck %s
-; RUN: llc -mattr=+sme2,+fp8 --force-streaming < %s | FileCheck %s
+; RUN: llc -mattr=+sve2,+fp8 -enable-subreg-liveness=true < %s | FileCheck %s
+; RUN: llc -mattr=+sve,+sme2,+fp8 -enable-subreg-liveness=true < %s | FileCheck %s
+; RUN: llc -mattr=+sme2,+fp8 --force-streaming -enable-subreg-liveness=true < %s | FileCheck %s
target triple = "aarch64-linux"
define <vscale x 16 x i8> @cvtn_bf16(<vscale x 8 x bfloat> %s1, <vscale x 8 x bfloat> %s2) {
; CHECK-LABEL: cvtn_bf16:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: bfcvtn z0.b, { z0.h, z1.h }
; CHECK-NEXT: ret
%r = call <vscale x 16 x i8> @llvm.aarch64.sve.fp8.cvtn.nxv8bf16(<vscale x 8 x bfloat> %s1, <vscale x 8 x bfloat> %s2)
@@ -19,8 +17,6 @@ define <vscale x 16 x i8> @cvtn_bf16(<vscale x 8 x bfloat> %s1, <vscale x 8 x bf
define <vscale x 16 x i8> @cvtn_f16(<vscale x 8 x half> %s1, <vscale x 8 x half> %s2) {
; CHECK-LABEL: cvtn_f16:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: fcvtn z0.b, { z0.h, z1.h }
; CHECK-NEXT: ret
%r = call <vscale x 16 x i8> @llvm.aarch64.sve.fp8.cvtn.nxv8f16(<vscale x 8 x half> %s1, <vscale x 8 x half> %s2)
@@ -30,8 +26,6 @@ define <vscale x 16 x i8> @cvtn_f16(<vscale x 8 x half> %s1, <vscale x 8 x half>
define <vscale x 16 x i8> @cvtnb_f32(<vscale x 4 x float> %s1, <vscale x 4 x float> %s2) {
; CHECK-LABEL: cvtnb_f32:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: fcvtnb z0.b, { z0.s, z1.s }
; CHECK-NEXT: ret
%r = call <vscale x 16 x i8> @llvm.aarch64.sve.fp8.cvtnb.nxv4f32(<vscale x 4 x float> %s1, <vscale x 4 x float> %s2)
diff --git a/llvm/test/CodeGen/AArch64/get-active-lane-mask-extract.ll b/llvm/test/CodeGen/AArch64/get-active-lane-mask-extract.ll
index e2c861b40e706..aa0b934151fef 100644
--- a/llvm/test/CodeGen/AArch64/get-active-lane-mask-extract.ll
+++ b/llvm/test/CodeGen/AArch64/get-active-lane-mask-extract.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
-; RUN: llc -mattr=+sve < %s | FileCheck %s -check-prefix CHECK-SVE
-; RUN: llc -mattr=+sve2p1 < %s | FileCheck %s -check-prefix CHECK-SVE2p1-SME2 -check-prefix CHECK-SVE2p1
-; RUN: llc -mattr=+sve -mattr=+sme2 -force-streaming < %s | FileCheck %s -check-prefix CHECK-SVE2p1-SME2 -check-prefix CHECK-SME2
+; RUN: llc -enable-subreg-liveness -mattr=+sve < %s | FileCheck %s -check-prefix CHECK-SVE
+; RUN: llc -enable-subreg-liveness -mattr=+sve2p1 < %s | FileCheck %s -check-prefix CHECK-SVE2p1-SME2 -check-prefix CHECK-SVE2p1
+; RUN: llc -enable-subreg-liveness -mattr=+sve -mattr=+sme2 -force-streaming < %s | FileCheck %s -check-prefix CHECK-SVE2p1-SME2 -check-prefix CHECK-SME2
target triple = "aarch64-linux"
; Test combining of getActiveLaneMask with a pair of extract_vector operations.
@@ -181,8 +181,6 @@ define void @test_fixed_extract(i64 %i, i64 %n) #0 {
; CHECK-SVE-NEXT: fmov s0, w8
; CHECK-SVE-NEXT: mov v0.s[1], v1.s[1]
; CHECK-SVE-NEXT: ext z1.b, z1.b, z1.b, #8
-; CHECK-SVE-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-SVE-NEXT: // kill: def $d1 killed $d1 killed $z1
; CHECK-SVE-NEXT: b use
;
; CHECK-SVE2p1-LABEL: test_fixed_extract:
@@ -193,8 +191,6 @@ define void @test_fixed_extract(i64 %i, i64 %n) #0 {
; CHECK-SVE2p1-NEXT: fmov s0, w8
; CHECK-SVE2p1-NEXT: mov v0.s[1], v1.s[1]
; CHECK-SVE2p1-NEXT: ext z1.b, z1.b, z1.b, #8
-; CHECK-SVE2p1-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-SVE2p1-NEXT: // kill: def $d1 killed $d1 killed $z1
; CHECK-SVE2p1-NEXT: b use
;
; CHECK-SME2-LABEL: test_fixed_extract:
@@ -205,9 +201,7 @@ define void @test_fixed_extract(i64 %i, i64 %n) #0 {
; CHECK-SME2-NEXT: fmov s2, w8
; CHECK-SME2-NEXT: mov z0.s, z1.s[1]
; CHECK-SME2-NEXT: ext z1.b, z1.b, z1.b, #8
-; CHECK-SME2-NEXT: // kill: def $d1 killed $d1 killed $z1
; CHECK-SME2-NEXT: zip1 z0.s, z2.s, z0.s
-; CHECK-SME2-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-SME2-NEXT: b use
%r = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 %i, i64 %n)
%v0 = call <2 x i1> @llvm.vector.extract.v2i1.nxv4i1.i64(<vscale x 4 x i1> %r, i64 0)
diff --git a/llvm/test/CodeGen/AArch64/intrinsic-cttz-elts-sve.ll b/llvm/test/CodeGen/AArch64/intrinsic-cttz-elts-sve.ll
index ca16df3c09ade..49a0086a7be54 100644
--- a/llvm/test/CodeGen/AArch64/intrinsic-cttz-elts-sve.ll
+++ b/llvm/test/CodeGen/AArch64/intrinsic-cttz-elts-sve.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s --check-prefixes=CHECK,NONSTREAMING
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -force-streaming < %s | FileCheck %s --check-prefixes=CHECK,STREAMING
+; RUN: llc -enable-subreg-liveness -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s --check-prefixes=CHECK,NONSTREAMING
+; RUN: llc -enable-subreg-liveness -mtriple=aarch64-linux-gnu -mattr=+sme -force-streaming < %s | FileCheck %s --check-prefixes=CHECK,STREAMING
; WITH VSCALE RANGE
@@ -145,7 +145,6 @@ define i32 @ctz_nxv2i1(<vscale x 2 x i1> %a) {
; CHECK-NEXT: ptrue p1.d
; CHECK-NEXT: brkb p0.b, p1/z, p0.b
; CHECK-NEXT: cntp x0, p0, p0.d
-; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
; CHECK-NEXT: ret
%res = call i32 @llvm.experimental.cttz.elts.i32.nxv2i1(<vscale x 2 x i1> %a, i1 0)
ret i32 %res
@@ -157,7 +156,6 @@ define i32 @ctz_nxv2i1_poison(<vscale x 2 x i1> %a) {
; CHECK-NEXT: ptrue p1.d
; CHECK-NEXT: brkb p0.b, p1/z, p0.b
; CHECK-NEXT: cntp x0, p0, p0.d
-; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
; CHECK-NEXT: ret
%res = call i32 @llvm.experimental.cttz.elts.i32.nxv2i1(<vscale x 2 x i1> %a, i1 1)
ret i32 %res
@@ -179,10 +177,8 @@ define i32 @add_i32_ctz_nxv2i1_poison(<vscale x 2 x i1> %a, i32 %b) {
; CHECK-LABEL: add_i32_ctz_nxv2i1_poison:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.d
-; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-NEXT: brkb p0.b, p1/z, p0.b
; CHECK-NEXT: incp x0, p0.d
-; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
; CHECK-NEXT: ret
%res = call i64 @llvm.experimental.cttz.elts.i64.nxv2i1(<vscale x 2 x i1> %a, i1 1)
%trunc = trunc i64 %res to i32
@@ -196,7 +192,6 @@ define i32 @ctz_nxv4i1(<vscale x 4 x i1> %a) {
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: brkb p0.b, p1/z, p0.b
; CHECK-NEXT: cntp x0, p0, p0.s
-; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
; CHECK-NEXT: ret
%res = call i32 @llvm.experimental.cttz.elts.i32.nxv4i1(<vscale x 4 x i1> %a, i1 0)
ret i32 %res
@@ -208,7 +203,6 @@ define i32 @ctz_nxv4i1_poison(<vscale x 4 x i1> %a) {
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: brkb p0.b, p1/z, p0.b
; CHECK-NEXT: cntp x0, p0, p0.s
-; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
; CHECK-NEXT: ret
%res = call i32 @llvm.experimental.cttz.elts.i32.nxv4i1(<vscale x 4 x i1> %a, i1 1)
ret i32 %res
@@ -230,10 +224,8 @@ define i32 @add_i32_ctz_nxv4i1_poison(<vscale x 4 x i1> %a, i32 %b) {
; CHECK-LABEL: add_i32_ctz_nxv4i1_poison:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.s
-; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-NEXT: brkb p0.b, p1/z, p0.b
; CHECK-NEXT: incp x0, p0.s
-; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
; CHECK-NEXT: ret
%res = call i64 @llvm.experimental.cttz.elts.i64.nxv4i1(<vscale x 4 x i1> %a, i1 1)
%trunc = trunc i64 %res to i32
@@ -247,7 +239,6 @@ define i32 @ctz_nxv8i1(<vscale x 8 x i1> %a) {
; CHECK-NEXT: ptrue p1.h
; CHECK-NEXT: brkb p0.b, p1/z, p0.b
; CHECK-NEXT: cntp x0, p0, p0.h
-; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
; CHECK-NEXT: ret
%res = call i32 @llvm.experimental.cttz.elts.i32.nxv8i1(<vscale x 8 x i1> %a, i1 0)
ret i32 %res
@@ -259,7 +250,6 @@ define i32 @ctz_nxv8i1_poison(<vscale x 8 x i1> %a) {
; CHECK-NEXT: ptrue p1.h
; CHECK-NEXT: brkb p0.b, p1/z, p0.b
; CHECK-NEXT: cntp x0, p0, p0.h
-; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
; CHECK-NEXT: ret
%res = call i32 @llvm.experimental.cttz.elts.i32.nxv8i1(<vscale x 8 x i1> %a, i1 1)
ret i32 %res
@@ -281,10 +271,8 @@ define i32 @add_i32_ctz_nxv8i1_poison(<vscale x 8 x i1> %a, i32 %b) {
; CHECK-LABEL: add_i32_ctz_nxv8i1_poison:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.h
-; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-NEXT: brkb p0.b, p1/z, p0.b
; CHECK-NEXT: incp x0, p0.h
-; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
; CHECK-NEXT: ret
%res = call i64 @llvm.experimental.cttz.elts.i64.nxv8i1(<vscale x 8 x i1> %a, i1 1)
%trunc = trunc i64 %res to i32
@@ -298,7 +286,6 @@ define i32 @ctz_nxv16i1(<vscale x 16 x i1> %a) {
; CHECK-NEXT: ptrue p1.b
; CHECK-NEXT: brkb p0.b, p1/z, p0.b
; CHECK-NEXT: cntp x0, p0, p0.b
-; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
; CHECK-NEXT: ret
%res = call i32 @llvm.experimental.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> %a, i1 0)
ret i32 %res
@@ -310,7 +297,6 @@ define i32 @ctz_nxv16i1_poison(<vscale x 16 x i1> %a) {
; CHECK-NEXT: ptrue p1.b
; CHECK-NEXT: brkb p0.b, p1/z, p0.b
; CHECK-NEXT: cntp x0, p0, p0.b
-; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
; CHECK-NEXT: ret
%res = call i32 @llvm.experimental.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> %a, i1 1)
ret i32 %res
@@ -323,7 +309,6 @@ define i32 @ctz_and_nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vsca
; CHECK-NEXT: ptrue p1.b
; CHECK-NEXT: brkb p0.b, p1/z, p0.b
; CHECK-NEXT: cntp x0, p0, p0.b
-; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
; CHECK-NEXT: ret
%cmp = icmp ne <vscale x 16 x i8> %a, %b
%select = select <vscale x 16 x i1> %pg, <vscale x 16 x i1> %cmp, <vscale x 16 x i1> zeroinitializer
@@ -348,10 +333,8 @@ define i32 @add_i32_ctz_nxv16i1_poison(<vscale x 16 x i1> %a, i32 %b) {
; CHECK-LABEL: add_i32_ctz_nxv16i1_poison:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.b
-; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-NEXT: brkb p0.b, p1/z, p0.b
; CHECK-NEXT: incp x0, p0.b
-; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
; CHECK-NEXT: ret
%res = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1(<vscale x 16 x i1> %a, i1 1)
%trunc = trunc i64 %res to i32
@@ -370,20 +353,17 @@ define i32 @ctz_v16i1(<16 x i1> %a) {
; NONSTREAMING-NEXT: cmpne p0.b, p0/z, z0.b, #0
; NONSTREAMING-NEXT: brkb p0.b, p1/z, p0.b
; NONSTREAMING-NEXT: cntp x0, p0, p0.b
-; NONSTREAMING-NEXT: // kill: def $w0 killed $w0 killed $x0
; NONSTREAMING-NEXT: ret
;
; STREAMING-LABEL: ctz_v16i1:
; STREAMING: // %bb.0:
-; STREAMING-NEXT: // kill: def $q0 killed $q0 def $z0
-; STREAMING-NEXT: ptrue p0.b, vl16
; STREAMING-NEXT: lsl z0.b, z0.b, #7
+; STREAMING-NEXT: ptrue p0.b, vl16
; STREAMING-NEXT: ptrue p1.b
; STREAMING-NEXT: asr z0.b, z0.b, #7
; STREAMING-NEXT: cmpne p0.b, p0/z, z0.b, #0
; STREAMING-NEXT: brkb p0.b, p1/z, p0.b
; STREAMING-NEXT: cntp x0, p0, p0.b
-; STREAMING-NEXT: // kill: def $w0 killed $w0 killed $x0
; STREAMING-NEXT: ret
%res = call i32 @llvm.experimental.cttz.elts.i32.v16i1(<16 x i1> %a, i1 0)
ret i32 %res
@@ -398,20 +378,17 @@ define i32 @ctz_v16i1_poison(<16 x i1> %a) {
; NONSTREAMING-NEXT: cmpne p0.b, p0/z, z0.b, #0
; NONSTREAMING-NEXT: brkb p0.b, p1/z, p0.b
; NONSTREAMING-NEXT: cntp x0, p0, p0.b
-; NONSTREAMING-NEXT: // kill: def $w0 killed $w0 killed $x0
; NONSTREAMING-NEXT: ret
;
; STREAMING-LABEL: ctz_v16i1_poison:
; STREAMING: // %bb.0:
-; STREAMING-NEXT: // kill: def $q0 killed $q0 def $z0
-; STREAMING-NEXT: ptrue p0.b, vl16
; STREAMING-NEXT: lsl z0.b, z0.b, #7
+; STREAMING-NEXT: ptrue p0.b, vl16
; STREAMING-NEXT: ptrue p1.b
; STREAMING-NEXT: asr z0.b, z0.b, #7
; STREAMING-NEXT: cmpne p0.b, p...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/174189
More information about the llvm-branch-commits
mailing list