[llvm] caabf2a - [AArch64] Regenerate some test checks. NFC
David Green via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 8 03:09:39 PDT 2021
Author: David Green
Date: 2021-09-08T11:08:32+01:00
New Revision: caabf2a445bd7d00844cd623db5348423d458dd7
URL: https://github.com/llvm/llvm-project/commit/caabf2a445bd7d00844cd623db5348423d458dd7
DIFF: https://github.com/llvm/llvm-project/commit/caabf2a445bd7d00844cd623db5348423d458dd7.diff
LOG: [AArch64] Regenerate some test checks. NFC
This patch just reruns the update_llc_test_checks script on the AArch64
tests claiming to be updated by the script, cleaning up the output.
Added:
Modified:
llvm/test/CodeGen/AArch64/aarch64-dup-ext-crash.ll
llvm/test/CodeGen/AArch64/f16-imm.ll
llvm/test/CodeGen/AArch64/fp16_intrinsic_lane.ll
llvm/test/CodeGen/AArch64/load-combine-big-endian.ll
llvm/test/CodeGen/AArch64/machine-outliner-thunk.ll
llvm/test/CodeGen/AArch64/named-vector-shuffle-reverse-sve.ll
llvm/test/CodeGen/AArch64/pow.75.ll
llvm/test/CodeGen/AArch64/shift-mod.ll
llvm/test/CodeGen/AArch64/stack-guard-remat-bitcast.ll
llvm/test/CodeGen/AArch64/strqu.ll
llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll
llvm/test/CodeGen/AArch64/sve-select.ll
llvm/test/CodeGen/AArch64/sve-unary-movprfx.ll
llvm/test/CodeGen/AArch64/sve2-intrinsics-int-arith-imm.ll
llvm/test/CodeGen/AArch64/sve2-unary-movprfx.ll
llvm/test/CodeGen/AArch64/unwind-preserved.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/AArch64/aarch64-dup-ext-crash.ll b/llvm/test/CodeGen/AArch64/aarch64-dup-ext-crash.ll
index 51f91aa1b940..ec118a50d56d 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-dup-ext-crash.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-dup-ext-crash.ll
@@ -9,6 +9,16 @@ target triple = "aarch64-unknown-linux-gnu"
; here, only that this case no longer causes said crash.
define dso_local i32 @dupext_crashtest(i32 %e) local_unnamed_addr {
; CHECK-LABEL: dupext_crashtest:
+; CHECK: // %bb.0: // %for.body.lr.ph
+; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: dup v0.2s, w8
+; CHECK-NEXT: .LBB0_1: // %vector.body
+; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: ldr d1, [x8]
+; CHECK-NEXT: smull v1.2d, v0.2s, v1.2s
+; CHECK-NEXT: xtn v1.2s, v1.2d
+; CHECK-NEXT: str d1, [x8]
+; CHECK-NEXT: b .LBB0_1
for.body.lr.ph:
%conv314 = zext i32 %e to i64
br label %vector.memcheck
diff --git a/llvm/test/CodeGen/AArch64/f16-imm.ll b/llvm/test/CodeGen/AArch64/f16-imm.ll
index b49262e6e946..f6a6347f78bc 100644
--- a/llvm/test/CodeGen/AArch64/f16-imm.ll
+++ b/llvm/test/CodeGen/AArch64/f16-imm.ll
@@ -11,7 +11,7 @@ define half @Const0() {
;
; CHECK-ZCZ-LABEL: Const0:
; CHECK-ZCZ: // %bb.0: // %entry
-; CHECK-ZCZ-NEXT: movi d0, #0
+; CHECK-ZCZ-NEXT: movi d0, #0000000000000000
; CHECK-ZCZ-NEXT: ret
;
; CHECK-NOFP16-LABEL: Const0:
diff --git a/llvm/test/CodeGen/AArch64/fp16_intrinsic_lane.ll b/llvm/test/CodeGen/AArch64/fp16_intrinsic_lane.ll
index 90a5e2453a77..c92ea2fcfe6a 100644
--- a/llvm/test/CodeGen/AArch64/fp16_intrinsic_lane.ll
+++ b/llvm/test/CodeGen/AArch64/fp16_intrinsic_lane.ll
@@ -10,8 +10,7 @@ declare half @llvm.fma.f16(half, half, half) #1
define dso_local <4 x half> @t_vfma_lane_f16(<4 x half> %a, <4 x half> %b, <4 x half> %c, i32 %lane) {
; CHECK-LABEL: t_vfma_lane_f16:
-; CHECK-NEXT: .cfi_startproc
-; CHECK-NEXT: // %bb.0: // %entry
+; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
; CHECK-NEXT: fmla v0.4h, v1.4h, v2.h[0]
; CHECK-NEXT: ret
@@ -23,8 +22,7 @@ entry:
define dso_local <8 x half> @t_vfmaq_lane_f16(<8 x half> %a, <8 x half> %b, <4 x half> %c, i32 %lane) {
; CHECK-LABEL: t_vfmaq_lane_f16:
-; CHECK-NEXT: .cfi_startproc
-; CHECK-NEXT: // %bb.0: // %entry
+; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
; CHECK-NEXT: fmla v0.8h, v1.8h, v2.h[0]
; CHECK-NEXT: ret
@@ -36,8 +34,7 @@ entry:
define dso_local <4 x half> @t_vfma_laneq_f16(<4 x half> %a, <4 x half> %b, <8 x half> %c, i32 %lane) {
; CHECK-LABEL: t_vfma_laneq_f16:
-; CHECK-NEXT: .cfi_startproc
-; CHECK-NEXT: // %bb.0: // %entry
+; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fmla v0.4h, v1.4h, v2.h[0]
; CHECK-NEXT: ret
entry:
@@ -48,8 +45,7 @@ entry:
define dso_local <8 x half> @t_vfmaq_laneq_f16(<8 x half> %a, <8 x half> %b, <8 x half> %c, i32 %lane) {
; CHECK-LABEL: t_vfmaq_laneq_f16:
-; CHECK-NEXT: .cfi_startproc
-; CHECK-NEXT: // %bb.0: // %entry
+; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fmla v0.8h, v1.8h, v2.h[0]
; CHECK-NEXT: ret
entry:
@@ -60,8 +56,7 @@ entry:
define dso_local <4 x half> @t_vfma_n_f16(<4 x half> %a, <4 x half> %b, half %c) {
; CHECK-LABEL: t_vfma_n_f16:
-; CHECK-NEXT: .cfi_startproc
-; CHECK-NEXT: // %bb.0: // %entry
+; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $h2 killed $h2 def $q2
; CHECK-NEXT: fmla v0.4h, v1.4h, v2.h[0]
; CHECK-NEXT: ret
@@ -74,8 +69,7 @@ entry:
define dso_local <8 x half> @t_vfmaq_n_f16(<8 x half> %a, <8 x half> %b, half %c) {
; CHECK-LABEL: t_vfmaq_n_f16:
-; CHECK-NEXT: .cfi_startproc
-; CHECK-NEXT: // %bb.0: // %entry
+; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $h2 killed $h2 def $q2
; CHECK-NEXT: fmla v0.8h, v1.8h, v2.h[0]
; CHECK-NEXT: ret
@@ -88,8 +82,7 @@ entry:
define dso_local half @t_vfmah_lane_f16(half %a, half %b, <4 x half> %c, i32 %lane) {
; CHECK-LABEL: t_vfmah_lane_f16:
-; CHECK-NEXT: .cfi_startproc
-; CHECK-NEXT: // %bb.0: // %entry
+; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
; CHECK-NEXT: fmla h0, h1, v2.h[0]
; CHECK-NEXT: ret
@@ -101,8 +94,7 @@ entry:
define dso_local half @t_vfmah_laneq_f16(half %a, half %b, <8 x half> %c, i32 %lane) {
; CHECK-LABEL: t_vfmah_laneq_f16:
-; CHECK-NEXT: .cfi_startproc
-; CHECK-NEXT: // %bb.0: // %entry
+; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fmla h0, h1, v2.h[0]
; CHECK-NEXT: ret
entry:
@@ -113,8 +105,7 @@ entry:
define dso_local <4 x half> @t_vfms_lane_f16(<4 x half> %a, <4 x half> %b, <4 x half> %c, i32 %lane) {
; CHECK-LABEL: t_vfms_lane_f16:
-; CHECK-NEXT: .cfi_startproc
-; CHECK-NEXT: // %bb.0: // %entry
+; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
; CHECK-NEXT: fmls v0.4h, v1.4h, v2.h[0]
; CHECK-NEXT: ret
@@ -127,8 +118,7 @@ entry:
define dso_local <8 x half> @t_vfmsq_lane_f16(<8 x half> %a, <8 x half> %b, <4 x half> %c, i32 %lane) {
; CHECK-LABEL: t_vfmsq_lane_f16:
-; CHECK-NEXT: .cfi_startproc
-; CHECK-NEXT: // %bb.0: // %entry
+; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
; CHECK-NEXT: fmls v0.8h, v1.8h, v2.h[0]
; CHECK-NEXT: ret
@@ -141,8 +131,7 @@ entry:
define dso_local <4 x half> @t_vfms_laneq_f16(<4 x half> %a, <4 x half> %b, <8 x half> %c, i32 %lane) {
; CHECK-LABEL: t_vfms_laneq_f16:
-; CHECK-NEXT: .cfi_startproc
-; CHECK-NEXT: // %bb.0: // %entry
+; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fmls v0.4h, v1.4h, v2.h[0]
; CHECK-NEXT: ret
entry:
@@ -154,8 +143,7 @@ entry:
define dso_local <8 x half> @t_vfmsq_laneq_f16(<8 x half> %a, <8 x half> %b, <8 x half> %c, i32 %lane) {
; CHECK-LABEL: t_vfmsq_laneq_f16:
-; CHECK-NEXT: .cfi_startproc
-; CHECK-NEXT: // %bb.0: // %entry
+; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fmls v0.8h, v1.8h, v2.h[0]
; CHECK-NEXT: ret
entry:
@@ -167,8 +155,7 @@ entry:
define dso_local <4 x half> @t_vfms_n_f16(<4 x half> %a, <4 x half> %b, half %c) {
; CHECK-LABEL: t_vfms_n_f16:
-; CHECK-NEXT: .cfi_startproc
-; CHECK-NEXT: // %bb.0: // %entry
+; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $h2 killed $h2 def $q2
; CHECK-NEXT: fmls v0.4h, v1.4h, v2.h[0]
; CHECK-NEXT: ret
@@ -182,8 +169,7 @@ entry:
define dso_local <8 x half> @t_vfmsq_n_f16(<8 x half> %a, <8 x half> %b, half %c) {
; CHECK-LABEL: t_vfmsq_n_f16:
-; CHECK-NEXT: .cfi_startproc
-; CHECK-NEXT: // %bb.0: // %entry
+; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $h2 killed $h2 def $q2
; CHECK-NEXT: fmls v0.8h, v1.8h, v2.h[0]
; CHECK-NEXT: ret
@@ -197,8 +183,7 @@ entry:
define dso_local half @t_vfmsh_lane_f16(half %a, half %b, <4 x half> %c, i32 %lane) {
; CHECK-LABEL: t_vfmsh_lane_f16:
-; CHECK-NEXT: .cfi_startproc
-; CHECK-NEXT: // %bb.0: // %entry
+; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
; CHECK-NEXT: fmls h0, h1, v2.h[0]
; CHECK-NEXT: ret
@@ -211,8 +196,7 @@ entry:
define dso_local half @t_vfmsh_laneq_f16(half %a, half %b, <8 x half> %c, i32 %lane) {
; CHECK-LABEL: t_vfmsh_laneq_f16:
-; CHECK-NEXT: .cfi_startproc
-; CHECK-NEXT: // %bb.0: // %entry
+; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fmls h0, h1, v2.h[0]
; CHECK-NEXT: ret
entry:
@@ -224,8 +208,7 @@ entry:
define dso_local <4 x half> @t_vmul_laneq_f16(<4 x half> %a, <8 x half> %b, i32 %lane) {
; CHECK-LABEL: t_vmul_laneq_f16:
-; CHECK-NEXT: .cfi_startproc
-; CHECK-NEXT: // %bb.0: // %entry
+; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fmul v0.4h, v0.4h, v1.h[0]
; CHECK-NEXT: ret
entry:
@@ -236,8 +219,7 @@ entry:
define dso_local <8 x half> @t_vmulq_laneq_f16(<8 x half> %a, <8 x half> %b, i32 %lane) {
; CHECK-LABEL: t_vmulq_laneq_f16:
-; CHECK-NEXT: .cfi_startproc
-; CHECK-NEXT: // %bb.0: // %entry
+; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fmul v0.8h, v0.8h, v1.h[0]
; CHECK-NEXT: ret
entry:
@@ -248,8 +230,7 @@ entry:
define dso_local half @t_vmulh_lane_f16(half %a, <4 x half> %c, i32 %lane) {
; CHECK-LABEL: t_vmulh_lane_f16:
-; CHECK-NEXT: .cfi_startproc
-; CHECK-NEXT: // %bb.0: // %entry
+; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-NEXT: fmul h0, h0, v1.h[0]
; CHECK-NEXT: ret
@@ -261,8 +242,7 @@ entry:
define dso_local half @t_vmulh_laneq_f16(half %a, <8 x half> %c, i32 %lane) {
; CHECK-LABEL: t_vmulh_laneq_f16:
-; CHECK-NEXT: .cfi_startproc
-; CHECK-NEXT: // %bb.0: // %entry
+; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fmul h0, h0, v1.h[0]
; CHECK-NEXT: ret
entry:
@@ -273,8 +253,7 @@ entry:
define dso_local half @t_vmulx_f16(half %a, half %b) {
; CHECK-LABEL: t_vmulx_f16:
-; CHECK-NEXT: .cfi_startproc
-; CHECK-NEXT: // %bb.0: // %entry
+; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fmulx h0, h0, h1
; CHECK-NEXT: ret
entry:
@@ -284,8 +263,7 @@ entry:
define dso_local half @t_vmulxh_lane_f16(half %a, <4 x half> %b, i32 %lane) {
; CHECK-LABEL: t_vmulxh_lane_f16:
-; CHECK-NEXT: .cfi_startproc
-; CHECK-NEXT: // %bb.0: // %entry
+; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-NEXT: fmulx h0, h0, v1.h[3]
; CHECK-NEXT: ret
@@ -297,8 +275,7 @@ entry:
define dso_local <4 x half> @t_vmulx_lane_f16(<4 x half> %a, <4 x half> %b, i32 %lane) {
; CHECK-LABEL: t_vmulx_lane_f16:
-; CHECK-NEXT: .cfi_startproc
-; CHECK-NEXT: // %bb.0: // %entry
+; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-NEXT: fmulx v0.4h, v0.4h, v1.h[0]
; CHECK-NEXT: ret
@@ -310,8 +287,7 @@ entry:
define dso_local <8 x half> @t_vmulxq_lane_f16(<8 x half> %a, <4 x half> %b, i32 %lane) {
; CHECK-LABEL: t_vmulxq_lane_f16:
-; CHECK-NEXT: .cfi_startproc
-; CHECK-NEXT: // %bb.0: // %entry
+; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-NEXT: fmulx v0.8h, v0.8h, v1.h[0]
; CHECK-NEXT: ret
@@ -323,8 +299,7 @@ entry:
define dso_local <4 x half> @t_vmulx_laneq_f16(<4 x half> %a, <8 x half> %b, i32 %lane) {
; CHECK-LABEL: t_vmulx_laneq_f16:
-; CHECK-NEXT: .cfi_startproc
-; CHECK-NEXT: // %bb.0: // %entry
+; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fmulx v0.4h, v0.4h, v1.h[0]
; CHECK-NEXT: ret
entry:
@@ -335,8 +310,7 @@ entry:
define dso_local <8 x half> @t_vmulxq_laneq_f16(<8 x half> %a, <8 x half> %b, i32 %lane) {
; CHECK-LABEL: t_vmulxq_laneq_f16:
-; CHECK-NEXT: .cfi_startproc
-; CHECK-NEXT: // %bb.0: // %entry
+; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fmulx v0.8h, v0.8h, v1.h[0]
; CHECK-NEXT: ret
entry:
@@ -347,8 +321,7 @@ entry:
define dso_local half @t_vmulxh_laneq_f16(half %a, <8 x half> %b, i32 %lane) {
; CHECK-LABEL: t_vmulxh_laneq_f16:
-; CHECK-NEXT: .cfi_startproc
-; CHECK-NEXT: // %bb.0: // %entry
+; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fmulx h0, h0, v1.h[7]
; CHECK-NEXT: ret
entry:
@@ -359,8 +332,7 @@ entry:
define dso_local <4 x half> @t_vmulx_n_f16(<4 x half> %a, half %c) {
; CHECK-LABEL: t_vmulx_n_f16:
-; CHECK-NEXT: .cfi_startproc
-; CHECK-NEXT: // %bb.0: // %entry
+; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $h1 killed $h1 def $q1
; CHECK-NEXT: dup v1.4h, v1.h[0]
; CHECK-NEXT: fmulx v0.4h, v0.4h, v1.4h
@@ -374,8 +346,7 @@ entry:
define dso_local <8 x half> @t_vmulxq_n_f16(<8 x half> %a, half %c) {
; CHECK-LABEL: t_vmulxq_n_f16:
-; CHECK-NEXT: .cfi_startproc
-; CHECK-NEXT: // %bb.0: // %entry
+; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $h1 killed $h1 def $q1
; CHECK-NEXT: dup v1.8h, v1.h[0]
; CHECK-NEXT: fmulx v0.8h, v0.8h, v1.8h
@@ -389,8 +360,7 @@ entry:
define dso_local half @t_vfmah_lane3_f16(half %a, half %b, <4 x half> %c) {
; CHECK-LABEL: t_vfmah_lane3_f16:
-; CHECK-NEXT: .cfi_startproc
-; CHECK-NEXT: // %bb.0: // %entry
+; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
; CHECK-NEXT: fmla h0, h1, v2.h[3]
; CHECK-NEXT: ret
@@ -402,8 +372,7 @@ entry:
define dso_local half @t_vfmah_laneq7_f16(half %a, half %b, <8 x half> %c) {
; CHECK-LABEL: t_vfmah_laneq7_f16:
-; CHECK-NEXT: .cfi_startproc
-; CHECK-NEXT: // %bb.0: // %entry
+; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fmla h0, h1, v2.h[7]
; CHECK-NEXT: ret
entry:
@@ -414,8 +383,7 @@ entry:
define dso_local half @t_vfmsh_lane3_f16(half %a, half %b, <4 x half> %c) {
; CHECK-LABEL: t_vfmsh_lane3_f16:
-; CHECK-NEXT: .cfi_startproc
-; CHECK-NEXT: // %bb.0: // %entry
+; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
; CHECK-NEXT: fmls h0, h1, v2.h[3]
; CHECK-NEXT: ret
@@ -428,8 +396,7 @@ entry:
define dso_local half @t_vfmsh_laneq7_f16(half %a, half %b, <8 x half> %c) {
; CHECK-LABEL: t_vfmsh_laneq7_f16:
-; CHECK-NEXT: .cfi_startproc
-; CHECK-NEXT: // %bb.0: // %entry
+; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fmls h0, h1, v2.h[7]
; CHECK-NEXT: ret
entry:
@@ -441,8 +408,7 @@ entry:
define dso_local half @t_fadd_vfmah_f16(half %a, half %b, <4 x half> %c, <4 x half> %d) {
; CHECK-LABEL: t_fadd_vfmah_f16:
-; CHECK-NEXT: .cfi_startproc
-; CHECK-NEXT: // %bb.0: // %entry
+; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fadd v2.4h, v2.4h, v3.4h
; CHECK-NEXT: fmla h0, h1, v2.h[3]
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/load-combine-big-endian.ll b/llvm/test/CodeGen/AArch64/load-combine-big-endian.ll
index 19de95198c19..14a0162d5269 100644
--- a/llvm/test/CodeGen/AArch64/load-combine-big-endian.ll
+++ b/llvm/test/CodeGen/AArch64/load-combine-big-endian.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=arm64eb-unknown | FileCheck %s
+; RUN: llc < %s -mtriple=aarch64_be-unknown | FileCheck %s
; i8* p; // p is 4 byte aligned
; ((i32) p[0] << 24) | ((i32) p[1] << 16) | ((i32) p[2] << 8) | (i32) p[3]
@@ -207,7 +207,6 @@ define i32 @load_i32_by_i8_nonzero_offset(i32* %arg) {
; CHECK-NEXT: ldur w8, [x0, #1]
; CHECK-NEXT: rev w0, w8
; CHECK-NEXT: ret
-
%tmp = bitcast i32* %arg to i8*
%tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1
%tmp2 = load i8, i8* %tmp1, align 4
@@ -238,7 +237,6 @@ define i32 @load_i32_by_i8_neg_offset(i32* %arg) {
; CHECK-NEXT: ldur w8, [x0, #-4]
; CHECK-NEXT: rev w0, w8
; CHECK-NEXT: ret
-
%tmp = bitcast i32* %arg to i8*
%tmp1 = getelementptr inbounds i8, i8* %tmp, i32 -4
%tmp2 = load i8, i8* %tmp1, align 4
@@ -268,7 +266,6 @@ define i32 @load_i32_by_i8_nonzero_offset_bswap(i32* %arg) {
; CHECK: // %bb.0:
; CHECK-NEXT: ldur w0, [x0, #1]
; CHECK-NEXT: ret
-
%tmp = bitcast i32* %arg to i8*
%tmp1 = getelementptr inbounds i8, i8* %tmp, i32 4
%tmp2 = load i8, i8* %tmp1, align 1
@@ -298,7 +295,6 @@ define i32 @load_i32_by_i8_neg_offset_bswap(i32* %arg) {
; CHECK: // %bb.0:
; CHECK-NEXT: ldur w0, [x0, #-4]
; CHECK-NEXT: ret
-
%tmp = bitcast i32* %arg to i8*
%tmp1 = getelementptr inbounds i8, i8* %tmp, i32 -1
%tmp2 = load i8, i8* %tmp1, align 1
@@ -449,7 +445,6 @@ define i32 @zext_load_i32_by_i8(i32* %arg) {
; CHECK-NEXT: lsl w8, w8, #16
; CHECK-NEXT: rev w0, w8
; CHECK-NEXT: ret
-
%tmp = bitcast i32* %arg to i8*
%tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0
%tmp2 = load i8, i8* %tmp1, align 2
@@ -472,7 +467,6 @@ define i32 @zext_load_i32_by_i8_shl_8(i32* %arg) {
; CHECK-NEXT: lsl w0, w8, #8
; CHECK-NEXT: bfi w0, w9, #16, #8
; CHECK-NEXT: ret
-
%tmp = bitcast i32* %arg to i8*
%tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0
%tmp2 = load i8, i8* %tmp1, align 2
@@ -496,7 +490,6 @@ define i32 @zext_load_i32_by_i8_shl_16(i32* %arg) {
; CHECK-NEXT: lsl w0, w8, #16
; CHECK-NEXT: bfi w0, w9, #24, #8
; CHECK-NEXT: ret
-
%tmp = bitcast i32* %arg to i8*
%tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0
%tmp2 = load i8, i8* %tmp1, align 2
@@ -516,7 +509,6 @@ define i32 @zext_load_i32_by_i8_bswap(i32* %arg) {
; CHECK: // %bb.0:
; CHECK-NEXT: ldrh w0, [x0]
; CHECK-NEXT: ret
-
%tmp = bitcast i32* %arg to i8*
%tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1
%tmp2 = load i8, i8* %tmp1, align 1
@@ -539,7 +531,6 @@ define i32 @zext_load_i32_by_i8_bswap_shl_8(i32* %arg) {
; CHECK-NEXT: lsl w0, w8, #8
; CHECK-NEXT: bfi w0, w9, #16, #8
; CHECK-NEXT: ret
-
%tmp = bitcast i32* %arg to i8*
%tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1
%tmp2 = load i8, i8* %tmp1, align 1
@@ -563,7 +554,6 @@ define i32 @zext_load_i32_by_i8_bswap_shl_16(i32* %arg) {
; CHECK-NEXT: lsl w0, w8, #16
; CHECK-NEXT: bfi w0, w9, #24, #8
; CHECK-NEXT: ret
-
%tmp = bitcast i32* %arg to i8*
%tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1
%tmp2 = load i8, i8* %tmp1, align 1
@@ -590,7 +580,6 @@ define i16 @load_i16_from_nonzero_offset(i8* %p) {
; CHECK-NEXT: ldrb w0, [x0, #2]
; CHECK-NEXT: bfi w0, w8, #8, #24
; CHECK-NEXT: ret
-
%p1.i16 = bitcast i8* %p to i16*
%p2.i8 = getelementptr i8, i8* %p, i64 2
%v1 = load i16, i16* %p1.i16
diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-thunk.ll b/llvm/test/CodeGen/AArch64/machine-outliner-thunk.ll
index eae121ff17f7..425d616d8866 100644
--- a/llvm/test/CodeGen/AArch64/machine-outliner-thunk.ll
+++ b/llvm/test/CodeGen/AArch64/machine-outliner-thunk.ll
@@ -12,7 +12,7 @@ define i32 @a() {
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .cfi_offset w30, -16
-; CHECK-NEXT: bl [[OUTLINED_DIRECT:OUTLINED_FUNCTION_[0-9]+]]
+; CHECK-NEXT: bl OUTLINED_FUNCTION_1
; CHECK-NEXT: add w0, w0, #8
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
@@ -28,7 +28,7 @@ define i32 @b() {
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .cfi_offset w30, -16
-; CHECK-NEXT: bl [[OUTLINED_DIRECT]]
+; CHECK-NEXT: bl OUTLINED_FUNCTION_1
; CHECK-NEXT: add w0, w0, #88
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
@@ -44,7 +44,7 @@ define hidden i32 @c(i32 (i32, i32, i32, i32)* %fptr) {
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .cfi_offset w30, -16
-; CHECK-NEXT: bl [[OUTLINED_INDIRECT:OUTLINED_FUNCTION_[0-9]+]]
+; CHECK-NEXT: bl OUTLINED_FUNCTION_0
; CHECK-NEXT: add w0, w0, #8
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
@@ -60,7 +60,7 @@ define hidden i32 @d(i32 (i32, i32, i32, i32)* %fptr) {
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .cfi_offset w30, -16
-; CHECK-NEXT: bl [[OUTLINED_INDIRECT]]
+; CHECK-NEXT: bl OUTLINED_FUNCTION_0
; CHECK-NEXT: add w0, w0, #88
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
@@ -70,8 +70,8 @@ entry:
ret i32 %add
}
-; CHECK: [[OUTLINED_INDIRECT]]:
-; CHECK-SAME: // @[[OUTLINED_INDIRECT]] Thunk
+; CHECK: OUTLINED_FUNCTION_0:
+; CHECK-SAME: // @OUTLINED_FUNCTION_0 Thunk
; CHECK: // %bb.0:
; CHECK-NEXT: mov x8, x0
; CHECK-NEXT: mov w0, #1
@@ -80,8 +80,8 @@ entry:
; CHECK-NEXT: mov w3, #4
; CHECK-NEXT: br x8
-; CHECK: [[OUTLINED_DIRECT]]:
-; CHECK-SAME: // @[[OUTLINED_DIRECT]] Thunk
+; CHECK: OUTLINED_FUNCTION_1:
+; CHECK-SAME: // @OUTLINED_FUNCTION_1 Thunk
; CHECK: // %bb.0:
; CHECK-NEXT: mov w0, #1
; CHECK-NEXT: mov w1, #2
diff --git a/llvm/test/CodeGen/AArch64/named-vector-shuffle-reverse-sve.ll b/llvm/test/CodeGen/AArch64/named-vector-shuffle-reverse-sve.ll
index 2bcb4b849923..372ae7ad74bc 100644
--- a/llvm/test/CodeGen/AArch64/named-vector-shuffle-reverse-sve.ll
+++ b/llvm/test/CodeGen/AArch64/named-vector-shuffle-reverse-sve.ll
@@ -50,22 +50,24 @@ define <vscale x 16 x i1> @reverse_nxv16i1(<vscale x 16 x i1> %a) #0 {
; Verify splitvec type legalisation works as expected.
define <vscale x 32 x i1> @reverse_nxv32i1(<vscale x 32 x i1> %a) #0 {
-; CHECK-LABEL: reverse_nxv32i1:
+; CHECK-SELDAG-LABEL: reverse_nxv32i1:
; CHECK-SELDAG: // %bb.0:
; CHECK-SELDAG-NEXT: rev p2.b, p1.b
; CHECK-SELDAG-NEXT: rev p1.b, p0.b
; CHECK-SELDAG-NEXT: mov p0.b, p2.b
; CHECK-SELDAG-NEXT: ret
+;
+; CHECK-FASTISEL-LABEL: reverse_nxv32i1:
; CHECK-FASTISEL: // %bb.0:
-; CHECK-FASTISEL-NEXT: str x29, [sp, #-16]
-; CHECK-FASTISEL-NEXT: addvl sp, sp, #-1
-; CHECK-FASTISEL-NEXT: str p1, [sp, #7, mul vl]
-; CHECK-FASTISEL-NEXT: mov p1.b, p0.b
-; CHECK-FASTISEL-NEXT: ldr p0, [sp, #7, mul vl]
-; CHECK-FASTISEL-NEXT: rev p0.b, p0.b
-; CHECK-FASTISEL-NEXT: rev p1.b, p1.b
-; CHECK-FASTISEL-NEXT: addvl sp, sp, #1
-; CHECK-FASTISEL-NEXT: ldr x29, [sp], #16
+; CHECK-FASTISEL-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-FASTISEL-NEXT: addvl sp, sp, #-1
+; CHECK-FASTISEL-NEXT: str p1, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-FASTISEL-NEXT: mov p1.b, p0.b
+; CHECK-FASTISEL-NEXT: ldr p0, [sp, #7, mul vl] // 2-byte Folded Reload
+; CHECK-FASTISEL-NEXT: rev p0.b, p0.b
+; CHECK-FASTISEL-NEXT: rev p1.b, p1.b
+; CHECK-FASTISEL-NEXT: addvl sp, sp, #1
+; CHECK-FASTISEL-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-FASTISEL-NEXT: ret
%res = call <vscale x 32 x i1> @llvm.experimental.vector.reverse.nxv32i1(<vscale x 32 x i1> %a)
@@ -158,22 +160,24 @@ define <vscale x 2 x i8> @reverse_nxv2i8(<vscale x 2 x i8> %a) #0 {
; Verify splitvec type legalisation works as expected.
define <vscale x 8 x i32> @reverse_nxv8i32(<vscale x 8 x i32> %a) #0 {
-; CHECK-LABEL: reverse_nxv8i32:
+; CHECK-SELDAG-LABEL: reverse_nxv8i32:
; CHECK-SELDAG: // %bb.0:
; CHECK-SELDAG-NEXT: rev z2.s, z1.s
; CHECK-SELDAG-NEXT: rev z1.s, z0.s
; CHECK-SELDAG-NEXT: mov z0.d, z2.d
; CHECK-SELDAG-NEXT: ret
+;
+; CHECK-FASTISEL-LABEL: reverse_nxv8i32:
; CHECK-FASTISEL: // %bb.0:
-; CHECK-FASTISEL-NEXT: str x29, [sp, #-16]
-; CHECK-FASTISEL-NEXT: addvl sp, sp, #-1
-; CHECK-FASTISEL-NEXT: str z1, [sp]
-; CHECK-FASTISEL-NEXT: mov z1.d, z0.d
-; CHECK-FASTISEL-NEXT: ldr z0, [sp]
-; CHECK-FASTISEL-NEXT: rev z0.s, z0.s
-; CHECK-FASTISEL-NEXT: rev z1.s, z1.s
-; CHECK-FASTISEL-NEXT: addvl sp, sp, #1
-; CHECK-FASTISEL-NEXT: ldr x29, [sp], #16
+; CHECK-FASTISEL-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-FASTISEL-NEXT: addvl sp, sp, #-1
+; CHECK-FASTISEL-NEXT: str z1, [sp] // 16-byte Folded Spill
+; CHECK-FASTISEL-NEXT: mov z1.d, z0.d
+; CHECK-FASTISEL-NEXT: ldr z0, [sp] // 16-byte Folded Reload
+; CHECK-FASTISEL-NEXT: rev z0.s, z0.s
+; CHECK-FASTISEL-NEXT: rev z1.s, z1.s
+; CHECK-FASTISEL-NEXT: addvl sp, sp, #1
+; CHECK-FASTISEL-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-FASTISEL-NEXT: ret
%res = call <vscale x 8 x i32> @llvm.experimental.vector.reverse.nxv8i32(<vscale x 8 x i32> %a)
@@ -182,7 +186,7 @@ define <vscale x 8 x i32> @reverse_nxv8i32(<vscale x 8 x i32> %a) #0 {
; Verify splitvec type legalisation works as expected.
define <vscale x 16 x float> @reverse_nxv16f32(<vscale x 16 x float> %a) #0 {
-; CHECK-LABEL: reverse_nxv16f32:
+; CHECK-SELDAG-LABEL: reverse_nxv16f32:
; CHECK-SELDAG: // %bb.0:
; CHECK-SELDAG-NEXT: rev z5.s, z3.s
; CHECK-SELDAG-NEXT: rev z4.s, z2.s
@@ -191,21 +195,23 @@ define <vscale x 16 x float> @reverse_nxv16f32(<vscale x 16 x float> %a) #0 {
; CHECK-SELDAG-NEXT: mov z0.d, z5.d
; CHECK-SELDAG-NEXT: mov z1.d, z4.d
; CHECK-SELDAG-NEXT: ret
+;
+; CHECK-FASTISEL-LABEL: reverse_nxv16f32:
; CHECK-FASTISEL: // %bb.0:
-; CHECK-FASTISEL-NEXT: str x29, [sp, #-16]
-; CHECK-FASTISEL-NEXT: addvl sp, sp, #-2
-; CHECK-FASTISEL-NEXT: str z3, [sp, #1, mul vl]
-; CHECK-FASTISEL-NEXT: str z2, [sp]
-; CHECK-FASTISEL-NEXT: mov z2.d, z1.d
-; CHECK-FASTISEL-NEXT: ldr z1, [sp]
-; CHECK-FASTISEL-NEXT: mov z3.d, z0.d
-; CHECK-FASTISEL-NEXT: ldr z0, [sp, #1, mul vl]
-; CHECK-FASTISEL-NEXT: rev z0.s, z0.s
-; CHECK-FASTISEL-NEXT: rev z1.s, z1.s
-; CHECK-FASTISEL-NEXT: rev z2.s, z2.s
-; CHECK-FASTISEL-NEXT: rev z3.s, z3.s
-; CHECK-FASTISEL-NEXT: addvl sp, sp, #2
-; CHECK-FASTISEL-NEXT: ldr x29, [sp], #16
+; CHECK-FASTISEL-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-FASTISEL-NEXT: addvl sp, sp, #-2
+; CHECK-FASTISEL-NEXT: str z3, [sp, #1, mul vl] // 16-byte Folded Spill
+; CHECK-FASTISEL-NEXT: str z2, [sp] // 16-byte Folded Spill
+; CHECK-FASTISEL-NEXT: mov z2.d, z1.d
+; CHECK-FASTISEL-NEXT: ldr z1, [sp] // 16-byte Folded Reload
+; CHECK-FASTISEL-NEXT: mov z3.d, z0.d
+; CHECK-FASTISEL-NEXT: ldr z0, [sp, #1, mul vl] // 16-byte Folded Reload
+; CHECK-FASTISEL-NEXT: rev z0.s, z0.s
+; CHECK-FASTISEL-NEXT: rev z1.s, z1.s
+; CHECK-FASTISEL-NEXT: rev z2.s, z2.s
+; CHECK-FASTISEL-NEXT: rev z3.s, z3.s
+; CHECK-FASTISEL-NEXT: addvl sp, sp, #2
+; CHECK-FASTISEL-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-FASTISEL-NEXT: ret
%res = call <vscale x 16 x float> @llvm.experimental.vector.reverse.nxv16f32(<vscale x 16 x float> %a)
diff --git a/llvm/test/CodeGen/AArch64/pow.75.ll b/llvm/test/CodeGen/AArch64/pow.75.ll
index 4ceb7d9e23e1..4d760e13468b 100644
--- a/llvm/test/CodeGen/AArch64/pow.75.ll
+++ b/llvm/test/CodeGen/AArch64/pow.75.ll
@@ -1,4 +1,3 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=aarch64-- -debug 2>&1 | FileCheck %s
; REQUIRES: asserts
diff --git a/llvm/test/CodeGen/AArch64/shift-mod.ll b/llvm/test/CodeGen/AArch64/shift-mod.ll
index eee65fa23d4e..39aa7fa89cf8 100644
--- a/llvm/test/CodeGen/AArch64/shift-mod.ll
+++ b/llvm/test/CodeGen/AArch64/shift-mod.ll
@@ -18,8 +18,8 @@ define i32 @test1(i32 %x, i64 %y) {
define i64 @test2(i32 %x, i64 %y) {
; CHECK-LABEL: test2:
; CHECK: // %bb.0:
-; CHECK-NEXT: neg w[[REG:[0-9]+]], w0
-; CHECK-NEXT: asr x0, x1, x[[REG]]
+; CHECK-NEXT: neg w8, w0
+; CHECK-NEXT: asr x0, x1, x8
; CHECK-NEXT: ret
%sub9 = sub nsw i32 64, %x
%sh_prom12.i = zext i32 %sub9 to i64
diff --git a/llvm/test/CodeGen/AArch64/stack-guard-remat-bitcast.ll b/llvm/test/CodeGen/AArch64/stack-guard-remat-bitcast.ll
index 527f6fa30960..f0636b6e53f6 100644
--- a/llvm/test/CodeGen/AArch64/stack-guard-remat-bitcast.ll
+++ b/llvm/test/CodeGen/AArch64/stack-guard-remat-bitcast.ll
@@ -5,6 +5,7 @@
; PR20558
+; Load the stack guard for the second time, just in case the previous value gets spilled.
define i32 @test_stack_guard_remat2() ssp {
; CHECK-LABEL: test_stack_guard_remat2:
; CHECK: ; %bb.0: ; %entry
@@ -17,7 +18,6 @@ define i32 @test_stack_guard_remat2() ssp {
; CHECK-NEXT: Lloh0:
; CHECK-NEXT: adrp x8, ___stack_chk_guard at GOTPAGE
; CHECK-NEXT: Lloh1:
-; Load the stack guard for the second time, just in case the previous value gets spilled.
; CHECK-NEXT: adrp x9, ___stack_chk_guard at GOTPAGE
; CHECK-NEXT: Lloh2:
; CHECK-NEXT: ldr x8, [x8, ___stack_chk_guard at GOTPAGEOFF]
diff --git a/llvm/test/CodeGen/AArch64/strqu.ll b/llvm/test/CodeGen/AArch64/strqu.ll
index f34624973272..ea4d5906bd8a 100644
--- a/llvm/test/CodeGen/AArch64/strqu.ll
+++ b/llvm/test/CodeGen/AArch64/strqu.ll
@@ -1,39 +1,23 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-linux-gnu | FileCheck --check-prefixes=CHECK,NOSPLIT %s
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64_be-linux-gnu | FileCheck --check-prefixes=CHECK,NOSPLIT %s
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-linux-gnu -mcpu=exynos-m3 | FileCheck --check-prefixes=CHECK,NOSPLIT %s
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64_be-linux-gnu -mcpu=exynos-m3 | FileCheck --check-prefixes=CHECK,NOSPLIT %s
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-linux-gnu | FileCheck %s
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64_be-linux-gnu | FileCheck %s
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-linux-gnu -mcpu=exynos-m3 | FileCheck %s
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64_be-linux-gnu -mcpu=exynos-m3 | FileCheck %s
define void @test_split_f(<4 x float> %val, <4 x float>* %addr) {
-; NOSPLIT-LABEL: test_split_f:
-; NOSPLIT: // %bb.0:
-; NOSPLIT-NEXT: str q0, [x0]
-; NOSPLIT-NEXT: ret
-;
-; SPLIT-LABEL: test_split_f:
-; SPLIT: // %bb.0:
-; SPLIT-NEXT: rev64 v0.4s, v0.4s
-; SPLIT-NEXT: ext v0.16b, v0.16b, v0.16b, #8
-; SPLIT-NEXT: st1 { v0.2s }, [x0]
-; SPLIT-NEXT: ext v0.16b, v0.16b, v0.16b, #8
-; SPLIT-NEXT: add x8, x0, #8 // =8
-; SPLIT-NEXT: st1 { v0.2s }, [x8]
-; SPLIT-NEXT: ret
+; CHECK-LABEL: test_split_f:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str q0, [x0]
+; CHECK-NEXT: ret
store <4 x float> %val, <4 x float>* %addr, align 8
ret void
}
define void @test_split_d(<2 x double> %val, <2 x double>* %addr) {
-; NOSPLIT-LABEL: test_split_d:
-; NOSPLIT: // %bb.0:
-; NOSPLIT-NEXT: str q0, [x0]
-; NOSPLIT-NEXT: ret
-;
-; SPLIT-LABEL: test_split_d:
-; SPLIT: // %bb.0:
-; SPLIT-NEXT: ext v0.16b, v0.16b, v0.16b, #8
-; SPLIT-NEXT: st1 { v0.2d }, [x0]
-; SPLIT-NEXT: ret
+; CHECK-LABEL: test_split_d:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str q0, [x0]
+; CHECK-NEXT: ret
store <2 x double> %val, <2 x double>* %addr, align 8
ret void
}
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll
index 3ae7de7637da..1a071d78ff04 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll
@@ -6,7 +6,7 @@
define <vscale x 16 x i8> @add_i8(<vscale x 16 x i8> %a) {
; CHECK-LABEL: add_i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: add z0.b, z0.b, #127
+; CHECK-NEXT: add z0.b, z0.b, #127 // =0x7f
; CHECK-NEXT: ret
%pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
%elt = insertelement <vscale x 16 x i8> undef, i8 127, i32 0
@@ -20,7 +20,7 @@ define <vscale x 16 x i8> @add_i8(<vscale x 16 x i8> %a) {
define <vscale x 8 x i16> @add_i16(<vscale x 8 x i16> %a) {
; CHECK-LABEL: add_i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: add z0.h, z0.h, #127
+; CHECK-NEXT: add z0.h, z0.h, #127 // =0x7f
; CHECK-NEXT: ret
%pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
%elt = insertelement <vscale x 8 x i16> undef, i16 127, i32 0
@@ -50,7 +50,7 @@ define <vscale x 8 x i16> @add_i16_out_of_range(<vscale x 8 x i16> %a) {
define <vscale x 4 x i32> @add_i32(<vscale x 4 x i32> %a) {
; CHECK-LABEL: add_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: add z0.s, z0.s, #127
+; CHECK-NEXT: add z0.s, z0.s, #127 // =0x7f
; CHECK-NEXT: ret
%pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
%elt = insertelement <vscale x 4 x i32> undef, i32 127, i32 0
@@ -80,7 +80,7 @@ define <vscale x 4 x i32> @add_i32_out_of_range(<vscale x 4 x i32> %a) {
define <vscale x 2 x i64> @add_i64(<vscale x 2 x i64> %a) {
; CHECK-LABEL: add_i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: add z0.d, z0.d, #127
+; CHECK-NEXT: add z0.d, z0.d, #127 // =0x7f
; CHECK-NEXT: ret
%pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
%elt = insertelement <vscale x 2 x i64> undef, i64 127, i64 0
@@ -112,7 +112,7 @@ define <vscale x 2 x i64> @add_i64_out_of_range(<vscale x 2 x i64> %a) {
define <vscale x 16 x i8> @sub_i8(<vscale x 16 x i8> %a) {
; CHECK-LABEL: sub_i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub z0.b, z0.b, #127
+; CHECK-NEXT: sub z0.b, z0.b, #127 // =0x7f
; CHECK-NEXT: ret
%pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
%elt = insertelement <vscale x 16 x i8> undef, i8 127, i32 0
@@ -126,7 +126,7 @@ define <vscale x 16 x i8> @sub_i8(<vscale x 16 x i8> %a) {
define <vscale x 8 x i16> @sub_i16(<vscale x 8 x i16> %a) {
; CHECK-LABEL: sub_i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub z0.h, z0.h, #127
+; CHECK-NEXT: sub z0.h, z0.h, #127 // =0x7f
; CHECK-NEXT: ret
%pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
%elt = insertelement <vscale x 8 x i16> undef, i16 127, i32 0
@@ -156,7 +156,7 @@ define <vscale x 8 x i16> @sub_i16_out_of_range(<vscale x 8 x i16> %a) {
define <vscale x 4 x i32> @sub_i32(<vscale x 4 x i32> %a) {
; CHECK-LABEL: sub_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub z0.s, z0.s, #127
+; CHECK-NEXT: sub z0.s, z0.s, #127 // =0x7f
; CHECK-NEXT: ret
%pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
%elt = insertelement <vscale x 4 x i32> undef, i32 127, i32 0
@@ -186,7 +186,7 @@ define <vscale x 4 x i32> @sub_i32_out_of_range(<vscale x 4 x i32> %a) {
define <vscale x 2 x i64> @sub_i64(<vscale x 2 x i64> %a) {
; CHECK-LABEL: sub_i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub z0.d, z0.d, #127
+; CHECK-NEXT: sub z0.d, z0.d, #127 // =0x7f
; CHECK-NEXT: ret
%pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
%elt = insertelement <vscale x 2 x i64> undef, i64 127, i64 0
@@ -216,8 +216,9 @@ define <vscale x 2 x i64> @sub_i64_out_of_range(<vscale x 2 x i64> %a) {
; As sub_i32 but where pg is i8 based and thus compatible for i32.
define <vscale x 4 x i32> @sub_i32_ptrue_all_b(<vscale x 4 x i32> %a) #0 {
; CHECK-LABEL: sub_i32_ptrue_all_b:
-; CHECK: sub z0.s, z0.s, #1
-; CHECK-NEXT: ret
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub z0.s, z0.s, #1 // =0x1
+; CHECK-NEXT: ret
%pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
%pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
%b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
@@ -230,8 +231,9 @@ define <vscale x 4 x i32> @sub_i32_ptrue_all_b(<vscale x 4 x i32> %a) #0 {
; As sub_i32 but where pg is i16 based and thus compatible for i32.
define <vscale x 4 x i32> @sub_i32_ptrue_all_h(<vscale x 4 x i32> %a) #0 {
; CHECK-LABEL: sub_i32_ptrue_all_h:
-; CHECK: sub z0.s, z0.s, #1
-; CHECK-NEXT: ret
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub z0.s, z0.s, #1 // =0x1
+; CHECK-NEXT: ret
%pg.h = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
%pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %pg.h)
%pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
@@ -246,10 +248,11 @@ define <vscale x 4 x i32> @sub_i32_ptrue_all_h(<vscale x 4 x i32> %a) #0 {
; thus inactive lanes are important and the immediate form cannot be used.
define <vscale x 4 x i32> @sub_i32_ptrue_all_d(<vscale x 4 x i32> %a) #0 {
; CHECK-LABEL: sub_i32_ptrue_all_d:
-; CHECK-DAG: ptrue [[PG:p[0-9]+]].d
-; CHECK-DAG: mov [[DUP:z[0-9]+]].s, #1
-; CHECK-DAG: sub z0.s, [[PG]]/m, z0.s, [[DUP]].s
-; CHECK-NEXT: ret
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: mov z1.s, #1 // =0x1
+; CHECK-NEXT: sub z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
%pg.d = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
%pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg.d)
%pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
@@ -372,8 +375,9 @@ define <vscale x 2 x i64> @smax_i64_out_of_range(<vscale x 2 x i64> %a) {
; As smax_i32 but where pg is i8 based and thus compatible for i32.
define <vscale x 4 x i32> @smax_i32_ptrue_all_b(<vscale x 4 x i32> %a) #0 {
; CHECK-LABEL: smax_i32_ptrue_all_b:
-; CHECK: smax z0.s, z0.s, #1
-; CHECK-NEXT: ret
+; CHECK: // %bb.0:
+; CHECK-NEXT: smax z0.s, z0.s, #1
+; CHECK-NEXT: ret
%pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
%pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
%b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
@@ -386,8 +390,9 @@ define <vscale x 4 x i32> @smax_i32_ptrue_all_b(<vscale x 4 x i32> %a) #0 {
; As smax_i32 but where pg is i16 based and thus compatible for i32.
define <vscale x 4 x i32> @smax_i32_ptrue_all_h(<vscale x 4 x i32> %a) #0 {
; CHECK-LABEL: smax_i32_ptrue_all_h:
-; CHECK: smax z0.s, z0.s, #1
-; CHECK-NEXT: ret
+; CHECK: // %bb.0:
+; CHECK-NEXT: smax z0.s, z0.s, #1
+; CHECK-NEXT: ret
%pg.h = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
%pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %pg.h)
%pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
@@ -402,10 +407,11 @@ define <vscale x 4 x i32> @smax_i32_ptrue_all_h(<vscale x 4 x i32> %a) #0 {
; thus inactive lanes are important and the immediate form cannot be used.
define <vscale x 4 x i32> @smax_i32_ptrue_all_d(<vscale x 4 x i32> %a) #0 {
; CHECK-LABEL: smax_i32_ptrue_all_d:
-; CHECK-DAG: ptrue [[PG:p[0-9]+]].d
-; CHECK-DAG: mov [[DUP:z[0-9]+]].s, #1
-; CHECK-DAG: smax z0.s, [[PG]]/m, z0.s, [[DUP]].s
-; CHECK-NEXT: ret
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: mov z1.s, #1 // =0x1
+; CHECK-NEXT: smax z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
%pg.d = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
%pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg.d)
%pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
@@ -528,8 +534,9 @@ define <vscale x 2 x i64> @smin_i64_out_of_range(<vscale x 2 x i64> %a) {
; As smin_i32 but where pg is i8 based and thus compatible for i32.
define <vscale x 4 x i32> @smin_i32_ptrue_all_b(<vscale x 4 x i32> %a) #0 {
; CHECK-LABEL: smin_i32_ptrue_all_b:
-; CHECK: smin z0.s, z0.s, #1
-; CHECK-NEXT: ret
+; CHECK: // %bb.0:
+; CHECK-NEXT: smin z0.s, z0.s, #1
+; CHECK-NEXT: ret
%pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
%pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
%b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
@@ -542,8 +549,9 @@ define <vscale x 4 x i32> @smin_i32_ptrue_all_b(<vscale x 4 x i32> %a) #0 {
; As smin_i32 but where pg is i16 based and thus compatible for i32.
define <vscale x 4 x i32> @smin_i32_ptrue_all_h(<vscale x 4 x i32> %a) #0 {
; CHECK-LABEL: smin_i32_ptrue_all_h:
-; CHECK: smin z0.s, z0.s, #1
-; CHECK-NEXT: ret
+; CHECK: // %bb.0:
+; CHECK-NEXT: smin z0.s, z0.s, #1
+; CHECK-NEXT: ret
%pg.h = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
%pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %pg.h)
%pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
@@ -558,10 +566,11 @@ define <vscale x 4 x i32> @smin_i32_ptrue_all_h(<vscale x 4 x i32> %a) #0 {
; thus inactive lanes are important and the immediate form cannot be used.
define <vscale x 4 x i32> @smin_i32_ptrue_all_d(<vscale x 4 x i32> %a) #0 {
; CHECK-LABEL: smin_i32_ptrue_all_d:
-; CHECK-DAG: ptrue [[PG:p[0-9]+]].d
-; CHECK-DAG: mov [[DUP:z[0-9]+]].s, #1
-; CHECK-DAG: smin z0.s, [[PG]]/m, z0.s, [[DUP]].s
-; CHECK-NEXT: ret
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: mov z1.s, #1 // =0x1
+; CHECK-NEXT: smin z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
%pg.d = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
%pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg.d)
%pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
@@ -684,8 +693,9 @@ define <vscale x 2 x i64> @umax_i64_out_of_range(<vscale x 2 x i64> %a) {
; As umax_i32 but where pg is i8 based and thus compatible for i32.
define <vscale x 4 x i32> @umax_i32_ptrue_all_b(<vscale x 4 x i32> %a) #0 {
; CHECK-LABEL: umax_i32_ptrue_all_b:
-; CHECK: umax z0.s, z0.s, #1
-; CHECK-NEXT: ret
+; CHECK: // %bb.0:
+; CHECK-NEXT: umax z0.s, z0.s, #1
+; CHECK-NEXT: ret
%pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
%pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
%b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
@@ -698,8 +708,9 @@ define <vscale x 4 x i32> @umax_i32_ptrue_all_b(<vscale x 4 x i32> %a) #0 {
; As umax_i32 but where pg is i16 based and thus compatible for i32.
define <vscale x 4 x i32> @umax_i32_ptrue_all_h(<vscale x 4 x i32> %a) #0 {
; CHECK-LABEL: umax_i32_ptrue_all_h:
-; CHECK: umax z0.s, z0.s, #1
-; CHECK-NEXT: ret
+; CHECK: // %bb.0:
+; CHECK-NEXT: umax z0.s, z0.s, #1
+; CHECK-NEXT: ret
%pg.h = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
%pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %pg.h)
%pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
@@ -714,10 +725,11 @@ define <vscale x 4 x i32> @umax_i32_ptrue_all_h(<vscale x 4 x i32> %a) #0 {
; thus inactive lanes are important and the immediate form cannot be used.
define <vscale x 4 x i32> @umax_i32_ptrue_all_d(<vscale x 4 x i32> %a) #0 {
; CHECK-LABEL: umax_i32_ptrue_all_d:
-; CHECK-DAG: ptrue [[PG:p[0-9]+]].d
-; CHECK-DAG: mov [[DUP:z[0-9]+]].s, #1
-; CHECK-DAG: umax z0.s, [[PG]]/m, z0.s, [[DUP]].s
-; CHECK-NEXT: ret
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: mov z1.s, #1 // =0x1
+; CHECK-NEXT: umax z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
%pg.d = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
%pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg.d)
%pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
@@ -840,8 +852,9 @@ define <vscale x 2 x i64> @umin_i64_out_of_range(<vscale x 2 x i64> %a) {
; As umin_i32 but where pg is i8 based and thus compatible for i32.
define <vscale x 4 x i32> @umin_i32_ptrue_all_b(<vscale x 4 x i32> %a) #0 {
; CHECK-LABEL: umin_i32_ptrue_all_b:
-; CHECK: umin z0.s, z0.s, #1
-; CHECK-NEXT: ret
+; CHECK: // %bb.0:
+; CHECK-NEXT: umin z0.s, z0.s, #1
+; CHECK-NEXT: ret
%pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
%pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
%b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
@@ -854,8 +867,9 @@ define <vscale x 4 x i32> @umin_i32_ptrue_all_b(<vscale x 4 x i32> %a) #0 {
; As umin_i32 but where pg is i16 based and thus compatible for i32.
define <vscale x 4 x i32> @umin_i32_ptrue_all_h(<vscale x 4 x i32> %a) #0 {
; CHECK-LABEL: umin_i32_ptrue_all_h:
-; CHECK: umin z0.s, z0.s, #1
-; CHECK-NEXT: ret
+; CHECK: // %bb.0:
+; CHECK-NEXT: umin z0.s, z0.s, #1
+; CHECK-NEXT: ret
%pg.h = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
%pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %pg.h)
%pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
@@ -870,10 +884,11 @@ define <vscale x 4 x i32> @umin_i32_ptrue_all_h(<vscale x 4 x i32> %a) #0 {
; thus inactive lanes are important and the immediate form cannot be used.
define <vscale x 4 x i32> @umin_i32_ptrue_all_d(<vscale x 4 x i32> %a) #0 {
; CHECK-LABEL: umin_i32_ptrue_all_d:
-; CHECK-DAG: ptrue [[PG:p[0-9]+]].d
-; CHECK-DAG: mov [[DUP:z[0-9]+]].s, #1
-; CHECK-DAG: umin z0.s, [[PG]]/m, z0.s, [[DUP]].s
-; CHECK-NEXT: ret
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: mov z1.s, #1 // =0x1
+; CHECK-NEXT: umin z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
%pg.d = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
%pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg.d)
%pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
@@ -1769,8 +1784,9 @@ define <vscale x 2 x i64> @lsr_i64_too_small(<vscale x 2 x i1> %pg, <vscale x 2
; As lsr_i32 but where pg is i8 based and thus compatible for i32.
define <vscale x 4 x i32> @lsr_i32_ptrue_all_b(<vscale x 4 x i32> %a) #0 {
; CHECK-LABEL: lsr_i32_ptrue_all_b:
-; CHECK: lsr z0.s, z0.s, #1
-; CHECK-NEXT: ret
+; CHECK: // %bb.0:
+; CHECK-NEXT: lsr z0.s, z0.s, #1
+; CHECK-NEXT: ret
%pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
%pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
%b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
@@ -1783,8 +1799,9 @@ define <vscale x 4 x i32> @lsr_i32_ptrue_all_b(<vscale x 4 x i32> %a) #0 {
; As lsr_i32 but where pg is i16 based and thus compatible for i32.
define <vscale x 4 x i32> @lsr_i32_ptrue_all_h(<vscale x 4 x i32> %a) #0 {
; CHECK-LABEL: lsr_i32_ptrue_all_h:
-; CHECK: lsr z0.s, z0.s, #1
-; CHECK-NEXT: ret
+; CHECK: // %bb.0:
+; CHECK-NEXT: lsr z0.s, z0.s, #1
+; CHECK-NEXT: ret
%pg.h = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
%pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %pg.h)
%pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
@@ -1799,9 +1816,10 @@ define <vscale x 4 x i32> @lsr_i32_ptrue_all_h(<vscale x 4 x i32> %a) #0 {
; thus inactive lanes are important and the immediate form cannot be used.
define <vscale x 4 x i32> @lsr_i32_ptrue_all_d(<vscale x 4 x i32> %a) #0 {
; CHECK-LABEL: lsr_i32_ptrue_all_d:
-; CHECK-DAG: ptrue [[PG:p[0-9]+]].d
-; CHECK-DAG: lsr z0.s, [[PG]]/m, z0.s, #1
-; CHECK-NEXT: ret
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: lsr z0.s, p0/m, z0.s, #1
+; CHECK-NEXT: ret
%pg.d = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
%pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg.d)
%pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
@@ -1819,8 +1837,9 @@ define <vscale x 4 x i32> @lsr_i32_ptrue_all_d(<vscale x 4 x i32> %a) #0 {
; As mul_i32 but where pg is i8 based and thus compatible for i32.
define <vscale x 4 x i32> @mul_i32_ptrue_all_b(<vscale x 4 x i32> %a) #0 {
; CHECK-LABEL: mul_i32_ptrue_all_b:
-; CHECK: mul z0.s, z0.s, #1
-; CHECK-NEXT: ret
+; CHECK: // %bb.0:
+; CHECK-NEXT: mul z0.s, z0.s, #1
+; CHECK-NEXT: ret
%pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
%pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
%b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
@@ -1833,8 +1852,9 @@ define <vscale x 4 x i32> @mul_i32_ptrue_all_b(<vscale x 4 x i32> %a) #0 {
; As mul_i32 but where pg is i16 based and thus compatible for i32.
define <vscale x 4 x i32> @mul_i32_ptrue_all_h(<vscale x 4 x i32> %a) #0 {
; CHECK-LABEL: mul_i32_ptrue_all_h:
-; CHECK: mul z0.s, z0.s, #1
-; CHECK-NEXT: ret
+; CHECK: // %bb.0:
+; CHECK-NEXT: mul z0.s, z0.s, #1
+; CHECK-NEXT: ret
%pg.h = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
%pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %pg.h)
%pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
@@ -1849,10 +1869,11 @@ define <vscale x 4 x i32> @mul_i32_ptrue_all_h(<vscale x 4 x i32> %a) #0 {
; thus inactive lanes are important and the immediate form cannot be used.
define <vscale x 4 x i32> @mul_i32_ptrue_all_d(<vscale x 4 x i32> %a) #0 {
; CHECK-LABEL: mul_i32_ptrue_all_d:
-; CHECK-DAG: ptrue [[PG:p[0-9]+]].d
-; CHECK-DAG: mov [[DUP:z[0-9]+]].s, #1
-; CHECK-DAG: mul z0.s, [[PG]]/m, z0.s, [[DUP]].s
-; CHECK-NEXT: ret
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: mov z1.s, #1 // =0x1
+; CHECK-NEXT: mul z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
%pg.d = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
%pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg.d)
%pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
diff --git a/llvm/test/CodeGen/AArch64/sve-select.ll b/llvm/test/CodeGen/AArch64/sve-select.ll
index 55f5f33d4d48..9f00eb6790ff 100644
--- a/llvm/test/CodeGen/AArch64/sve-select.ll
+++ b/llvm/test/CodeGen/AArch64/sve-select.ll
@@ -135,41 +135,37 @@ define <vscale x 2 x i1> @select_nxv2i1(i1 %cond, <vscale x 2 x i1> %a, <vscal
; Integer vector select
-define <vscale x 16 x i8> @sel_nxv16i8(<vscale x 16 x i1> %p,
- <vscale x 16 x i8> %dst,
- <vscale x 16 x i8> %a) {
+define <vscale x 16 x i8> @sel_nxv16i8(<vscale x 16 x i1> %p, <vscale x 16 x i8> %dst, <vscale x 16 x i8> %a) {
; CHECK-LABEL: sel_nxv16i8:
-; CHECK: mov z0.b, p0/m, z1.b
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z0.b, p0/m, z1.b
; CHECK-NEXT: ret
%sel = select <vscale x 16 x i1> %p, <vscale x 16 x i8> %a, <vscale x 16 x i8> %dst
ret <vscale x 16 x i8> %sel
}
-define <vscale x 8 x i16> @sel_nxv8i16(<vscale x 8 x i1> %p,
- <vscale x 8 x i16> %dst,
- <vscale x 8 x i16> %a) {
+define <vscale x 8 x i16> @sel_nxv8i16(<vscale x 8 x i1> %p, <vscale x 8 x i16> %dst, <vscale x 8 x i16> %a) {
; CHECK-LABEL: sel_nxv8i16:
-; CHECK: mov z0.h, p0/m, z1.h
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z0.h, p0/m, z1.h
; CHECK-NEXT: ret
%sel = select <vscale x 8 x i1> %p, <vscale x 8 x i16> %a, <vscale x 8 x i16> %dst
ret <vscale x 8 x i16> %sel
}
-define <vscale x 4 x i32> @sel_nxv4i32(<vscale x 4 x i1> %p,
- <vscale x 4 x i32> %dst,
- <vscale x 4 x i32> %a) {
+define <vscale x 4 x i32> @sel_nxv4i32(<vscale x 4 x i1> %p, <vscale x 4 x i32> %dst, <vscale x 4 x i32> %a) {
; CHECK-LABEL: sel_nxv4i32:
-; CHECK: mov z0.s, p0/m, z1.s
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z0.s, p0/m, z1.s
; CHECK-NEXT: ret
%sel = select <vscale x 4 x i1> %p, <vscale x 4 x i32> %a, <vscale x 4 x i32> %dst
ret <vscale x 4 x i32> %sel
}
-define <vscale x 2 x i64> @sel_nxv2i64(<vscale x 2 x i1> %p,
- <vscale x 2 x i64> %dst,
- <vscale x 2 x i64> %a) {
+define <vscale x 2 x i64> @sel_nxv2i64(<vscale x 2 x i1> %p, <vscale x 2 x i64> %dst, <vscale x 2 x i64> %a) {
; CHECK-LABEL: sel_nxv2i64:
-; CHECK: mov z0.d, p0/m, z1.d
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z0.d, p0/m, z1.d
; CHECK-NEXT: ret
%sel = select <vscale x 2 x i1> %p, <vscale x 2 x i64> %a, <vscale x 2 x i64> %dst
ret <vscale x 2 x i64> %sel
@@ -177,41 +173,37 @@ define <vscale x 2 x i64> @sel_nxv2i64(<vscale x 2 x i1> %p,
; Floating point vector select
-define <vscale x 8 x half> @sel_nxv8f16(<vscale x 8 x i1> %p,
- <vscale x 8 x half> %dst,
- <vscale x 8 x half> %a) {
+define <vscale x 8 x half> @sel_nxv8f16(<vscale x 8 x i1> %p, <vscale x 8 x half> %dst, <vscale x 8 x half> %a) {
; CHECK-LABEL: sel_nxv8f16:
-; CHECK: mov z0.h, p0/m, z1.h
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z0.h, p0/m, z1.h
; CHECK-NEXT: ret
%sel = select <vscale x 8 x i1> %p, <vscale x 8 x half> %a, <vscale x 8 x half> %dst
ret <vscale x 8 x half> %sel
}
-define <vscale x 4 x float> @sel_nxv4f32(<vscale x 4 x i1> %p,
- <vscale x 4 x float> %dst,
- <vscale x 4 x float> %a) {
+define <vscale x 4 x float> @sel_nxv4f32(<vscale x 4 x i1> %p, <vscale x 4 x float> %dst, <vscale x 4 x float> %a) {
; CHECK-LABEL: sel_nxv4f32:
-; CHECK: mov z0.s, p0/m, z1.s
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z0.s, p0/m, z1.s
; CHECK-NEXT: ret
%sel = select <vscale x 4 x i1> %p, <vscale x 4 x float> %a, <vscale x 4 x float> %dst
ret <vscale x 4 x float> %sel
}
-define <vscale x 2 x float> @sel_nxv2f32(<vscale x 2 x i1> %p,
- <vscale x 2 x float> %dst,
- <vscale x 2 x float> %a) {
+define <vscale x 2 x float> @sel_nxv2f32(<vscale x 2 x i1> %p, <vscale x 2 x float> %dst, <vscale x 2 x float> %a) {
; CHECK-LABEL: sel_nxv2f32:
-; CHECK: mov z0.d, p0/m, z1.d
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z0.d, p0/m, z1.d
; CHECK-NEXT: ret
%sel = select <vscale x 2 x i1> %p, <vscale x 2 x float> %a, <vscale x 2 x float> %dst
ret <vscale x 2 x float> %sel
}
-define <vscale x 2 x double> @sel_nxv8f64(<vscale x 2 x i1> %p,
- <vscale x 2 x double> %dst,
- <vscale x 2 x double> %a) {
+define <vscale x 2 x double> @sel_nxv8f64(<vscale x 2 x i1> %p, <vscale x 2 x double> %dst, <vscale x 2 x double> %a) {
; CHECK-LABEL: sel_nxv8f64:
-; CHECK: mov z0.d, p0/m, z1.d
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z0.d, p0/m, z1.d
; CHECK-NEXT: ret
%sel = select <vscale x 2 x i1> %p, <vscale x 2 x double> %a, <vscale x 2 x double> %dst
ret <vscale x 2 x double> %sel
@@ -220,13 +212,13 @@ define <vscale x 2 x double> @sel_nxv8f64(<vscale x 2 x i1> %p,
; Check icmp+select
define <vscale x 2 x half> @icmp_select_nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b, i64 %x0) {
-; CHECK-LABEL: icmp_select_nxv2f16
+; CHECK-LABEL: icmp_select_nxv2f16:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmp x0, #0
-; CHECK-NEXT: cset w8, eq
-; CHECK-NEXT: sbfx x8, x8, #0, #1
-; CHECK-NEXT: whilelo p0.d, xzr, x8
-; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d
+; CHECK-NEXT: cmp x0, #0
+; CHECK-NEXT: cset w8, eq
+; CHECK-NEXT: sbfx x8, x8, #0, #1
+; CHECK-NEXT: whilelo p0.d, xzr, x8
+; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d
; CHECK-NEXT: ret
%mask = icmp eq i64 %x0, 0
%sel = select i1 %mask, <vscale x 2 x half> %a, <vscale x 2 x half> %b
@@ -234,13 +226,13 @@ define <vscale x 2 x half> @icmp_select_nxv2f16(<vscale x 2 x half> %a, <vscale
}
define <vscale x 2 x float> @icmp_select_nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b, i64 %x0) {
-; CHECK-LABEL: icmp_select_nxv2f32
+; CHECK-LABEL: icmp_select_nxv2f32:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmp x0, #0
-; CHECK-NEXT: cset w8, eq
-; CHECK-NEXT: sbfx x8, x8, #0, #1
-; CHECK-NEXT: whilelo p0.d, xzr, x8
-; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d
+; CHECK-NEXT: cmp x0, #0
+; CHECK-NEXT: cset w8, eq
+; CHECK-NEXT: sbfx x8, x8, #0, #1
+; CHECK-NEXT: whilelo p0.d, xzr, x8
+; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d
; CHECK-NEXT: ret
%mask = icmp eq i64 %x0, 0
%sel = select i1 %mask, <vscale x 2 x float> %a, <vscale x 2 x float> %b
@@ -248,13 +240,13 @@ define <vscale x 2 x float> @icmp_select_nxv2f32(<vscale x 2 x float> %a, <vscal
}
define <vscale x 2 x double> @icmp_select_nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b, i64 %x0) {
-; CHECK-LABEL: icmp_select_nxv2f64
+; CHECK-LABEL: icmp_select_nxv2f64:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmp x0, #0
-; CHECK-NEXT: cset w8, eq
-; CHECK-NEXT: sbfx x8, x8, #0, #1
-; CHECK-NEXT: whilelo p0.d, xzr, x8
-; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d
+; CHECK-NEXT: cmp x0, #0
+; CHECK-NEXT: cset w8, eq
+; CHECK-NEXT: sbfx x8, x8, #0, #1
+; CHECK-NEXT: whilelo p0.d, xzr, x8
+; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d
; CHECK-NEXT: ret
%mask = icmp eq i64 %x0, 0
%sel = select i1 %mask, <vscale x 2 x double> %a, <vscale x 2 x double> %b
@@ -262,13 +254,13 @@ define <vscale x 2 x double> @icmp_select_nxv2f64(<vscale x 2 x double> %a, <vsc
}
define <vscale x 4 x half> @icmp_select_nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b, i64 %x0) {
-; CHECK-LABEL: icmp_select_nxv4f16
+; CHECK-LABEL: icmp_select_nxv4f16:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmp x0, #0
-; CHECK-NEXT: cset w8, eq
-; CHECK-NEXT: sbfx x8, x8, #0, #1
-; CHECK-NEXT: whilelo p0.s, xzr, x8
-; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s
+; CHECK-NEXT: cmp x0, #0
+; CHECK-NEXT: cset w8, eq
+; CHECK-NEXT: sbfx x8, x8, #0, #1
+; CHECK-NEXT: whilelo p0.s, xzr, x8
+; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s
; CHECK-NEXT: ret
%mask = icmp eq i64 %x0, 0
%sel = select i1 %mask, <vscale x 4 x half> %a, <vscale x 4 x half> %b
@@ -276,13 +268,13 @@ define <vscale x 4 x half> @icmp_select_nxv4f16(<vscale x 4 x half> %a, <vscale
}
define <vscale x 4 x float> @icmp_select_nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b, i64 %x0) {
-; CHECK-LABEL: icmp_select_nxv4f32
+; CHECK-LABEL: icmp_select_nxv4f32:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmp x0, #0
-; CHECK-NEXT: cset w8, eq
-; CHECK-NEXT: sbfx x8, x8, #0, #1
-; CHECK-NEXT: whilelo p0.s, xzr, x8
-; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s
+; CHECK-NEXT: cmp x0, #0
+; CHECK-NEXT: cset w8, eq
+; CHECK-NEXT: sbfx x8, x8, #0, #1
+; CHECK-NEXT: whilelo p0.s, xzr, x8
+; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s
; CHECK-NEXT: ret
%mask = icmp eq i64 %x0, 0
%sel = select i1 %mask, <vscale x 4 x float> %a, <vscale x 4 x float> %b
@@ -290,13 +282,13 @@ define <vscale x 4 x float> @icmp_select_nxv4f32(<vscale x 4 x float> %a, <vscal
}
define <vscale x 8 x half> @icmp_select_nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, i64 %x0) {
-; CHECK-LABEL: icmp_select_nxv8f16
+; CHECK-LABEL: icmp_select_nxv8f16:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmp x0, #0
-; CHECK-NEXT: cset w8, eq
-; CHECK-NEXT: sbfx x8, x8, #0, #1
-; CHECK-NEXT: whilelo p0.h, xzr, x8
-; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h
+; CHECK-NEXT: cmp x0, #0
+; CHECK-NEXT: cset w8, eq
+; CHECK-NEXT: sbfx x8, x8, #0, #1
+; CHECK-NEXT: whilelo p0.h, xzr, x8
+; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h
; CHECK-NEXT: ret
%mask = icmp eq i64 %x0, 0
%sel = select i1 %mask, <vscale x 8 x half> %a, <vscale x 8 x half> %b
@@ -304,13 +296,13 @@ define <vscale x 8 x half> @icmp_select_nxv8f16(<vscale x 8 x half> %a, <vscale
}
define <vscale x 2 x i64> @icmp_select_nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, i64 %x0) {
-; CHECK-LABEL: icmp_select_nxv2i64
+; CHECK-LABEL: icmp_select_nxv2i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmp x0, #0
-; CHECK-NEXT: cset w8, eq
-; CHECK-NEXT: sbfx x8, x8, #0, #1
-; CHECK-NEXT: whilelo p0.d, xzr, x8
-; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d
+; CHECK-NEXT: cmp x0, #0
+; CHECK-NEXT: cset w8, eq
+; CHECK-NEXT: sbfx x8, x8, #0, #1
+; CHECK-NEXT: whilelo p0.d, xzr, x8
+; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d
; CHECK-NEXT: ret
%mask = icmp eq i64 %x0, 0
%sel = select i1 %mask, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b
@@ -318,13 +310,13 @@ define <vscale x 2 x i64> @icmp_select_nxv2i64(<vscale x 2 x i64> %a, <vscale x
}
define <vscale x 4 x i32> @icmp_select_nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, i64 %x0) {
-; CHECK-LABEL: icmp_select_nxv4i32
+; CHECK-LABEL: icmp_select_nxv4i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmp x0, #0
-; CHECK-NEXT: cset w8, eq
-; CHECK-NEXT: sbfx x8, x8, #0, #1
-; CHECK-NEXT: whilelo p0.s, xzr, x8
-; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s
+; CHECK-NEXT: cmp x0, #0
+; CHECK-NEXT: cset w8, eq
+; CHECK-NEXT: sbfx x8, x8, #0, #1
+; CHECK-NEXT: whilelo p0.s, xzr, x8
+; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s
; CHECK-NEXT: ret
%mask = icmp eq i64 %x0, 0
%sel = select i1 %mask, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b
@@ -332,13 +324,13 @@ define <vscale x 4 x i32> @icmp_select_nxv4i32(<vscale x 4 x i32> %a, <vscale x
}
define <vscale x 8 x i16> @icmp_select_nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, i64 %x0) {
-; CHECK-LABEL: icmp_select_nxv8i16
+; CHECK-LABEL: icmp_select_nxv8i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmp x0, #0
-; CHECK-NEXT: cset w8, eq
-; CHECK-NEXT: sbfx x8, x8, #0, #1
-; CHECK-NEXT: whilelo p0.h, xzr, x8
-; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h
+; CHECK-NEXT: cmp x0, #0
+; CHECK-NEXT: cset w8, eq
+; CHECK-NEXT: sbfx x8, x8, #0, #1
+; CHECK-NEXT: whilelo p0.h, xzr, x8
+; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h
; CHECK-NEXT: ret
%mask = icmp eq i64 %x0, 0
%sel = select i1 %mask, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b
@@ -346,13 +338,13 @@ define <vscale x 8 x i16> @icmp_select_nxv8i16(<vscale x 8 x i16> %a, <vscale x
}
define <vscale x 16 x i8> @icmp_select_nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, i64 %x0) {
-; CHECK-LABEL: icmp_select_nxv16i8
+; CHECK-LABEL: icmp_select_nxv16i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmp x0, #0
-; CHECK-NEXT: cset w8, eq
-; CHECK-NEXT: sbfx x8, x8, #0, #1
-; CHECK-NEXT: whilelo p0.b, xzr, x8
-; CHECK-NEXT: sel z0.b, p0, z0.b, z1.b
+; CHECK-NEXT: cmp x0, #0
+; CHECK-NEXT: cset w8, eq
+; CHECK-NEXT: sbfx x8, x8, #0, #1
+; CHECK-NEXT: whilelo p0.b, xzr, x8
+; CHECK-NEXT: sel z0.b, p0, z0.b, z1.b
; CHECK-NEXT: ret
%mask = icmp eq i64 %x0, 0
%sel = select i1 %mask, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b
@@ -360,52 +352,52 @@ define <vscale x 16 x i8> @icmp_select_nxv16i8(<vscale x 16 x i8> %a, <vscale x
}
define <vscale x 2 x i1> @icmp_select_nxv2i1(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b, i64 %x0) {
-; CHECK-LABEL: icmp_select_nxv2i1
+; CHECK-LABEL: icmp_select_nxv2i1:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmp x0, #0
-; CHECK-NEXT: cset w8, eq
-; CHECK-NEXT: sbfx x8, x8, #0, #1
-; CHECK-NEXT: whilelo p2.d, xzr, x8
-; CHECK-NEXT: sel p0.b, p2, p0.b, p1.b
+; CHECK-NEXT: cmp x0, #0
+; CHECK-NEXT: cset w8, eq
+; CHECK-NEXT: sbfx x8, x8, #0, #1
+; CHECK-NEXT: whilelo p2.d, xzr, x8
+; CHECK-NEXT: sel p0.b, p2, p0.b, p1.b
; CHECK-NEXT: ret
%mask = icmp eq i64 %x0, 0
%sel = select i1 %mask, <vscale x 2 x i1> %a, <vscale x 2 x i1> %b
ret <vscale x 2 x i1> %sel
}
define <vscale x 4 x i1> @icmp_select_nxv4i1(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b, i64 %x0) {
-; CHECK-LABEL: icmp_select_nxv4i1
+; CHECK-LABEL: icmp_select_nxv4i1:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmp x0, #0
-; CHECK-NEXT: cset w8, eq
-; CHECK-NEXT: sbfx x8, x8, #0, #1
-; CHECK-NEXT: whilelo p2.s, xzr, x8
-; CHECK-NEXT: sel p0.b, p2, p0.b, p1.b
+; CHECK-NEXT: cmp x0, #0
+; CHECK-NEXT: cset w8, eq
+; CHECK-NEXT: sbfx x8, x8, #0, #1
+; CHECK-NEXT: whilelo p2.s, xzr, x8
+; CHECK-NEXT: sel p0.b, p2, p0.b, p1.b
; CHECK-NEXT: ret
%mask = icmp eq i64 %x0, 0
%sel = select i1 %mask, <vscale x 4 x i1> %a, <vscale x 4 x i1> %b
ret <vscale x 4 x i1> %sel
}
define <vscale x 8 x i1> @icmp_select_nxv8i1(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b, i64 %x0) {
-; CHECK-LABEL: icmp_select_nxv8i1
+; CHECK-LABEL: icmp_select_nxv8i1:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmp x0, #0
-; CHECK-NEXT: cset w8, eq
-; CHECK-NEXT: sbfx x8, x8, #0, #1
-; CHECK-NEXT: whilelo p2.h, xzr, x8
-; CHECK-NEXT: sel p0.b, p2, p0.b, p1.b
+; CHECK-NEXT: cmp x0, #0
+; CHECK-NEXT: cset w8, eq
+; CHECK-NEXT: sbfx x8, x8, #0, #1
+; CHECK-NEXT: whilelo p2.h, xzr, x8
+; CHECK-NEXT: sel p0.b, p2, p0.b, p1.b
; CHECK-NEXT: ret
%mask = icmp eq i64 %x0, 0
%sel = select i1 %mask, <vscale x 8 x i1> %a, <vscale x 8 x i1> %b
ret <vscale x 8 x i1> %sel
}
define <vscale x 16 x i1> @icmp_select_nxv16i1(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b, i64 %x0) {
-; CHECK-LABEL: icmp_select_nxv16i1
+; CHECK-LABEL: icmp_select_nxv16i1:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmp x0, #0
-; CHECK-NEXT: cset w8, eq
-; CHECK-NEXT: sbfx x8, x8, #0, #1
-; CHECK-NEXT: whilelo p2.b, xzr, x8
-; CHECK-NEXT: sel p0.b, p2, p0.b, p1.b
+; CHECK-NEXT: cmp x0, #0
+; CHECK-NEXT: cset w8, eq
+; CHECK-NEXT: sbfx x8, x8, #0, #1
+; CHECK-NEXT: whilelo p2.b, xzr, x8
+; CHECK-NEXT: sel p0.b, p2, p0.b, p1.b
; CHECK-NEXT: ret
%mask = icmp eq i64 %x0, 0
%sel = select i1 %mask, <vscale x 16 x i1> %a, <vscale x 16 x i1> %b
diff --git a/llvm/test/CodeGen/AArch64/sve-unary-movprfx.ll b/llvm/test/CodeGen/AArch64/sve-unary-movprfx.ll
index a29aebfb001b..1a1d561ebcec 100644
--- a/llvm/test/CodeGen/AArch64/sve-unary-movprfx.ll
+++ b/llvm/test/CodeGen/AArch64/sve-unary-movprfx.ll
@@ -233,7 +233,7 @@ define <vscale x 2 x i64> @abs_i64_active(<vscale x 2 x i64> %a, <vscale x 2 x i
define <vscale x 2 x i64> @abs_i64_not_active(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i1> %pg) #0 {
; CHECK-LABEL: abs_i64_not_active:
; CHECK: // %bb.0:
-; CHECK: abs z0.d, p0/m, z1.d
+; CHECK-NEXT: abs z0.d, p0/m, z1.d
; CHECK-NEXT: ret
%ret = tail call <vscale x 2 x i64> @llvm.aarch64.sve.abs.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b)
ret <vscale x 2 x i64> %ret
@@ -424,7 +424,7 @@ define <vscale x 2 x i64> @cls_i64_active(<vscale x 2 x i64> %a, <vscale x 2 x i
define <vscale x 2 x i64> @cls_i64_not_active(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i1> %pg) #0 {
; CHECK-LABEL: cls_i64_not_active:
; CHECK: // %bb.0:
-; CHECK: cls z0.d, p0/m, z1.d
+; CHECK-NEXT: cls z0.d, p0/m, z1.d
; CHECK-NEXT: ret
%ret = tail call <vscale x 2 x i64> @llvm.aarch64.sve.cls.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b)
ret <vscale x 2 x i64> %ret
@@ -598,7 +598,7 @@ define <vscale x 2 x double> @fabs_f64_active(<vscale x 2 x double> %a, <vscale
define <vscale x 2 x double> @fabs_f64_not_active(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x i1> %pg) #0 {
; CHECK-LABEL: fabs_f64_not_active:
; CHECK: // %bb.0:
-; CHECK: fabs z0.d, p0/m, z1.d
+; CHECK-NEXT: fabs z0.d, p0/m, z1.d
; CHECK-NEXT: ret
%ret = tail call <vscale x 2 x double> @llvm.aarch64.sve.fabs.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x i1> %pg, <vscale x 2 x double> %b)
ret <vscale x 2 x double> %ret
@@ -772,7 +772,7 @@ define <vscale x 2 x i64> @sxtb_i64_active(<vscale x 2 x i64> %a, <vscale x 2 x
define <vscale x 2 x i64> @sxtb_i64_not_active(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i1> %pg) #0 {
; CHECK-LABEL: sxtb_i64_not_active:
; CHECK: // %bb.0:
-; CHECK: sxtb z0.d, p0/m, z1.d
+; CHECK-NEXT: sxtb z0.d, p0/m, z1.d
; CHECK-NEXT: ret
%ret = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sxtb.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b)
ret <vscale x 2 x i64> %ret
@@ -888,7 +888,7 @@ define <vscale x 2 x i64> @sxth_i64_active(<vscale x 2 x i64> %a, <vscale x 2 x
define <vscale x 2 x i64> @sxth_i64_not_active(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i1> %pg) #0 {
; CHECK-LABEL: sxth_i64_not_active:
; CHECK: // %bb.0:
-; CHECK: sxth z0.d, p0/m, z1.d
+; CHECK-NEXT: sxth z0.d, p0/m, z1.d
; CHECK-NEXT: ret
%ret = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sxth.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b)
ret <vscale x 2 x i64> %ret
@@ -946,7 +946,7 @@ define <vscale x 2 x i64> @sxtw_i64_active(<vscale x 2 x i64> %a, <vscale x 2 x
define <vscale x 2 x i64> @sxtw_i64_not_active(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i1> %pg) #0 {
; CHECK-LABEL: sxtw_i64_not_active:
; CHECK: // %bb.0:
-; CHECK: sxtw z0.d, p0/m, z1.d
+; CHECK-NEXT: sxtw z0.d, p0/m, z1.d
; CHECK-NEXT: ret
%ret = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sxtw.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b)
ret <vscale x 2 x i64> %ret
diff --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-int-arith-imm.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-int-arith-imm.ll
index c755af0286e0..6a8f53d7f31a 100644
--- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-int-arith-imm.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-int-arith-imm.ll
@@ -404,8 +404,9 @@ define <vscale x 2 x i64> @uqsub_d_highimm(<vscale x 2 x i64> %a) {
; As uqsub_i32 but where pg is i8 based and thus compatible for i32.
define <vscale x 4 x i32> @uqsub_i32_ptrue_all_b(<vscale x 4 x i32> %a) #0 {
; CHECK-LABEL: uqsub_i32_ptrue_all_b:
-; CHECK: uqsub z0.s, z0.s, #1
-; CHECK-NEXT: ret
+; CHECK: // %bb.0:
+; CHECK-NEXT: uqsub z0.s, z0.s, #1 // =0x1
+; CHECK-NEXT: ret
%pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
%pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
%b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
@@ -418,8 +419,9 @@ define <vscale x 4 x i32> @uqsub_i32_ptrue_all_b(<vscale x 4 x i32> %a) #0 {
; As uqsub_i32 but where pg is i16 based and thus compatible for i32.
define <vscale x 4 x i32> @uqsub_i32_ptrue_all_h(<vscale x 4 x i32> %a) #0 {
; CHECK-LABEL: uqsub_i32_ptrue_all_h:
-; CHECK: uqsub z0.s, z0.s, #1
-; CHECK-NEXT: ret
+; CHECK: // %bb.0:
+; CHECK-NEXT: uqsub z0.s, z0.s, #1 // =0x1
+; CHECK-NEXT: ret
%pg.h = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
%pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %pg.h)
%pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
@@ -434,10 +436,11 @@ define <vscale x 4 x i32> @uqsub_i32_ptrue_all_h(<vscale x 4 x i32> %a) #0 {
; thus inactive lanes are important and the immediate form cannot be used.
define <vscale x 4 x i32> @uqsub_i32_ptrue_all_d(<vscale x 4 x i32> %a) #0 {
; CHECK-LABEL: uqsub_i32_ptrue_all_d:
-; CHECK-DAG: ptrue [[PG:p[0-9]+]].d
-; CHECK-DAG: mov [[DUP:z[0-9]+]].s, #1
-; CHECK-DAG: uqsub z0.s, [[PG]]/m, z0.s, [[DUP]].s
-; CHECK-NEXT: ret
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: mov z1.s, #1 // =0x1
+; CHECK-NEXT: uqsub z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
%pg.d = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
%pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg.d)
%pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
diff --git a/llvm/test/CodeGen/AArch64/sve2-unary-movprfx.ll b/llvm/test/CodeGen/AArch64/sve2-unary-movprfx.ll
index 83b240791edd..b82b656f710f 100644
--- a/llvm/test/CodeGen/AArch64/sve2-unary-movprfx.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-unary-movprfx.ll
@@ -192,7 +192,7 @@ define <vscale x 2 x i64> @sqabs_i64_active(<vscale x 2 x i64> %a, <vscale x 2 x
define <vscale x 2 x i64> @sqabs_i64_not_active(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i1> %pg) #0 {
; CHECK-LABEL: sqabs_i64_not_active:
; CHECK: // %bb.0:
-; CHECK: sqabs z0.d, p0/m, z1.d
+; CHECK-NEXT: sqabs z0.d, p0/m, z1.d
; CHECK-NEXT: ret
%ret = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sqabs.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b)
ret <vscale x 2 x i64> %ret
diff --git a/llvm/test/CodeGen/AArch64/unwind-preserved.ll b/llvm/test/CodeGen/AArch64/unwind-preserved.ll
index 94af07474873..3e177043321e 100644
--- a/llvm/test/CodeGen/AArch64/unwind-preserved.ll
+++ b/llvm/test/CodeGen/AArch64/unwind-preserved.ll
@@ -365,7 +365,7 @@ define aarch64_vector_pcs <4 x i32> @invoke_callee_may_throw_neon(<4 x i32> %v)
; GISEL-NEXT: bl may_throw_neon
; GISEL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
; GISEL-NEXT: .Ltmp4:
-; GISEL-NEXT: b .LBB1_1
+; GISEL-NEXT: b .LBB1_1
; GISEL-NEXT: .LBB1_1: // %.Lcontinue
; GISEL-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
; GISEL-NEXT: ldp x29, x30, [sp, #288] // 16-byte Folded Reload
More information about the llvm-commits
mailing list