[llvm] cab01a8 - [AArch64] Additional testing for i128 and non-temporal loads/stores undef BE. NFC
David Green via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 26 03:01:54 PDT 2023
Author: David Green
Date: 2023-09-26T11:01:48+01:00
New Revision: cab01a8b4904013a5c66077eb8cbb7880d20cabe
URL: https://github.com/llvm/llvm-project/commit/cab01a8b4904013a5c66077eb8cbb7880d20cabe
DIFF: https://github.com/llvm/llvm-project/commit/cab01a8b4904013a5c66077eb8cbb7880d20cabe.diff
LOG: [AArch64] Additional testing for i128 and non-temporal loads/stores undef BE. NFC
Added:
Modified:
llvm/test/CodeGen/AArch64/i128_volatile_load_store.ll
llvm/test/CodeGen/AArch64/nontemporal.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/AArch64/i128_volatile_load_store.ll b/llvm/test/CodeGen/AArch64/i128_volatile_load_store.ll
index 73708e3fd8c44bf..c3a03b2cb35426b 100644
--- a/llvm/test/CodeGen/AArch64/i128_volatile_load_store.ll
+++ b/llvm/test/CodeGen/AArch64/i128_volatile_load_store.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64 %s -o - | FileCheck %s
+; RUN: llc -mtriple=aarch64 %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-LE
+; RUN: llc -mtriple=aarch64_be %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-BE
@x = common dso_local global i128 0
@y = common dso_local global i128 0
@@ -116,3 +117,53 @@ define void @test7() {
store volatile i128 %tmp, ptr getelementptr (i8, ptr @y, i64 503)
ret void
}
+
+define i128 @load_nonvol(i32, i32, ptr %p) {
+; CHECK-LABEL: load_nonvol:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ldp x0, x1, [x2]
+; CHECK-NEXT: ret
+entry:
+ %l = load i128, ptr %p, align 16
+ ret i128 %l
+}
+
+define i128 @load_vol(i32, i32, ptr %p) {
+; CHECK-LE-LABEL: load_vol:
+; CHECK-LE: // %bb.0: // %entry
+; CHECK-LE-NEXT: ldp x0, x1, [x2]
+; CHECK-LE-NEXT: ret
+;
+; CHECK-BE-LABEL: load_vol:
+; CHECK-BE: // %bb.0: // %entry
+; CHECK-BE-NEXT: ldp x1, x0, [x2]
+; CHECK-BE-NEXT: ret
+entry:
+ %l = load volatile i128, ptr %p, align 16
+ ret i128 %l
+}
+
+define void @store_nonvol(i128 %a, ptr %p) {
+; CHECK-LABEL: store_nonvol:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: stp x0, x1, [x2]
+; CHECK-NEXT: ret
+entry:
+ store i128 %a, ptr %p, align 16
+ ret void
+}
+
+define void @loadstore_vol(i128 %a, ptr %p) {
+; CHECK-LE-LABEL: loadstore_vol:
+; CHECK-LE: // %bb.0: // %entry
+; CHECK-LE-NEXT: stp x0, x1, [x2]
+; CHECK-LE-NEXT: ret
+;
+; CHECK-BE-LABEL: loadstore_vol:
+; CHECK-BE: // %bb.0: // %entry
+; CHECK-BE-NEXT: stp x1, x0, [x2]
+; CHECK-BE-NEXT: ret
+entry:
+ store volatile i128 %a, ptr %p, align 16
+ ret void
+}
diff --git a/llvm/test/CodeGen/AArch64/nontemporal.ll b/llvm/test/CodeGen/AArch64/nontemporal.ll
index 4fcb5c0342e525b..fe19ca7e2cc43d1 100644
--- a/llvm/test/CodeGen/AArch64/nontemporal.ll
+++ b/llvm/test/CodeGen/AArch64/nontemporal.ll
@@ -1,287 +1,424 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple aarch64-apple-darwin | FileCheck %s
+; RUN: llc < %s -mtriple aarch64 | FileCheck %s --check-prefixes=CHECK-LE
+; RUN: llc < %s -mtriple aarch64_be | FileCheck %s --check-prefixes=CHECK-BE
define void @test_stnp_v4i64(ptr %p, <4 x i64> %v) #0 {
-; CHECK-LABEL: test_stnp_v4i64:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: stnp q0, q1, [x0]
-; CHECK-NEXT: ret
+; CHECK-LE-LABEL: test_stnp_v4i64:
+; CHECK-LE: // %bb.0:
+; CHECK-LE-NEXT: stnp q0, q1, [x0]
+; CHECK-LE-NEXT: ret
+;
+; CHECK-BE-LABEL: test_stnp_v4i64:
+; CHECK-BE: // %bb.0:
+; CHECK-BE-NEXT: ext v1.16b, v1.16b, v1.16b, #8
+; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-BE-NEXT: stnp q0, q1, [x0]
+; CHECK-BE-NEXT: ret
store <4 x i64> %v, ptr %p, align 1, !nontemporal !0
ret void
}
define void @test_stnp_v4i32(ptr %p, <4 x i32> %v) #0 {
-; CHECK-LABEL: test_stnp_v4i32:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: mov d1, v0[1]
-; CHECK-NEXT: stnp d0, d1, [x0]
-; CHECK-NEXT: ret
+; CHECK-LE-LABEL: test_stnp_v4i32:
+; CHECK-LE: // %bb.0:
+; CHECK-LE-NEXT: mov d1, v0.d[1]
+; CHECK-LE-NEXT: stnp d0, d1, [x0]
+; CHECK-LE-NEXT: ret
+;
+; CHECK-BE-LABEL: test_stnp_v4i32:
+; CHECK-BE: // %bb.0:
+; CHECK-BE-NEXT: str q0, [x0]
+; CHECK-BE-NEXT: ret
store <4 x i32> %v, ptr %p, align 1, !nontemporal !0
ret void
}
define void @test_stnp_v8i16(ptr %p, <8 x i16> %v) #0 {
-; CHECK-LABEL: test_stnp_v8i16:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: mov d1, v0[1]
-; CHECK-NEXT: stnp d0, d1, [x0]
-; CHECK-NEXT: ret
+; CHECK-LE-LABEL: test_stnp_v8i16:
+; CHECK-LE: // %bb.0:
+; CHECK-LE-NEXT: mov d1, v0.d[1]
+; CHECK-LE-NEXT: stnp d0, d1, [x0]
+; CHECK-LE-NEXT: ret
+;
+; CHECK-BE-LABEL: test_stnp_v8i16:
+; CHECK-BE: // %bb.0:
+; CHECK-BE-NEXT: str q0, [x0]
+; CHECK-BE-NEXT: ret
store <8 x i16> %v, ptr %p, align 1, !nontemporal !0
ret void
}
define void @test_stnp_v16i8(ptr %p, <16 x i8> %v) #0 {
-; CHECK-LABEL: test_stnp_v16i8:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: mov d1, v0[1]
-; CHECK-NEXT: stnp d0, d1, [x0]
-; CHECK-NEXT: ret
+; CHECK-LE-LABEL: test_stnp_v16i8:
+; CHECK-LE: // %bb.0:
+; CHECK-LE-NEXT: mov d1, v0.d[1]
+; CHECK-LE-NEXT: stnp d0, d1, [x0]
+; CHECK-LE-NEXT: ret
+;
+; CHECK-BE-LABEL: test_stnp_v16i8:
+; CHECK-BE: // %bb.0:
+; CHECK-BE-NEXT: str q0, [x0]
+; CHECK-BE-NEXT: ret
store <16 x i8> %v, ptr %p, align 1, !nontemporal !0
ret void
}
define void @test_stnp_v2i32(ptr %p, <2 x i32> %v) #0 {
-; CHECK-LABEL: test_stnp_v2i32:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: mov s1, v0[1]
-; CHECK-NEXT: stnp s0, s1, [x0]
-; CHECK-NEXT: ret
+; CHECK-LE-LABEL: test_stnp_v2i32:
+; CHECK-LE: // %bb.0:
+; CHECK-LE-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-LE-NEXT: mov s1, v0.s[1]
+; CHECK-LE-NEXT: stnp s0, s1, [x0]
+; CHECK-LE-NEXT: ret
+;
+; CHECK-BE-LABEL: test_stnp_v2i32:
+; CHECK-BE: // %bb.0:
+; CHECK-BE-NEXT: str d0, [x0]
+; CHECK-BE-NEXT: ret
store <2 x i32> %v, ptr %p, align 1, !nontemporal !0
ret void
}
define void @test_stnp_v4i16(ptr %p, <4 x i16> %v) #0 {
-; CHECK-LABEL: test_stnp_v4i16:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: mov s1, v0[1]
-; CHECK-NEXT: stnp s0, s1, [x0]
-; CHECK-NEXT: ret
+; CHECK-LE-LABEL: test_stnp_v4i16:
+; CHECK-LE: // %bb.0:
+; CHECK-LE-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-LE-NEXT: mov s1, v0.s[1]
+; CHECK-LE-NEXT: stnp s0, s1, [x0]
+; CHECK-LE-NEXT: ret
+;
+; CHECK-BE-LABEL: test_stnp_v4i16:
+; CHECK-BE: // %bb.0:
+; CHECK-BE-NEXT: str d0, [x0]
+; CHECK-BE-NEXT: ret
store <4 x i16> %v, ptr %p, align 1, !nontemporal !0
ret void
}
define void @test_stnp_v8i8(ptr %p, <8 x i8> %v) #0 {
-; CHECK-LABEL: test_stnp_v8i8:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: mov s1, v0[1]
-; CHECK-NEXT: stnp s0, s1, [x0]
-; CHECK-NEXT: ret
+; CHECK-LE-LABEL: test_stnp_v8i8:
+; CHECK-LE: // %bb.0:
+; CHECK-LE-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-LE-NEXT: mov s1, v0.s[1]
+; CHECK-LE-NEXT: stnp s0, s1, [x0]
+; CHECK-LE-NEXT: ret
+;
+; CHECK-BE-LABEL: test_stnp_v8i8:
+; CHECK-BE: // %bb.0:
+; CHECK-BE-NEXT: str d0, [x0]
+; CHECK-BE-NEXT: ret
store <8 x i8> %v, ptr %p, align 1, !nontemporal !0
ret void
}
define void @test_stnp_v2f64(ptr %p, <2 x double> %v) #0 {
-; CHECK-LABEL: test_stnp_v2f64:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: mov d1, v0[1]
-; CHECK-NEXT: stnp d0, d1, [x0]
-; CHECK-NEXT: ret
+; CHECK-LE-LABEL: test_stnp_v2f64:
+; CHECK-LE: // %bb.0:
+; CHECK-LE-NEXT: mov d1, v0.d[1]
+; CHECK-LE-NEXT: stnp d0, d1, [x0]
+; CHECK-LE-NEXT: ret
+;
+; CHECK-BE-LABEL: test_stnp_v2f64:
+; CHECK-BE: // %bb.0:
+; CHECK-BE-NEXT: str q0, [x0]
+; CHECK-BE-NEXT: ret
store <2 x double> %v, ptr %p, align 1, !nontemporal !0
ret void
}
define void @test_stnp_v4f32(ptr %p, <4 x float> %v) #0 {
-; CHECK-LABEL: test_stnp_v4f32:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: mov d1, v0[1]
-; CHECK-NEXT: stnp d0, d1, [x0]
-; CHECK-NEXT: ret
+; CHECK-LE-LABEL: test_stnp_v4f32:
+; CHECK-LE: // %bb.0:
+; CHECK-LE-NEXT: mov d1, v0.d[1]
+; CHECK-LE-NEXT: stnp d0, d1, [x0]
+; CHECK-LE-NEXT: ret
+;
+; CHECK-BE-LABEL: test_stnp_v4f32:
+; CHECK-BE: // %bb.0:
+; CHECK-BE-NEXT: str q0, [x0]
+; CHECK-BE-NEXT: ret
store <4 x float> %v, ptr %p, align 1, !nontemporal !0
ret void
}
define void @test_stnp_v2f32(ptr %p, <2 x float> %v) #0 {
-; CHECK-LABEL: test_stnp_v2f32:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: mov s1, v0[1]
-; CHECK-NEXT: stnp s0, s1, [x0]
-; CHECK-NEXT: ret
+; CHECK-LE-LABEL: test_stnp_v2f32:
+; CHECK-LE: // %bb.0:
+; CHECK-LE-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-LE-NEXT: mov s1, v0.s[1]
+; CHECK-LE-NEXT: stnp s0, s1, [x0]
+; CHECK-LE-NEXT: ret
+;
+; CHECK-BE-LABEL: test_stnp_v2f32:
+; CHECK-BE: // %bb.0:
+; CHECK-BE-NEXT: str d0, [x0]
+; CHECK-BE-NEXT: ret
store <2 x float> %v, ptr %p, align 1, !nontemporal !0
ret void
}
define void @test_stnp_v1f64(ptr %p, <1 x double> %v) #0 {
-; CHECK-LABEL: test_stnp_v1f64:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: mov s1, v0[1]
-; CHECK-NEXT: stnp s0, s1, [x0]
-; CHECK-NEXT: ret
+; CHECK-LE-LABEL: test_stnp_v1f64:
+; CHECK-LE: // %bb.0:
+; CHECK-LE-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-LE-NEXT: mov s1, v0.s[1]
+; CHECK-LE-NEXT: stnp s0, s1, [x0]
+; CHECK-LE-NEXT: ret
+;
+; CHECK-BE-LABEL: test_stnp_v1f64:
+; CHECK-BE: // %bb.0:
+; CHECK-BE-NEXT: str d0, [x0]
+; CHECK-BE-NEXT: ret
store <1 x double> %v, ptr %p, align 1, !nontemporal !0
ret void
}
define void @test_stnp_v1i64(ptr %p, <1 x i64> %v) #0 {
-; CHECK-LABEL: test_stnp_v1i64:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: mov s1, v0[1]
-; CHECK-NEXT: stnp s0, s1, [x0]
-; CHECK-NEXT: ret
+; CHECK-LE-LABEL: test_stnp_v1i64:
+; CHECK-LE: // %bb.0:
+; CHECK-LE-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-LE-NEXT: mov s1, v0.s[1]
+; CHECK-LE-NEXT: stnp s0, s1, [x0]
+; CHECK-LE-NEXT: ret
+;
+; CHECK-BE-LABEL: test_stnp_v1i64:
+; CHECK-BE: // %bb.0:
+; CHECK-BE-NEXT: str d0, [x0]
+; CHECK-BE-NEXT: ret
store <1 x i64> %v, ptr %p, align 1, !nontemporal !0
ret void
}
define void @test_stnp_i64(ptr %p, i64 %v) #0 {
-; CHECK-LABEL: test_stnp_i64:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: lsr x8, x1, #32
-; CHECK-NEXT: stnp w1, w8, [x0]
-; CHECK-NEXT: ret
+; CHECK-LE-LABEL: test_stnp_i64:
+; CHECK-LE: // %bb.0:
+; CHECK-LE-NEXT: lsr x8, x1, #32
+; CHECK-LE-NEXT: stnp w1, w8, [x0]
+; CHECK-LE-NEXT: ret
+;
+; CHECK-BE-LABEL: test_stnp_i64:
+; CHECK-BE: // %bb.0:
+; CHECK-BE-NEXT: str x1, [x0]
+; CHECK-BE-NEXT: ret
store i64 %v, ptr %p, align 1, !nontemporal !0
ret void
}
define void @test_stnp_v2f64_offset(ptr %p, <2 x double> %v) #0 {
-; CHECK-LABEL: test_stnp_v2f64_offset:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: mov d1, v0[1]
-; CHECK-NEXT: stnp d0, d1, [x0, #16]
-; CHECK-NEXT: ret
+; CHECK-LE-LABEL: test_stnp_v2f64_offset:
+; CHECK-LE: // %bb.0:
+; CHECK-LE-NEXT: mov d1, v0.d[1]
+; CHECK-LE-NEXT: stnp d0, d1, [x0, #16]
+; CHECK-LE-NEXT: ret
+;
+; CHECK-BE-LABEL: test_stnp_v2f64_offset:
+; CHECK-BE: // %bb.0:
+; CHECK-BE-NEXT: str q0, [x0, #16]
+; CHECK-BE-NEXT: ret
%tmp0 = getelementptr <2 x double>, ptr %p, i32 1
store <2 x double> %v, ptr %tmp0, align 1, !nontemporal !0
ret void
}
define void @test_stnp_v2f64_offset_neg(ptr %p, <2 x double> %v) #0 {
-; CHECK-LABEL: test_stnp_v2f64_offset_neg:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: mov d1, v0[1]
-; CHECK-NEXT: stnp d0, d1, [x0, #-16]
-; CHECK-NEXT: ret
+; CHECK-LE-LABEL: test_stnp_v2f64_offset_neg:
+; CHECK-LE: // %bb.0:
+; CHECK-LE-NEXT: mov d1, v0.d[1]
+; CHECK-LE-NEXT: stnp d0, d1, [x0, #-16]
+; CHECK-LE-NEXT: ret
+;
+; CHECK-BE-LABEL: test_stnp_v2f64_offset_neg:
+; CHECK-BE: // %bb.0:
+; CHECK-BE-NEXT: stur q0, [x0, #-16]
+; CHECK-BE-NEXT: ret
%tmp0 = getelementptr <2 x double>, ptr %p, i32 -1
store <2 x double> %v, ptr %tmp0, align 1, !nontemporal !0
ret void
}
define void @test_stnp_v2f32_offset(ptr %p, <2 x float> %v) #0 {
-; CHECK-LABEL: test_stnp_v2f32_offset:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: mov s1, v0[1]
-; CHECK-NEXT: stnp s0, s1, [x0, #8]
-; CHECK-NEXT: ret
+; CHECK-LE-LABEL: test_stnp_v2f32_offset:
+; CHECK-LE: // %bb.0:
+; CHECK-LE-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-LE-NEXT: mov s1, v0.s[1]
+; CHECK-LE-NEXT: stnp s0, s1, [x0, #8]
+; CHECK-LE-NEXT: ret
+;
+; CHECK-BE-LABEL: test_stnp_v2f32_offset:
+; CHECK-BE: // %bb.0:
+; CHECK-BE-NEXT: str d0, [x0, #8]
+; CHECK-BE-NEXT: ret
%tmp0 = getelementptr <2 x float>, ptr %p, i32 1
store <2 x float> %v, ptr %tmp0, align 1, !nontemporal !0
ret void
}
define void @test_stnp_v2f32_offset_neg(ptr %p, <2 x float> %v) #0 {
-; CHECK-LABEL: test_stnp_v2f32_offset_neg:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: mov s1, v0[1]
-; CHECK-NEXT: stnp s0, s1, [x0, #-8]
-; CHECK-NEXT: ret
+; CHECK-LE-LABEL: test_stnp_v2f32_offset_neg:
+; CHECK-LE: // %bb.0:
+; CHECK-LE-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-LE-NEXT: mov s1, v0.s[1]
+; CHECK-LE-NEXT: stnp s0, s1, [x0, #-8]
+; CHECK-LE-NEXT: ret
+;
+; CHECK-BE-LABEL: test_stnp_v2f32_offset_neg:
+; CHECK-BE: // %bb.0:
+; CHECK-BE-NEXT: stur d0, [x0, #-8]
+; CHECK-BE-NEXT: ret
%tmp0 = getelementptr <2 x float>, ptr %p, i32 -1
store <2 x float> %v, ptr %tmp0, align 1, !nontemporal !0
ret void
}
define void @test_stnp_i64_offset(ptr %p, i64 %v) #0 {
-; CHECK-LABEL: test_stnp_i64_offset:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: lsr x8, x1, #32
-; CHECK-NEXT: stnp w1, w8, [x0, #8]
-; CHECK-NEXT: ret
+; CHECK-LE-LABEL: test_stnp_i64_offset:
+; CHECK-LE: // %bb.0:
+; CHECK-LE-NEXT: lsr x8, x1, #32
+; CHECK-LE-NEXT: stnp w1, w8, [x0, #8]
+; CHECK-LE-NEXT: ret
+;
+; CHECK-BE-LABEL: test_stnp_i64_offset:
+; CHECK-BE: // %bb.0:
+; CHECK-BE-NEXT: str x1, [x0, #8]
+; CHECK-BE-NEXT: ret
%tmp0 = getelementptr i64, ptr %p, i32 1
store i64 %v, ptr %tmp0, align 1, !nontemporal !0
ret void
}
define void @test_stnp_i64_offset_neg(ptr %p, i64 %v) #0 {
-; CHECK-LABEL: test_stnp_i64_offset_neg:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: lsr x8, x1, #32
-; CHECK-NEXT: stnp w1, w8, [x0, #-8]
-; CHECK-NEXT: ret
+; CHECK-LE-LABEL: test_stnp_i64_offset_neg:
+; CHECK-LE: // %bb.0:
+; CHECK-LE-NEXT: lsr x8, x1, #32
+; CHECK-LE-NEXT: stnp w1, w8, [x0, #-8]
+; CHECK-LE-NEXT: ret
+;
+; CHECK-BE-LABEL: test_stnp_i64_offset_neg:
+; CHECK-BE: // %bb.0:
+; CHECK-BE-NEXT: stur x1, [x0, #-8]
+; CHECK-BE-NEXT: ret
%tmp0 = getelementptr i64, ptr %p, i32 -1
store i64 %v, ptr %tmp0, align 1, !nontemporal !0
ret void
}
define void @test_stnp_v4f32_invalid_offset_4(ptr %p, <4 x float> %v) #0 {
-; CHECK-LABEL: test_stnp_v4f32_invalid_offset_4:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: mov d1, v0[1]
-; CHECK-NEXT: add x8, x0, #4
-; CHECK-NEXT: stnp d0, d1, [x8]
-; CHECK-NEXT: ret
+; CHECK-LE-LABEL: test_stnp_v4f32_invalid_offset_4:
+; CHECK-LE: // %bb.0:
+; CHECK-LE-NEXT: mov d1, v0.d[1]
+; CHECK-LE-NEXT: add x8, x0, #4
+; CHECK-LE-NEXT: stnp d0, d1, [x8]
+; CHECK-LE-NEXT: ret
+;
+; CHECK-BE-LABEL: test_stnp_v4f32_invalid_offset_4:
+; CHECK-BE: // %bb.0:
+; CHECK-BE-NEXT: stur q0, [x0, #4]
+; CHECK-BE-NEXT: ret
%tmp0 = getelementptr i8, ptr %p, i32 4
store <4 x float> %v, ptr %tmp0, align 1, !nontemporal !0
ret void
}
define void @test_stnp_v4f32_invalid_offset_neg_4(ptr %p, <4 x float> %v) #0 {
-; CHECK-LABEL: test_stnp_v4f32_invalid_offset_neg_4:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: mov d1, v0[1]
-; CHECK-NEXT: sub x8, x0, #4
-; CHECK-NEXT: stnp d0, d1, [x8]
-; CHECK-NEXT: ret
+; CHECK-LE-LABEL: test_stnp_v4f32_invalid_offset_neg_4:
+; CHECK-LE: // %bb.0:
+; CHECK-LE-NEXT: mov d1, v0.d[1]
+; CHECK-LE-NEXT: sub x8, x0, #4
+; CHECK-LE-NEXT: stnp d0, d1, [x8]
+; CHECK-LE-NEXT: ret
+;
+; CHECK-BE-LABEL: test_stnp_v4f32_invalid_offset_neg_4:
+; CHECK-BE: // %bb.0:
+; CHECK-BE-NEXT: stur q0, [x0, #-4]
+; CHECK-BE-NEXT: ret
%tmp0 = getelementptr i8, ptr %p, i32 -4
store <4 x float> %v, ptr %tmp0, align 1, !nontemporal !0
ret void
}
define void @test_stnp_v4f32_invalid_offset_512(ptr %p, <4 x float> %v) #0 {
-; CHECK-LABEL: test_stnp_v4f32_invalid_offset_512:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: mov d1, v0[1]
-; CHECK-NEXT: add x8, x0, #512
-; CHECK-NEXT: stnp d0, d1, [x8]
-; CHECK-NEXT: ret
+; CHECK-LE-LABEL: test_stnp_v4f32_invalid_offset_512:
+; CHECK-LE: // %bb.0:
+; CHECK-LE-NEXT: mov d1, v0.d[1]
+; CHECK-LE-NEXT: add x8, x0, #512
+; CHECK-LE-NEXT: stnp d0, d1, [x8]
+; CHECK-LE-NEXT: ret
+;
+; CHECK-BE-LABEL: test_stnp_v4f32_invalid_offset_512:
+; CHECK-BE: // %bb.0:
+; CHECK-BE-NEXT: str q0, [x0, #512]
+; CHECK-BE-NEXT: ret
%tmp0 = getelementptr i8, ptr %p, i32 512
store <4 x float> %v, ptr %tmp0, align 1, !nontemporal !0
ret void
}
define void @test_stnp_v4f32_offset_504(ptr %p, <4 x float> %v) #0 {
-; CHECK-LABEL: test_stnp_v4f32_offset_504:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: mov d1, v0[1]
-; CHECK-NEXT: stnp d0, d1, [x0, #504]
-; CHECK-NEXT: ret
+; CHECK-LE-LABEL: test_stnp_v4f32_offset_504:
+; CHECK-LE: // %bb.0:
+; CHECK-LE-NEXT: mov d1, v0.d[1]
+; CHECK-LE-NEXT: stnp d0, d1, [x0, #504]
+; CHECK-LE-NEXT: ret
+;
+; CHECK-BE-LABEL: test_stnp_v4f32_offset_504:
+; CHECK-BE: // %bb.0:
+; CHECK-BE-NEXT: add x8, x0, #504
+; CHECK-BE-NEXT: str q0, [x8]
+; CHECK-BE-NEXT: ret
%tmp0 = getelementptr i8, ptr %p, i32 504
store <4 x float> %v, ptr %tmp0, align 1, !nontemporal !0
ret void
}
define void @test_stnp_v4f32_invalid_offset_508(ptr %p, <4 x float> %v) #0 {
-; CHECK-LABEL: test_stnp_v4f32_invalid_offset_508:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: mov d1, v0[1]
-; CHECK-NEXT: add x8, x0, #508
-; CHECK-NEXT: stnp d0, d1, [x8]
-; CHECK-NEXT: ret
+; CHECK-LE-LABEL: test_stnp_v4f32_invalid_offset_508:
+; CHECK-LE: // %bb.0:
+; CHECK-LE-NEXT: mov d1, v0.d[1]
+; CHECK-LE-NEXT: add x8, x0, #508
+; CHECK-LE-NEXT: stnp d0, d1, [x8]
+; CHECK-LE-NEXT: ret
+;
+; CHECK-BE-LABEL: test_stnp_v4f32_invalid_offset_508:
+; CHECK-BE: // %bb.0:
+; CHECK-BE-NEXT: add x8, x0, #508
+; CHECK-BE-NEXT: str q0, [x8]
+; CHECK-BE-NEXT: ret
%tmp0 = getelementptr i8, ptr %p, i32 508
store <4 x float> %v, ptr %tmp0, align 1, !nontemporal !0
ret void
}
define void @test_stnp_v4f32_invalid_offset_neg_520(ptr %p, <4 x float> %v) #0 {
-; CHECK-LABEL: test_stnp_v4f32_invalid_offset_neg_520:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: mov d1, v0[1]
-; CHECK-NEXT: sub x8, x0, #520
-; CHECK-NEXT: stnp d0, d1, [x8]
-; CHECK-NEXT: ret
+; CHECK-LE-LABEL: test_stnp_v4f32_invalid_offset_neg_520:
+; CHECK-LE: // %bb.0:
+; CHECK-LE-NEXT: mov d1, v0.d[1]
+; CHECK-LE-NEXT: sub x8, x0, #520
+; CHECK-LE-NEXT: stnp d0, d1, [x8]
+; CHECK-LE-NEXT: ret
+;
+; CHECK-BE-LABEL: test_stnp_v4f32_invalid_offset_neg_520:
+; CHECK-BE: // %bb.0:
+; CHECK-BE-NEXT: sub x8, x0, #520
+; CHECK-BE-NEXT: str q0, [x8]
+; CHECK-BE-NEXT: ret
%tmp0 = getelementptr i8, ptr %p, i32 -520
store <4 x float> %v, ptr %tmp0, align 1, !nontemporal !0
ret void
}
define void @test_stnp_v4f32_offset_neg_512(ptr %p, <4 x float> %v) #0 {
-; CHECK-LABEL: test_stnp_v4f32_offset_neg_512:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: mov d1, v0[1]
-; CHECK-NEXT: stnp d0, d1, [x0, #-512]
-; CHECK-NEXT: ret
+; CHECK-LE-LABEL: test_stnp_v4f32_offset_neg_512:
+; CHECK-LE: // %bb.0:
+; CHECK-LE-NEXT: mov d1, v0.d[1]
+; CHECK-LE-NEXT: stnp d0, d1, [x0, #-512]
+; CHECK-LE-NEXT: ret
+;
+; CHECK-BE-LABEL: test_stnp_v4f32_offset_neg_512:
+; CHECK-BE: // %bb.0:
+; CHECK-BE-NEXT: sub x8, x0, #512
+; CHECK-BE-NEXT: str q0, [x8]
+; CHECK-BE-NEXT: ret
%tmp0 = getelementptr i8, ptr %p, i32 -512
store <4 x float> %v, ptr %tmp0, align 1, !nontemporal !0
ret void
@@ -289,50 +426,71 @@ define void @test_stnp_v4f32_offset_neg_512(ptr %p, <4 x float> %v) #0 {
define void @test_stnp_v2f32_invalid_offset_256(ptr %p, <2 x float> %v) #0 {
-; CHECK-LABEL: test_stnp_v2f32_invalid_offset_256:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: mov s1, v0[1]
-; CHECK-NEXT: add x8, x0, #256
-; CHECK-NEXT: stnp s0, s1, [x8]
-; CHECK-NEXT: ret
+; CHECK-LE-LABEL: test_stnp_v2f32_invalid_offset_256:
+; CHECK-LE: // %bb.0:
+; CHECK-LE-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-LE-NEXT: mov s1, v0.s[1]
+; CHECK-LE-NEXT: add x8, x0, #256
+; CHECK-LE-NEXT: stnp s0, s1, [x8]
+; CHECK-LE-NEXT: ret
+;
+; CHECK-BE-LABEL: test_stnp_v2f32_invalid_offset_256:
+; CHECK-BE: // %bb.0:
+; CHECK-BE-NEXT: str d0, [x0, #256]
+; CHECK-BE-NEXT: ret
%tmp0 = getelementptr i8, ptr %p, i32 256
store <2 x float> %v, ptr %tmp0, align 1, !nontemporal !0
ret void
}
define void @test_stnp_v2f32_offset_252(ptr %p, <2 x float> %v) #0 {
-; CHECK-LABEL: test_stnp_v2f32_offset_252:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: mov s1, v0[1]
-; CHECK-NEXT: stnp s0, s1, [x0, #252]
-; CHECK-NEXT: ret
+; CHECK-LE-LABEL: test_stnp_v2f32_offset_252:
+; CHECK-LE: // %bb.0:
+; CHECK-LE-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-LE-NEXT: mov s1, v0.s[1]
+; CHECK-LE-NEXT: stnp s0, s1, [x0, #252]
+; CHECK-LE-NEXT: ret
+;
+; CHECK-BE-LABEL: test_stnp_v2f32_offset_252:
+; CHECK-BE: // %bb.0:
+; CHECK-BE-NEXT: stur d0, [x0, #252]
+; CHECK-BE-NEXT: ret
%tmp0 = getelementptr i8, ptr %p, i32 252
store <2 x float> %v, ptr %tmp0, align 1, !nontemporal !0
ret void
}
define void @test_stnp_v2f32_invalid_offset_neg_260(ptr %p, <2 x float> %v) #0 {
-; CHECK-LABEL: test_stnp_v2f32_invalid_offset_neg_260:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: mov s1, v0[1]
-; CHECK-NEXT: sub x8, x0, #260
-; CHECK-NEXT: stnp s0, s1, [x8]
-; CHECK-NEXT: ret
+; CHECK-LE-LABEL: test_stnp_v2f32_invalid_offset_neg_260:
+; CHECK-LE: // %bb.0:
+; CHECK-LE-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-LE-NEXT: mov s1, v0.s[1]
+; CHECK-LE-NEXT: sub x8, x0, #260
+; CHECK-LE-NEXT: stnp s0, s1, [x8]
+; CHECK-LE-NEXT: ret
+;
+; CHECK-BE-LABEL: test_stnp_v2f32_invalid_offset_neg_260:
+; CHECK-BE: // %bb.0:
+; CHECK-BE-NEXT: sub x8, x0, #260
+; CHECK-BE-NEXT: str d0, [x8]
+; CHECK-BE-NEXT: ret
%tmp0 = getelementptr i8, ptr %p, i32 -260
store <2 x float> %v, ptr %tmp0, align 1, !nontemporal !0
ret void
}
define void @test_stnp_v2f32_offset_neg_256(ptr %p, <2 x float> %v) #0 {
-; CHECK-LABEL: test_stnp_v2f32_offset_neg_256:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: mov s1, v0[1]
-; CHECK-NEXT: stnp s0, s1, [x0, #-256]
-; CHECK-NEXT: ret
+; CHECK-LE-LABEL: test_stnp_v2f32_offset_neg_256:
+; CHECK-LE: // %bb.0:
+; CHECK-LE-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-LE-NEXT: mov s1, v0.s[1]
+; CHECK-LE-NEXT: stnp s0, s1, [x0, #-256]
+; CHECK-LE-NEXT: ret
+;
+; CHECK-BE-LABEL: test_stnp_v2f32_offset_neg_256:
+; CHECK-BE: // %bb.0:
+; CHECK-BE-NEXT: stur d0, [x0, #-256]
+; CHECK-BE-NEXT: ret
%tmp0 = getelementptr i8, ptr %p, i32 -256
store <2 x float> %v, ptr %tmp0, align 1, !nontemporal !0
ret void
@@ -341,17 +499,28 @@ define void @test_stnp_v2f32_offset_neg_256(ptr %p, <2 x float> %v) #0 {
declare void @dummy(ptr)
define void @test_stnp_v4f32_offset_alloca(<4 x float> %v) #0 {
-; CHECK-LABEL: test_stnp_v4f32_offset_alloca:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: sub sp, sp, #32
-; CHECK-NEXT: mov d1, v0[1]
-; CHECK-NEXT: mov x0, sp
-; CHECK-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill
-; CHECK-NEXT: stnp d0, d1, [sp]
-; CHECK-NEXT: bl _dummy
-; CHECK-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
-; CHECK-NEXT: add sp, sp, #32
-; CHECK-NEXT: ret
+; CHECK-LE-LABEL: test_stnp_v4f32_offset_alloca:
+; CHECK-LE: // %bb.0:
+; CHECK-LE-NEXT: sub sp, sp, #32
+; CHECK-LE-NEXT: mov d1, v0.d[1]
+; CHECK-LE-NEXT: mov x0, sp
+; CHECK-LE-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-LE-NEXT: stnp d0, d1, [sp]
+; CHECK-LE-NEXT: bl dummy
+; CHECK-LE-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-LE-NEXT: add sp, sp, #32
+; CHECK-LE-NEXT: ret
+;
+; CHECK-BE-LABEL: test_stnp_v4f32_offset_alloca:
+; CHECK-BE: // %bb.0:
+; CHECK-BE-NEXT: sub sp, sp, #32
+; CHECK-BE-NEXT: mov x0, sp
+; CHECK-BE-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-BE-NEXT: str q0, [sp]
+; CHECK-BE-NEXT: bl dummy
+; CHECK-BE-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-BE-NEXT: add sp, sp, #32
+; CHECK-BE-NEXT: ret
%tmp0 = alloca <4 x float>
store <4 x float> %v, ptr %tmp0, align 1, !nontemporal !0
call void @dummy(ptr %tmp0)
@@ -359,17 +528,28 @@ define void @test_stnp_v4f32_offset_alloca(<4 x float> %v) #0 {
}
define void @test_stnp_v4f32_offset_alloca_2(<4 x float> %v) #0 {
-; CHECK-LABEL: test_stnp_v4f32_offset_alloca_2:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: sub sp, sp, #48
-; CHECK-NEXT: mov d1, v0[1]
-; CHECK-NEXT: mov x0, sp
-; CHECK-NEXT: stp x29, x30, [sp, #32] ; 16-byte Folded Spill
-; CHECK-NEXT: stnp d0, d1, [sp, #16]
-; CHECK-NEXT: bl _dummy
-; CHECK-NEXT: ldp x29, x30, [sp, #32] ; 16-byte Folded Reload
-; CHECK-NEXT: add sp, sp, #48
-; CHECK-NEXT: ret
+; CHECK-LE-LABEL: test_stnp_v4f32_offset_alloca_2:
+; CHECK-LE: // %bb.0:
+; CHECK-LE-NEXT: sub sp, sp, #48
+; CHECK-LE-NEXT: mov d1, v0.d[1]
+; CHECK-LE-NEXT: mov x0, sp
+; CHECK-LE-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-LE-NEXT: stnp d0, d1, [sp, #16]
+; CHECK-LE-NEXT: bl dummy
+; CHECK-LE-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-LE-NEXT: add sp, sp, #48
+; CHECK-LE-NEXT: ret
+;
+; CHECK-BE-LABEL: test_stnp_v4f32_offset_alloca_2:
+; CHECK-BE: // %bb.0:
+; CHECK-BE-NEXT: sub sp, sp, #48
+; CHECK-BE-NEXT: mov x0, sp
+; CHECK-BE-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-BE-NEXT: str q0, [sp, #16]
+; CHECK-BE-NEXT: bl dummy
+; CHECK-BE-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-BE-NEXT: add sp, sp, #48
+; CHECK-BE-NEXT: ret
%tmp0 = alloca <4 x float>, i32 2
%tmp1 = getelementptr <4 x float>, ptr %tmp0, i32 1
store <4 x float> %v, ptr %tmp1, align 1, !nontemporal !0
@@ -378,122 +558,239 @@ define void @test_stnp_v4f32_offset_alloca_2(<4 x float> %v) #0 {
}
define void @test_stnp_v32i8(<32 x i8> %v, ptr %ptr) {
-; CHECK-LABEL: test_stnp_v32i8:
-; CHECK: ; %bb.0: ; %entry
-; CHECK-NEXT: stnp q0, q1, [x0]
-; CHECK-NEXT: ret
-
+; CHECK-LE-LABEL: test_stnp_v32i8:
+; CHECK-LE: // %bb.0: // %entry
+; CHECK-LE-NEXT: stnp q0, q1, [x0]
+; CHECK-LE-NEXT: ret
+;
+; CHECK-BE-LABEL: test_stnp_v32i8:
+; CHECK-BE: // %bb.0: // %entry
+; CHECK-BE-NEXT: rev64 v1.16b, v1.16b
+; CHECK-BE-NEXT: rev64 v0.16b, v0.16b
+; CHECK-BE-NEXT: ext v1.16b, v1.16b, v1.16b, #8
+; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-BE-NEXT: stnp q0, q1, [x0]
+; CHECK-BE-NEXT: ret
entry:
store <32 x i8> %v, ptr %ptr, align 4, !nontemporal !0
ret void
}
define void @test_stnp_v32i16(<32 x i16> %v, ptr %ptr) {
-; CHECK-LABEL: test_stnp_v32i16:
-; CHECK: ; %bb.0: ; %entry
-; CHECK-NEXT: stnp q2, q3, [x0, #32]
-; CHECK-NEXT: stnp q0, q1, [x0]
-; CHECK-NEXT: ret
-
+; CHECK-LE-LABEL: test_stnp_v32i16:
+; CHECK-LE: // %bb.0: // %entry
+; CHECK-LE-NEXT: stnp q2, q3, [x0, #32]
+; CHECK-LE-NEXT: stnp q0, q1, [x0]
+; CHECK-LE-NEXT: ret
+;
+; CHECK-BE-LABEL: test_stnp_v32i16:
+; CHECK-BE: // %bb.0: // %entry
+; CHECK-BE-NEXT: rev64 v3.8h, v3.8h
+; CHECK-BE-NEXT: rev64 v2.8h, v2.8h
+; CHECK-BE-NEXT: rev64 v1.8h, v1.8h
+; CHECK-BE-NEXT: rev64 v0.8h, v0.8h
+; CHECK-BE-NEXT: ext v3.16b, v3.16b, v3.16b, #8
+; CHECK-BE-NEXT: ext v2.16b, v2.16b, v2.16b, #8
+; CHECK-BE-NEXT: ext v1.16b, v1.16b, v1.16b, #8
+; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-BE-NEXT: stnp q2, q3, [x0, #32]
+; CHECK-BE-NEXT: stnp q0, q1, [x0]
+; CHECK-BE-NEXT: ret
entry:
store <32 x i16> %v, ptr %ptr, align 4, !nontemporal !0
ret void
}
define void @test_stnp_v32f16(<32 x half> %v, ptr %ptr) {
-; CHECK-LABEL: test_stnp_v32f16:
-; CHECK: ; %bb.0: ; %entry
-; CHECK-NEXT: stnp q2, q3, [x0, #32]
-; CHECK-NEXT: stnp q0, q1, [x0]
-; CHECK-NEXT: ret
-
+; CHECK-LE-LABEL: test_stnp_v32f16:
+; CHECK-LE: // %bb.0: // %entry
+; CHECK-LE-NEXT: stnp q2, q3, [x0, #32]
+; CHECK-LE-NEXT: stnp q0, q1, [x0]
+; CHECK-LE-NEXT: ret
+;
+; CHECK-BE-LABEL: test_stnp_v32f16:
+; CHECK-BE: // %bb.0: // %entry
+; CHECK-BE-NEXT: rev64 v3.8h, v3.8h
+; CHECK-BE-NEXT: rev64 v2.8h, v2.8h
+; CHECK-BE-NEXT: rev64 v1.8h, v1.8h
+; CHECK-BE-NEXT: rev64 v0.8h, v0.8h
+; CHECK-BE-NEXT: ext v3.16b, v3.16b, v3.16b, #8
+; CHECK-BE-NEXT: ext v2.16b, v2.16b, v2.16b, #8
+; CHECK-BE-NEXT: ext v1.16b, v1.16b, v1.16b, #8
+; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-BE-NEXT: stnp q2, q3, [x0, #32]
+; CHECK-BE-NEXT: stnp q0, q1, [x0]
+; CHECK-BE-NEXT: ret
entry:
store <32 x half> %v, ptr %ptr, align 4, !nontemporal !0
ret void
}
define void @test_stnp_v16i32(<16 x i32> %v, ptr %ptr) {
-; CHECK-LABEL: test_stnp_v16i32:
-; CHECK: ; %bb.0: ; %entry
-; CHECK-NEXT: stnp q2, q3, [x0, #32]
-; CHECK-NEXT: stnp q0, q1, [x0]
-; CHECK-NEXT: ret
-
+; CHECK-LE-LABEL: test_stnp_v16i32:
+; CHECK-LE: // %bb.0: // %entry
+; CHECK-LE-NEXT: stnp q2, q3, [x0, #32]
+; CHECK-LE-NEXT: stnp q0, q1, [x0]
+; CHECK-LE-NEXT: ret
+;
+; CHECK-BE-LABEL: test_stnp_v16i32:
+; CHECK-BE: // %bb.0: // %entry
+; CHECK-BE-NEXT: rev64 v3.4s, v3.4s
+; CHECK-BE-NEXT: rev64 v2.4s, v2.4s
+; CHECK-BE-NEXT: rev64 v1.4s, v1.4s
+; CHECK-BE-NEXT: rev64 v0.4s, v0.4s
+; CHECK-BE-NEXT: ext v3.16b, v3.16b, v3.16b, #8
+; CHECK-BE-NEXT: ext v2.16b, v2.16b, v2.16b, #8
+; CHECK-BE-NEXT: ext v1.16b, v1.16b, v1.16b, #8
+; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-BE-NEXT: stnp q2, q3, [x0, #32]
+; CHECK-BE-NEXT: stnp q0, q1, [x0]
+; CHECK-BE-NEXT: ret
entry:
store <16 x i32> %v, ptr %ptr, align 4, !nontemporal !0
ret void
}
define void @test_stnp_v16f32(<16 x float> %v, ptr %ptr) {
-; CHECK-LABEL: test_stnp_v16f32:
-; CHECK: ; %bb.0: ; %entry
-; CHECK-NEXT: stnp q2, q3, [x0, #32]
-; CHECK-NEXT: stnp q0, q1, [x0]
-; CHECK-NEXT: ret
-
+; CHECK-LE-LABEL: test_stnp_v16f32:
+; CHECK-LE: // %bb.0: // %entry
+; CHECK-LE-NEXT: stnp q2, q3, [x0, #32]
+; CHECK-LE-NEXT: stnp q0, q1, [x0]
+; CHECK-LE-NEXT: ret
+;
+; CHECK-BE-LABEL: test_stnp_v16f32:
+; CHECK-BE: // %bb.0: // %entry
+; CHECK-BE-NEXT: rev64 v3.4s, v3.4s
+; CHECK-BE-NEXT: rev64 v2.4s, v2.4s
+; CHECK-BE-NEXT: rev64 v1.4s, v1.4s
+; CHECK-BE-NEXT: rev64 v0.4s, v0.4s
+; CHECK-BE-NEXT: ext v3.16b, v3.16b, v3.16b, #8
+; CHECK-BE-NEXT: ext v2.16b, v2.16b, v2.16b, #8
+; CHECK-BE-NEXT: ext v1.16b, v1.16b, v1.16b, #8
+; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-BE-NEXT: stnp q2, q3, [x0, #32]
+; CHECK-BE-NEXT: stnp q0, q1, [x0]
+; CHECK-BE-NEXT: ret
entry:
store <16 x float> %v, ptr %ptr, align 4, !nontemporal !0
ret void
}
define void @test_stnp_v17f32(<17 x float> %v, ptr %ptr) {
-; CHECK-LABEL: test_stnp_v17f32:
-; CHECK: ; %bb.0: ; %entry
-; CHECK-NEXT: ; kill: def $s4 killed $s4 def $q4
-; CHECK-NEXT: ; kill: def $s0 killed $s0 def $q0
-; CHECK-NEXT: ldr s16, [sp, #16]
-; CHECK-NEXT: ; kill: def $s5 killed $s5 def $q5
-; CHECK-NEXT: ; kill: def $s1 killed $s1 def $q1
-; CHECK-NEXT: add x8, sp, #20
-; CHECK-NEXT: ; kill: def $s6 killed $s6 def $q6
-; CHECK-NEXT: ; kill: def $s2 killed $s2 def $q2
-; CHECK-NEXT: ; kill: def $s7 killed $s7 def $q7
-; CHECK-NEXT: ; kill: def $s3 killed $s3 def $q3
-; CHECK-NEXT: mov.s v4[1], v5[0]
-; CHECK-NEXT: mov.s v0[1], v1[0]
-; CHECK-NEXT: ldr s5, [sp]
-; CHECK-NEXT: ld1.s { v16 }[1], [x8]
-; CHECK-NEXT: add x8, sp, #4
-; CHECK-NEXT: ld1.s { v5 }[1], [x8]
-; CHECK-NEXT: add x8, sp, #24
-; CHECK-NEXT: mov.s v4[2], v6[0]
-; CHECK-NEXT: ld1.s { v16 }[2], [x8]
-; CHECK-NEXT: mov.s v0[2], v2[0]
-; CHECK-NEXT: add x8, sp, #8
-; CHECK-NEXT: ld1.s { v5 }[2], [x8]
-; CHECK-NEXT: add x8, sp, #28
-; CHECK-NEXT: ld1.s { v16 }[3], [x8]
-; CHECK-NEXT: add x8, sp, #12
-; CHECK-NEXT: mov.s v4[3], v7[0]
-; CHECK-NEXT: mov.s v0[3], v3[0]
-; CHECK-NEXT: ld1.s { v5 }[3], [x8]
-; CHECK-NEXT: mov d1, v16[1]
-; CHECK-NEXT: mov d2, v5[1]
-; CHECK-NEXT: mov d3, v4[1]
-; CHECK-NEXT: mov d6, v0[1]
-; CHECK-NEXT: stnp d16, d1, [x0, #48]
-; CHECK-NEXT: ldr s1, [sp, #32]
-; CHECK-NEXT: stnp d5, d2, [x0, #32]
-; CHECK-NEXT: stnp d4, d3, [x0, #16]
-; CHECK-NEXT: stnp d0, d6, [x0]
-; CHECK-NEXT: str s1, [x0, #64]
-; CHECK-NEXT: ret
-
+; CHECK-LE-LABEL: test_stnp_v17f32:
+; CHECK-LE: // %bb.0: // %entry
+; CHECK-LE-NEXT: // kill: def $s4 killed $s4 def $q4
+; CHECK-LE-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-LE-NEXT: ldr s16, [sp, #32]
+; CHECK-LE-NEXT: // kill: def $s5 killed $s5 def $q5
+; CHECK-LE-NEXT: // kill: def $s1 killed $s1 def $q1
+; CHECK-LE-NEXT: add x8, sp, #40
+; CHECK-LE-NEXT: // kill: def $s6 killed $s6 def $q6
+; CHECK-LE-NEXT: // kill: def $s2 killed $s2 def $q2
+; CHECK-LE-NEXT: // kill: def $s7 killed $s7 def $q7
+; CHECK-LE-NEXT: // kill: def $s3 killed $s3 def $q3
+; CHECK-LE-NEXT: mov v4.s[1], v5.s[0]
+; CHECK-LE-NEXT: mov v0.s[1], v1.s[0]
+; CHECK-LE-NEXT: ldr s5, [sp]
+; CHECK-LE-NEXT: ld1 { v16.s }[1], [x8]
+; CHECK-LE-NEXT: add x8, sp, #8
+; CHECK-LE-NEXT: ld1 { v5.s }[1], [x8]
+; CHECK-LE-NEXT: add x8, sp, #48
+; CHECK-LE-NEXT: mov v4.s[2], v6.s[0]
+; CHECK-LE-NEXT: ld1 { v16.s }[2], [x8]
+; CHECK-LE-NEXT: mov v0.s[2], v2.s[0]
+; CHECK-LE-NEXT: add x8, sp, #16
+; CHECK-LE-NEXT: ld1 { v5.s }[2], [x8]
+; CHECK-LE-NEXT: add x8, sp, #56
+; CHECK-LE-NEXT: ld1 { v16.s }[3], [x8]
+; CHECK-LE-NEXT: add x8, sp, #24
+; CHECK-LE-NEXT: mov v4.s[3], v7.s[0]
+; CHECK-LE-NEXT: mov v0.s[3], v3.s[0]
+; CHECK-LE-NEXT: ld1 { v5.s }[3], [x8]
+; CHECK-LE-NEXT: mov d1, v16.d[1]
+; CHECK-LE-NEXT: mov d2, v5.d[1]
+; CHECK-LE-NEXT: mov d3, v4.d[1]
+; CHECK-LE-NEXT: mov d6, v0.d[1]
+; CHECK-LE-NEXT: stnp d16, d1, [x0, #48]
+; CHECK-LE-NEXT: ldr s1, [sp, #64]
+; CHECK-LE-NEXT: stnp d5, d2, [x0, #32]
+; CHECK-LE-NEXT: stnp d4, d3, [x0, #16]
+; CHECK-LE-NEXT: stnp d0, d6, [x0]
+; CHECK-LE-NEXT: str s1, [x0, #64]
+; CHECK-LE-NEXT: ret
+;
+; CHECK-BE-LABEL: test_stnp_v17f32:
+; CHECK-BE: // %bb.0: // %entry
+; CHECK-BE-NEXT: // kill: def $s4 killed $s4 def $q4
+; CHECK-BE-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-BE-NEXT: ldr s16, [sp, #36]
+; CHECK-BE-NEXT: // kill: def $s5 killed $s5 def $q5
+; CHECK-BE-NEXT: // kill: def $s1 killed $s1 def $q1
+; CHECK-BE-NEXT: ldr s17, [sp, #4]
+; CHECK-BE-NEXT: add x8, sp, #44
+; CHECK-BE-NEXT: mov v4.s[1], v5.s[0]
+; CHECK-BE-NEXT: mov v0.s[1], v1.s[0]
+; CHECK-BE-NEXT: // kill: def $s6 killed $s6 def $q6
+; CHECK-BE-NEXT: // kill: def $s2 killed $s2 def $q2
+; CHECK-BE-NEXT: // kill: def $s7 killed $s7 def $q7
+; CHECK-BE-NEXT: // kill: def $s3 killed $s3 def $q3
+; CHECK-BE-NEXT: ldr s1, [sp, #68]
+; CHECK-BE-NEXT: ld1 { v16.s }[1], [x8]
+; CHECK-BE-NEXT: add x8, sp, #12
+; CHECK-BE-NEXT: ld1 { v17.s }[1], [x8]
+; CHECK-BE-NEXT: add x8, sp, #52
+; CHECK-BE-NEXT: str s1, [x0, #64]
+; CHECK-BE-NEXT: ld1 { v16.s }[2], [x8]
+; CHECK-BE-NEXT: add x8, sp, #20
+; CHECK-BE-NEXT: mov v4.s[2], v6.s[0]
+; CHECK-BE-NEXT: mov v0.s[2], v2.s[0]
+; CHECK-BE-NEXT: ld1 { v17.s }[2], [x8]
+; CHECK-BE-NEXT: add x8, sp, #60
+; CHECK-BE-NEXT: ld1 { v16.s }[3], [x8]
+; CHECK-BE-NEXT: add x8, sp, #28
+; CHECK-BE-NEXT: ld1 { v17.s }[3], [x8]
+; CHECK-BE-NEXT: mov v4.s[3], v7.s[0]
+; CHECK-BE-NEXT: add x8, x0, #48
+; CHECK-BE-NEXT: mov v0.s[3], v3.s[0]
+; CHECK-BE-NEXT: st1 { v16.4s }, [x8]
+; CHECK-BE-NEXT: add x8, x0, #32
+; CHECK-BE-NEXT: st1 { v17.4s }, [x8]
+; CHECK-BE-NEXT: add x8, x0, #16
+; CHECK-BE-NEXT: st1 { v4.4s }, [x8]
+; CHECK-BE-NEXT: st1 { v0.4s }, [x0]
+; CHECK-BE-NEXT: ret
entry:
store <17 x float> %v, ptr %ptr, align 4, !nontemporal !0
ret void
}
define void @test_stnp_v16i32_invalid_offset(<16 x i32> %v, ptr %ptr) {
-; CHECK-LABEL: test_stnp_v16i32_invalid_offset:
-; CHECK: ; %bb.0: ; %entry
-; CHECK-NEXT: mov w8, #32032 ; =0x7d20
-; CHECK-NEXT: mov w9, #32000 ; =0x7d00
-; CHECK-NEXT: add x8, x0, x8
-; CHECK-NEXT: add x9, x0, x9
-; CHECK-NEXT: stnp q2, q3, [x8]
-; CHECK-NEXT: stnp q0, q1, [x9]
-; CHECK-NEXT: ret
-
+; CHECK-LE-LABEL: test_stnp_v16i32_invalid_offset:
+; CHECK-LE: // %bb.0: // %entry
+; CHECK-LE-NEXT: mov w8, #32032 // =0x7d20
+; CHECK-LE-NEXT: mov w9, #32000 // =0x7d00
+; CHECK-LE-NEXT: add x8, x0, x8
+; CHECK-LE-NEXT: add x9, x0, x9
+; CHECK-LE-NEXT: stnp q2, q3, [x8]
+; CHECK-LE-NEXT: stnp q0, q1, [x9]
+; CHECK-LE-NEXT: ret
+;
+; CHECK-BE-LABEL: test_stnp_v16i32_invalid_offset:
+; CHECK-BE: // %bb.0: // %entry
+; CHECK-BE-NEXT: rev64 v3.4s, v3.4s
+; CHECK-BE-NEXT: rev64 v2.4s, v2.4s
+; CHECK-BE-NEXT: mov w8, #32032 // =0x7d20
+; CHECK-BE-NEXT: rev64 v1.4s, v1.4s
+; CHECK-BE-NEXT: rev64 v0.4s, v0.4s
+; CHECK-BE-NEXT: mov w9, #32000 // =0x7d00
+; CHECK-BE-NEXT: add x8, x0, x8
+; CHECK-BE-NEXT: add x9, x0, x9
+; CHECK-BE-NEXT: ext v3.16b, v3.16b, v3.16b, #8
+; CHECK-BE-NEXT: ext v2.16b, v2.16b, v2.16b, #8
+; CHECK-BE-NEXT: ext v1.16b, v1.16b, v1.16b, #8
+; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-BE-NEXT: stnp q2, q3, [x8]
+; CHECK-BE-NEXT: stnp q0, q1, [x9]
+; CHECK-BE-NEXT: ret
entry:
%gep = getelementptr <16 x i32>, ptr %ptr, i32 500
store <16 x i32> %v, ptr %gep, align 4, !nontemporal !0
@@ -501,28 +798,58 @@ entry:
}
define void @test_stnp_v16f64(<16 x double> %v, ptr %ptr) {
-; CHECK-LABEL: test_stnp_v16f64:
-; CHECK: ; %bb.0: ; %entry
-; CHECK-NEXT: stnp q6, q7, [x0, #96]
-; CHECK-NEXT: stnp q4, q5, [x0, #64]
-; CHECK-NEXT: stnp q2, q3, [x0, #32]
-; CHECK-NEXT: stnp q0, q1, [x0]
-; CHECK-NEXT: ret
-
+; CHECK-LE-LABEL: test_stnp_v16f64:
+; CHECK-LE: // %bb.0: // %entry
+; CHECK-LE-NEXT: stnp q6, q7, [x0, #96]
+; CHECK-LE-NEXT: stnp q4, q5, [x0, #64]
+; CHECK-LE-NEXT: stnp q2, q3, [x0, #32]
+; CHECK-LE-NEXT: stnp q0, q1, [x0]
+; CHECK-LE-NEXT: ret
+;
+; CHECK-BE-LABEL: test_stnp_v16f64:
+; CHECK-BE: // %bb.0: // %entry
+; CHECK-BE-NEXT: ext v7.16b, v7.16b, v7.16b, #8
+; CHECK-BE-NEXT: ext v6.16b, v6.16b, v6.16b, #8
+; CHECK-BE-NEXT: ext v5.16b, v5.16b, v5.16b, #8
+; CHECK-BE-NEXT: ext v4.16b, v4.16b, v4.16b, #8
+; CHECK-BE-NEXT: ext v3.16b, v3.16b, v3.16b, #8
+; CHECK-BE-NEXT: ext v2.16b, v2.16b, v2.16b, #8
+; CHECK-BE-NEXT: ext v1.16b, v1.16b, v1.16b, #8
+; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-BE-NEXT: stnp q6, q7, [x0, #96]
+; CHECK-BE-NEXT: stnp q4, q5, [x0, #64]
+; CHECK-BE-NEXT: stnp q2, q3, [x0, #32]
+; CHECK-BE-NEXT: stnp q0, q1, [x0]
+; CHECK-BE-NEXT: ret
entry:
store <16 x double> %v, ptr %ptr, align 4, !nontemporal !0
ret void
}
define void @test_stnp_v16i64(<16 x i64> %v, ptr %ptr) {
-; CHECK-LABEL: test_stnp_v16i64:
-; CHECK: ; %bb.0: ; %entry
-; CHECK-NEXT: stnp q6, q7, [x0, #96]
-; CHECK-NEXT: stnp q4, q5, [x0, #64]
-; CHECK-NEXT: stnp q2, q3, [x0, #32]
-; CHECK-NEXT: stnp q0, q1, [x0]
-; CHECK-NEXT: ret
-
+; CHECK-LE-LABEL: test_stnp_v16i64:
+; CHECK-LE: // %bb.0: // %entry
+; CHECK-LE-NEXT: stnp q6, q7, [x0, #96]
+; CHECK-LE-NEXT: stnp q4, q5, [x0, #64]
+; CHECK-LE-NEXT: stnp q2, q3, [x0, #32]
+; CHECK-LE-NEXT: stnp q0, q1, [x0]
+; CHECK-LE-NEXT: ret
+;
+; CHECK-BE-LABEL: test_stnp_v16i64:
+; CHECK-BE: // %bb.0: // %entry
+; CHECK-BE-NEXT: ext v7.16b, v7.16b, v7.16b, #8
+; CHECK-BE-NEXT: ext v6.16b, v6.16b, v6.16b, #8
+; CHECK-BE-NEXT: ext v5.16b, v5.16b, v5.16b, #8
+; CHECK-BE-NEXT: ext v4.16b, v4.16b, v4.16b, #8
+; CHECK-BE-NEXT: ext v3.16b, v3.16b, v3.16b, #8
+; CHECK-BE-NEXT: ext v2.16b, v2.16b, v2.16b, #8
+; CHECK-BE-NEXT: ext v1.16b, v1.16b, v1.16b, #8
+; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-BE-NEXT: stnp q6, q7, [x0, #96]
+; CHECK-BE-NEXT: stnp q4, q5, [x0, #64]
+; CHECK-BE-NEXT: stnp q2, q3, [x0, #32]
+; CHECK-BE-NEXT: stnp q0, q1, [x0]
+; CHECK-BE-NEXT: ret
entry:
store <16 x i64> %v, ptr %ptr, align 4, !nontemporal !0
ret void
More information about the llvm-commits
mailing list