[llvm] cab01a8 - [AArch64] Additional testing for i128 and non-temporal loads/stores undef BE. NFC

David Green via llvm-commits llvm-commits at lists.llvm.org
Tue Sep 26 03:01:54 PDT 2023


Author: David Green
Date: 2023-09-26T11:01:48+01:00
New Revision: cab01a8b4904013a5c66077eb8cbb7880d20cabe

URL: https://github.com/llvm/llvm-project/commit/cab01a8b4904013a5c66077eb8cbb7880d20cabe
DIFF: https://github.com/llvm/llvm-project/commit/cab01a8b4904013a5c66077eb8cbb7880d20cabe.diff

LOG: [AArch64] Additional testing for i128 and non-temporal loads/stores undef BE. NFC

Added: 
    

Modified: 
    llvm/test/CodeGen/AArch64/i128_volatile_load_store.ll
    llvm/test/CodeGen/AArch64/nontemporal.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/AArch64/i128_volatile_load_store.ll b/llvm/test/CodeGen/AArch64/i128_volatile_load_store.ll
index 73708e3fd8c44bf..c3a03b2cb35426b 100644
--- a/llvm/test/CodeGen/AArch64/i128_volatile_load_store.ll
+++ b/llvm/test/CodeGen/AArch64/i128_volatile_load_store.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64 %s -o - | FileCheck %s
+; RUN: llc -mtriple=aarch64 %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-LE
+; RUN: llc -mtriple=aarch64_be %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-BE
 
 @x = common dso_local global i128 0
 @y = common dso_local global i128 0
@@ -116,3 +117,53 @@ define void @test7() {
   store volatile i128 %tmp, ptr getelementptr (i8, ptr @y, i64 503)
   ret void
 }
+
+define i128 @load_nonvol(i32, i32, ptr %p) {
+; CHECK-LABEL: load_nonvol:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldp x0, x1, [x2]
+; CHECK-NEXT:    ret
+entry:
+  %l = load i128, ptr %p, align 16
+  ret i128 %l
+}
+
+define i128 @load_vol(i32, i32, ptr %p) {
+; CHECK-LE-LABEL: load_vol:
+; CHECK-LE:       // %bb.0: // %entry
+; CHECK-LE-NEXT:    ldp x0, x1, [x2]
+; CHECK-LE-NEXT:    ret
+;
+; CHECK-BE-LABEL: load_vol:
+; CHECK-BE:       // %bb.0: // %entry
+; CHECK-BE-NEXT:    ldp x1, x0, [x2]
+; CHECK-BE-NEXT:    ret
+entry:
+  %l = load volatile i128, ptr %p, align 16
+  ret i128 %l
+}
+
+define void @store_nonvol(i128 %a, ptr %p) {
+; CHECK-LABEL: store_nonvol:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    stp x0, x1, [x2]
+; CHECK-NEXT:    ret
+entry:
+  store i128 %a, ptr %p, align 16
+  ret void
+}
+
+define void @loadstore_vol(i128 %a, ptr %p) {
+; CHECK-LE-LABEL: loadstore_vol:
+; CHECK-LE:       // %bb.0: // %entry
+; CHECK-LE-NEXT:    stp x0, x1, [x2]
+; CHECK-LE-NEXT:    ret
+;
+; CHECK-BE-LABEL: loadstore_vol:
+; CHECK-BE:       // %bb.0: // %entry
+; CHECK-BE-NEXT:    stp x1, x0, [x2]
+; CHECK-BE-NEXT:    ret
+entry:
+  store volatile i128 %a, ptr %p, align 16
+  ret void
+}

diff  --git a/llvm/test/CodeGen/AArch64/nontemporal.ll b/llvm/test/CodeGen/AArch64/nontemporal.ll
index 4fcb5c0342e525b..fe19ca7e2cc43d1 100644
--- a/llvm/test/CodeGen/AArch64/nontemporal.ll
+++ b/llvm/test/CodeGen/AArch64/nontemporal.ll
@@ -1,287 +1,424 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple aarch64-apple-darwin | FileCheck %s
+; RUN: llc < %s -mtriple aarch64 | FileCheck %s --check-prefixes=CHECK-LE
+; RUN: llc < %s -mtriple aarch64_be | FileCheck %s --check-prefixes=CHECK-BE
 
 define void @test_stnp_v4i64(ptr %p, <4 x i64> %v) #0 {
-; CHECK-LABEL: test_stnp_v4i64:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    stnp q0, q1, [x0]
-; CHECK-NEXT:    ret
+; CHECK-LE-LABEL: test_stnp_v4i64:
+; CHECK-LE:       // %bb.0:
+; CHECK-LE-NEXT:    stnp q0, q1, [x0]
+; CHECK-LE-NEXT:    ret
+;
+; CHECK-BE-LABEL: test_stnp_v4i64:
+; CHECK-BE:       // %bb.0:
+; CHECK-BE-NEXT:    ext v1.16b, v1.16b, v1.16b, #8
+; CHECK-BE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-BE-NEXT:    stnp q0, q1, [x0]
+; CHECK-BE-NEXT:    ret
   store <4 x i64> %v, ptr %p, align 1, !nontemporal !0
   ret void
 }
 
 define void @test_stnp_v4i32(ptr %p, <4 x i32> %v) #0 {
-; CHECK-LABEL: test_stnp_v4i32:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    mov d1, v0[1]
-; CHECK-NEXT:    stnp d0, d1, [x0]
-; CHECK-NEXT:    ret
+; CHECK-LE-LABEL: test_stnp_v4i32:
+; CHECK-LE:       // %bb.0:
+; CHECK-LE-NEXT:    mov d1, v0.d[1]
+; CHECK-LE-NEXT:    stnp d0, d1, [x0]
+; CHECK-LE-NEXT:    ret
+;
+; CHECK-BE-LABEL: test_stnp_v4i32:
+; CHECK-BE:       // %bb.0:
+; CHECK-BE-NEXT:    str q0, [x0]
+; CHECK-BE-NEXT:    ret
   store <4 x i32> %v, ptr %p, align 1, !nontemporal !0
   ret void
 }
 
 define void @test_stnp_v8i16(ptr %p, <8 x i16> %v) #0 {
-; CHECK-LABEL: test_stnp_v8i16:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    mov d1, v0[1]
-; CHECK-NEXT:    stnp d0, d1, [x0]
-; CHECK-NEXT:    ret
+; CHECK-LE-LABEL: test_stnp_v8i16:
+; CHECK-LE:       // %bb.0:
+; CHECK-LE-NEXT:    mov d1, v0.d[1]
+; CHECK-LE-NEXT:    stnp d0, d1, [x0]
+; CHECK-LE-NEXT:    ret
+;
+; CHECK-BE-LABEL: test_stnp_v8i16:
+; CHECK-BE:       // %bb.0:
+; CHECK-BE-NEXT:    str q0, [x0]
+; CHECK-BE-NEXT:    ret
   store <8 x i16> %v, ptr %p, align 1, !nontemporal !0
   ret void
 }
 
 define void @test_stnp_v16i8(ptr %p, <16 x i8> %v) #0 {
-; CHECK-LABEL: test_stnp_v16i8:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    mov d1, v0[1]
-; CHECK-NEXT:    stnp d0, d1, [x0]
-; CHECK-NEXT:    ret
+; CHECK-LE-LABEL: test_stnp_v16i8:
+; CHECK-LE:       // %bb.0:
+; CHECK-LE-NEXT:    mov d1, v0.d[1]
+; CHECK-LE-NEXT:    stnp d0, d1, [x0]
+; CHECK-LE-NEXT:    ret
+;
+; CHECK-BE-LABEL: test_stnp_v16i8:
+; CHECK-BE:       // %bb.0:
+; CHECK-BE-NEXT:    str q0, [x0]
+; CHECK-BE-NEXT:    ret
   store <16 x i8> %v, ptr %p, align 1, !nontemporal !0
   ret void
 }
 
 define void @test_stnp_v2i32(ptr %p, <2 x i32> %v) #0 {
-; CHECK-LABEL: test_stnp_v2i32:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    ; kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    mov s1, v0[1]
-; CHECK-NEXT:    stnp s0, s1, [x0]
-; CHECK-NEXT:    ret
+; CHECK-LE-LABEL: test_stnp_v2i32:
+; CHECK-LE:       // %bb.0:
+; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-LE-NEXT:    mov s1, v0.s[1]
+; CHECK-LE-NEXT:    stnp s0, s1, [x0]
+; CHECK-LE-NEXT:    ret
+;
+; CHECK-BE-LABEL: test_stnp_v2i32:
+; CHECK-BE:       // %bb.0:
+; CHECK-BE-NEXT:    str d0, [x0]
+; CHECK-BE-NEXT:    ret
   store <2 x i32> %v, ptr %p, align 1, !nontemporal !0
   ret void
 }
 
 define void @test_stnp_v4i16(ptr %p, <4 x i16> %v) #0 {
-; CHECK-LABEL: test_stnp_v4i16:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    ; kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    mov s1, v0[1]
-; CHECK-NEXT:    stnp s0, s1, [x0]
-; CHECK-NEXT:    ret
+; CHECK-LE-LABEL: test_stnp_v4i16:
+; CHECK-LE:       // %bb.0:
+; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-LE-NEXT:    mov s1, v0.s[1]
+; CHECK-LE-NEXT:    stnp s0, s1, [x0]
+; CHECK-LE-NEXT:    ret
+;
+; CHECK-BE-LABEL: test_stnp_v4i16:
+; CHECK-BE:       // %bb.0:
+; CHECK-BE-NEXT:    str d0, [x0]
+; CHECK-BE-NEXT:    ret
   store <4 x i16> %v, ptr %p, align 1, !nontemporal !0
   ret void
 }
 
 define void @test_stnp_v8i8(ptr %p, <8 x i8> %v) #0 {
-; CHECK-LABEL: test_stnp_v8i8:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    ; kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    mov s1, v0[1]
-; CHECK-NEXT:    stnp s0, s1, [x0]
-; CHECK-NEXT:    ret
+; CHECK-LE-LABEL: test_stnp_v8i8:
+; CHECK-LE:       // %bb.0:
+; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-LE-NEXT:    mov s1, v0.s[1]
+; CHECK-LE-NEXT:    stnp s0, s1, [x0]
+; CHECK-LE-NEXT:    ret
+;
+; CHECK-BE-LABEL: test_stnp_v8i8:
+; CHECK-BE:       // %bb.0:
+; CHECK-BE-NEXT:    str d0, [x0]
+; CHECK-BE-NEXT:    ret
   store <8 x i8> %v, ptr %p, align 1, !nontemporal !0
   ret void
 }
 
 define void @test_stnp_v2f64(ptr %p, <2 x double> %v) #0 {
-; CHECK-LABEL: test_stnp_v2f64:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    mov d1, v0[1]
-; CHECK-NEXT:    stnp d0, d1, [x0]
-; CHECK-NEXT:    ret
+; CHECK-LE-LABEL: test_stnp_v2f64:
+; CHECK-LE:       // %bb.0:
+; CHECK-LE-NEXT:    mov d1, v0.d[1]
+; CHECK-LE-NEXT:    stnp d0, d1, [x0]
+; CHECK-LE-NEXT:    ret
+;
+; CHECK-BE-LABEL: test_stnp_v2f64:
+; CHECK-BE:       // %bb.0:
+; CHECK-BE-NEXT:    str q0, [x0]
+; CHECK-BE-NEXT:    ret
   store <2 x double> %v, ptr %p, align 1, !nontemporal !0
   ret void
 }
 
 define void @test_stnp_v4f32(ptr %p, <4 x float> %v) #0 {
-; CHECK-LABEL: test_stnp_v4f32:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    mov d1, v0[1]
-; CHECK-NEXT:    stnp d0, d1, [x0]
-; CHECK-NEXT:    ret
+; CHECK-LE-LABEL: test_stnp_v4f32:
+; CHECK-LE:       // %bb.0:
+; CHECK-LE-NEXT:    mov d1, v0.d[1]
+; CHECK-LE-NEXT:    stnp d0, d1, [x0]
+; CHECK-LE-NEXT:    ret
+;
+; CHECK-BE-LABEL: test_stnp_v4f32:
+; CHECK-BE:       // %bb.0:
+; CHECK-BE-NEXT:    str q0, [x0]
+; CHECK-BE-NEXT:    ret
   store <4 x float> %v, ptr %p, align 1, !nontemporal !0
   ret void
 }
 
 define void @test_stnp_v2f32(ptr %p, <2 x float> %v) #0 {
-; CHECK-LABEL: test_stnp_v2f32:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    ; kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    mov s1, v0[1]
-; CHECK-NEXT:    stnp s0, s1, [x0]
-; CHECK-NEXT:    ret
+; CHECK-LE-LABEL: test_stnp_v2f32:
+; CHECK-LE:       // %bb.0:
+; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-LE-NEXT:    mov s1, v0.s[1]
+; CHECK-LE-NEXT:    stnp s0, s1, [x0]
+; CHECK-LE-NEXT:    ret
+;
+; CHECK-BE-LABEL: test_stnp_v2f32:
+; CHECK-BE:       // %bb.0:
+; CHECK-BE-NEXT:    str d0, [x0]
+; CHECK-BE-NEXT:    ret
   store <2 x float> %v, ptr %p, align 1, !nontemporal !0
   ret void
 }
 
 define void @test_stnp_v1f64(ptr %p, <1 x double> %v) #0 {
-; CHECK-LABEL: test_stnp_v1f64:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    ; kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    mov s1, v0[1]
-; CHECK-NEXT:    stnp s0, s1, [x0]
-; CHECK-NEXT:    ret
+; CHECK-LE-LABEL: test_stnp_v1f64:
+; CHECK-LE:       // %bb.0:
+; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-LE-NEXT:    mov s1, v0.s[1]
+; CHECK-LE-NEXT:    stnp s0, s1, [x0]
+; CHECK-LE-NEXT:    ret
+;
+; CHECK-BE-LABEL: test_stnp_v1f64:
+; CHECK-BE:       // %bb.0:
+; CHECK-BE-NEXT:    str d0, [x0]
+; CHECK-BE-NEXT:    ret
   store <1 x double> %v, ptr %p, align 1, !nontemporal !0
   ret void
 }
 
 define void @test_stnp_v1i64(ptr %p, <1 x i64> %v) #0 {
-; CHECK-LABEL: test_stnp_v1i64:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    ; kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    mov s1, v0[1]
-; CHECK-NEXT:    stnp s0, s1, [x0]
-; CHECK-NEXT:    ret
+; CHECK-LE-LABEL: test_stnp_v1i64:
+; CHECK-LE:       // %bb.0:
+; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-LE-NEXT:    mov s1, v0.s[1]
+; CHECK-LE-NEXT:    stnp s0, s1, [x0]
+; CHECK-LE-NEXT:    ret
+;
+; CHECK-BE-LABEL: test_stnp_v1i64:
+; CHECK-BE:       // %bb.0:
+; CHECK-BE-NEXT:    str d0, [x0]
+; CHECK-BE-NEXT:    ret
   store <1 x i64> %v, ptr %p, align 1, !nontemporal !0
   ret void
 }
 
 define void @test_stnp_i64(ptr %p, i64 %v) #0 {
-; CHECK-LABEL: test_stnp_i64:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    lsr x8, x1, #32
-; CHECK-NEXT:    stnp w1, w8, [x0]
-; CHECK-NEXT:    ret
+; CHECK-LE-LABEL: test_stnp_i64:
+; CHECK-LE:       // %bb.0:
+; CHECK-LE-NEXT:    lsr x8, x1, #32
+; CHECK-LE-NEXT:    stnp w1, w8, [x0]
+; CHECK-LE-NEXT:    ret
+;
+; CHECK-BE-LABEL: test_stnp_i64:
+; CHECK-BE:       // %bb.0:
+; CHECK-BE-NEXT:    str x1, [x0]
+; CHECK-BE-NEXT:    ret
   store i64 %v, ptr %p, align 1, !nontemporal !0
   ret void
 }
 
 
 define void @test_stnp_v2f64_offset(ptr %p, <2 x double> %v) #0 {
-; CHECK-LABEL: test_stnp_v2f64_offset:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    mov d1, v0[1]
-; CHECK-NEXT:    stnp d0, d1, [x0, #16]
-; CHECK-NEXT:    ret
+; CHECK-LE-LABEL: test_stnp_v2f64_offset:
+; CHECK-LE:       // %bb.0:
+; CHECK-LE-NEXT:    mov d1, v0.d[1]
+; CHECK-LE-NEXT:    stnp d0, d1, [x0, #16]
+; CHECK-LE-NEXT:    ret
+;
+; CHECK-BE-LABEL: test_stnp_v2f64_offset:
+; CHECK-BE:       // %bb.0:
+; CHECK-BE-NEXT:    str q0, [x0, #16]
+; CHECK-BE-NEXT:    ret
   %tmp0 = getelementptr <2 x double>, ptr %p, i32 1
   store <2 x double> %v, ptr %tmp0, align 1, !nontemporal !0
   ret void
 }
 
 define void @test_stnp_v2f64_offset_neg(ptr %p, <2 x double> %v) #0 {
-; CHECK-LABEL: test_stnp_v2f64_offset_neg:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    mov d1, v0[1]
-; CHECK-NEXT:    stnp d0, d1, [x0, #-16]
-; CHECK-NEXT:    ret
+; CHECK-LE-LABEL: test_stnp_v2f64_offset_neg:
+; CHECK-LE:       // %bb.0:
+; CHECK-LE-NEXT:    mov d1, v0.d[1]
+; CHECK-LE-NEXT:    stnp d0, d1, [x0, #-16]
+; CHECK-LE-NEXT:    ret
+;
+; CHECK-BE-LABEL: test_stnp_v2f64_offset_neg:
+; CHECK-BE:       // %bb.0:
+; CHECK-BE-NEXT:    stur q0, [x0, #-16]
+; CHECK-BE-NEXT:    ret
   %tmp0 = getelementptr <2 x double>, ptr %p, i32 -1
   store <2 x double> %v, ptr %tmp0, align 1, !nontemporal !0
   ret void
 }
 
 define void @test_stnp_v2f32_offset(ptr %p, <2 x float> %v) #0 {
-; CHECK-LABEL: test_stnp_v2f32_offset:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    ; kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    mov s1, v0[1]
-; CHECK-NEXT:    stnp s0, s1, [x0, #8]
-; CHECK-NEXT:    ret
+; CHECK-LE-LABEL: test_stnp_v2f32_offset:
+; CHECK-LE:       // %bb.0:
+; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-LE-NEXT:    mov s1, v0.s[1]
+; CHECK-LE-NEXT:    stnp s0, s1, [x0, #8]
+; CHECK-LE-NEXT:    ret
+;
+; CHECK-BE-LABEL: test_stnp_v2f32_offset:
+; CHECK-BE:       // %bb.0:
+; CHECK-BE-NEXT:    str d0, [x0, #8]
+; CHECK-BE-NEXT:    ret
   %tmp0 = getelementptr <2 x float>, ptr %p, i32 1
   store <2 x float> %v, ptr %tmp0, align 1, !nontemporal !0
   ret void
 }
 
 define void @test_stnp_v2f32_offset_neg(ptr %p, <2 x float> %v) #0 {
-; CHECK-LABEL: test_stnp_v2f32_offset_neg:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    ; kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    mov s1, v0[1]
-; CHECK-NEXT:    stnp s0, s1, [x0, #-8]
-; CHECK-NEXT:    ret
+; CHECK-LE-LABEL: test_stnp_v2f32_offset_neg:
+; CHECK-LE:       // %bb.0:
+; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-LE-NEXT:    mov s1, v0.s[1]
+; CHECK-LE-NEXT:    stnp s0, s1, [x0, #-8]
+; CHECK-LE-NEXT:    ret
+;
+; CHECK-BE-LABEL: test_stnp_v2f32_offset_neg:
+; CHECK-BE:       // %bb.0:
+; CHECK-BE-NEXT:    stur d0, [x0, #-8]
+; CHECK-BE-NEXT:    ret
   %tmp0 = getelementptr <2 x float>, ptr %p, i32 -1
   store <2 x float> %v, ptr %tmp0, align 1, !nontemporal !0
   ret void
 }
 
 define void @test_stnp_i64_offset(ptr %p, i64 %v) #0 {
-; CHECK-LABEL: test_stnp_i64_offset:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    lsr x8, x1, #32
-; CHECK-NEXT:    stnp w1, w8, [x0, #8]
-; CHECK-NEXT:    ret
+; CHECK-LE-LABEL: test_stnp_i64_offset:
+; CHECK-LE:       // %bb.0:
+; CHECK-LE-NEXT:    lsr x8, x1, #32
+; CHECK-LE-NEXT:    stnp w1, w8, [x0, #8]
+; CHECK-LE-NEXT:    ret
+;
+; CHECK-BE-LABEL: test_stnp_i64_offset:
+; CHECK-BE:       // %bb.0:
+; CHECK-BE-NEXT:    str x1, [x0, #8]
+; CHECK-BE-NEXT:    ret
   %tmp0 = getelementptr i64, ptr %p, i32 1
   store i64 %v, ptr %tmp0, align 1, !nontemporal !0
   ret void
 }
 
 define void @test_stnp_i64_offset_neg(ptr %p, i64 %v) #0 {
-; CHECK-LABEL: test_stnp_i64_offset_neg:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    lsr x8, x1, #32
-; CHECK-NEXT:    stnp w1, w8, [x0, #-8]
-; CHECK-NEXT:    ret
+; CHECK-LE-LABEL: test_stnp_i64_offset_neg:
+; CHECK-LE:       // %bb.0:
+; CHECK-LE-NEXT:    lsr x8, x1, #32
+; CHECK-LE-NEXT:    stnp w1, w8, [x0, #-8]
+; CHECK-LE-NEXT:    ret
+;
+; CHECK-BE-LABEL: test_stnp_i64_offset_neg:
+; CHECK-BE:       // %bb.0:
+; CHECK-BE-NEXT:    stur x1, [x0, #-8]
+; CHECK-BE-NEXT:    ret
   %tmp0 = getelementptr i64, ptr %p, i32 -1
   store i64 %v, ptr %tmp0, align 1, !nontemporal !0
   ret void
 }
 
 define void @test_stnp_v4f32_invalid_offset_4(ptr %p, <4 x float> %v) #0 {
-; CHECK-LABEL: test_stnp_v4f32_invalid_offset_4:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    mov d1, v0[1]
-; CHECK-NEXT:    add x8, x0, #4
-; CHECK-NEXT:    stnp d0, d1, [x8]
-; CHECK-NEXT:    ret
+; CHECK-LE-LABEL: test_stnp_v4f32_invalid_offset_4:
+; CHECK-LE:       // %bb.0:
+; CHECK-LE-NEXT:    mov d1, v0.d[1]
+; CHECK-LE-NEXT:    add x8, x0, #4
+; CHECK-LE-NEXT:    stnp d0, d1, [x8]
+; CHECK-LE-NEXT:    ret
+;
+; CHECK-BE-LABEL: test_stnp_v4f32_invalid_offset_4:
+; CHECK-BE:       // %bb.0:
+; CHECK-BE-NEXT:    stur q0, [x0, #4]
+; CHECK-BE-NEXT:    ret
   %tmp0 = getelementptr i8, ptr %p, i32 4
   store <4 x float> %v, ptr %tmp0, align 1, !nontemporal !0
   ret void
 }
 
 define void @test_stnp_v4f32_invalid_offset_neg_4(ptr %p, <4 x float> %v) #0 {
-; CHECK-LABEL: test_stnp_v4f32_invalid_offset_neg_4:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    mov d1, v0[1]
-; CHECK-NEXT:    sub x8, x0, #4
-; CHECK-NEXT:    stnp d0, d1, [x8]
-; CHECK-NEXT:    ret
+; CHECK-LE-LABEL: test_stnp_v4f32_invalid_offset_neg_4:
+; CHECK-LE:       // %bb.0:
+; CHECK-LE-NEXT:    mov d1, v0.d[1]
+; CHECK-LE-NEXT:    sub x8, x0, #4
+; CHECK-LE-NEXT:    stnp d0, d1, [x8]
+; CHECK-LE-NEXT:    ret
+;
+; CHECK-BE-LABEL: test_stnp_v4f32_invalid_offset_neg_4:
+; CHECK-BE:       // %bb.0:
+; CHECK-BE-NEXT:    stur q0, [x0, #-4]
+; CHECK-BE-NEXT:    ret
   %tmp0 = getelementptr i8, ptr %p, i32 -4
   store <4 x float> %v, ptr %tmp0, align 1, !nontemporal !0
   ret void
 }
 
 define void @test_stnp_v4f32_invalid_offset_512(ptr %p, <4 x float> %v) #0 {
-; CHECK-LABEL: test_stnp_v4f32_invalid_offset_512:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    mov d1, v0[1]
-; CHECK-NEXT:    add x8, x0, #512
-; CHECK-NEXT:    stnp d0, d1, [x8]
-; CHECK-NEXT:    ret
+; CHECK-LE-LABEL: test_stnp_v4f32_invalid_offset_512:
+; CHECK-LE:       // %bb.0:
+; CHECK-LE-NEXT:    mov d1, v0.d[1]
+; CHECK-LE-NEXT:    add x8, x0, #512
+; CHECK-LE-NEXT:    stnp d0, d1, [x8]
+; CHECK-LE-NEXT:    ret
+;
+; CHECK-BE-LABEL: test_stnp_v4f32_invalid_offset_512:
+; CHECK-BE:       // %bb.0:
+; CHECK-BE-NEXT:    str q0, [x0, #512]
+; CHECK-BE-NEXT:    ret
   %tmp0 = getelementptr i8, ptr %p, i32 512
   store <4 x float> %v, ptr %tmp0, align 1, !nontemporal !0
   ret void
 }
 
 define void @test_stnp_v4f32_offset_504(ptr %p, <4 x float> %v) #0 {
-; CHECK-LABEL: test_stnp_v4f32_offset_504:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    mov d1, v0[1]
-; CHECK-NEXT:    stnp d0, d1, [x0, #504]
-; CHECK-NEXT:    ret
+; CHECK-LE-LABEL: test_stnp_v4f32_offset_504:
+; CHECK-LE:       // %bb.0:
+; CHECK-LE-NEXT:    mov d1, v0.d[1]
+; CHECK-LE-NEXT:    stnp d0, d1, [x0, #504]
+; CHECK-LE-NEXT:    ret
+;
+; CHECK-BE-LABEL: test_stnp_v4f32_offset_504:
+; CHECK-BE:       // %bb.0:
+; CHECK-BE-NEXT:    add x8, x0, #504
+; CHECK-BE-NEXT:    str q0, [x8]
+; CHECK-BE-NEXT:    ret
   %tmp0 = getelementptr i8, ptr %p, i32 504
   store <4 x float> %v, ptr %tmp0, align 1, !nontemporal !0
   ret void
 }
 
 define void @test_stnp_v4f32_invalid_offset_508(ptr %p, <4 x float> %v) #0 {
-; CHECK-LABEL: test_stnp_v4f32_invalid_offset_508:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    mov d1, v0[1]
-; CHECK-NEXT:    add x8, x0, #508
-; CHECK-NEXT:    stnp d0, d1, [x8]
-; CHECK-NEXT:    ret
+; CHECK-LE-LABEL: test_stnp_v4f32_invalid_offset_508:
+; CHECK-LE:       // %bb.0:
+; CHECK-LE-NEXT:    mov d1, v0.d[1]
+; CHECK-LE-NEXT:    add x8, x0, #508
+; CHECK-LE-NEXT:    stnp d0, d1, [x8]
+; CHECK-LE-NEXT:    ret
+;
+; CHECK-BE-LABEL: test_stnp_v4f32_invalid_offset_508:
+; CHECK-BE:       // %bb.0:
+; CHECK-BE-NEXT:    add x8, x0, #508
+; CHECK-BE-NEXT:    str q0, [x8]
+; CHECK-BE-NEXT:    ret
   %tmp0 = getelementptr i8, ptr %p, i32 508
   store <4 x float> %v, ptr %tmp0, align 1, !nontemporal !0
   ret void
 }
 
 define void @test_stnp_v4f32_invalid_offset_neg_520(ptr %p, <4 x float> %v) #0 {
-; CHECK-LABEL: test_stnp_v4f32_invalid_offset_neg_520:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    mov d1, v0[1]
-; CHECK-NEXT:    sub x8, x0, #520
-; CHECK-NEXT:    stnp d0, d1, [x8]
-; CHECK-NEXT:    ret
+; CHECK-LE-LABEL: test_stnp_v4f32_invalid_offset_neg_520:
+; CHECK-LE:       // %bb.0:
+; CHECK-LE-NEXT:    mov d1, v0.d[1]
+; CHECK-LE-NEXT:    sub x8, x0, #520
+; CHECK-LE-NEXT:    stnp d0, d1, [x8]
+; CHECK-LE-NEXT:    ret
+;
+; CHECK-BE-LABEL: test_stnp_v4f32_invalid_offset_neg_520:
+; CHECK-BE:       // %bb.0:
+; CHECK-BE-NEXT:    sub x8, x0, #520
+; CHECK-BE-NEXT:    str q0, [x8]
+; CHECK-BE-NEXT:    ret
   %tmp0 = getelementptr i8, ptr %p, i32 -520
   store <4 x float> %v, ptr %tmp0, align 1, !nontemporal !0
   ret void
 }
 
 define void @test_stnp_v4f32_offset_neg_512(ptr %p, <4 x float> %v) #0 {
-; CHECK-LABEL: test_stnp_v4f32_offset_neg_512:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    mov d1, v0[1]
-; CHECK-NEXT:    stnp d0, d1, [x0, #-512]
-; CHECK-NEXT:    ret
+; CHECK-LE-LABEL: test_stnp_v4f32_offset_neg_512:
+; CHECK-LE:       // %bb.0:
+; CHECK-LE-NEXT:    mov d1, v0.d[1]
+; CHECK-LE-NEXT:    stnp d0, d1, [x0, #-512]
+; CHECK-LE-NEXT:    ret
+;
+; CHECK-BE-LABEL: test_stnp_v4f32_offset_neg_512:
+; CHECK-BE:       // %bb.0:
+; CHECK-BE-NEXT:    sub x8, x0, #512
+; CHECK-BE-NEXT:    str q0, [x8]
+; CHECK-BE-NEXT:    ret
   %tmp0 = getelementptr i8, ptr %p, i32 -512
   store <4 x float> %v, ptr %tmp0, align 1, !nontemporal !0
   ret void
@@ -289,50 +426,71 @@ define void @test_stnp_v4f32_offset_neg_512(ptr %p, <4 x float> %v) #0 {
 
 
 define void @test_stnp_v2f32_invalid_offset_256(ptr %p, <2 x float> %v) #0 {
-; CHECK-LABEL: test_stnp_v2f32_invalid_offset_256:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    ; kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    mov s1, v0[1]
-; CHECK-NEXT:    add x8, x0, #256
-; CHECK-NEXT:    stnp s0, s1, [x8]
-; CHECK-NEXT:    ret
+; CHECK-LE-LABEL: test_stnp_v2f32_invalid_offset_256:
+; CHECK-LE:       // %bb.0:
+; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-LE-NEXT:    mov s1, v0.s[1]
+; CHECK-LE-NEXT:    add x8, x0, #256
+; CHECK-LE-NEXT:    stnp s0, s1, [x8]
+; CHECK-LE-NEXT:    ret
+;
+; CHECK-BE-LABEL: test_stnp_v2f32_invalid_offset_256:
+; CHECK-BE:       // %bb.0:
+; CHECK-BE-NEXT:    str d0, [x0, #256]
+; CHECK-BE-NEXT:    ret
   %tmp0 = getelementptr i8, ptr %p, i32 256
   store <2 x float> %v, ptr %tmp0, align 1, !nontemporal !0
   ret void
 }
 
 define void @test_stnp_v2f32_offset_252(ptr %p, <2 x float> %v) #0 {
-; CHECK-LABEL: test_stnp_v2f32_offset_252:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    ; kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    mov s1, v0[1]
-; CHECK-NEXT:    stnp s0, s1, [x0, #252]
-; CHECK-NEXT:    ret
+; CHECK-LE-LABEL: test_stnp_v2f32_offset_252:
+; CHECK-LE:       // %bb.0:
+; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-LE-NEXT:    mov s1, v0.s[1]
+; CHECK-LE-NEXT:    stnp s0, s1, [x0, #252]
+; CHECK-LE-NEXT:    ret
+;
+; CHECK-BE-LABEL: test_stnp_v2f32_offset_252:
+; CHECK-BE:       // %bb.0:
+; CHECK-BE-NEXT:    stur d0, [x0, #252]
+; CHECK-BE-NEXT:    ret
   %tmp0 = getelementptr i8, ptr %p, i32 252
   store <2 x float> %v, ptr %tmp0, align 1, !nontemporal !0
   ret void
 }
 
 define void @test_stnp_v2f32_invalid_offset_neg_260(ptr %p, <2 x float> %v) #0 {
-; CHECK-LABEL: test_stnp_v2f32_invalid_offset_neg_260:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    ; kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    mov s1, v0[1]
-; CHECK-NEXT:    sub x8, x0, #260
-; CHECK-NEXT:    stnp s0, s1, [x8]
-; CHECK-NEXT:    ret
+; CHECK-LE-LABEL: test_stnp_v2f32_invalid_offset_neg_260:
+; CHECK-LE:       // %bb.0:
+; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-LE-NEXT:    mov s1, v0.s[1]
+; CHECK-LE-NEXT:    sub x8, x0, #260
+; CHECK-LE-NEXT:    stnp s0, s1, [x8]
+; CHECK-LE-NEXT:    ret
+;
+; CHECK-BE-LABEL: test_stnp_v2f32_invalid_offset_neg_260:
+; CHECK-BE:       // %bb.0:
+; CHECK-BE-NEXT:    sub x8, x0, #260
+; CHECK-BE-NEXT:    str d0, [x8]
+; CHECK-BE-NEXT:    ret
   %tmp0 = getelementptr i8, ptr %p, i32 -260
   store <2 x float> %v, ptr %tmp0, align 1, !nontemporal !0
   ret void
 }
 
 define void @test_stnp_v2f32_offset_neg_256(ptr %p, <2 x float> %v) #0 {
-; CHECK-LABEL: test_stnp_v2f32_offset_neg_256:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    ; kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    mov s1, v0[1]
-; CHECK-NEXT:    stnp s0, s1, [x0, #-256]
-; CHECK-NEXT:    ret
+; CHECK-LE-LABEL: test_stnp_v2f32_offset_neg_256:
+; CHECK-LE:       // %bb.0:
+; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-LE-NEXT:    mov s1, v0.s[1]
+; CHECK-LE-NEXT:    stnp s0, s1, [x0, #-256]
+; CHECK-LE-NEXT:    ret
+;
+; CHECK-BE-LABEL: test_stnp_v2f32_offset_neg_256:
+; CHECK-BE:       // %bb.0:
+; CHECK-BE-NEXT:    stur d0, [x0, #-256]
+; CHECK-BE-NEXT:    ret
   %tmp0 = getelementptr i8, ptr %p, i32 -256
   store <2 x float> %v, ptr %tmp0, align 1, !nontemporal !0
   ret void
@@ -341,17 +499,28 @@ define void @test_stnp_v2f32_offset_neg_256(ptr %p, <2 x float> %v) #0 {
 declare void @dummy(ptr)
 
 define void @test_stnp_v4f32_offset_alloca(<4 x float> %v) #0 {
-; CHECK-LABEL: test_stnp_v4f32_offset_alloca:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    sub sp, sp, #32
-; CHECK-NEXT:    mov d1, v0[1]
-; CHECK-NEXT:    mov x0, sp
-; CHECK-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
-; CHECK-NEXT:    stnp d0, d1, [sp]
-; CHECK-NEXT:    bl _dummy
-; CHECK-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
-; CHECK-NEXT:    add sp, sp, #32
-; CHECK-NEXT:    ret
+; CHECK-LE-LABEL: test_stnp_v4f32_offset_alloca:
+; CHECK-LE:       // %bb.0:
+; CHECK-LE-NEXT:    sub sp, sp, #32
+; CHECK-LE-NEXT:    mov d1, v0.d[1]
+; CHECK-LE-NEXT:    mov x0, sp
+; CHECK-LE-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-LE-NEXT:    stnp d0, d1, [sp]
+; CHECK-LE-NEXT:    bl dummy
+; CHECK-LE-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-LE-NEXT:    add sp, sp, #32
+; CHECK-LE-NEXT:    ret
+;
+; CHECK-BE-LABEL: test_stnp_v4f32_offset_alloca:
+; CHECK-BE:       // %bb.0:
+; CHECK-BE-NEXT:    sub sp, sp, #32
+; CHECK-BE-NEXT:    mov x0, sp
+; CHECK-BE-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-BE-NEXT:    str q0, [sp]
+; CHECK-BE-NEXT:    bl dummy
+; CHECK-BE-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-BE-NEXT:    add sp, sp, #32
+; CHECK-BE-NEXT:    ret
   %tmp0 = alloca <4 x float>
   store <4 x float> %v, ptr %tmp0, align 1, !nontemporal !0
   call void @dummy(ptr %tmp0)
@@ -359,17 +528,28 @@ define void @test_stnp_v4f32_offset_alloca(<4 x float> %v) #0 {
 }
 
 define void @test_stnp_v4f32_offset_alloca_2(<4 x float> %v) #0 {
-; CHECK-LABEL: test_stnp_v4f32_offset_alloca_2:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    sub sp, sp, #48
-; CHECK-NEXT:    mov d1, v0[1]
-; CHECK-NEXT:    mov x0, sp
-; CHECK-NEXT:    stp x29, x30, [sp, #32] ; 16-byte Folded Spill
-; CHECK-NEXT:    stnp d0, d1, [sp, #16]
-; CHECK-NEXT:    bl _dummy
-; CHECK-NEXT:    ldp x29, x30, [sp, #32] ; 16-byte Folded Reload
-; CHECK-NEXT:    add sp, sp, #48
-; CHECK-NEXT:    ret
+; CHECK-LE-LABEL: test_stnp_v4f32_offset_alloca_2:
+; CHECK-LE:       // %bb.0:
+; CHECK-LE-NEXT:    sub sp, sp, #48
+; CHECK-LE-NEXT:    mov d1, v0.d[1]
+; CHECK-LE-NEXT:    mov x0, sp
+; CHECK-LE-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-LE-NEXT:    stnp d0, d1, [sp, #16]
+; CHECK-LE-NEXT:    bl dummy
+; CHECK-LE-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-LE-NEXT:    add sp, sp, #48
+; CHECK-LE-NEXT:    ret
+;
+; CHECK-BE-LABEL: test_stnp_v4f32_offset_alloca_2:
+; CHECK-BE:       // %bb.0:
+; CHECK-BE-NEXT:    sub sp, sp, #48
+; CHECK-BE-NEXT:    mov x0, sp
+; CHECK-BE-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-BE-NEXT:    str q0, [sp, #16]
+; CHECK-BE-NEXT:    bl dummy
+; CHECK-BE-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-BE-NEXT:    add sp, sp, #48
+; CHECK-BE-NEXT:    ret
   %tmp0 = alloca <4 x float>, i32 2
   %tmp1 = getelementptr <4 x float>, ptr %tmp0, i32 1
   store <4 x float> %v, ptr %tmp1, align 1, !nontemporal !0
@@ -378,122 +558,239 @@ define void @test_stnp_v4f32_offset_alloca_2(<4 x float> %v) #0 {
 }
 
 define void @test_stnp_v32i8(<32 x i8> %v, ptr %ptr) {
-; CHECK-LABEL: test_stnp_v32i8:
-; CHECK:       ; %bb.0: ; %entry
-; CHECK-NEXT:    stnp q0, q1, [x0]
-; CHECK-NEXT:    ret
-
+; CHECK-LE-LABEL: test_stnp_v32i8:
+; CHECK-LE:       // %bb.0: // %entry
+; CHECK-LE-NEXT:    stnp q0, q1, [x0]
+; CHECK-LE-NEXT:    ret
+;
+; CHECK-BE-LABEL: test_stnp_v32i8:
+; CHECK-BE:       // %bb.0: // %entry
+; CHECK-BE-NEXT:    rev64 v1.16b, v1.16b
+; CHECK-BE-NEXT:    rev64 v0.16b, v0.16b
+; CHECK-BE-NEXT:    ext v1.16b, v1.16b, v1.16b, #8
+; CHECK-BE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-BE-NEXT:    stnp q0, q1, [x0]
+; CHECK-BE-NEXT:    ret
 entry:
   store <32 x i8> %v, ptr %ptr, align 4, !nontemporal !0
   ret void
 }
 
 define void @test_stnp_v32i16(<32 x i16> %v, ptr %ptr) {
-; CHECK-LABEL: test_stnp_v32i16:
-; CHECK:       ; %bb.0: ; %entry
-; CHECK-NEXT:    stnp q2, q3, [x0, #32]
-; CHECK-NEXT:    stnp q0, q1, [x0]
-; CHECK-NEXT:    ret
-
+; CHECK-LE-LABEL: test_stnp_v32i16:
+; CHECK-LE:       // %bb.0: // %entry
+; CHECK-LE-NEXT:    stnp q2, q3, [x0, #32]
+; CHECK-LE-NEXT:    stnp q0, q1, [x0]
+; CHECK-LE-NEXT:    ret
+;
+; CHECK-BE-LABEL: test_stnp_v32i16:
+; CHECK-BE:       // %bb.0: // %entry
+; CHECK-BE-NEXT:    rev64 v3.8h, v3.8h
+; CHECK-BE-NEXT:    rev64 v2.8h, v2.8h
+; CHECK-BE-NEXT:    rev64 v1.8h, v1.8h
+; CHECK-BE-NEXT:    rev64 v0.8h, v0.8h
+; CHECK-BE-NEXT:    ext v3.16b, v3.16b, v3.16b, #8
+; CHECK-BE-NEXT:    ext v2.16b, v2.16b, v2.16b, #8
+; CHECK-BE-NEXT:    ext v1.16b, v1.16b, v1.16b, #8
+; CHECK-BE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-BE-NEXT:    stnp q2, q3, [x0, #32]
+; CHECK-BE-NEXT:    stnp q0, q1, [x0]
+; CHECK-BE-NEXT:    ret
 entry:
   store <32 x i16> %v, ptr %ptr, align 4, !nontemporal !0
   ret void
 }
 
 define void @test_stnp_v32f16(<32 x half> %v, ptr %ptr) {
-; CHECK-LABEL: test_stnp_v32f16:
-; CHECK:       ; %bb.0: ; %entry
-; CHECK-NEXT:    stnp q2, q3, [x0, #32]
-; CHECK-NEXT:    stnp q0, q1, [x0]
-; CHECK-NEXT:    ret
-
+; CHECK-LE-LABEL: test_stnp_v32f16:
+; CHECK-LE:       // %bb.0: // %entry
+; CHECK-LE-NEXT:    stnp q2, q3, [x0, #32]
+; CHECK-LE-NEXT:    stnp q0, q1, [x0]
+; CHECK-LE-NEXT:    ret
+;
+; CHECK-BE-LABEL: test_stnp_v32f16:
+; CHECK-BE:       // %bb.0: // %entry
+; CHECK-BE-NEXT:    rev64 v3.8h, v3.8h
+; CHECK-BE-NEXT:    rev64 v2.8h, v2.8h
+; CHECK-BE-NEXT:    rev64 v1.8h, v1.8h
+; CHECK-BE-NEXT:    rev64 v0.8h, v0.8h
+; CHECK-BE-NEXT:    ext v3.16b, v3.16b, v3.16b, #8
+; CHECK-BE-NEXT:    ext v2.16b, v2.16b, v2.16b, #8
+; CHECK-BE-NEXT:    ext v1.16b, v1.16b, v1.16b, #8
+; CHECK-BE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-BE-NEXT:    stnp q2, q3, [x0, #32]
+; CHECK-BE-NEXT:    stnp q0, q1, [x0]
+; CHECK-BE-NEXT:    ret
 entry:
   store <32 x half> %v, ptr %ptr, align 4, !nontemporal !0
   ret void
 }
 
 define void @test_stnp_v16i32(<16 x i32> %v, ptr %ptr) {
-; CHECK-LABEL: test_stnp_v16i32:
-; CHECK:       ; %bb.0: ; %entry
-; CHECK-NEXT:    stnp q2, q3, [x0, #32]
-; CHECK-NEXT:    stnp q0, q1, [x0]
-; CHECK-NEXT:    ret
-
+; CHECK-LE-LABEL: test_stnp_v16i32:
+; CHECK-LE:       // %bb.0: // %entry
+; CHECK-LE-NEXT:    stnp q2, q3, [x0, #32]
+; CHECK-LE-NEXT:    stnp q0, q1, [x0]
+; CHECK-LE-NEXT:    ret
+;
+; CHECK-BE-LABEL: test_stnp_v16i32:
+; CHECK-BE:       // %bb.0: // %entry
+; CHECK-BE-NEXT:    rev64 v3.4s, v3.4s
+; CHECK-BE-NEXT:    rev64 v2.4s, v2.4s
+; CHECK-BE-NEXT:    rev64 v1.4s, v1.4s
+; CHECK-BE-NEXT:    rev64 v0.4s, v0.4s
+; CHECK-BE-NEXT:    ext v3.16b, v3.16b, v3.16b, #8
+; CHECK-BE-NEXT:    ext v2.16b, v2.16b, v2.16b, #8
+; CHECK-BE-NEXT:    ext v1.16b, v1.16b, v1.16b, #8
+; CHECK-BE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-BE-NEXT:    stnp q2, q3, [x0, #32]
+; CHECK-BE-NEXT:    stnp q0, q1, [x0]
+; CHECK-BE-NEXT:    ret
 entry:
   store <16 x i32> %v, ptr %ptr, align 4, !nontemporal !0
   ret void
 }
 
 define void @test_stnp_v16f32(<16 x float> %v, ptr %ptr) {
-; CHECK-LABEL: test_stnp_v16f32:
-; CHECK:       ; %bb.0: ; %entry
-; CHECK-NEXT:    stnp q2, q3, [x0, #32]
-; CHECK-NEXT:    stnp q0, q1, [x0]
-; CHECK-NEXT:    ret
-
+; CHECK-LE-LABEL: test_stnp_v16f32:
+; CHECK-LE:       // %bb.0: // %entry
+; CHECK-LE-NEXT:    stnp q2, q3, [x0, #32]
+; CHECK-LE-NEXT:    stnp q0, q1, [x0]
+; CHECK-LE-NEXT:    ret
+;
+; CHECK-BE-LABEL: test_stnp_v16f32:
+; CHECK-BE:       // %bb.0: // %entry
+; CHECK-BE-NEXT:    rev64 v3.4s, v3.4s
+; CHECK-BE-NEXT:    rev64 v2.4s, v2.4s
+; CHECK-BE-NEXT:    rev64 v1.4s, v1.4s
+; CHECK-BE-NEXT:    rev64 v0.4s, v0.4s
+; CHECK-BE-NEXT:    ext v3.16b, v3.16b, v3.16b, #8
+; CHECK-BE-NEXT:    ext v2.16b, v2.16b, v2.16b, #8
+; CHECK-BE-NEXT:    ext v1.16b, v1.16b, v1.16b, #8
+; CHECK-BE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-BE-NEXT:    stnp q2, q3, [x0, #32]
+; CHECK-BE-NEXT:    stnp q0, q1, [x0]
+; CHECK-BE-NEXT:    ret
 entry:
   store <16 x float> %v, ptr %ptr, align 4, !nontemporal !0
   ret void
 }
 
 define void @test_stnp_v17f32(<17 x float> %v, ptr %ptr) {
-; CHECK-LABEL: test_stnp_v17f32:
-; CHECK:       ; %bb.0: ; %entry
-; CHECK-NEXT:    ; kill: def $s4 killed $s4 def $q4
-; CHECK-NEXT:    ; kill: def $s0 killed $s0 def $q0
-; CHECK-NEXT:    ldr s16, [sp, #16]
-; CHECK-NEXT:    ; kill: def $s5 killed $s5 def $q5
-; CHECK-NEXT:    ; kill: def $s1 killed $s1 def $q1
-; CHECK-NEXT:    add x8, sp, #20
-; CHECK-NEXT:    ; kill: def $s6 killed $s6 def $q6
-; CHECK-NEXT:    ; kill: def $s2 killed $s2 def $q2
-; CHECK-NEXT:    ; kill: def $s7 killed $s7 def $q7
-; CHECK-NEXT:    ; kill: def $s3 killed $s3 def $q3
-; CHECK-NEXT:    mov.s v4[1], v5[0]
-; CHECK-NEXT:    mov.s v0[1], v1[0]
-; CHECK-NEXT:    ldr s5, [sp]
-; CHECK-NEXT:    ld1.s { v16 }[1], [x8]
-; CHECK-NEXT:    add x8, sp, #4
-; CHECK-NEXT:    ld1.s { v5 }[1], [x8]
-; CHECK-NEXT:    add x8, sp, #24
-; CHECK-NEXT:    mov.s v4[2], v6[0]
-; CHECK-NEXT:    ld1.s { v16 }[2], [x8]
-; CHECK-NEXT:    mov.s v0[2], v2[0]
-; CHECK-NEXT:    add x8, sp, #8
-; CHECK-NEXT:    ld1.s { v5 }[2], [x8]
-; CHECK-NEXT:    add x8, sp, #28
-; CHECK-NEXT:    ld1.s { v16 }[3], [x8]
-; CHECK-NEXT:    add x8, sp, #12
-; CHECK-NEXT:    mov.s v4[3], v7[0]
-; CHECK-NEXT:    mov.s v0[3], v3[0]
-; CHECK-NEXT:    ld1.s { v5 }[3], [x8]
-; CHECK-NEXT:    mov d1, v16[1]
-; CHECK-NEXT:    mov d2, v5[1]
-; CHECK-NEXT:    mov d3, v4[1]
-; CHECK-NEXT:    mov d6, v0[1]
-; CHECK-NEXT:    stnp d16, d1, [x0, #48]
-; CHECK-NEXT:    ldr s1, [sp, #32]
-; CHECK-NEXT:    stnp d5, d2, [x0, #32]
-; CHECK-NEXT:    stnp d4, d3, [x0, #16]
-; CHECK-NEXT:    stnp d0, d6, [x0]
-; CHECK-NEXT:    str s1, [x0, #64]
-; CHECK-NEXT:    ret
-
+; CHECK-LE-LABEL: test_stnp_v17f32:
+; CHECK-LE:       // %bb.0: // %entry
+; CHECK-LE-NEXT:    // kill: def $s4 killed $s4 def $q4
+; CHECK-LE-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-LE-NEXT:    ldr s16, [sp, #32]
+; CHECK-LE-NEXT:    // kill: def $s5 killed $s5 def $q5
+; CHECK-LE-NEXT:    // kill: def $s1 killed $s1 def $q1
+; CHECK-LE-NEXT:    add x8, sp, #40
+; CHECK-LE-NEXT:    // kill: def $s6 killed $s6 def $q6
+; CHECK-LE-NEXT:    // kill: def $s2 killed $s2 def $q2
+; CHECK-LE-NEXT:    // kill: def $s7 killed $s7 def $q7
+; CHECK-LE-NEXT:    // kill: def $s3 killed $s3 def $q3
+; CHECK-LE-NEXT:    mov v4.s[1], v5.s[0]
+; CHECK-LE-NEXT:    mov v0.s[1], v1.s[0]
+; CHECK-LE-NEXT:    ldr s5, [sp]
+; CHECK-LE-NEXT:    ld1 { v16.s }[1], [x8]
+; CHECK-LE-NEXT:    add x8, sp, #8
+; CHECK-LE-NEXT:    ld1 { v5.s }[1], [x8]
+; CHECK-LE-NEXT:    add x8, sp, #48
+; CHECK-LE-NEXT:    mov v4.s[2], v6.s[0]
+; CHECK-LE-NEXT:    ld1 { v16.s }[2], [x8]
+; CHECK-LE-NEXT:    mov v0.s[2], v2.s[0]
+; CHECK-LE-NEXT:    add x8, sp, #16
+; CHECK-LE-NEXT:    ld1 { v5.s }[2], [x8]
+; CHECK-LE-NEXT:    add x8, sp, #56
+; CHECK-LE-NEXT:    ld1 { v16.s }[3], [x8]
+; CHECK-LE-NEXT:    add x8, sp, #24
+; CHECK-LE-NEXT:    mov v4.s[3], v7.s[0]
+; CHECK-LE-NEXT:    mov v0.s[3], v3.s[0]
+; CHECK-LE-NEXT:    ld1 { v5.s }[3], [x8]
+; CHECK-LE-NEXT:    mov d1, v16.d[1]
+; CHECK-LE-NEXT:    mov d2, v5.d[1]
+; CHECK-LE-NEXT:    mov d3, v4.d[1]
+; CHECK-LE-NEXT:    mov d6, v0.d[1]
+; CHECK-LE-NEXT:    stnp d16, d1, [x0, #48]
+; CHECK-LE-NEXT:    ldr s1, [sp, #64]
+; CHECK-LE-NEXT:    stnp d5, d2, [x0, #32]
+; CHECK-LE-NEXT:    stnp d4, d3, [x0, #16]
+; CHECK-LE-NEXT:    stnp d0, d6, [x0]
+; CHECK-LE-NEXT:    str s1, [x0, #64]
+; CHECK-LE-NEXT:    ret
+;
+; CHECK-BE-LABEL: test_stnp_v17f32:
+; CHECK-BE:       // %bb.0: // %entry
+; CHECK-BE-NEXT:    // kill: def $s4 killed $s4 def $q4
+; CHECK-BE-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-BE-NEXT:    ldr s16, [sp, #36]
+; CHECK-BE-NEXT:    // kill: def $s5 killed $s5 def $q5
+; CHECK-BE-NEXT:    // kill: def $s1 killed $s1 def $q1
+; CHECK-BE-NEXT:    ldr s17, [sp, #4]
+; CHECK-BE-NEXT:    add x8, sp, #44
+; CHECK-BE-NEXT:    mov v4.s[1], v5.s[0]
+; CHECK-BE-NEXT:    mov v0.s[1], v1.s[0]
+; CHECK-BE-NEXT:    // kill: def $s6 killed $s6 def $q6
+; CHECK-BE-NEXT:    // kill: def $s2 killed $s2 def $q2
+; CHECK-BE-NEXT:    // kill: def $s7 killed $s7 def $q7
+; CHECK-BE-NEXT:    // kill: def $s3 killed $s3 def $q3
+; CHECK-BE-NEXT:    ldr s1, [sp, #68]
+; CHECK-BE-NEXT:    ld1 { v16.s }[1], [x8]
+; CHECK-BE-NEXT:    add x8, sp, #12
+; CHECK-BE-NEXT:    ld1 { v17.s }[1], [x8]
+; CHECK-BE-NEXT:    add x8, sp, #52
+; CHECK-BE-NEXT:    str s1, [x0, #64]
+; CHECK-BE-NEXT:    ld1 { v16.s }[2], [x8]
+; CHECK-BE-NEXT:    add x8, sp, #20
+; CHECK-BE-NEXT:    mov v4.s[2], v6.s[0]
+; CHECK-BE-NEXT:    mov v0.s[2], v2.s[0]
+; CHECK-BE-NEXT:    ld1 { v17.s }[2], [x8]
+; CHECK-BE-NEXT:    add x8, sp, #60
+; CHECK-BE-NEXT:    ld1 { v16.s }[3], [x8]
+; CHECK-BE-NEXT:    add x8, sp, #28
+; CHECK-BE-NEXT:    ld1 { v17.s }[3], [x8]
+; CHECK-BE-NEXT:    mov v4.s[3], v7.s[0]
+; CHECK-BE-NEXT:    add x8, x0, #48
+; CHECK-BE-NEXT:    mov v0.s[3], v3.s[0]
+; CHECK-BE-NEXT:    st1 { v16.4s }, [x8]
+; CHECK-BE-NEXT:    add x8, x0, #32
+; CHECK-BE-NEXT:    st1 { v17.4s }, [x8]
+; CHECK-BE-NEXT:    add x8, x0, #16
+; CHECK-BE-NEXT:    st1 { v4.4s }, [x8]
+; CHECK-BE-NEXT:    st1 { v0.4s }, [x0]
+; CHECK-BE-NEXT:    ret
 entry:
   store <17 x float> %v, ptr %ptr, align 4, !nontemporal !0
   ret void
 }
 define void @test_stnp_v16i32_invalid_offset(<16 x i32> %v, ptr %ptr) {
-; CHECK-LABEL: test_stnp_v16i32_invalid_offset:
-; CHECK:       ; %bb.0: ; %entry
-; CHECK-NEXT:    mov w8, #32032 ; =0x7d20
-; CHECK-NEXT:    mov w9, #32000 ; =0x7d00
-; CHECK-NEXT:    add x8, x0, x8
-; CHECK-NEXT:    add x9, x0, x9
-; CHECK-NEXT:    stnp q2, q3, [x8]
-; CHECK-NEXT:    stnp q0, q1, [x9]
-; CHECK-NEXT:    ret
-
+; CHECK-LE-LABEL: test_stnp_v16i32_invalid_offset:
+; CHECK-LE:       // %bb.0: // %entry
+; CHECK-LE-NEXT:    mov w8, #32032 // =0x7d20
+; CHECK-LE-NEXT:    mov w9, #32000 // =0x7d00
+; CHECK-LE-NEXT:    add x8, x0, x8
+; CHECK-LE-NEXT:    add x9, x0, x9
+; CHECK-LE-NEXT:    stnp q2, q3, [x8]
+; CHECK-LE-NEXT:    stnp q0, q1, [x9]
+; CHECK-LE-NEXT:    ret
+;
+; CHECK-BE-LABEL: test_stnp_v16i32_invalid_offset:
+; CHECK-BE:       // %bb.0: // %entry
+; CHECK-BE-NEXT:    rev64 v3.4s, v3.4s
+; CHECK-BE-NEXT:    rev64 v2.4s, v2.4s
+; CHECK-BE-NEXT:    mov w8, #32032 // =0x7d20
+; CHECK-BE-NEXT:    rev64 v1.4s, v1.4s
+; CHECK-BE-NEXT:    rev64 v0.4s, v0.4s
+; CHECK-BE-NEXT:    mov w9, #32000 // =0x7d00
+; CHECK-BE-NEXT:    add x8, x0, x8
+; CHECK-BE-NEXT:    add x9, x0, x9
+; CHECK-BE-NEXT:    ext v3.16b, v3.16b, v3.16b, #8
+; CHECK-BE-NEXT:    ext v2.16b, v2.16b, v2.16b, #8
+; CHECK-BE-NEXT:    ext v1.16b, v1.16b, v1.16b, #8
+; CHECK-BE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-BE-NEXT:    stnp q2, q3, [x8]
+; CHECK-BE-NEXT:    stnp q0, q1, [x9]
+; CHECK-BE-NEXT:    ret
 entry:
   %gep = getelementptr <16 x i32>, ptr %ptr, i32 500
   store <16 x i32> %v, ptr %gep, align 4, !nontemporal !0
@@ -501,28 +798,58 @@ entry:
 }
 
 define void @test_stnp_v16f64(<16 x double> %v, ptr %ptr) {
-; CHECK-LABEL: test_stnp_v16f64:
-; CHECK:       ; %bb.0: ; %entry
-; CHECK-NEXT:    stnp q6, q7, [x0, #96]
-; CHECK-NEXT:    stnp q4, q5, [x0, #64]
-; CHECK-NEXT:    stnp q2, q3, [x0, #32]
-; CHECK-NEXT:    stnp q0, q1, [x0]
-; CHECK-NEXT:    ret
-
+; CHECK-LE-LABEL: test_stnp_v16f64:
+; CHECK-LE:       // %bb.0: // %entry
+; CHECK-LE-NEXT:    stnp q6, q7, [x0, #96]
+; CHECK-LE-NEXT:    stnp q4, q5, [x0, #64]
+; CHECK-LE-NEXT:    stnp q2, q3, [x0, #32]
+; CHECK-LE-NEXT:    stnp q0, q1, [x0]
+; CHECK-LE-NEXT:    ret
+;
+; CHECK-BE-LABEL: test_stnp_v16f64:
+; CHECK-BE:       // %bb.0: // %entry
+; CHECK-BE-NEXT:    ext v7.16b, v7.16b, v7.16b, #8
+; CHECK-BE-NEXT:    ext v6.16b, v6.16b, v6.16b, #8
+; CHECK-BE-NEXT:    ext v5.16b, v5.16b, v5.16b, #8
+; CHECK-BE-NEXT:    ext v4.16b, v4.16b, v4.16b, #8
+; CHECK-BE-NEXT:    ext v3.16b, v3.16b, v3.16b, #8
+; CHECK-BE-NEXT:    ext v2.16b, v2.16b, v2.16b, #8
+; CHECK-BE-NEXT:    ext v1.16b, v1.16b, v1.16b, #8
+; CHECK-BE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-BE-NEXT:    stnp q6, q7, [x0, #96]
+; CHECK-BE-NEXT:    stnp q4, q5, [x0, #64]
+; CHECK-BE-NEXT:    stnp q2, q3, [x0, #32]
+; CHECK-BE-NEXT:    stnp q0, q1, [x0]
+; CHECK-BE-NEXT:    ret
 entry:
   store <16 x double> %v, ptr %ptr, align 4, !nontemporal !0
   ret void
 }
 
 define void @test_stnp_v16i64(<16 x i64> %v, ptr %ptr) {
-; CHECK-LABEL: test_stnp_v16i64:
-; CHECK:       ; %bb.0: ; %entry
-; CHECK-NEXT:    stnp q6, q7, [x0, #96]
-; CHECK-NEXT:    stnp q4, q5, [x0, #64]
-; CHECK-NEXT:    stnp q2, q3, [x0, #32]
-; CHECK-NEXT:    stnp q0, q1, [x0]
-; CHECK-NEXT:    ret
-
+; CHECK-LE-LABEL: test_stnp_v16i64:
+; CHECK-LE:       // %bb.0: // %entry
+; CHECK-LE-NEXT:    stnp q6, q7, [x0, #96]
+; CHECK-LE-NEXT:    stnp q4, q5, [x0, #64]
+; CHECK-LE-NEXT:    stnp q2, q3, [x0, #32]
+; CHECK-LE-NEXT:    stnp q0, q1, [x0]
+; CHECK-LE-NEXT:    ret
+;
+; CHECK-BE-LABEL: test_stnp_v16i64:
+; CHECK-BE:       // %bb.0: // %entry
+; CHECK-BE-NEXT:    ext v7.16b, v7.16b, v7.16b, #8
+; CHECK-BE-NEXT:    ext v6.16b, v6.16b, v6.16b, #8
+; CHECK-BE-NEXT:    ext v5.16b, v5.16b, v5.16b, #8
+; CHECK-BE-NEXT:    ext v4.16b, v4.16b, v4.16b, #8
+; CHECK-BE-NEXT:    ext v3.16b, v3.16b, v3.16b, #8
+; CHECK-BE-NEXT:    ext v2.16b, v2.16b, v2.16b, #8
+; CHECK-BE-NEXT:    ext v1.16b, v1.16b, v1.16b, #8
+; CHECK-BE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-BE-NEXT:    stnp q6, q7, [x0, #96]
+; CHECK-BE-NEXT:    stnp q4, q5, [x0, #64]
+; CHECK-BE-NEXT:    stnp q2, q3, [x0, #32]
+; CHECK-BE-NEXT:    stnp q0, q1, [x0]
+; CHECK-BE-NEXT:    ret
 entry:
   store <16 x i64> %v, ptr %ptr, align 4, !nontemporal !0
   ret void


        


More information about the llvm-commits mailing list