[llvm] df4878d - [AArch64] Tests for non-temporal loads.

Florian Hahn via llvm-commits llvm-commits at lists.llvm.org
Mon Aug 15 01:16:14 PDT 2022


Author: Zain Jaffal
Date: 2022-08-15T09:16:02+01:00
New Revision: df4878d28d38b2bf2eb8d50d9cdef93ab79a38d5

URL: https://github.com/llvm/llvm-project/commit/df4878d28d38b2bf2eb8d50d9cdef93ab79a38d5
DIFF: https://github.com/llvm/llvm-project/commit/df4878d28d38b2bf2eb8d50d9cdef93ab79a38d5.diff

LOG: [AArch64] Tests for non-temporal loads.

Add some test cases for D131773 where LDNP could be used as well as
negative tests.

Reviewed By: fhahn

Differential Revision: https://reviews.llvm.org/D131767

Added: 
    llvm/test/CodeGen/AArch64/nontemporal-load.ll

Modified: 
    

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/AArch64/nontemporal-load.ll b/llvm/test/CodeGen/AArch64/nontemporal-load.ll
new file mode 100644
index 0000000000000..bc42a168ad73f
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/nontemporal-load.ll
@@ -0,0 +1,337 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple aarch64-apple-darwin | FileCheck %s
+
+define <4 x double> @test_ldnp_v4f64(<4 x double>* %A) {
+; CHECK-LABEL: test_ldnp_v4f64:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ldp q0, q1, [x0]
+; CHECK-NEXT:    ret
+  %lv = load <4 x double>, <4 x double>* %A, align 8, !nontemporal !0
+  ret <4 x double> %lv
+}
+
+define <4 x i64> @test_ldnp_v4i64(<4 x i64>* %A) {
+; CHECK-LABEL: test_ldnp_v4i64:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ldp q0, q1, [x0]
+; CHECK-NEXT:    ret
+  %lv = load <4 x i64>, <4 x i64>* %A, align 8, !nontemporal !0
+  ret <4 x i64> %lv
+}
+
+define <8 x i32> @test_ldnp_v8i32(<8 x i32>* %A) {
+; CHECK-LABEL: test_ldnp_v8i32:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ldp q0, q1, [x0]
+; CHECK-NEXT:    ret
+  %lv = load <8 x i32>, <8 x i32>* %A, align 8, !nontemporal !0
+  ret <8 x i32> %lv
+}
+
+define <8 x float> @test_ldnp_v8f32(<8 x float>* %A) {
+; CHECK-LABEL: test_ldnp_v8f32:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ldp q0, q1, [x0]
+; CHECK-NEXT:    ret
+  %lv = load <8 x float>, <8 x float>* %A, align 8, !nontemporal !0
+  ret <8 x float> %lv
+}
+
+define <16 x i16> @test_ldnp_v16i16(<16 x i16>* %A) {
+; CHECK-LABEL: test_ldnp_v16i16:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ldp q0, q1, [x0]
+; CHECK-NEXT:    ret
+  %lv = load <16 x i16>, <16 x i16>* %A, align 8, !nontemporal !0
+  ret <16 x i16> %lv
+}
+
+define <16 x half> @test_ldnp_v16f16(<16 x half>* %A) {
+; CHECK-LABEL: test_ldnp_v16f16:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ldp q0, q1, [x0]
+; CHECK-NEXT:    ret
+  %lv = load <16 x half>, <16 x half>* %A, align 8, !nontemporal !0
+  ret <16 x half> %lv
+}
+
+define <32 x i8> @test_ldnp_v32i8(<32 x i8>* %A) {
+; CHECK-LABEL: test_ldnp_v32i8:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ldp q0, q1, [x0]
+; CHECK-NEXT:    ret
+  %lv = load <32 x i8>, <32 x i8>* %A, align 8, !nontemporal !0
+  ret <32 x i8> %lv
+}
+
+define <4 x i32> @test_ldnp_v4i32(<4 x i32>* %A) {
+; CHECK-LABEL: test_ldnp_v4i32:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ldr q0, [x0]
+; CHECK-NEXT:    ret
+  %lv = load<4 x i32>, <4 x i32>* %A, align 8, !nontemporal !0
+  ret <4 x i32> %lv
+}
+
+define <4 x float> @test_ldnp_v4f32(<4 x float>* %A) {
+; CHECK-LABEL: test_ldnp_v4f32:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ldr q0, [x0]
+; CHECK-NEXT:    ret
+  %lv = load<4 x float>, <4 x float>* %A, align 8, !nontemporal !0
+  ret <4 x float> %lv
+}
+
+define <8 x i16> @test_ldnp_v8i16(<8 x i16>* %A) {
+; CHECK-LABEL: test_ldnp_v8i16:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ldr q0, [x0]
+; CHECK-NEXT:    ret
+  %lv = load <8 x i16>, <8 x i16>* %A, align 8, !nontemporal !0
+  ret <8 x i16> %lv
+}
+
+define <16 x i8> @test_ldnp_v16i8(<16 x i8>* %A) {
+; CHECK-LABEL: test_ldnp_v16i8:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ldr q0, [x0]
+; CHECK-NEXT:    ret
+  %lv = load <16 x i8>, <16 x i8>* %A, align 8, !nontemporal !0
+  ret <16 x i8> %lv
+}
+define <2 x double> @test_ldnp_v2f64(<2 x double>* %A) {
+; CHECK-LABEL: test_ldnp_v2f64:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ldr q0, [x0]
+; CHECK-NEXT:    ret
+  %lv = load <2 x double>, <2 x double>* %A, align 8, !nontemporal !0
+  ret <2 x double> %lv
+}
+
+define <2 x i32> @test_ldnp_v2i32(<2 x i32>* %A) {
+; CHECK-LABEL: test_ldnp_v2i32:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ldr d0, [x0]
+; CHECK-NEXT:    ret
+  %lv = load <2 x i32>, <2 x i32>* %A, align 8, !nontemporal !0
+  ret <2 x i32> %lv
+}
+
+define <2 x float> @test_ldnp_v2f32(<2 x float>* %A) {
+; CHECK-LABEL: test_ldnp_v2f32:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ldr d0, [x0]
+; CHECK-NEXT:    ret
+  %lv = load <2 x float>, <2 x float>* %A, align 8, !nontemporal !0
+  ret <2 x float> %lv
+}
+
+define <4 x i16> @test_ldnp_v4i16(<4 x i16>* %A) {
+; CHECK-LABEL: test_ldnp_v4i16:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ldr d0, [x0]
+; CHECK-NEXT:    ret
+  %lv = load <4 x i16>, <4 x i16>* %A, align 8, !nontemporal !0
+  ret <4 x i16> %lv
+}
+
+define <8 x i8> @test_ldnp_v8i8(<8 x i8>* %A) {
+; CHECK-LABEL: test_ldnp_v8i8:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ldr d0, [x0]
+; CHECK-NEXT:    ret
+  %lv = load <8 x i8>, <8 x i8>* %A, align 8, !nontemporal !0
+  ret <8 x i8> %lv
+}
+
+define <1 x double> @test_ldnp_v1f64(<1 x double>* %A) {
+; CHECK-LABEL: test_ldnp_v1f64:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ldr d0, [x0]
+; CHECK-NEXT:    ret
+  %lv = load <1 x double>, <1 x double>* %A, align 8, !nontemporal !0
+  ret <1 x double> %lv
+}
+
+define <1 x i64> @test_ldnp_v1i64(<1 x i64>* %A) {
+; CHECK-LABEL: test_ldnp_v1i64:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ldr d0, [x0]
+; CHECK-NEXT:    ret
+  %lv = load <1 x i64>, <1 x i64>* %A, align 8, !nontemporal !0
+  ret <1 x i64> %lv
+}
+
+define <32 x i16> @test_ldnp_v32i16(<32 x i16>* %A) {
+; CHECK-LABEL: test_ldnp_v32i16:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ldp q0, q1, [x0]
+; CHECK-NEXT:    ldp q2, q3, [x0, #32]
+; CHECK-NEXT:    ret
+  %lv = load <32 x i16>, <32 x i16>* %A, align 8, !nontemporal !0
+  ret <32 x i16> %lv
+}
+
+define <32 x half> @test_ldnp_v32f16(<32 x half>* %A) {
+; CHECK-LABEL: test_ldnp_v32f16:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ldp q0, q1, [x0]
+; CHECK-NEXT:    ldp q2, q3, [x0, #32]
+; CHECK-NEXT:    ret
+  %lv = load <32 x half>, <32 x half>* %A, align 8, !nontemporal !0
+  ret <32 x half> %lv
+}
+
+define <16 x i32> @test_ldnp_v16i32(<16 x i32>* %A) {
+; CHECK-LABEL: test_ldnp_v16i32:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ldp q0, q1, [x0]
+; CHECK-NEXT:    ldp q2, q3, [x0, #32]
+; CHECK-NEXT:    ret
+  %lv = load <16 x i32>, <16 x i32>* %A, align 8, !nontemporal !0
+  ret <16 x i32> %lv
+}
+
+define <16 x float> @test_ldnp_v16f32(<16 x float>* %A) {
+; CHECK-LABEL: test_ldnp_v16f32:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ldp q0, q1, [x0]
+; CHECK-NEXT:    ldp q2, q3, [x0, #32]
+; CHECK-NEXT:    ret
+  %lv = load <16 x float>, <16 x float>* %A, align 8, !nontemporal !0
+  ret <16 x float> %lv
+}
+
+define <17 x float> @test_ldnp_v17f32(<17 x float>* %A) {
+; CHECK-LABEL: test_ldnp_v17f32:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ldp q1, q2, [x0, #32]
+; CHECK-NEXT:    ldp q3, q4, [x0]
+; CHECK-NEXT:    ldr s0, [x0, #64]
+; CHECK-NEXT:    stp q3, q4, [x8]
+; CHECK-NEXT:    stp q1, q2, [x8, #32]
+; CHECK-NEXT:    str s0, [x8, #64]
+; CHECK-NEXT:    ret
+  %lv = load <17 x float>, <17 x float>* %A, align 8, !nontemporal !0
+  ret <17 x float> %lv
+}
+
+define <33 x double> @test_ldnp_v33f64(<33 x double>* %A) {
+; CHECK-LABEL: test_ldnp_v33f64:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ldp q0, q1, [x0]
+; CHECK-NEXT:    ldp q2, q3, [x0, #32]
+; CHECK-NEXT:    ldp q4, q5, [x0, #64]
+; CHECK-NEXT:    ldp q6, q7, [x0, #96]
+; CHECK-NEXT:    ldp q16, q17, [x0, #128]
+; CHECK-NEXT:    ldp q18, q19, [x0, #160]
+; CHECK-NEXT:    ldp q21, q22, [x0, #224]
+; CHECK-NEXT:    ldp q23, q24, [x0, #192]
+; CHECK-NEXT:    ldr d20, [x0, #256]
+; CHECK-NEXT:    stp q0, q1, [x8]
+; CHECK-NEXT:    stp q2, q3, [x8, #32]
+; CHECK-NEXT:    stp q4, q5, [x8, #64]
+; CHECK-NEXT:    str d20, [x8, #256]
+; CHECK-NEXT:    stp q6, q7, [x8, #96]
+; CHECK-NEXT:    stp q16, q17, [x8, #128]
+; CHECK-NEXT:    stp q18, q19, [x8, #160]
+; CHECK-NEXT:    stp q23, q24, [x8, #192]
+; CHECK-NEXT:    stp q21, q22, [x8, #224]
+; CHECK-NEXT:    ret
+  %lv = load <33 x double>, <33 x double>* %A, align 8, !nontemporal !0
+  ret <33 x double> %lv
+}
+
+define <33 x i8> @test_ldnp_v33i8(<33 x i8>* %A) {
+; CHECK-LABEL: test_ldnp_v33i8:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ldp q1, q0, [x0]
+; CHECK-NEXT:    ldrb w9, [x0, #32]
+; CHECK-NEXT:    stp q1, q0, [x8]
+; CHECK-NEXT:    strb w9, [x8, #32]
+; CHECK-NEXT:    ret
+  %lv = load<33 x i8>, <33 x i8>* %A, align 8, !nontemporal !0
+  ret <33 x i8> %lv
+}
+
+define <4 x i65> @test_ldnp_v4i65(<4 x i65>* %A) {
+; CHECK-LABEL: test_ldnp_v4i65:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ldp x8, x9, [x0, #8]
+; CHECK-NEXT:    ldr d0, [x0]
+; CHECK-NEXT:    ldr x10, [x0, #24]
+; CHECK-NEXT:    and x1, x8, #0x1
+; CHECK-NEXT:    ldrb w11, [x0, #32]
+; CHECK-NEXT:    extr x2, x9, x8, #1
+; CHECK-NEXT:    extr x4, x10, x9, #2
+; CHECK-NEXT:    extr x6, x11, x10, #3
+; CHECK-NEXT:    ubfx x3, x9, #1, #1
+; CHECK-NEXT:    mov.d v0[1], x1
+; CHECK-NEXT:    ubfx x5, x10, #2, #1
+; CHECK-NEXT:    ubfx x7, x11, #3, #1
+; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    ret
+  %lv = load <4 x i65>, <4 x i65>* %A, align 8, !nontemporal !0
+  ret <4 x i65> %lv
+}
+
+define <4 x i63> @test_ldnp_v4i63(<4 x i63>* %A) {
+; CHECK-LABEL: test_ldnp_v4i63:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ldp x8, x9, [x0]
+; CHECK-NEXT:    ldp x10, x11, [x0, #16]
+; CHECK-NEXT:    extr x12, x9, x8, #63
+; CHECK-NEXT:    and x0, x8, #0x7fffffffffffffff
+; CHECK-NEXT:    extr x9, x10, x9, #62
+; CHECK-NEXT:    extr x10, x11, x10, #61
+; CHECK-NEXT:    and x1, x12, #0x7fffffffffffffff
+; CHECK-NEXT:    and x2, x9, #0x7fffffffffffffff
+; CHECK-NEXT:    and x3, x10, #0x7fffffffffffffff
+; CHECK-NEXT:    ret
+  %lv = load <4 x i63>, <4 x i63>* %A, align 8, !nontemporal !0
+  ret <4 x i63> %lv
+}
+
+define <5 x double> @test_ldnp_v5f64(<5 x double>* %A) {
+; CHECK-LABEL: test_ldnp_v5f64:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ldp q0, q2, [x0]
+; CHECK-NEXT:    ext.16b v1, v0, v0, #8
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $q1
+; CHECK-NEXT:    ext.16b v3, v2, v2, #8
+; CHECK-NEXT:    ldr d4, [x0, #32]
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $q2
+; CHECK-NEXT:    ; kill: def $d3 killed $d3 killed $q3
+; CHECK-NEXT:    ; kill: def $d4 killed $d4 killed $q4
+; CHECK-NEXT:    ret
+  %lv = load<5 x double>, <5 x double>* %A, align 8, !nontemporal !0
+  ret <5 x double> %lv
+}
+
+define <16 x i64> @test_ldnp_v16i64(<16 x i64>* %A) {
+; CHECK-LABEL: test_ldnp_v16i64:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ldp q0, q1, [x0]
+; CHECK-NEXT:    ldp q2, q3, [x0, #32]
+; CHECK-NEXT:    ldp q4, q5, [x0, #64]
+; CHECK-NEXT:    ldp q6, q7, [x0, #96]
+; CHECK-NEXT:    ret
+  %lv = load <16 x i64>, <16 x i64>* %A, align 8, !nontemporal !0
+  ret <16 x i64> %lv
+}
+
+define <16 x double> @test_ldnp_v16f64(<16 x double>* %A) {
+; CHECK-LABEL: test_ldnp_v16f64:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ldp q0, q1, [x0]
+; CHECK-NEXT:    ldp q2, q3, [x0, #32]
+; CHECK-NEXT:    ldp q4, q5, [x0, #64]
+; CHECK-NEXT:    ldp q6, q7, [x0, #96]
+; CHECK-NEXT:    ret
+  %lv = load <16 x double>, <16 x double>* %A, align 8, !nontemporal !0
+  ret <16 x double> %lv
+}
+
+
+!0 = !{i32 1}


        


More information about the llvm-commits mailing list