[llvm] [AArch64] Avoid NEON ORR when NEON and SVE are unavailable (PR #93940)
Sander de Smalen via llvm-commits
llvm-commits at lists.llvm.org
Fri May 31 01:58:59 PDT 2024
https://github.com/sdesmalen-arm created https://github.com/llvm/llvm-project/pull/93940
For streaming-compatible functions with only +sme, we can't use
a NEON ORR (aliased as 'mov') for copies of Q-registers, so
we need to use a spill/fill instead.
This also fixes the fill, which should use the post-incrementing
addressing mode.
>From 4204e6f5d30b390414062c64ee56871172c801aa Mon Sep 17 00:00:00 2001
From: Sander de Smalen <sander.desmalen at arm.com>
Date: Fri, 31 May 2024 09:48:35 +0100
Subject: [PATCH 1/2] [AArch64] NFC: Use update_llc_test_checks for
arm64-reg-copy-noneon.ll
---
.../CodeGen/AArch64/arm64-reg-copy-noneon.ll | 25 ++++++++++++-------
1 file changed, 16 insertions(+), 9 deletions(-)
diff --git a/llvm/test/CodeGen/AArch64/arm64-reg-copy-noneon.ll b/llvm/test/CodeGen/AArch64/arm64-reg-copy-noneon.ll
index 29255ef187c1c..763c0c6a69f5f 100644
--- a/llvm/test/CodeGen/AArch64/arm64-reg-copy-noneon.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-reg-copy-noneon.ll
@@ -1,20 +1,27 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=arm64-none-linux-gnu -mattr=-neon < %s | FileCheck %s
define float @copy_FPR32(float %a, float %b) {
-;CHECK-LABEL: copy_FPR32:
-;CHECK: fmov s0, s1
+; CHECK-LABEL: copy_FPR32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmov s0, s1
+; CHECK-NEXT: ret
ret float %b;
}
-
+
define double @copy_FPR64(double %a, double %b) {
-;CHECK-LABEL: copy_FPR64:
-;CHECK: fmov d0, d1
+; CHECK-LABEL: copy_FPR64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmov d0, d1
+; CHECK-NEXT: ret
ret double %b;
}
-
+
define fp128 @copy_FPR128(fp128 %a, fp128 %b) {
-;CHECK-LABEL: copy_FPR128:
-;CHECK: str q1, [sp, #-16]!
-;CHECK-NEXT: ldr q0, [sp, #16]!
+; CHECK-LABEL: copy_FPR128:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str q1, [sp, #-16]!
+; CHECK-NEXT: ldr q0, [sp, #16]!
+; CHECK-NEXT: ret
ret fp128 %b;
}
>From 634bc6c22e150b60d0a1eeff9df75247be00bfe0 Mon Sep 17 00:00:00 2001
From: Sander de Smalen <sander.desmalen at arm.com>
Date: Fri, 31 May 2024 09:45:57 +0100
Subject: [PATCH 2/2] [AArch64] Avoid NEON ORR when NEON and SVE are
unavailable.
For streaming-compatible functions with only +sme, we can't use
a NEON ORR (aliased as 'mov') for copies of Q-registers, so
we need to use a spill/fill instead.
This also fixes the fill, which should use the post-incrementing
addressing mode.
---
llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 4 ++--
llvm/test/CodeGen/AArch64/arm64-reg-copy-noneon.ll | 2 +-
.../sve-streaming-mode-fixed-length-masked-load.ll | 12 ++++++++----
.../AArch64/sve-streaming-mode-test-register-mov.ll | 3 ++-
4 files changed, 13 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index aa0b7c93f8661..ce4b7e68fd625 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -4659,7 +4659,7 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
.addReg(AArch64::Z0 + (DestReg - AArch64::Q0), RegState::Define)
.addReg(AArch64::Z0 + (SrcReg - AArch64::Q0))
.addReg(AArch64::Z0 + (SrcReg - AArch64::Q0));
- else if (Subtarget.hasNEON())
+ else if (Subtarget.isNeonAvailable())
BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
.addReg(SrcReg)
.addReg(SrcReg, getKillRegState(KillSrc));
@@ -4669,7 +4669,7 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
.addReg(SrcReg, getKillRegState(KillSrc))
.addReg(AArch64::SP)
.addImm(-16);
- BuildMI(MBB, I, DL, get(AArch64::LDRQpre))
+ BuildMI(MBB, I, DL, get(AArch64::LDRQpost))
.addReg(AArch64::SP, RegState::Define)
.addReg(DestReg, RegState::Define)
.addReg(AArch64::SP)
diff --git a/llvm/test/CodeGen/AArch64/arm64-reg-copy-noneon.ll b/llvm/test/CodeGen/AArch64/arm64-reg-copy-noneon.ll
index 763c0c6a69f5f..69cd295c309d1 100644
--- a/llvm/test/CodeGen/AArch64/arm64-reg-copy-noneon.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-reg-copy-noneon.ll
@@ -21,7 +21,7 @@ define fp128 @copy_FPR128(fp128 %a, fp128 %b) {
; CHECK-LABEL: copy_FPR128:
; CHECK: // %bb.0:
; CHECK-NEXT: str q1, [sp, #-16]!
-; CHECK-NEXT: ldr q0, [sp, #16]!
+; CHECK-NEXT: ldr q0, [sp], #16
; CHECK-NEXT: ret
ret fp128 %b;
}
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-load.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-load.ll
index be335c697707d..a689a539b0082 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-load.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-load.ll
@@ -980,7 +980,8 @@ define <32 x i8> @masked_load_v32i8(ptr %src, <32 x i1> %mask) {
; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB3_3
; NONEON-NOSVE-NEXT: b .LBB3_4
; NONEON-NOSVE-NEXT: .LBB3_2:
-; NONEON-NOSVE-NEXT: mov v0.16b, v1.16b
+; NONEON-NOSVE-NEXT: str q1, [sp, #-16]!
+; NONEON-NOSVE-NEXT: ldr q0, [sp], #16
; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB3_4
; NONEON-NOSVE-NEXT: .LBB3_3: // %cond.load1
; NONEON-NOSVE-NEXT: ldrb w10, [x0, #1]
@@ -2095,7 +2096,8 @@ define <16 x half> @masked_load_v16f16(ptr %src, <16 x i1> %mask) {
; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB7_3
; NONEON-NOSVE-NEXT: b .LBB7_4
; NONEON-NOSVE-NEXT: .LBB7_2:
-; NONEON-NOSVE-NEXT: mov v0.16b, v1.16b
+; NONEON-NOSVE-NEXT: str q1, [sp, #-16]!
+; NONEON-NOSVE-NEXT: ldr q0, [sp], #16
; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB7_4
; NONEON-NOSVE-NEXT: .LBB7_3: // %cond.load1
; NONEON-NOSVE-NEXT: ldr h2, [x0, #2]
@@ -2616,7 +2618,8 @@ define <8 x float> @masked_load_v8f32(ptr %src, <8 x i1> %mask) {
; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB10_3
; NONEON-NOSVE-NEXT: b .LBB10_4
; NONEON-NOSVE-NEXT: .LBB10_2:
-; NONEON-NOSVE-NEXT: mov v0.16b, v1.16b
+; NONEON-NOSVE-NEXT: str q1, [sp, #-16]!
+; NONEON-NOSVE-NEXT: ldr q0, [sp], #16
; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB10_4
; NONEON-NOSVE-NEXT: .LBB10_3: // %cond.load1
; NONEON-NOSVE-NEXT: ldr s2, [x0, #4]
@@ -2839,7 +2842,8 @@ define <4 x double> @masked_load_v4f64(ptr %src, <4 x i1> %mask) {
; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB12_3
; NONEON-NOSVE-NEXT: b .LBB12_4
; NONEON-NOSVE-NEXT: .LBB12_2:
-; NONEON-NOSVE-NEXT: mov v0.16b, v1.16b
+; NONEON-NOSVE-NEXT: str q1, [sp, #-16]!
+; NONEON-NOSVE-NEXT: ldr q0, [sp], #16
; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB12_4
; NONEON-NOSVE-NEXT: .LBB12_3: // %cond.load1
; NONEON-NOSVE-NEXT: ldr d2, [x0, #8]
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-test-register-mov.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-test-register-mov.ll
index 67cdde718e391..23adb1a4bc092 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-test-register-mov.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-test-register-mov.ll
@@ -15,7 +15,8 @@ define fp128 @test_streaming_compatible_register_mov(fp128 %q0, fp128 %q1) {
;
; NONEON-NOSVE-LABEL: test_streaming_compatible_register_mov:
; NONEON-NOSVE: // %bb.0:
-; NONEON-NOSVE-NEXT: mov v0.16b, v1.16b
+; NONEON-NOSVE-NEXT: str q1, [sp, #-16]!
+; NONEON-NOSVE-NEXT: ldr q0, [sp], #16
; NONEON-NOSVE-NEXT: ret
ret fp128 %q1
}
More information about the llvm-commits
mailing list