[llvm] [AArch64][SME] Fix generating incorrect TBZ when lowering lazy save. (PR #68429)
Amara Emerson via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 6 09:59:10 PDT 2023
https://github.com/aemerson created https://github.com/llvm/llvm-project/pull/68429
After calling arm_sme_state, the -S assembly would show clang generating
a “tbz xN, #0, Lbb”. However, disassembling it showed that it was
actually encoded as “tbz xN, #32, Lbb”. The issue is that for TBZ, if
you want a bit offset <32 you need to use the W variant, since the
instruction overloads the top bit of the immediate.
>From 63553dc0f1481d5c170010744d716f5430c61dbe Mon Sep 17 00:00:00 2001
From: Amara Emerson <amara at apple.com>
Date: Fri, 6 Oct 2023 09:57:06 -0700
Subject: [PATCH] [AArch64][SME] Fix generating incorrect TBZ when lowering
lazy save.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
After calling arm_sme_state, the -S assembly would show clang generating
a “tbz xN, #0, Lbb”. However, disassembling it showed that it was
actually encoded as “tbz xN, #32, Lbb”. The issue is that for TBZ, if
you want a bit offset <32 you need to use the W variant, since the
instruction overloads the top bit of the immediate.
---
.../AArch64/AArch64ExpandPseudoInsts.cpp | 6 ++--
...compatible-to-normal-fn-wihout-sme-attr.ll | 4 +--
.../AArch64/sme-disable-gisel-fisel.ll | 4 +--
.../CodeGen/AArch64/sme-lazy-save-call.ll | 8 ++---
.../sme-streaming-compatible-interface.ll | 30 +++++++++----------
5 files changed, 27 insertions(+), 25 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index cc61373d51d7188..9179583724e92d1 100644
--- a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -1005,10 +1005,12 @@ AArch64ExpandPseudo::expandCondSMToggle(MachineBasicBlock &MBB,
// expected value for the callee (0 for a normal callee and 1 for a streaming
// callee).
auto PStateSM = MI.getOperand(2).getReg();
+ auto TRI = MBB.getParent()->getSubtarget().getRegisterInfo();
+ unsigned SMReg32 = TRI->getSubReg(PStateSM, AArch64::sub_32);
bool IsStreamingCallee = MI.getOperand(3).getImm();
- unsigned Opc = IsStreamingCallee ? AArch64::TBZX : AArch64::TBNZX;
+ unsigned Opc = IsStreamingCallee ? AArch64::TBZW : AArch64::TBNZW;
MachineInstrBuilder Tbx =
- BuildMI(MBB, MBBI, DL, TII->get(Opc)).addReg(PStateSM).addImm(0);
+ BuildMI(MBB, MBBI, DL, TII->get(Opc)).addReg(SMReg32).addImm(0);
// Split MBB and create two new blocks:
// - MBB now contains all instructions before MSRcond_pstatesvcrImm1.
diff --git a/llvm/test/CodeGen/AArch64/sme-call-streaming-compatible-to-normal-fn-wihout-sme-attr.ll b/llvm/test/CodeGen/AArch64/sme-call-streaming-compatible-to-normal-fn-wihout-sme-attr.ll
index cffbadc53552396..3fa1ee5b9b01140 100644
--- a/llvm/test/CodeGen/AArch64/sme-call-streaming-compatible-to-normal-fn-wihout-sme-attr.ll
+++ b/llvm/test/CodeGen/AArch64/sme-call-streaming-compatible-to-normal-fn-wihout-sme-attr.ll
@@ -17,12 +17,12 @@ define void @streaming_compatible() #0 {
; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
; CHECK-NEXT: bl __arm_sme_state
; CHECK-NEXT: and x19, x0, #0x1
-; CHECK-NEXT: tbz x19, #0, .LBB0_2
+; CHECK-NEXT: tbz w19, #0, .LBB0_2
; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: smstop sm
; CHECK-NEXT: .LBB0_2:
; CHECK-NEXT: bl non_streaming
-; CHECK-NEXT: tbz x19, #0, .LBB0_4
+; CHECK-NEXT: tbz w19, #0, .LBB0_4
; CHECK-NEXT: // %bb.3:
; CHECK-NEXT: smstart sm
; CHECK-NEXT: .LBB0_4:
diff --git a/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll b/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll
index a831cee09619c83..885cd7b0b0947da 100644
--- a/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll
+++ b/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll
@@ -413,14 +413,14 @@ define float @frem_call_sm_compat(float %a, float %b) "aarch64_pstate_sm_compati
; CHECK-COMMON-NEXT: stp s0, s1, [sp, #8] // 8-byte Folded Spill
; CHECK-COMMON-NEXT: bl __arm_sme_state
; CHECK-COMMON-NEXT: and x19, x0, #0x1
-; CHECK-COMMON-NEXT: tbz x19, #0, .LBB12_2
+; CHECK-COMMON-NEXT: tbz w19, #0, .LBB12_2
; CHECK-COMMON-NEXT: // %bb.1:
; CHECK-COMMON-NEXT: smstop sm
; CHECK-COMMON-NEXT: .LBB12_2:
; CHECK-COMMON-NEXT: ldp s0, s1, [sp, #8] // 8-byte Folded Reload
; CHECK-COMMON-NEXT: bl fmodf
; CHECK-COMMON-NEXT: str s0, [sp, #12] // 4-byte Folded Spill
-; CHECK-COMMON-NEXT: tbz x19, #0, .LBB12_4
+; CHECK-COMMON-NEXT: tbz w19, #0, .LBB12_4
; CHECK-COMMON-NEXT: // %bb.3:
; CHECK-COMMON-NEXT: smstart sm
; CHECK-COMMON-NEXT: .LBB12_4:
diff --git a/llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll b/llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll
index 7944c7f94c7018b..6757af01278bd9b 100644
--- a/llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll
+++ b/llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll
@@ -134,12 +134,12 @@ define void @test_lazy_save_and_conditional_smstart() nounwind "aarch64_pstate_z
; CHECK-NEXT: msr TPIDR2_EL0, x9
; CHECK-NEXT: bl __arm_sme_state
; CHECK-NEXT: and x19, x0, #0x1
-; CHECK-NEXT: tbz x19, #0, .LBB3_2
+; CHECK-NEXT: tbz w19, #0, .LBB3_2
; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: smstop sm
; CHECK-NEXT: .LBB3_2:
; CHECK-NEXT: bl private_za_callee
-; CHECK-NEXT: tbz x19, #0, .LBB3_4
+; CHECK-NEXT: tbz w19, #0, .LBB3_4
; CHECK-NEXT: // %bb.3:
; CHECK-NEXT: smstart sm
; CHECK-NEXT: .LBB3_4:
@@ -187,12 +187,12 @@ define void @za_shared_caller_za_preserved_callee() nounwind "aarch64_pstate_za_
; CHECK-NEXT: msr TPIDR2_EL0, x8
; CHECK-NEXT: bl __arm_sme_state
; CHECK-NEXT: and x19, x0, #0x1
-; CHECK-NEXT: tbz x19, #0, .LBB4_2
+; CHECK-NEXT: tbz w19, #0, .LBB4_2
; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: smstop sm
; CHECK-NEXT: .LBB4_2:
; CHECK-NEXT: bl private_za_preserved_callee
-; CHECK-NEXT: tbz x19, #0, .LBB4_4
+; CHECK-NEXT: tbz w19, #0, .LBB4_4
; CHECK-NEXT: // %bb.3:
; CHECK-NEXT: smstart sm
; CHECK-NEXT: .LBB4_4:
diff --git a/llvm/test/CodeGen/AArch64/sme-streaming-compatible-interface.ll b/llvm/test/CodeGen/AArch64/sme-streaming-compatible-interface.ll
index 011c3a4c25cacb7..1ad6b189d6fa5cc 100644
--- a/llvm/test/CodeGen/AArch64/sme-streaming-compatible-interface.ll
+++ b/llvm/test/CodeGen/AArch64/sme-streaming-compatible-interface.ll
@@ -43,12 +43,12 @@ define void @streaming_compatible_caller_normal_callee() "aarch64_pstate_sm_comp
; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
; CHECK-NEXT: bl __arm_sme_state
; CHECK-NEXT: and x19, x0, #0x1
-; CHECK-NEXT: tbz x19, #0, .LBB1_2
+; CHECK-NEXT: tbz w19, #0, .LBB1_2
; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: smstop sm
; CHECK-NEXT: .LBB1_2:
; CHECK-NEXT: bl normal_callee
-; CHECK-NEXT: tbz x19, #0, .LBB1_4
+; CHECK-NEXT: tbz w19, #0, .LBB1_4
; CHECK-NEXT: // %bb.3:
; CHECK-NEXT: smstart sm
; CHECK-NEXT: .LBB1_4:
@@ -79,12 +79,12 @@ define void @streaming_compatible_caller_streaming_callee() "aarch64_pstate_sm_c
; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
; CHECK-NEXT: bl __arm_sme_state
; CHECK-NEXT: and x19, x0, #0x1
-; CHECK-NEXT: tbnz x19, #0, .LBB2_2
+; CHECK-NEXT: tbnz w19, #0, .LBB2_2
; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: smstart sm
; CHECK-NEXT: .LBB2_2:
; CHECK-NEXT: bl streaming_callee
-; CHECK-NEXT: tbnz x19, #0, .LBB2_4
+; CHECK-NEXT: tbnz w19, #0, .LBB2_4
; CHECK-NEXT: // %bb.3:
; CHECK-NEXT: smstop sm
; CHECK-NEXT: .LBB2_4:
@@ -134,7 +134,7 @@ define <2 x double> @streaming_compatible_with_neon_vectors(<2 x double> %arg) "
; CHECK-NEXT: str z0, [sp, #1, mul vl] // 16-byte Folded Spill
; CHECK-NEXT: bl __arm_sme_state
; CHECK-NEXT: and x19, x0, #0x1
-; CHECK-NEXT: tbz x19, #0, .LBB4_2
+; CHECK-NEXT: tbz w19, #0, .LBB4_2
; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: smstop sm
; CHECK-NEXT: .LBB4_2:
@@ -143,7 +143,7 @@ define <2 x double> @streaming_compatible_with_neon_vectors(<2 x double> %arg) "
; CHECK-NEXT: bl normal_callee_vec_arg
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: str z0, [sp] // 16-byte Folded Spill
-; CHECK-NEXT: tbz x19, #0, .LBB4_4
+; CHECK-NEXT: tbz w19, #0, .LBB4_4
; CHECK-NEXT: // %bb.3:
; CHECK-NEXT: smstart sm
; CHECK-NEXT: .LBB4_4:
@@ -204,14 +204,14 @@ define <vscale x 2 x double> @streaming_compatible_with_scalable_vectors(<vscale
; CHECK-NEXT: str z0, [sp, #1, mul vl] // 16-byte Folded Spill
; CHECK-NEXT: bl __arm_sme_state
; CHECK-NEXT: and x19, x0, #0x1
-; CHECK-NEXT: tbz x19, #0, .LBB5_2
+; CHECK-NEXT: tbz w19, #0, .LBB5_2
; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: smstop sm
; CHECK-NEXT: .LBB5_2:
; CHECK-NEXT: ldr z0, [sp, #1, mul vl] // 16-byte Folded Reload
; CHECK-NEXT: bl normal_callee_scalable_vec_arg
; CHECK-NEXT: str z0, [sp] // 16-byte Folded Spill
-; CHECK-NEXT: tbz x19, #0, .LBB5_4
+; CHECK-NEXT: tbz w19, #0, .LBB5_4
; CHECK-NEXT: // %bb.3:
; CHECK-NEXT: smstart sm
; CHECK-NEXT: .LBB5_4:
@@ -296,14 +296,14 @@ define <vscale x 2 x i1> @streaming_compatible_with_predicate_vectors(<vscale x
; CHECK-NEXT: str p0, [sp, #7, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: bl __arm_sme_state
; CHECK-NEXT: and x19, x0, #0x1
-; CHECK-NEXT: tbz x19, #0, .LBB6_2
+; CHECK-NEXT: tbz w19, #0, .LBB6_2
; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: smstop sm
; CHECK-NEXT: .LBB6_2:
; CHECK-NEXT: ldr p0, [sp, #7, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: bl normal_callee_predicate_vec_arg
; CHECK-NEXT: str p0, [sp, #6, mul vl] // 2-byte Folded Spill
-; CHECK-NEXT: tbz x19, #0, .LBB6_4
+; CHECK-NEXT: tbz w19, #0, .LBB6_4
; CHECK-NEXT: // %bb.3:
; CHECK-NEXT: smstart sm
; CHECK-NEXT: .LBB6_4:
@@ -360,7 +360,7 @@ define i32 @conditional_smstart_unreachable_block() "aarch64_pstate_sm_compatibl
; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
; CHECK-NEXT: bl __arm_sme_state
; CHECK-NEXT: and x19, x0, #0x1
-; CHECK-NEXT: tbnz x19, #0, .LBB7_2
+; CHECK-NEXT: tbnz w19, #0, .LBB7_2
; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: smstart sm
; CHECK-NEXT: .LBB7_2:
@@ -381,12 +381,12 @@ define void @conditional_smstart_no_successor_block(i1 %p) "aarch64_pstate_sm_co
; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
; CHECK-NEXT: bl __arm_sme_state
; CHECK-NEXT: and x19, x0, #0x1
-; CHECK-NEXT: tbnz x19, #0, .LBB8_3
+; CHECK-NEXT: tbnz w19, #0, .LBB8_3
; CHECK-NEXT: // %bb.2: // %if.then
; CHECK-NEXT: smstart sm
; CHECK-NEXT: .LBB8_3: // %if.then
; CHECK-NEXT: bl streaming_callee
-; CHECK-NEXT: tbnz x19, #0, .LBB8_5
+; CHECK-NEXT: tbnz w19, #0, .LBB8_5
; CHECK-NEXT: // %bb.4: // %if.then
; CHECK-NEXT: smstop sm
; CHECK-NEXT: .LBB8_5: // %if.then
@@ -417,12 +417,12 @@ define void @disable_tailcallopt() "aarch64_pstate_sm_compatible" nounwind {
; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
; CHECK-NEXT: bl __arm_sme_state
; CHECK-NEXT: and x19, x0, #0x1
-; CHECK-NEXT: tbz x19, #0, .LBB9_2
+; CHECK-NEXT: tbz w19, #0, .LBB9_2
; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: smstop sm
; CHECK-NEXT: .LBB9_2:
; CHECK-NEXT: bl normal_callee
-; CHECK-NEXT: tbz x19, #0, .LBB9_4
+; CHECK-NEXT: tbz w19, #0, .LBB9_4
; CHECK-NEXT: // %bb.3:
; CHECK-NEXT: smstart sm
; CHECK-NEXT: .LBB9_4:
More information about the llvm-commits
mailing list