[llvm] 3efe832 - [AArch64] Fix chain for calls from agnostic-ZA functions.

Sander de Smalen via llvm-commits llvm-commits at lists.llvm.org
Mon Jan 13 04:07:21 PST 2025


Author: Sander de Smalen
Date: 2025-01-13T12:06:50Z
New Revision: 3efe83291f07dcf2423065e63b826407d1ec2609

URL: https://github.com/llvm/llvm-project/commit/3efe83291f07dcf2423065e63b826407d1ec2609
DIFF: https://github.com/llvm/llvm-project/commit/3efe83291f07dcf2423065e63b826407d1ec2609.diff

LOG: [AArch64] Fix chain for calls from agnostic-ZA functions.

The lowering code was using the wrong chain value, which meant that
the 'smstart' after the call from streaming agnostic-ZA functions ->
non-streaming private-ZA functions was incorrectly removed from the DAG.

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
    llvm/test/CodeGen/AArch64/sme-agnostic-za.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index d9877fef1437cb..278dd95cd969d8 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -9664,7 +9664,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
         DAG.getConstant(0, DL, MVT::i64));
     TPIDR2.Uses++;
   } else if (RequiresSaveAllZA) {
-    Result = emitSMEStateSaveRestore(*this, DAG, FuncInfo, DL, Chain,
+    Result = emitSMEStateSaveRestore(*this, DAG, FuncInfo, DL, Result,
                                      /*IsSave=*/false);
   }
 

diff  --git a/llvm/test/CodeGen/AArch64/sme-agnostic-za.ll b/llvm/test/CodeGen/AArch64/sme-agnostic-za.ll
index 97522b9a319c09..1f688154110973 100644
--- a/llvm/test/CodeGen/AArch64/sme-agnostic-za.ll
+++ b/llvm/test/CodeGen/AArch64/sme-agnostic-za.ll
@@ -82,3 +82,121 @@ define i64 @shared_caller_agnostic_callee(i64 %v) nounwind "aarch64_inout_za" "a
   %res = call i64 @agnostic_decl(i64 %v)
   ret i64 %res
 }
+
+; agnostic-ZA + streaming -> private-ZA + non-streaming
+define i64 @streaming_agnostic_caller_nonstreaming_private_za_callee(i64 %v) nounwind "aarch64_za_state_agnostic" "aarch64_pstate_sm_enabled" {
+; CHECK-LABEL: streaming_agnostic_caller_nonstreaming_private_za_callee:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stp d15, d14, [sp, #-112]! // 16-byte Folded Spill
+; CHECK-NEXT:    stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT:    mov x9, x0
+; CHECK-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT:    stp x29, x30, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT:    bl __arm_get_current_vg
+; CHECK-NEXT:    str x0, [sp, #80] // 8-byte Folded Spill
+; CHECK-NEXT:    mov x0, x9
+; CHECK-NEXT:    add x29, sp, #64
+; CHECK-NEXT:    stp x20, x19, [sp, #96] // 16-byte Folded Spill
+; CHECK-NEXT:    mov x8, x0
+; CHECK-NEXT:    bl __arm_sme_state_size
+; CHECK-NEXT:    sub sp, sp, x0
+; CHECK-NEXT:    mov x20, sp
+; CHECK-NEXT:    mov x0, x20
+; CHECK-NEXT:    bl __arm_sme_save
+; CHECK-NEXT:    smstop sm
+; CHECK-NEXT:    mov x0, x8
+; CHECK-NEXT:    bl private_za_decl
+; CHECK-NEXT:    mov x1, x0
+; CHECK-NEXT:    smstart sm
+; CHECK-NEXT:    mov x0, x20
+; CHECK-NEXT:    bl __arm_sme_restore
+; CHECK-NEXT:    mov x0, x20
+; CHECK-NEXT:    bl __arm_sme_save
+; CHECK-NEXT:    smstop sm
+; CHECK-NEXT:    mov x0, x1
+; CHECK-NEXT:    bl private_za_decl
+; CHECK-NEXT:    mov x1, x0
+; CHECK-NEXT:    smstart sm
+; CHECK-NEXT:    mov x0, x20
+; CHECK-NEXT:    bl __arm_sme_restore
+; CHECK-NEXT:    mov x0, x1
+; CHECK-NEXT:    sub sp, x29, #64
+; CHECK-NEXT:    ldp x20, x19, [sp, #96] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp x29, x30, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp d15, d14, [sp], #112 // 16-byte Folded Reload
+; CHECK-NEXT:    ret
+  %res = call i64 @private_za_decl(i64 %v)
+  %res2 = call i64 @private_za_decl(i64 %res)
+  ret i64 %res2
+}
+
+; agnostic-ZA + streaming-compatible -> private-ZA + non-streaming
+define i64 @streaming_compatible_agnostic_caller_nonstreaming_private_za_callee(i64 %v) nounwind "aarch64_za_state_agnostic" "aarch64_pstate_sm_compatible" {
+; CHECK-LABEL: streaming_compatible_agnostic_caller_nonstreaming_private_za_callee:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stp d15, d14, [sp, #-112]! // 16-byte Folded Spill
+; CHECK-NEXT:    stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT:    mov x9, x0
+; CHECK-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT:    stp x29, x30, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT:    bl __arm_get_current_vg
+; CHECK-NEXT:    str x0, [sp, #80] // 8-byte Folded Spill
+; CHECK-NEXT:    mov x0, x9
+; CHECK-NEXT:    add x29, sp, #64
+; CHECK-NEXT:    stp x20, x19, [sp, #96] // 16-byte Folded Spill
+; CHECK-NEXT:    mov x8, x0
+; CHECK-NEXT:    bl __arm_sme_state_size
+; CHECK-NEXT:    sub sp, sp, x0
+; CHECK-NEXT:    mov x19, sp
+; CHECK-NEXT:    mov x0, x19
+; CHECK-NEXT:    bl __arm_sme_save
+; CHECK-NEXT:    bl __arm_sme_state
+; CHECK-NEXT:    and x20, x0, #0x1
+; CHECK-NEXT:    tbz w20, #0, .LBB5_2
+; CHECK-NEXT:  // %bb.1:
+; CHECK-NEXT:    smstop sm
+; CHECK-NEXT:  .LBB5_2:
+; CHECK-NEXT:    mov x0, x8
+; CHECK-NEXT:    bl private_za_decl
+; CHECK-NEXT:    mov x2, x0
+; CHECK-NEXT:    tbz w20, #0, .LBB5_4
+; CHECK-NEXT:  // %bb.3:
+; CHECK-NEXT:    smstart sm
+; CHECK-NEXT:  .LBB5_4:
+; CHECK-NEXT:    mov x0, x19
+; CHECK-NEXT:    bl __arm_sme_restore
+; CHECK-NEXT:    mov x0, x19
+; CHECK-NEXT:    bl __arm_sme_save
+; CHECK-NEXT:    bl __arm_sme_state
+; CHECK-NEXT:    and x20, x0, #0x1
+; CHECK-NEXT:    tbz w20, #0, .LBB5_6
+; CHECK-NEXT:  // %bb.5:
+; CHECK-NEXT:    smstop sm
+; CHECK-NEXT:  .LBB5_6:
+; CHECK-NEXT:    mov x0, x2
+; CHECK-NEXT:    bl private_za_decl
+; CHECK-NEXT:    mov x1, x0
+; CHECK-NEXT:    tbz w20, #0, .LBB5_8
+; CHECK-NEXT:  // %bb.7:
+; CHECK-NEXT:    smstart sm
+; CHECK-NEXT:  .LBB5_8:
+; CHECK-NEXT:    mov x0, x19
+; CHECK-NEXT:    bl __arm_sme_restore
+; CHECK-NEXT:    mov x0, x1
+; CHECK-NEXT:    sub sp, x29, #64
+; CHECK-NEXT:    ldp x20, x19, [sp, #96] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp x29, x30, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp d15, d14, [sp], #112 // 16-byte Folded Reload
+; CHECK-NEXT:    ret
+  %res = call i64 @private_za_decl(i64 %v)
+  %res2 = call i64 @private_za_decl(i64 %res)
+  ret i64 %res2
+}


        


More information about the llvm-commits mailing list