[llvm] 13c2514 - [AArch64] Disable GlobalISel/FastISel for more SME functions

David Green via llvm-commits llvm-commits at lists.llvm.org
Tue Aug 22 10:06:32 PDT 2023


Author: David Green
Date: 2023-08-22T18:06:27+01:00
New Revision: 13c2514df3b44a96e4ba39cd58c3fd0440868dff

URL: https://github.com/llvm/llvm-project/commit/13c2514df3b44a96e4ba39cd58c3fd0440868dff
DIFF: https://github.com/llvm/llvm-project/commit/13c2514df3b44a96e4ba39cd58c3fd0440868dff.diff

LOG: [AArch64] Disable GlobalISel/FastISel for more SME functions

The patch D136361 disabled GlobalISel and FastISel for some SME functions, as
the saving and restoring of SM is not yet handled. There were several tests
added for fp128 fadd, which will be expanded to a libcall, that only happened
to work by accident and did not handle other cases such as f32/f64 frem
libcalls.

This extends the cases where GlobalISel / FastISel is disabled for functions
with SME attributes, under the assumption that it is difficult to tell what
will become a libcall reliably, and so should fall back for all function until
GlobalISel and/or FastISel can handle them.

Differential Revision: https://reviews.llvm.org/D158490

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64FastISel.cpp
    llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
    llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64FastISel.cpp b/llvm/lib/Target/AArch64/AArch64FastISel.cpp
index 1ae3709e9588ab..b2c46939e58469 100644
--- a/llvm/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FastISel.cpp
@@ -5187,8 +5187,8 @@ FastISel *AArch64::createFastISel(FunctionLoweringInfo &FuncInfo,
                                         const TargetLibraryInfo *LibInfo) {
 
   SMEAttrs CallerAttrs(*FuncInfo.Fn);
-  if (CallerAttrs.hasZAState() ||
-      (!CallerAttrs.hasStreamingInterface() && CallerAttrs.hasStreamingBody()))
+  if (CallerAttrs.hasZAState() || CallerAttrs.hasStreamingInterfaceOrBody() ||
+      CallerAttrs.hasStreamingCompatibleInterface())
     return nullptr;
   return new AArch64FastISel(FuncInfo, LibInfo);
 }

diff  --git a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
index e78d8bb487a93f..1c5d1e904b87a0 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
@@ -532,8 +532,8 @@ bool AArch64CallLowering::fallBackToDAGISel(const MachineFunction &MF) const {
   }
 
   SMEAttrs Attrs(F);
-  if (Attrs.hasNewZAInterface() ||
-      (!Attrs.hasStreamingInterface() && Attrs.hasStreamingBody()))
+  if (Attrs.hasZAState() || Attrs.hasStreamingInterfaceOrBody() ||
+      Attrs.hasStreamingCompatibleInterface())
     return true;
 
   return false;

diff  --git a/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll b/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll
index 46ef8f96320a41..75cf0838e49139 100644
--- a/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll
+++ b/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll
@@ -68,57 +68,31 @@ entry:
 
 
 define double @streaming_caller_nonstreaming_callee(double %x) nounwind noinline optnone "aarch64_pstate_sm_enabled" {
-; CHECK-FISEL-LABEL: streaming_caller_nonstreaming_callee:
-; CHECK-FISEL:       // %bb.0: // %entry
-; CHECK-FISEL-NEXT:    sub sp, sp, #96
-; CHECK-FISEL-NEXT:    stp d15, d14, [sp, #16] // 16-byte Folded Spill
-; CHECK-FISEL-NEXT:    stp d13, d12, [sp, #32] // 16-byte Folded Spill
-; CHECK-FISEL-NEXT:    stp d11, d10, [sp, #48] // 16-byte Folded Spill
-; CHECK-FISEL-NEXT:    stp d9, d8, [sp, #64] // 16-byte Folded Spill
-; CHECK-FISEL-NEXT:    str x30, [sp, #80] // 8-byte Folded Spill
-; CHECK-FISEL-NEXT:    str d0, [sp, #8] // 8-byte Folded Spill
-; CHECK-FISEL-NEXT:    smstop sm
-; CHECK-FISEL-NEXT:    ldr d0, [sp, #8] // 8-byte Folded Reload
-; CHECK-FISEL-NEXT:    bl normal_callee
-; CHECK-FISEL-NEXT:    str d0, [sp, #88] // 8-byte Folded Spill
-; CHECK-FISEL-NEXT:    smstart sm
-; CHECK-FISEL-NEXT:    adrp x8, .LCPI1_0
-; CHECK-FISEL-NEXT:    ldr d0, [x8, :lo12:.LCPI1_0]
-; CHECK-FISEL-NEXT:    ldr d1, [sp, #88] // 8-byte Folded Reload
-; CHECK-FISEL-NEXT:    fadd d0, d1, d0
-; CHECK-FISEL-NEXT:    ldr x30, [sp, #80] // 8-byte Folded Reload
-; CHECK-FISEL-NEXT:    ldp d9, d8, [sp, #64] // 16-byte Folded Reload
-; CHECK-FISEL-NEXT:    ldp d11, d10, [sp, #48] // 16-byte Folded Reload
-; CHECK-FISEL-NEXT:    ldp d13, d12, [sp, #32] // 16-byte Folded Reload
-; CHECK-FISEL-NEXT:    ldp d15, d14, [sp, #16] // 16-byte Folded Reload
-; CHECK-FISEL-NEXT:    add sp, sp, #96
-; CHECK-FISEL-NEXT:    ret
-;
-; CHECK-GISEL-LABEL: streaming_caller_nonstreaming_callee:
-; CHECK-GISEL:       // %bb.0: // %entry
-; CHECK-GISEL-NEXT:    sub sp, sp, #96
-; CHECK-GISEL-NEXT:    stp d15, d14, [sp, #16] // 16-byte Folded Spill
-; CHECK-GISEL-NEXT:    stp d13, d12, [sp, #32] // 16-byte Folded Spill
-; CHECK-GISEL-NEXT:    stp d11, d10, [sp, #48] // 16-byte Folded Spill
-; CHECK-GISEL-NEXT:    stp d9, d8, [sp, #64] // 16-byte Folded Spill
-; CHECK-GISEL-NEXT:    str x30, [sp, #80] // 8-byte Folded Spill
-; CHECK-GISEL-NEXT:    str d0, [sp, #8] // 8-byte Folded Spill
-; CHECK-GISEL-NEXT:    smstop sm
-; CHECK-GISEL-NEXT:    ldr d0, [sp, #8] // 8-byte Folded Reload
-; CHECK-GISEL-NEXT:    bl normal_callee
-; CHECK-GISEL-NEXT:    str d0, [sp, #88] // 8-byte Folded Spill
-; CHECK-GISEL-NEXT:    smstart sm
-; CHECK-GISEL-NEXT:    mov x8, #4631107791820423168 // =0x4045000000000000
-; CHECK-GISEL-NEXT:    fmov d0, x8
-; CHECK-GISEL-NEXT:    ldr d1, [sp, #88] // 8-byte Folded Reload
-; CHECK-GISEL-NEXT:    fadd d0, d1, d0
-; CHECK-GISEL-NEXT:    ldr x30, [sp, #80] // 8-byte Folded Reload
-; CHECK-GISEL-NEXT:    ldp d9, d8, [sp, #64] // 16-byte Folded Reload
-; CHECK-GISEL-NEXT:    ldp d11, d10, [sp, #48] // 16-byte Folded Reload
-; CHECK-GISEL-NEXT:    ldp d13, d12, [sp, #32] // 16-byte Folded Reload
-; CHECK-GISEL-NEXT:    ldp d15, d14, [sp, #16] // 16-byte Folded Reload
-; CHECK-GISEL-NEXT:    add sp, sp, #96
-; CHECK-GISEL-NEXT:    ret
+; CHECK-COMMON-LABEL: streaming_caller_nonstreaming_callee:
+; CHECK-COMMON:       // %bb.0: // %entry
+; CHECK-COMMON-NEXT:    sub sp, sp, #96
+; CHECK-COMMON-NEXT:    stp d15, d14, [sp, #16] // 16-byte Folded Spill
+; CHECK-COMMON-NEXT:    stp d13, d12, [sp, #32] // 16-byte Folded Spill
+; CHECK-COMMON-NEXT:    stp d11, d10, [sp, #48] // 16-byte Folded Spill
+; CHECK-COMMON-NEXT:    stp d9, d8, [sp, #64] // 16-byte Folded Spill
+; CHECK-COMMON-NEXT:    str x30, [sp, #80] // 8-byte Folded Spill
+; CHECK-COMMON-NEXT:    str d0, [sp, #8] // 8-byte Folded Spill
+; CHECK-COMMON-NEXT:    smstop sm
+; CHECK-COMMON-NEXT:    ldr d0, [sp, #8] // 8-byte Folded Reload
+; CHECK-COMMON-NEXT:    bl normal_callee
+; CHECK-COMMON-NEXT:    str d0, [sp, #88] // 8-byte Folded Spill
+; CHECK-COMMON-NEXT:    smstart sm
+; CHECK-COMMON-NEXT:    mov x8, #4631107791820423168 // =0x4045000000000000
+; CHECK-COMMON-NEXT:    fmov d0, x8
+; CHECK-COMMON-NEXT:    ldr d1, [sp, #88] // 8-byte Folded Reload
+; CHECK-COMMON-NEXT:    fadd d0, d1, d0
+; CHECK-COMMON-NEXT:    ldr x30, [sp, #80] // 8-byte Folded Reload
+; CHECK-COMMON-NEXT:    ldp d9, d8, [sp, #64] // 16-byte Folded Reload
+; CHECK-COMMON-NEXT:    ldp d11, d10, [sp, #48] // 16-byte Folded Reload
+; CHECK-COMMON-NEXT:    ldp d13, d12, [sp, #32] // 16-byte Folded Reload
+; CHECK-COMMON-NEXT:    ldp d15, d14, [sp, #16] // 16-byte Folded Reload
+; CHECK-COMMON-NEXT:    add sp, sp, #96
+; CHECK-COMMON-NEXT:    ret
 entry:
   %call = call double @normal_callee(double %x)
   %add = fadd double %call, 4.200000e+01
@@ -358,64 +332,95 @@ define fp128 @f128_call_sm(fp128 %a, fp128 %b) "aarch64_pstate_sm_enabled" nounw
   ret fp128 %res
 }
 
-; FIXME: As above this should use Selection DAG to make sure the libcall call is lowered correctly.
+; As above this should use Selection DAG to make sure the libcall call is lowered correctly.
 define double @frem_call_za(double %a, double %b) "aarch64_pstate_za_shared" nounwind {
-; CHECK-FISEL-LABEL: frem_call_za:
-; CHECK-FISEL:       // %bb.0:
-; CHECK-FISEL-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
-; CHECK-FISEL-NEXT:    mov x29, sp
-; CHECK-FISEL-NEXT:    sub sp, sp, #16
-; CHECK-FISEL-NEXT:    rdsvl x8, #1
-; CHECK-FISEL-NEXT:    mov x9, sp
-; CHECK-FISEL-NEXT:    mul x8, x8, x8
-; CHECK-FISEL-NEXT:    sub x9, x9, x8
-; CHECK-FISEL-NEXT:    mov sp, x9
-; CHECK-FISEL-NEXT:    stur x9, [x29, #-16]
-; CHECK-FISEL-NEXT:    sub x9, x29, #16
-; CHECK-FISEL-NEXT:    sturh w8, [x29, #-8]
-; CHECK-FISEL-NEXT:    msr TPIDR2_EL0, x9
-; CHECK-FISEL-NEXT:    bl fmod
-; CHECK-FISEL-NEXT:    smstart za
-; CHECK-FISEL-NEXT:    mrs x8, TPIDR2_EL0
-; CHECK-FISEL-NEXT:    sub x0, x29, #16
-; CHECK-FISEL-NEXT:    cbnz x8, .LBB10_2
-; CHECK-FISEL-NEXT:  // %bb.1:
-; CHECK-FISEL-NEXT:    bl __arm_tpidr2_restore
-; CHECK-FISEL-NEXT:  .LBB10_2:
-; CHECK-FISEL-NEXT:    msr TPIDR2_EL0, xzr
-; CHECK-FISEL-NEXT:    mov sp, x29
-; CHECK-FISEL-NEXT:    ldp x29, x30, [sp], #16 // 16-byte Folded Reload
-; CHECK-FISEL-NEXT:    ret
-;
-; CHECK-GISEL-LABEL: frem_call_za:
-; CHECK-GISEL:       // %bb.0:
-; CHECK-GISEL-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-GISEL-NEXT:    bl fmod
-; CHECK-GISEL-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-GISEL-NEXT:    ret
+; CHECK-COMMON-LABEL: frem_call_za:
+; CHECK-COMMON:       // %bb.0:
+; CHECK-COMMON-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-COMMON-NEXT:    mov x29, sp
+; CHECK-COMMON-NEXT:    sub sp, sp, #16
+; CHECK-COMMON-NEXT:    rdsvl x8, #1
+; CHECK-COMMON-NEXT:    mov x9, sp
+; CHECK-COMMON-NEXT:    mul x8, x8, x8
+; CHECK-COMMON-NEXT:    sub x9, x9, x8
+; CHECK-COMMON-NEXT:    mov sp, x9
+; CHECK-COMMON-NEXT:    stur x9, [x29, #-16]
+; CHECK-COMMON-NEXT:    sub x9, x29, #16
+; CHECK-COMMON-NEXT:    sturh w8, [x29, #-8]
+; CHECK-COMMON-NEXT:    msr TPIDR2_EL0, x9
+; CHECK-COMMON-NEXT:    bl fmod
+; CHECK-COMMON-NEXT:    smstart za
+; CHECK-COMMON-NEXT:    mrs x8, TPIDR2_EL0
+; CHECK-COMMON-NEXT:    sub x0, x29, #16
+; CHECK-COMMON-NEXT:    cbnz x8, .LBB10_2
+; CHECK-COMMON-NEXT:  // %bb.1:
+; CHECK-COMMON-NEXT:    bl __arm_tpidr2_restore
+; CHECK-COMMON-NEXT:  .LBB10_2:
+; CHECK-COMMON-NEXT:    msr TPIDR2_EL0, xzr
+; CHECK-COMMON-NEXT:    mov sp, x29
+; CHECK-COMMON-NEXT:    ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-COMMON-NEXT:    ret
   %res = frem double %a, %b
   ret double %res
 }
 
-; FIXME: As above this should use Selection DAG to make sure the libcall is lowered correctly.
+; As above this should use Selection DAG to make sure the libcall is lowered correctly.
 define float @frem_call_sm(float %a, float %b) "aarch64_pstate_sm_enabled" nounwind {
 ; CHECK-COMMON-LABEL: frem_call_sm:
 ; CHECK-COMMON:       // %bb.0:
-; CHECK-COMMON-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-COMMON-NEXT:    stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
+; CHECK-COMMON-NEXT:    stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; CHECK-COMMON-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; CHECK-COMMON-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; CHECK-COMMON-NEXT:    str x30, [sp, #64] // 8-byte Folded Spill
+; CHECK-COMMON-NEXT:    stp s0, s1, [sp, #72] // 8-byte Folded Spill
+; CHECK-COMMON-NEXT:    smstop sm
+; CHECK-COMMON-NEXT:    ldp s0, s1, [sp, #72] // 8-byte Folded Reload
 ; CHECK-COMMON-NEXT:    bl fmodf
-; CHECK-COMMON-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-COMMON-NEXT:    str s0, [sp, #76] // 4-byte Folded Spill
+; CHECK-COMMON-NEXT:    smstart sm
+; CHECK-COMMON-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; CHECK-COMMON-NEXT:    ldr s0, [sp, #76] // 4-byte Folded Reload
+; CHECK-COMMON-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; CHECK-COMMON-NEXT:    ldr x30, [sp, #64] // 8-byte Folded Reload
+; CHECK-COMMON-NEXT:    ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; CHECK-COMMON-NEXT:    ldp d15, d14, [sp], #80 // 16-byte Folded Reload
 ; CHECK-COMMON-NEXT:    ret
   %res = frem float %a, %b
   ret float %res
 }
 
-; FIXME: As above this should use Selection DAG to make sure the libcall is lowered correctly.
+; As above this should use Selection DAG to make sure the libcall is lowered correctly.
 define float @frem_call_sm_compat(float %a, float %b) "aarch64_pstate_sm_compatible" nounwind {
 ; CHECK-COMMON-LABEL: frem_call_sm_compat:
 ; CHECK-COMMON:       // %bb.0:
-; CHECK-COMMON-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-COMMON-NEXT:    sub sp, sp, #96
+; CHECK-COMMON-NEXT:    stp d15, d14, [sp, #16] // 16-byte Folded Spill
+; CHECK-COMMON-NEXT:    stp d13, d12, [sp, #32] // 16-byte Folded Spill
+; CHECK-COMMON-NEXT:    stp d11, d10, [sp, #48] // 16-byte Folded Spill
+; CHECK-COMMON-NEXT:    stp d9, d8, [sp, #64] // 16-byte Folded Spill
+; CHECK-COMMON-NEXT:    stp x30, x19, [sp, #80] // 16-byte Folded Spill
+; CHECK-COMMON-NEXT:    stp s0, s1, [sp, #8] // 8-byte Folded Spill
+; CHECK-COMMON-NEXT:    bl __arm_sme_state
+; CHECK-COMMON-NEXT:    and x19, x0, #0x1
+; CHECK-COMMON-NEXT:    tbz x19, #0, .LBB12_2
+; CHECK-COMMON-NEXT:  // %bb.1:
+; CHECK-COMMON-NEXT:    smstop sm
+; CHECK-COMMON-NEXT:  .LBB12_2:
+; CHECK-COMMON-NEXT:    ldp s0, s1, [sp, #8] // 8-byte Folded Reload
 ; CHECK-COMMON-NEXT:    bl fmodf
-; CHECK-COMMON-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-COMMON-NEXT:    str s0, [sp, #12] // 4-byte Folded Spill
+; CHECK-COMMON-NEXT:    tbz x19, #0, .LBB12_4
+; CHECK-COMMON-NEXT:  // %bb.3:
+; CHECK-COMMON-NEXT:    smstart sm
+; CHECK-COMMON-NEXT:  .LBB12_4:
+; CHECK-COMMON-NEXT:    ldp x30, x19, [sp, #80] // 16-byte Folded Reload
+; CHECK-COMMON-NEXT:    ldr s0, [sp, #12] // 4-byte Folded Reload
+; CHECK-COMMON-NEXT:    ldp d9, d8, [sp, #64] // 16-byte Folded Reload
+; CHECK-COMMON-NEXT:    ldp d11, d10, [sp, #48] // 16-byte Folded Reload
+; CHECK-COMMON-NEXT:    ldp d13, d12, [sp, #32] // 16-byte Folded Reload
+; CHECK-COMMON-NEXT:    ldp d15, d14, [sp, #16] // 16-byte Folded Reload
+; CHECK-COMMON-NEXT:    add sp, sp, #96
 ; CHECK-COMMON-NEXT:    ret
   %res = frem float %a, %b
   ret float %res


        


More information about the llvm-commits mailing list