[compiler-rt] [Compiler-rt] Add AArch64 routines for __arm_agnostic("sme_za_state") (PR #120059)

Paul Walker via llvm-commits llvm-commits at lists.llvm.org
Tue Dec 17 06:51:29 PST 2024


================
@@ -204,6 +206,163 @@ DEFINE_COMPILERRT_FUNCTION(__arm_get_current_vg)
   ret
 END_COMPILERRT_FUNCTION(__arm_get_current_vg)
 
+DEFINE_COMPILERRT_FUNCTION(__arm_sme_state_size)
+  .variant_pcs __arm_sme_state_size
+  BTI_C
+
+  // Test if SME is available and ZA state is 'active'.
+  adrp    x16, CPU_FEATS_SYMBOL
+  ldr     x16, [x16, CPU_FEATS_SYMBOL_OFFSET]
+  tbz     x16, #FEAT_SME_BIT, 0f
+  mrs     x16, SVCR
+  tbz     x16, #1, 0f
+  mrs     x16, TPIDR2_EL0
+  cbnz    x16, 0f
+
+  // Size = HAS_FEAT_SME2 ? 96 : 32
+  adrp    x16, CPU_FEATS_SYMBOL
+  ldr     x16, [x16, CPU_FEATS_SYMBOL_OFFSET]
+  tst     x16, #FEAT_SME2_MASK
+  mov     w17, #32
+  mov     w16, #96
+  csel    x16, x17, x16, eq
+
+  // Size = Size + (SVLB * SVLB)
+  rdsvl   x17, #1
+  madd    x0, x17, x17, x16
+  ret
+
+0:
+  // Default case, 16 bytes is minimum (to encode VALID bit, multiple of 16 bytes)
+  mov w0, #16
+  ret
+END_COMPILERRT_FUNCTION(__arm_sme_state_size)
+
+DEFINE_COMPILERRT_FUNCTION(__arm_sme_save)
+  .variant_pcs __arm_sme_save
+  BTI_C
+
+  // Clear internal state bits
+  stp     xzr, xzr, [x0]
+
+  // If PTR is not 16-byte aligned, abort.
+  tst     x0, #0xF
+  b.ne    3f
+
+  // If SME is not available, PSTATE.ZA = 0 or TPIDR2_EL0 != 0, return.
+  adrp    x16, CPU_FEATS_SYMBOL
+  ldr     x16, [x16, CPU_FEATS_SYMBOL_OFFSET]
+  tbz     x16, #FEAT_SME_BIT, 2f
+  mrs     x16, SVCR
+  tbz     x16, #1, 2f
+  mrs     x16, TPIDR2_EL0
+  cbnz    x16, 2f
+
+  # ZA or ZT0 need saving, we can now set internal VALID bit to 1
+  mov     w16, #1
+  str     x16, [x0]
+
+  adrp    x16, CPU_FEATS_SYMBOL
+  ldr     x16, [x16, CPU_FEATS_SYMBOL_OFFSET]
+  tbz     x16, #FEAT_SME2_BIT, 0f
+
+  // Store ZT0 and ZA
+  add     x16, x0, #32
+  str     zt0, [x16]
+  add     x18, x0, #96
+  b       1f
+
+0:
+  // Has SME only
+  add     x18, x0, #32
+
+1:
+  // Set up lazy-save (x18 = pointer to buffer)
+  rdsvl   x17, #1
+  str     x18, [x0, #16]!
+  strh    w17, [x0, #8]
+  stur    wzr, [x0, #10]
+  strh    wzr, [x0, #14]
+  msr     TPIDR2_EL0, x0
+  ret
+
+2:
+  // Do nothing
+  ret
+
+3:
+  b       SYMBOL_NAME(do_abort)
+END_COMPILERRT_FUNCTION(__arm_sme_save)
+
+DEFINE_COMPILERRT_FUNCTION(__arm_sme_restore)
+  .cfi_startproc
+  .variant_pcs __arm_sme_save
+  BTI_C
+
+  stp     x29, x30, [sp, #-16]!
+  .cfi_def_cfa_offset 16
+  mov     x29, sp
+  .cfi_def_cfa w29, 16
+  .cfi_offset w30, -8
+  .cfi_offset w29, -16
+
+  // If PTR is not 16-byte aligned, abort.
+  tst     x0, #0xF
+  b.ne    3f
+
+  // If the VALID bit is 0, return early.
+  ldr     x16, [x0]
+  tbz     x16, #0, 2f
+
+  // If SME is not available, abort.
+  adrp    x16, CPU_FEATS_SYMBOL
+  ldr     x16, [x16, CPU_FEATS_SYMBOL_OFFSET]
+  tbz     x16, #FEAT_SME_BIT, 3f
+
+  // If TPIDR2_EL0 != nullptr, no lazy-save was committed, try to reload zt0.
+  mrs     x16, TPIDR2_EL0
+  cbnz    x16, 0f
+
+  // If TPIDR2_EL0 == nullptr and PSTATE.ZA = 1 (<=> ZA state is 'active'),
+  // abort.
+  mrs     x16, SVCR
+  tbnz    x16, #1, 3f
+
+  // Restore za.
+  smstart za
+  mov     x16, x0
----------------
paulwalker-arm wrote:

The specification for `__arm_tpidr2_restore` says `x0` is call preserved and so you can omit this and just reverse the add after the call returns.

https://github.com/llvm/llvm-project/pull/120059


More information about the llvm-commits mailing list