[llvm] [AArch64] Fix a presumed typo in isFPImmLegal limit. NFC (PR #106716)

Fri Aug 30 05:29:18 PDT 2024

https://github.com/citymarina updated https://github.com/llvm/llvm-project/pull/106716

>From 74667af109fb1f570f6884733ab079bd2206c920 Mon Sep 17 00:00:00 2001
From: Marina Taylor <marina_taylor at apple.com>
Date: Fri, 30 Aug 2024 12:36:50 +0100
Subject: [PATCH 1/2] [AArch64] Fix a presumed typo in isFPImmLegal limit. NFC

The worst possible case for a double literal goes like:

```
  mov ...
  movk ..., lsl #16
  movk ..., lsl #32
  movk ..., lsl #48
  fmov ...
```

The limit of 5 in the code gives the impression that  `Insn` includes
all instructions including the `fmov`, but that's not true. It only
counts the integer moves. This led me astray on some other work in
this area.
---
 llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 02390e0a85c0a5..98f6f30112a8c7 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -11463,7 +11463,8 @@ bool AArch64TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
     // movw+movk is fused). So we limit up to 2 instrdduction at most.
     SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
     AArch64_IMM::expandMOVImm(ImmInt.getZExtValue(), VT.getSizeInBits(), Insn);
-    unsigned Limit = (OptForSize ? 1 : (Subtarget->hasFuseLiterals() ? 5 : 2));
+    assert(Insn.size() <= 4);
+    unsigned Limit = (OptForSize ? 1 : (Subtarget->hasFuseLiterals() ? 4 : 2));
     IsLegal = Insn.size() <= Limit;
   }
 

>From c093fd85ab016181b64804cd4b88158705a8e85f Mon Sep 17 00:00:00 2001
From: Marina Taylor <marina_taylor at apple.com>
Date: Fri, 30 Aug 2024 13:02:12 +0100
Subject: [PATCH 2/2] [AArch64] Add tests for fused FP literals. NFC

This is for an upcoming change to the threshold on Apple targets
for using a constant pool for FP literals versus building them with
integer moves.

This file is based on literal_pools_float.ll. I tried to bolt on to
the existing test, but it got messy as that file is already testing
a matrix of combinations, so creating this new file instead.
---
 .../AArch64/literal_pools_float_apple.ll      | 128 ++++++++++++++++++
 1 file changed, 128 insertions(+)
 create mode 100644 llvm/test/CodeGen/AArch64/literal_pools_float_apple.ll

diff --git a/llvm/test/CodeGen/AArch64/literal_pools_float_apple.ll b/llvm/test/CodeGen/AArch64/literal_pools_float_apple.ll
new file mode 100644
index 00000000000000..144f71ab1e4695
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/literal_pools_float_apple.ll
@@ -0,0 +1,128 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=arm64-apple-macosx -mcpu=apple-m1 < %s | FileCheck %s --check-prefix=APPLE
+
+define dso_local float @float_0mov() {
+; CHECK-LABEL: float_0mov:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmov s0, #1.00000000
+; CHECK-NEXT:    ret
+;
+; APPLE-LABEL: float_0mov:
+; APPLE:       ; %bb.0:
+; APPLE-NEXT:    fmov s0, #1.00000000
+; APPLE-NEXT:    ret
+  ret float 1.0
+}
+
+define dso_local float @float_1mov() {
+; CHECK-LABEL: float_1mov:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #2143289344 // =0x7fc00000
+; CHECK-NEXT:    fmov s0, w8
+; CHECK-NEXT:    ret
+;
+; APPLE-LABEL: float_1mov:
+; APPLE:       ; %bb.0:
+; APPLE-NEXT:    mov w8, #2143289344 ; =0x7fc00000
+; APPLE-NEXT:    fmov s0, w8
+; APPLE-NEXT:    ret
+  ret float 0x7FF8000000000000
+}
+
+define dso_local float @float_2mov() {
+; CHECK-LABEL: float_2mov:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #34952 // =0x8888
+; CHECK-NEXT:    movk w8, #32704, lsl #16
+; CHECK-NEXT:    fmov s0, w8
+; CHECK-NEXT:    ret
+;
+; APPLE-LABEL: float_2mov:
+; APPLE:       ; %bb.0:
+; APPLE-NEXT:    mov w8, #34952 ; =0x8888
+; APPLE-NEXT:    movk w8, #32704, lsl #16
+; APPLE-NEXT:    fmov s0, w8
+; APPLE-NEXT:    ret
+  ret float 0x7FF8111100000000
+}
+
+define dso_local double @double_0mov() {
+; CHECK-LABEL: double_0mov:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmov d0, #1.00000000
+; CHECK-NEXT:    ret
+;
+; APPLE-LABEL: double_0mov:
+; APPLE:       ; %bb.0:
+; APPLE-NEXT:    fmov d0, #1.00000000
+; APPLE-NEXT:    ret
+  ret double 1.0
+}
+
+define dso_local double @double_1mov() {
+; CHECK-LABEL: double_1mov:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov x8, #4096 // =0x1000
+; CHECK-NEXT:    fmov d0, x8
+; CHECK-NEXT:    ret
+;
+; APPLE-LABEL: double_1mov:
+; APPLE:       ; %bb.0:
+; APPLE-NEXT:    mov x8, #4096 ; =0x1000
+; APPLE-NEXT:    fmov d0, x8
+; APPLE-NEXT:    ret
+  ret double 0x1000
+}
+
+define dso_local double @double_2mov() {
+; CHECK-LABEL: double_2mov:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov x8, #4096 // =0x1000
+; CHECK-NEXT:    movk x8, #8192, lsl #16
+; CHECK-NEXT:    fmov d0, x8
+; CHECK-NEXT:    ret
+;
+; APPLE-LABEL: double_2mov:
+; APPLE:       ; %bb.0:
+; APPLE-NEXT:    mov x8, #4096 ; =0x1000
+; APPLE-NEXT:    movk x8, #8192, lsl #16
+; APPLE-NEXT:    fmov d0, x8
+; APPLE-NEXT:    ret
+  ret double 0x20001000
+}
+
+define dso_local double @double_3mov() {
+; CHECK-LABEL: double_3mov:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, .LCPI6_0
+; CHECK-NEXT:    ldr d0, [x8, :lo12:.LCPI6_0]
+; CHECK-NEXT:    ret
+;
+; APPLE-LABEL: double_3mov:
+; APPLE:       ; %bb.0:
+; APPLE-NEXT:    mov x8, #4096 ; =0x1000
+; APPLE-NEXT:    movk x8, #8192, lsl #16
+; APPLE-NEXT:    movk x8, #12288, lsl #32
+; APPLE-NEXT:    fmov d0, x8
+; APPLE-NEXT:    ret
+  ret double 0x300020001000
+}
+
+define dso_local double @double_4mov() {
+; CHECK-LABEL: double_4mov:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, .LCPI7_0
+; CHECK-NEXT:    ldr d0, [x8, :lo12:.LCPI7_0]
+; CHECK-NEXT:    ret
+;
+; APPLE-LABEL: double_4mov:
+; APPLE:       ; %bb.0:
+; APPLE-NEXT:    mov x8, #4096 ; =0x1000
+; APPLE-NEXT:    movk x8, #8192, lsl #16
+; APPLE-NEXT:    movk x8, #12288, lsl #32
+; APPLE-NEXT:    movk x8, #16384, lsl #48
+; APPLE-NEXT:    fmov d0, x8
+; APPLE-NEXT:    ret
+  ret double 0x4000300020001000
+}