[llvm] [AArch64] Remove redundant fmov instruction in i32 load, zero-extension to i64 and bitcast to f64 (PR #146920)

Fri Jul 4 03:46:10 PDT 2025

https://github.com/Amichaxx updated https://github.com/llvm/llvm-project/pull/146920

>From 6835ce4a207f2322e73739495624170a839aba14 Mon Sep 17 00:00:00 2001
From: Amina Chabane <amina.chabane at arm.com>
Date: Fri, 4 Jul 2025 10:31:38 +0000
Subject: [PATCH] Remove redundant fmov instruction in i32 load, zero-extension
 to i64 and bitcast to f64. Previously, a separate load, zext and FMOV
 instruction was emitted. This patch adds a new TableGen pattern to avoid the
 unnecessary FMOV. A test is included in
 test/CodeGen/AArch64/load_u64_from_u32.ll .

---
 llvm/lib/Target/AArch64/AArch64InstrInfo.td    |  6 +++++-
 llvm/test/CodeGen/AArch64/load_u64_from_u16.ll | 15 +++++++++++++++
 llvm/test/CodeGen/AArch64/load_u64_from_u32.ll | 14 ++++++++++++++
 3 files changed, 34 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/CodeGen/AArch64/load_u64_from_u16.ll
 create mode 100644 llvm/test/CodeGen/AArch64/load_u64_from_u32.ll

diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index efe6cc1aa8aec..588aede3193a8 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -3913,6 +3913,10 @@ defm LDRSW  : LoadUI<0b10, 0, 0b10, GPR64, uimm12s4, "ldrsw",
 def : Pat<(i64 (zextloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))),
       (SUBREG_TO_REG (i64 0), (LDRWui GPR64sp:$Rn, uimm12s4:$offset), sub_32)>;
 
+// load zero-extended i16, bitcast to f64
+def : Pat <(f64 (bitconvert (i64 (zextloadi16 (am_indexed32 GPR64sp:$Rn, uimm12s2:$offset))))),
+           (SUBREG_TO_REG (i64 0), (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>;
+    
 // Pre-fetch.
 def PRFMui : PrefetchUI<0b11, 0, 0b10, "prfm",
                         [(AArch64Prefetch timm:$Rt,
@@ -10986,4 +10990,4 @@ defm FMMLA : SIMDThreeSameVectorFP8MatrixMul<"fmmla">;
 include "AArch64InstrAtomics.td"
 include "AArch64SVEInstrInfo.td"
 include "AArch64SMEInstrInfo.td"
-include "AArch64InstrGISel.td"
+include "AArch64InstrGISel.td"
\ No newline at end of file
diff --git a/llvm/test/CodeGen/AArch64/load_u64_from_u16.ll b/llvm/test/CodeGen/AArch64/load_u64_from_u16.ll
new file mode 100644
index 0000000000000..ee02895b55221
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/load_u64_from_u16.ll
@@ -0,0 +1,15 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=aarch64-linux-gnu -o - %s | FileCheck %s
+
+define double @_Z9load_u64_from_u16_testPj(ptr %n){
+; CHECK-LABEL: _Z9load_u64_from_u16_testPj:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldr h0, [x0]
+; CHECK-NEXT:    ret
+entry:
+  %0 = load i16, ptr %n, align 2
+  %conv = zext i16 %0 to i64
+  %1 = bitcast i64 %conv to double
+  ret double %1
+}
+
diff --git a/llvm/test/CodeGen/AArch64/load_u64_from_u32.ll b/llvm/test/CodeGen/AArch64/load_u64_from_u32.ll
new file mode 100644
index 0000000000000..ad30981012112
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/load_u64_from_u32.ll
@@ -0,0 +1,14 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=aarch64-linux-gnu -o - %s | FileCheck %s
+
+define double @_Z9load_u64_from_u32_testPj(ptr %n) {
+; CHECK-LABEL: _Z9load_u64_from_u32_testPj:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldr s0, [x0]
+; CHECK-NEXT:    ret
+entry:
+  %0 = load i32, ptr %n, align 4
+  %conv = zext i32 %0 to i64
+  %1 = bitcast i64 %conv to double
+  ret double %1
+}