[llvm] [AArch64] Remove redundant fmov instruction in i32 load, zero-extension to i64 and bitcast to f64 (PR #146920)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 3 09:19:24 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: Amina Chabane (Amichaxx)
<details>
<summary>Changes</summary>
Previously, a separate load, zext and FMOV instruction was emitted. This patch adds a new TableGen pattern to avoid the unnecessary FMOV. A test is included in test/CodeGen/AArch64/load_u64_from_u32.ll
---
Full diff: https://github.com/llvm/llvm-project/pull/146920.diff
2 Files Affected:
- (modified) llvm/lib/Target/AArch64/AArch64InstrInfo.td (+6-1)
- (added) llvm/test/CodeGen/AArch64/load_u64_from_u32.ll (+14)
``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index efe6cc1aa8aec..2b75e38232384 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -3913,6 +3913,10 @@ defm LDRSW : LoadUI<0b10, 0, 0b10, GPR64, uimm12s4, "ldrsw",
def : Pat<(i64 (zextloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))),
(SUBREG_TO_REG (i64 0), (LDRWui GPR64sp:$Rn, uimm12s4:$offset), sub_32)>;
+// load zero-extended word, bitcast to double
+def : Pat <(f64 (bitconvert (i64 (zextloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))),
+ (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub)>;
+
// Pre-fetch.
def PRFMui : PrefetchUI<0b11, 0, 0b10, "prfm",
[(AArch64Prefetch timm:$Rt,
@@ -9414,6 +9418,7 @@ def : Pat<(v4i32 (mulhu V128:$Rn, V128:$Rm)),
(EXTRACT_SUBREG V128:$Rm, dsub)),
(UMULLv4i32_v2i64 V128:$Rn, V128:$Rm))>;
+
// Conversions within AdvSIMD types in the same register size are free.
// But because we need a consistent lane ordering, in big endian many
// conversions require one or more REV instructions.
@@ -10986,4 +10991,4 @@ defm FMMLA : SIMDThreeSameVectorFP8MatrixMul<"fmmla">;
include "AArch64InstrAtomics.td"
include "AArch64SVEInstrInfo.td"
include "AArch64SMEInstrInfo.td"
-include "AArch64InstrGISel.td"
+include "AArch64InstrGISel.td"
\ No newline at end of file
diff --git a/llvm/test/CodeGen/AArch64/load_u64_from_u32.ll b/llvm/test/CodeGen/AArch64/load_u64_from_u32.ll
new file mode 100644
index 0000000000000..ad30981012112
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/load_u64_from_u32.ll
@@ -0,0 +1,14 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=aarch64-linux-gnu -o - %s | FileCheck %s
+
+define double @_Z9load_u64_from_u32_testPj(ptr %n) {
+; CHECK-LABEL: _Z9load_u64_from_u32_testPj:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ldr s0, [x0]
+; CHECK-NEXT: ret
+entry:
+ %0 = load i32, ptr %n, align 4
+ %conv = zext i32 %0 to i64
+ %1 = bitcast i64 %conv to double
+ ret double %1
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/146920
More information about the llvm-commits
mailing list