[llvm] 3b19717 - [AArch64] Removed redundant FMOV instruction for truncstores of f64/f32 via bitcast to i64/i32/i8. (#149997)

Mon Sep 8 02:35:08 PDT 2025

Author: Amina Chabane
Date: 2025-09-08T10:35:04+01:00
New Revision: 3b19717fb4ce17e450f13f1fd114c26a9682accf

URL: https://github.com/llvm/llvm-project/commit/3b19717fb4ce17e450f13f1fd114c26a9682accf
DIFF: https://github.com/llvm/llvm-project/commit/3b19717fb4ce17e450f13f1fd114c26a9682accf.diff

LOG: [AArch64] Removed redundant FMOV instruction for truncstores of f64/f32 via bitcast to i64/i32/i8. (#149997)

Previously, storing the low bits of a double, which was bitcast to i64
and truncated to i32 or i16, would emit a redundant FMOV. This patch
introduces new TableGen patterns to avoid the unnecessary FMOV. Tests
added: bitcast_truncstore.ll

Added: 
    llvm/test/CodeGen/AArch64/bitcast_truncstore.ll

Modified: 
    llvm/lib/Target/AArch64/AArch64InstrInfo.td

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 62b26b5239365..2a90288c35751 100644

--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -4751,6 +4751,26 @@ let Predicates = [IsLE] in {
             (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
 }
 
+// truncstorei32 of f64 bitcasted to i64
+def : Pat<(truncstorei32 (i64 (bitconvert (f64 FPR64:$Rt))), (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)),
+          (STRSui (EXTRACT_SUBREG FPR64:$Rt, ssub), GPR64sp:$Rn, uimm12s4:$offset)>;
+
+// truncstorei16 of f64 bitcasted to i64
+def : Pat<(truncstorei16 (i64 (bitconvert (f64 FPR64:$Rt))), (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)),
+          (STRHui (f16 (EXTRACT_SUBREG FPR64:$Rt, hsub)), GPR64sp:$Rn, uimm12s2:$offset)>;      
+
+  // truncstorei16 of f32 bitcasted to i32
+def : Pat<(truncstorei16 (i32 (bitconvert (f32 FPR32:$Rt))), (am_indexed16 GPR64sp:$Rn, uimm12s2:$off)),
+          (STRHui (f16 (EXTRACT_SUBREG FPR32:$Rt, hsub)), GPR64sp:$Rn, uimm12s2:$off)>;
+
+  // truncstorei8 of f64 bitcasted to i64
+def : Pat<(truncstorei8 (i64 (bitconvert (f64 FPR64:$Rt))), (am_indexed8 GPR64sp:$Rn, uimm12s1:$off)),
+          (STRBui (aarch64mfp8 (EXTRACT_SUBREG FPR64:$Rt, bsub)), GPR64sp:$Rn, uimm12s1:$off)>;
+
+  // truncstorei8 of f32 bitcasted to i32
+def : Pat<(truncstorei8 (i32 (bitconvert (f32 FPR32:$Rt))), (am_indexed8 GPR64sp:$Rn, uimm12s1:$off)),
+          (STRBui (aarch64mfp8 (EXTRACT_SUBREG FPR32:$Rt, bsub)), GPR64sp:$Rn, uimm12s1:$off)>;
+
 // truncstore i64
 def : Pat<(truncstorei32 GPR64:$Rt,
                          (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)),

diff  --git a/llvm/test/CodeGen/AArch64/bitcast_truncstore.ll b/llvm/test/CodeGen/AArch64/bitcast_truncstore.ll
new file mode 100644
index 0000000000000..e1f1bb429409a
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/bitcast_truncstore.ll
@@ -0,0 +1,83 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s
+
+define void @_Z10test_truncstore_f64toi32Pjd(ptr %n, double %x) {
+; CHECK-LABEL: _Z10test_truncstore_f64toi32Pjd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str s0, [x0]
+; CHECK-NEXT:    ret
+  %i64 = bitcast double %x to i64
+  %conv = trunc i64 %i64 to i32
+  store i32 %conv, ptr %n, align 4
+  ret void
+}
+
+define void @_Z9test_truncstore_f64toi16Ptd(ptr %n, double %x) {
+; CHECK-LABEL: _Z9test_truncstore_f64toi16Ptd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str h0, [x0]
+; CHECK-NEXT:    ret
+  %i64 = bitcast double %x to i64
+  %conv = trunc i64 %i64 to i16
+  store i16 %conv, ptr %n, align 2
+  ret void
+}
+
+define void @_Z13test_truncstore_f64toi8Phd(ptr %n, double %x) {
+; CHECK-LABEL: _Z13test_truncstore_f64toi8Phd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str b0, [x0]
+; CHECK-NEXT:    ret
+  %i64 = bitcast double %x to i64
+  %conv = trunc i64 %i64 to i8
+  store i8 %conv, ptr %n, align 1
+  ret void
+}
+
+define void @_Z17test_truncstore_f32toi16Ptf(ptr %n, float %x) {
+; CHECK-LABEL: _Z17test_truncstore_f32toi16Ptf:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str h0, [x0]
+; CHECK-NEXT:    ret
+  %i32 = bitcast float %x to i32
+  %conv = trunc i32 %i32 to i16
+  store i16 %conv, ptr %n, align 2
+  ret void
+}
+
+define void @_Z16test_truncstore_f32toi8Phf(ptr %n, float %x) {
+; CHECK-LABEL: _Z16test_truncstore_f32toi8Phf:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str b0, [x0]
+; CHECK-NEXT:    ret
+  %i32 = bitcast float %x to i32
+  %conv = trunc i32 %i32 to i8
+  store i8 %conv, ptr %n, align 1
+  ret void
+}
+
+define void @test_truncstore_i64tof32(ptr %n, i64 %x) {
+; CHECK-LABEL: test_truncstore_i64tof32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmov d0, x1
+; CHECK-NEXT:    fcvt s0, d0
+; CHECK-NEXT:    str s0, [x0]
+; CHECK-NEXT:    ret
+  %d = bitcast i64 %x to double
+  %f = fptrunc double %d to float
+  store float %f, ptr %n, align 4
+  ret void
+}
+
+define void @test_truncstore_i32tof16(ptr %n, i32 %x) {
+; CHECK-LABEL: test_truncstore_i32tof16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmov s0, w1
+; CHECK-NEXT:    fcvt h0, s0
+; CHECK-NEXT:    str h0, [x0]
+; CHECK-NEXT:    ret
+  %f = bitcast i32 %x to float
+  %h = fptrunc float %f to half
+  store half %h, ptr %n, align 2
+  ret void
+}