[llvm] [AArch64] Removed redundant FMOV instruction for truncstores of f64/f32 via bitcast to i64/i32/i8. (PR #149997)

Wed Aug 20 02:13:51 PDT 2025

https://github.com/Amichaxx updated https://github.com/llvm/llvm-project/pull/149997

>From 97f61b4cce1514a20c06d9687d4e81f0dc7b3af6 Mon Sep 17 00:00:00 2001
From: Amina Chabane <amina.chabane at arm.com>
Date: Sun, 17 Aug 2025 23:24:34 +0000
Subject: [PATCH 1/4] =?UTF-8?q?[AArch64]=20Add=20TableGen=20patterns=20for?=
 =?UTF-8?q?=20truncstore=20of=20bitcasted=20FP=20values=20(f64/f32=20?=
 =?UTF-8?q?=E2=86=92=20i32/i16/i8X)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 llvm/lib/Target/AArch64/AArch64InstrInfo.td   | 24 ++++++++
 .../CodeGen/AArch64/bitcast_truncstore.ll     | 60 +++++++++++++++++++
 2 files changed, 84 insertions(+)
 create mode 100644 llvm/test/CodeGen/AArch64/bitcast_truncstore.ll

diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 4fa91a4dc8270..42fce52ef65e5 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -4706,6 +4706,30 @@ let Predicates = [IsLE] in {
             (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
 }
 
+// truncstorei32 of f64 bitcasted to i64
+def : Pat<(truncstorei32 (i64 (bitconvert (f64 FPR64:$Rt))), (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)),
+          (STRSui (EXTRACT_SUBREG FPR64:$Rt, ssub), GPR64sp:$Rn, uimm12s4:$offset)>;
+
+// truncstorei16 of f64 bitcasted to i64
+def : Pat<(truncstorei16 (i64 (bitconvert (f64 FPR64:$Rt))), (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)),
+          (STRHui (f16 (EXTRACT_SUBREG FPR64:$Rt, hsub)), GPR64sp:$Rn, uimm12s2:$offset)>;      
+
+let Predicates = [HasFullFP16] in {
+  // truncstorei16 of f32 bitcasted to i32
+  def : Pat<(truncstorei16 (i32 (bitconvert (f32 FPR32:$Rt))), (am_indexed16 GPR64sp:$Rn, uimm12s2:$off)),
+          (STRHui (f16 (EXTRACT_SUBREG FPR32:$Rt, hsub)), GPR64sp:$Rn, uimm12s2:$off)>;
+}
+
+let Predicates = [HasFPARMv8] in {
+  // truncstorei8 of f64 bitcasted to i64
+  def : Pat<(truncstorei8 (i64 (bitconvert (f64 FPR64:$Rt))), (am_indexed8 GPR64sp:$Rn, uimm12s1:$off)),
+          (STRBui (aarch64mfp8 (EXTRACT_SUBREG FPR64:$Rt, bsub)), GPR64sp:$Rn, uimm12s1:$off)>;
+
+  // truncstorei8 of f32 bitcasted to i32
+  def : Pat<(truncstorei8 (i32 (bitconvert (f32 FPR32:$Rt))), (am_indexed8 GPR64sp:$Rn, uimm12s1:$off)),
+        (STRBui (aarch64mfp8 (EXTRACT_SUBREG FPR32:$Rt, bsub)), GPR64sp:$Rn, uimm12s1:$off)>;
+}
+
 // truncstore i64
 def : Pat<(truncstorei32 GPR64:$Rt,
                          (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)),
diff --git a/llvm/test/CodeGen/AArch64/bitcast_truncstore.ll b/llvm/test/CodeGen/AArch64/bitcast_truncstore.ll
new file mode 100644
index 0000000000000..5806df9d29601
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/bitcast_truncstore.ll
@@ -0,0 +1,60 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=aarch64-unknown-linux-gnu -mattr=+fp-armv8,+fullfp16 < %s | FileCheck %s
+
+
+define void @_Z10store_f64i32Pjd(ptr %n, double noundef %x){
+; CHECK-LABEL: _Z10store_f64i32Pjd:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    str s0, [x0]
+; CHECK-NEXT:    ret
+entry:
+  %0 = bitcast double %x to i64
+  %conv = trunc i64 %0 to i32
+  store i32 %conv, ptr %n, align 4
+  ret void
+}
+
+define void @_Z9store_f64i16Ptd(ptr %n, double noundef %x){
+; CHECK-LABEL: _Z9store_f64i16Ptd:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    str h0, [x0]
+; CHECK-NEXT:    ret
+entry:
+  %0 = bitcast double %x to i64
+  %conv = trunc i64 %0 to i16
+  store i16 %conv, ptr %n, align 2
+  ret void
+}
+
+define void @_Z13store_f64i8Phd(ptr %0, double noundef %1){
+; CHECK-LABEL: _Z13store_f64i8Phd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str b0, [x0]
+; CHECK-NEXT:    ret
+  %3 = bitcast double %1 to i64
+  %4 = trunc i64 %3 to i8
+  store i8 %4, ptr %0, align 1
+  ret void
+}
+
+define void @_Z17store_f32i16Ptf(ptr %0, float noundef %1){
+; CHECK-LABEL: _Z17store_f32i16Ptf:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str h0, [x0]
+; CHECK-NEXT:    ret
+  %3 = bitcast float %1 to i32
+  %4 = trunc i32 %3 to i16
+  store i16 %4, ptr %0, align 2
+  ret void
+}
+
+define void @_Z16store_f32i8Phf(ptr  %0, float noundef %1){
+; CHECK-LABEL: _Z16store_f32i8Phf:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str b0, [x0]
+; CHECK-NEXT:    ret
+  %3 = bitcast float %1 to i32
+  %4 = trunc i32 %3 to i8
+  store i8 %4, ptr %0, align 1
+  ret void
+}

>From 3faa3029ac607cccaada0844e8baca7e5ac35355 Mon Sep 17 00:00:00 2001
From: Amina Chabane <amina.chabane at arm.com>
Date: Mon, 18 Aug 2025 14:37:31 +0000
Subject: [PATCH 2/4] Added tests for converting from int to fp, renamed tests

---
 .../CodeGen/AArch64/bitcast_truncstore.ll     | 79 ++++++++++++-------
 1 file changed, 51 insertions(+), 28 deletions(-)

diff --git a/llvm/test/CodeGen/AArch64/bitcast_truncstore.ll b/llvm/test/CodeGen/AArch64/bitcast_truncstore.ll
index 5806df9d29601..147c41e859dc9 100644
--- a/llvm/test/CodeGen/AArch64/bitcast_truncstore.ll
+++ b/llvm/test/CodeGen/AArch64/bitcast_truncstore.ll
@@ -1,60 +1,83 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc -mtriple=aarch64-unknown-linux-gnu -mattr=+fp-armv8,+fullfp16 < %s | FileCheck %s
 
-
-define void @_Z10store_f64i32Pjd(ptr %n, double noundef %x){
-; CHECK-LABEL: _Z10store_f64i32Pjd:
-; CHECK:       // %bb.0: // %entry
+define void @_Z10test_truncstore_f64toi32Pjd(ptr %n, double %x) {
+; CHECK-LABEL: _Z10test_truncstore_f64toi32Pjd:
+; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str s0, [x0]
 ; CHECK-NEXT:    ret
-entry:
-  %0 = bitcast double %x to i64
-  %conv = trunc i64 %0 to i32
+  %i64 = bitcast double %x to i64
+  %conv = trunc i64 %i64 to i32
   store i32 %conv, ptr %n, align 4
   ret void
 }
 
-define void @_Z9store_f64i16Ptd(ptr %n, double noundef %x){
-; CHECK-LABEL: _Z9store_f64i16Ptd:
-; CHECK:       // %bb.0: // %entry
+define void @_Z9test_truncstore_f64toi16Ptd(ptr %n, double %x) {
+; CHECK-LABEL: _Z9test_truncstore_f64toi16Ptd:
+; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str h0, [x0]
 ; CHECK-NEXT:    ret
-entry:
-  %0 = bitcast double %x to i64
-  %conv = trunc i64 %0 to i16
+  %i64 = bitcast double %x to i64
+  %conv = trunc i64 %i64 to i16
   store i16 %conv, ptr %n, align 2
   ret void
 }
 
-define void @_Z13store_f64i8Phd(ptr %0, double noundef %1){
-; CHECK-LABEL: _Z13store_f64i8Phd:
+define void @_Z13test_truncstore_f64toi8Phd(ptr %n, double %x) {
+; CHECK-LABEL: _Z13test_truncstore_f64toi8Phd:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str b0, [x0]
 ; CHECK-NEXT:    ret
-  %3 = bitcast double %1 to i64
-  %4 = trunc i64 %3 to i8
-  store i8 %4, ptr %0, align 1
+  %i64 = bitcast double %x to i64
+  %conv = trunc i64 %i64 to i8
+  store i8 %conv, ptr %n, align 1
   ret void
 }
 
-define void @_Z17store_f32i16Ptf(ptr %0, float noundef %1){
-; CHECK-LABEL: _Z17store_f32i16Ptf:
+define void @_Z17test_truncstore_f32toi16Ptf(ptr %n, float %x) {
+; CHECK-LABEL: _Z17test_truncstore_f32toi16Ptf:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str h0, [x0]
 ; CHECK-NEXT:    ret
-  %3 = bitcast float %1 to i32
-  %4 = trunc i32 %3 to i16
-  store i16 %4, ptr %0, align 2
+  %i32 = bitcast float %x to i32
+  %conv = trunc i32 %i32 to i16
+  store i16 %conv, ptr %n, align 2
   ret void
 }
 
-define void @_Z16store_f32i8Phf(ptr  %0, float noundef %1){
-; CHECK-LABEL: _Z16store_f32i8Phf:
+define void @_Z16test_truncstore_f32toi8Phf(ptr %n, float %x) {
+; CHECK-LABEL: _Z16test_truncstore_f32toi8Phf:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str b0, [x0]
 ; CHECK-NEXT:    ret
-  %3 = bitcast float %1 to i32
-  %4 = trunc i32 %3 to i8
-  store i8 %4, ptr %0, align 1
+  %i32 = bitcast float %x to i32
+  %conv = trunc i32 %i32 to i8
+  store i8 %conv, ptr %n, align 1
+  ret void
+}
+
+define void @test_truncstore_i64tof32(ptr %n, i64 %x) {
+; CHECK-LABEL: test_truncstore_i64tof32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmov d0, x1
+; CHECK-NEXT:    fcvt s0, d0
+; CHECK-NEXT:    str s0, [x0]
+; CHECK-NEXT:    ret
+  %d = bitcast i64 %x to double
+  %f = fptrunc double %d to float
+  store float %f, ptr %n, align 4
+  ret void
+}
+
+define void @test_truncstore_i32tof16(ptr %n, i32 %x) {
+; CHECK-LABEL: test_truncstore_i32tof16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmov s0, w1
+; CHECK-NEXT:    fcvt h0, s0
+; CHECK-NEXT:    str h0, [x0]
+; CHECK-NEXT:    ret
+  %f = bitcast i32 %x to float
+  %h = fptrunc float %f to half
+  store half %h, ptr %n, align 2
   ret void
 }

>From efa6fabea3e34b81e76099c639ce40e76a06a965 Mon Sep 17 00:00:00 2001
From: Amina Chabane <amina.chabane at arm.com>
Date: Tue, 19 Aug 2025 12:02:43 +0000
Subject: [PATCH 3/4] Removed predicates from patterns and feature flags from
 test

---
 llvm/lib/Target/AArch64/AArch64InstrInfo.td     | 14 +++++---------
 llvm/test/CodeGen/AArch64/bitcast_truncstore.ll |  2 +-
 2 files changed, 6 insertions(+), 10 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 42fce52ef65e5..44a60b61fb2c4 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -4714,21 +4714,17 @@ def : Pat<(truncstorei32 (i64 (bitconvert (f64 FPR64:$Rt))), (am_indexed32 GPR64
 def : Pat<(truncstorei16 (i64 (bitconvert (f64 FPR64:$Rt))), (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)),
           (STRHui (f16 (EXTRACT_SUBREG FPR64:$Rt, hsub)), GPR64sp:$Rn, uimm12s2:$offset)>;      
 
-let Predicates = [HasFullFP16] in {
   // truncstorei16 of f32 bitcasted to i32
-  def : Pat<(truncstorei16 (i32 (bitconvert (f32 FPR32:$Rt))), (am_indexed16 GPR64sp:$Rn, uimm12s2:$off)),
-          (STRHui (f16 (EXTRACT_SUBREG FPR32:$Rt, hsub)), GPR64sp:$Rn, uimm12s2:$off)>;
-}
+def : Pat<(truncstorei16 (i32 (bitconvert (f32 FPR32:$Rt))), (am_indexed16 GPR64sp:$Rn, uimm12s2:$off)),
+        (STRHui (f16 (EXTRACT_SUBREG FPR32:$Rt, hsub)), GPR64sp:$Rn, uimm12s2:$off)>;
 
-let Predicates = [HasFPARMv8] in {
   // truncstorei8 of f64 bitcasted to i64
-  def : Pat<(truncstorei8 (i64 (bitconvert (f64 FPR64:$Rt))), (am_indexed8 GPR64sp:$Rn, uimm12s1:$off)),
-          (STRBui (aarch64mfp8 (EXTRACT_SUBREG FPR64:$Rt, bsub)), GPR64sp:$Rn, uimm12s1:$off)>;
+def : Pat<(truncstorei8 (i64 (bitconvert (f64 FPR64:$Rt))), (am_indexed8 GPR64sp:$Rn, uimm12s1:$off)),
+        (STRBui (aarch64mfp8 (EXTRACT_SUBREG FPR64:$Rt, bsub)), GPR64sp:$Rn, uimm12s1:$off)>;
 
   // truncstorei8 of f32 bitcasted to i32
-  def : Pat<(truncstorei8 (i32 (bitconvert (f32 FPR32:$Rt))), (am_indexed8 GPR64sp:$Rn, uimm12s1:$off)),
+def : Pat<(truncstorei8 (i32 (bitconvert (f32 FPR32:$Rt))), (am_indexed8 GPR64sp:$Rn, uimm12s1:$off)),
         (STRBui (aarch64mfp8 (EXTRACT_SUBREG FPR32:$Rt, bsub)), GPR64sp:$Rn, uimm12s1:$off)>;
-}
 
 // truncstore i64
 def : Pat<(truncstorei32 GPR64:$Rt,
diff --git a/llvm/test/CodeGen/AArch64/bitcast_truncstore.ll b/llvm/test/CodeGen/AArch64/bitcast_truncstore.ll
index 147c41e859dc9..e1f1bb429409a 100644
--- a/llvm/test/CodeGen/AArch64/bitcast_truncstore.ll
+++ b/llvm/test/CodeGen/AArch64/bitcast_truncstore.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -mtriple=aarch64-unknown-linux-gnu -mattr=+fp-armv8,+fullfp16 < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s
 
 define void @_Z10test_truncstore_f64toi32Pjd(ptr %n, double %x) {
 ; CHECK-LABEL: _Z10test_truncstore_f64toi32Pjd:

>From a2169a97724c7ab9402c99fe20e25e3a483fbd26 Mon Sep 17 00:00:00 2001
From: Amina Chabane <amina.chabane at arm.com>
Date: Wed, 20 Aug 2025 09:07:53 +0000
Subject: [PATCH 4/4] Alignment

---
 llvm/lib/Target/AArch64/AArch64InstrInfo.td | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 44a60b61fb2c4..84c830a0b8017 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -4716,15 +4716,15 @@ def : Pat<(truncstorei16 (i64 (bitconvert (f64 FPR64:$Rt))), (am_indexed16 GPR64
 
   // truncstorei16 of f32 bitcasted to i32
 def : Pat<(truncstorei16 (i32 (bitconvert (f32 FPR32:$Rt))), (am_indexed16 GPR64sp:$Rn, uimm12s2:$off)),
-        (STRHui (f16 (EXTRACT_SUBREG FPR32:$Rt, hsub)), GPR64sp:$Rn, uimm12s2:$off)>;
+          (STRHui (f16 (EXTRACT_SUBREG FPR32:$Rt, hsub)), GPR64sp:$Rn, uimm12s2:$off)>;
 
   // truncstorei8 of f64 bitcasted to i64
 def : Pat<(truncstorei8 (i64 (bitconvert (f64 FPR64:$Rt))), (am_indexed8 GPR64sp:$Rn, uimm12s1:$off)),
-        (STRBui (aarch64mfp8 (EXTRACT_SUBREG FPR64:$Rt, bsub)), GPR64sp:$Rn, uimm12s1:$off)>;
+          (STRBui (aarch64mfp8 (EXTRACT_SUBREG FPR64:$Rt, bsub)), GPR64sp:$Rn, uimm12s1:$off)>;
 
   // truncstorei8 of f32 bitcasted to i32
 def : Pat<(truncstorei8 (i32 (bitconvert (f32 FPR32:$Rt))), (am_indexed8 GPR64sp:$Rn, uimm12s1:$off)),
-        (STRBui (aarch64mfp8 (EXTRACT_SUBREG FPR32:$Rt, bsub)), GPR64sp:$Rn, uimm12s1:$off)>;
+          (STRBui (aarch64mfp8 (EXTRACT_SUBREG FPR32:$Rt, bsub)), GPR64sp:$Rn, uimm12s1:$off)>;
 
 // truncstore i64
 def : Pat<(truncstorei32 GPR64:$Rt,