[llvm] 3b19717 - [AArch64] Removed redundant FMOV instruction for truncstores of f64/f32 via bitcast to i64/i32/i8. (#149997)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 8 02:35:08 PDT 2025
Author: Amina Chabane
Date: 2025-09-08T10:35:04+01:00
New Revision: 3b19717fb4ce17e450f13f1fd114c26a9682accf
URL: https://github.com/llvm/llvm-project/commit/3b19717fb4ce17e450f13f1fd114c26a9682accf
DIFF: https://github.com/llvm/llvm-project/commit/3b19717fb4ce17e450f13f1fd114c26a9682accf.diff
LOG: [AArch64] Removed redundant FMOV instruction for truncstores of f64/f32 via bitcast to i64/i32/i8. (#149997)
Previously, storing the low bits of a double, which was bitcast to i64
and truncated to i32 or i16, would emit a redundant FMOV. This patch
introduces new TableGen patterns to avoid the unnecessary FMOV. Tests
added: bitcast_truncstore.ll
Added:
llvm/test/CodeGen/AArch64/bitcast_truncstore.ll
Modified:
llvm/lib/Target/AArch64/AArch64InstrInfo.td
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 62b26b5239365..2a90288c35751 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -4751,6 +4751,26 @@ let Predicates = [IsLE] in {
(STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
}
+// truncstorei32 of f64 bitcasted to i64
+def : Pat<(truncstorei32 (i64 (bitconvert (f64 FPR64:$Rt))), (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)),
+ (STRSui (EXTRACT_SUBREG FPR64:$Rt, ssub), GPR64sp:$Rn, uimm12s4:$offset)>;
+
+// truncstorei16 of f64 bitcasted to i64
+def : Pat<(truncstorei16 (i64 (bitconvert (f64 FPR64:$Rt))), (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)),
+ (STRHui (f16 (EXTRACT_SUBREG FPR64:$Rt, hsub)), GPR64sp:$Rn, uimm12s2:$offset)>;
+
+ // truncstorei16 of f32 bitcasted to i32
+def : Pat<(truncstorei16 (i32 (bitconvert (f32 FPR32:$Rt))), (am_indexed16 GPR64sp:$Rn, uimm12s2:$off)),
+ (STRHui (f16 (EXTRACT_SUBREG FPR32:$Rt, hsub)), GPR64sp:$Rn, uimm12s2:$off)>;
+
+ // truncstorei8 of f64 bitcasted to i64
+def : Pat<(truncstorei8 (i64 (bitconvert (f64 FPR64:$Rt))), (am_indexed8 GPR64sp:$Rn, uimm12s1:$off)),
+ (STRBui (aarch64mfp8 (EXTRACT_SUBREG FPR64:$Rt, bsub)), GPR64sp:$Rn, uimm12s1:$off)>;
+
+ // truncstorei8 of f32 bitcasted to i32
+def : Pat<(truncstorei8 (i32 (bitconvert (f32 FPR32:$Rt))), (am_indexed8 GPR64sp:$Rn, uimm12s1:$off)),
+ (STRBui (aarch64mfp8 (EXTRACT_SUBREG FPR32:$Rt, bsub)), GPR64sp:$Rn, uimm12s1:$off)>;
+
// truncstore i64
def : Pat<(truncstorei32 GPR64:$Rt,
(am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)),
diff --git a/llvm/test/CodeGen/AArch64/bitcast_truncstore.ll b/llvm/test/CodeGen/AArch64/bitcast_truncstore.ll
new file mode 100644
index 0000000000000..e1f1bb429409a
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/bitcast_truncstore.ll
@@ -0,0 +1,83 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s
+
+define void @_Z10test_truncstore_f64toi32Pjd(ptr %n, double %x) {
+; CHECK-LABEL: _Z10test_truncstore_f64toi32Pjd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str s0, [x0]
+; CHECK-NEXT: ret
+ %i64 = bitcast double %x to i64
+ %conv = trunc i64 %i64 to i32
+ store i32 %conv, ptr %n, align 4
+ ret void
+}
+
+define void @_Z9test_truncstore_f64toi16Ptd(ptr %n, double %x) {
+; CHECK-LABEL: _Z9test_truncstore_f64toi16Ptd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str h0, [x0]
+; CHECK-NEXT: ret
+ %i64 = bitcast double %x to i64
+ %conv = trunc i64 %i64 to i16
+ store i16 %conv, ptr %n, align 2
+ ret void
+}
+
+define void @_Z13test_truncstore_f64toi8Phd(ptr %n, double %x) {
+; CHECK-LABEL: _Z13test_truncstore_f64toi8Phd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str b0, [x0]
+; CHECK-NEXT: ret
+ %i64 = bitcast double %x to i64
+ %conv = trunc i64 %i64 to i8
+ store i8 %conv, ptr %n, align 1
+ ret void
+}
+
+define void @_Z17test_truncstore_f32toi16Ptf(ptr %n, float %x) {
+; CHECK-LABEL: _Z17test_truncstore_f32toi16Ptf:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str h0, [x0]
+; CHECK-NEXT: ret
+ %i32 = bitcast float %x to i32
+ %conv = trunc i32 %i32 to i16
+ store i16 %conv, ptr %n, align 2
+ ret void
+}
+
+define void @_Z16test_truncstore_f32toi8Phf(ptr %n, float %x) {
+; CHECK-LABEL: _Z16test_truncstore_f32toi8Phf:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str b0, [x0]
+; CHECK-NEXT: ret
+ %i32 = bitcast float %x to i32
+ %conv = trunc i32 %i32 to i8
+ store i8 %conv, ptr %n, align 1
+ ret void
+}
+
+define void @test_truncstore_i64tof32(ptr %n, i64 %x) {
+; CHECK-LABEL: test_truncstore_i64tof32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmov d0, x1
+; CHECK-NEXT: fcvt s0, d0
+; CHECK-NEXT: str s0, [x0]
+; CHECK-NEXT: ret
+ %d = bitcast i64 %x to double
+ %f = fptrunc double %d to float
+ store float %f, ptr %n, align 4
+ ret void
+}
+
+define void @test_truncstore_i32tof16(ptr %n, i32 %x) {
+; CHECK-LABEL: test_truncstore_i32tof16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmov s0, w1
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: str h0, [x0]
+; CHECK-NEXT: ret
+ %f = bitcast i32 %x to float
+ %h = fptrunc float %f to half
+ store half %h, ptr %n, align 2
+ ret void
+}
More information about the llvm-commits
mailing list