[llvm] [DAG] Enable bitcast STLF for Constant/Undef (PR #172523)

Thu Dec 25 05:14:49 PST 2025

https://github.com/Michael-Chen-NJU updated https://github.com/llvm/llvm-project/pull/172523

>From fdb2856d75bb1898f33d3b5ba451b455da9848ce Mon Sep 17 00:00:00 2001
From: Michael-Chen-NJU <2802328816 at qq.com>
Date: Wed, 17 Dec 2025 01:23:35 +0800
Subject: [PATCH 1/3] [DAGCombiner] Fix crash and enable bitcast forwarding in
 ForwardStoreValueToDirectLoad

---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 14 +++++-
 ...ing-mode-fixed-length-insert-vector-elt.ll |  7 +--
 .../sve-streaming-mode-fixed-length-stores.ll |  8 +---
 llvm/test/CodeGen/AArch64/v3f-to-int.ll       |  3 +-
 llvm/test/CodeGen/X86/dag-stlf-mismatch.ll    | 43 +++++++++++++++++++
 llvm/test/CodeGen/X86/pr38533.ll              |  9 ++--
 6 files changed, 63 insertions(+), 21 deletions(-)
 create mode 100644 llvm/test/CodeGen/X86/dag-stlf-mismatch.ll

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 6a99d4e29b64f..111218ef2a01a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -20478,8 +20478,18 @@ SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
       break;
     if (STMemType != LDMemType) {
       // TODO: Support vectors? This requires extract_subvector/bitcast.
-      if (!STMemType.isVector() && !LDMemType.isVector() &&
-          STMemType.isInteger() && LDMemType.isInteger())
+      if (LdMemSize == StMemSize) {
+        bool IsConstantOrUndef = isa<ConstantSDNode>(Val) ||
+                                 isa<ConstantFPSDNode>(Val) ||
+                                 isa<ConstantPoolSDNode>(Val) || Val.isUndef();
+
+        if (IsConstantOrUndef && isTypeLegal(Val.getValueType()) &&
+            TLI.isOperationLegalOrCustom(ISD::BITCAST, LDMemType))
+          Val = DAG.getBitcast(LDMemType, Val);
+        else
+          break;
+      } else if (!STMemType.isVector() && !LDMemType.isVector() &&
+                 STMemType.isInteger() && LDMemType.isInteger())
         Val = DAG.getNode(ISD::TRUNCATE, SDLoc(LD), LDMemType, Val);
       else
         break;
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-insert-vector-elt.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-insert-vector-elt.ll
index ad00e99b704dd..5f21c80c2fdd0 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-insert-vector-elt.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-insert-vector-elt.ll
@@ -426,8 +426,7 @@ define <1 x i64> @insertelement_v1i64(<1 x i64> %op1) {
 ; NONEON-NOSVE-NEXT:    sub sp, sp, #16
 ; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
 ; NONEON-NOSVE-NEXT:    mov w8, #5 // =0x5
-; NONEON-NOSVE-NEXT:    str x8, [sp, #8]
-; NONEON-NOSVE-NEXT:    ldr d0, [sp, #8]
+; NONEON-NOSVE-NEXT:    fmov d0, x8
 ; NONEON-NOSVE-NEXT:    add sp, sp, #16
 ; NONEON-NOSVE-NEXT:    ret
     %r = insertelement <1 x i64> %op1, i64 5, i64 0
@@ -760,9 +759,7 @@ define <1 x double> @insertelement_v1f64(<1 x double> %op1) {
 ; NONEON-NOSVE:       // %bb.0:
 ; NONEON-NOSVE-NEXT:    sub sp, sp, #16
 ; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
-; NONEON-NOSVE-NEXT:    mov x8, #4617315517961601024 // =0x4014000000000000
-; NONEON-NOSVE-NEXT:    str x8, [sp, #8]
-; NONEON-NOSVE-NEXT:    ldr d0, [sp, #8]
+; NONEON-NOSVE-NEXT:    fmov d0, #5.00000000
 ; NONEON-NOSVE-NEXT:    add sp, sp, #16
 ; NONEON-NOSVE-NEXT:    ret
     %r = insertelement <1 x double> %op1, double 5.0, i64 0
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-stores.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-stores.ll
index 17579d79896da..d2e9fd79e50a9 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-stores.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-stores.ll
@@ -314,9 +314,7 @@ define void @store_v1i64(ptr %a) {
 ; NONEON-NOSVE:       // %bb.0:
 ; NONEON-NOSVE-NEXT:    sub sp, sp, #16
 ; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
-; NONEON-NOSVE-NEXT:    str xzr, [sp, #8]
-; NONEON-NOSVE-NEXT:    ldr d0, [sp, #8]
-; NONEON-NOSVE-NEXT:    str d0, [x0]
+; NONEON-NOSVE-NEXT:    str xzr, [x0]
 ; NONEON-NOSVE-NEXT:    add sp, sp, #16
 ; NONEON-NOSVE-NEXT:    ret
   store <1 x i64> zeroinitializer, ptr %a
@@ -334,9 +332,7 @@ define void @store_v1f64(ptr %a) {
 ; NONEON-NOSVE:       // %bb.0:
 ; NONEON-NOSVE-NEXT:    sub sp, sp, #16
 ; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
-; NONEON-NOSVE-NEXT:    str xzr, [sp, #8]
-; NONEON-NOSVE-NEXT:    ldr d0, [sp, #8]
-; NONEON-NOSVE-NEXT:    str d0, [x0]
+; NONEON-NOSVE-NEXT:    str xzr, [x0]
 ; NONEON-NOSVE-NEXT:    add sp, sp, #16
 ; NONEON-NOSVE-NEXT:    ret
   store <1 x double> zeroinitializer, ptr %a
diff --git a/llvm/test/CodeGen/AArch64/v3f-to-int.ll b/llvm/test/CodeGen/AArch64/v3f-to-int.ll
index 6d4061fb02cff..2e4041f60049b 100644
--- a/llvm/test/CodeGen/AArch64/v3f-to-int.ll
+++ b/llvm/test/CodeGen/AArch64/v3f-to-int.ll
@@ -6,8 +6,7 @@ define void @convert_v3f32() {
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    sub sp, sp, #16
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    str wzr, [sp, #12]
-; CHECK-NEXT:    ldr s0, [sp, #12]
+; CHECK-NEXT:    movi d0, #0000000000000000
 ; CHECK-NEXT:    strb wzr, [x8]
 ; CHECK-NEXT:    ushll v0.4s, v0.4h, #0
 ; CHECK-NEXT:    str h0, [x8]
diff --git a/llvm/test/CodeGen/X86/dag-stlf-mismatch.ll b/llvm/test/CodeGen/X86/dag-stlf-mismatch.ll
new file mode 100644
index 0000000000000..7d65ff77494cf
--- /dev/null
+++ b/llvm/test/CodeGen/X86/dag-stlf-mismatch.ll
@@ -0,0 +1,43 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define float @test_stlf_integer(ptr %p, float %v) {
+; CHECK-LABEL: test_stlf_integer:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movl $0, (%rdi)
+; CHECK-NEXT:    xorps %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  store i32 0, ptr %p, align 4
+  %f = load float, ptr %p, align 4
+  %r = fmul fast float %f, %v
+  ret float %r
+}
+
+define float @test_stlf_vector(ptr %p, float %v) {
+; CHECK-LABEL: test_stlf_vector:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xorps %xmm1, %xmm1
+; CHECK-NEXT:    movups %xmm1, (%rdi)
+; CHECK-NEXT:    mulss (%rdi), %xmm0
+; CHECK-NEXT:    retq
+  store <4 x float> zeroinitializer, ptr %p, align 4
+  %f = load float, ptr %p, align 4
+  %r = fmul fast float %f, %v
+  ret float %r
+}
+
+define float @test_stlf_bitcast(ptr %p, float %v) {
+; CHECK-LABEL: test_stlf_bitcast:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xorps %xmm1, %xmm1
+; CHECK-NEXT:    movups %xmm1, (%rdi)
+; CHECK-NEXT:    mulss (%rdi), %xmm0
+; CHECK-NEXT:    retq
+  store <2 x i64> zeroinitializer, ptr %p, align 4
+  %f = load float, ptr %p, align 4
+  %r = fmul fast float %f, %v
+  ret float %r
+}
diff --git a/llvm/test/CodeGen/X86/pr38533.ll b/llvm/test/CodeGen/X86/pr38533.ll
index f1bbb2ffdffd0..11db6bfa99207 100644
--- a/llvm/test/CodeGen/X86/pr38533.ll
+++ b/llvm/test/CodeGen/X86/pr38533.ll
@@ -7,23 +7,20 @@
 define void @constant_fold_vector_to_half() {
 ; SSE2-LABEL: constant_fold_vector_to_half:
 ; SSE2:       # %bb.0:
-; SSE2-NEXT:    movw $16384, -{{[0-9]+}}(%rsp) # imm = 0x4000
-; SSE2-NEXT:    pinsrw $0, -{{[0-9]+}}(%rsp), %xmm0
+; SSE2-NEXT:    pinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
 ; SSE2-NEXT:    pextrw $0, %xmm0, %eax
 ; SSE2-NEXT:    movw %ax, (%rax)
 ; SSE2-NEXT:    retq
 ;
 ; AVX512-LABEL: constant_fold_vector_to_half:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    movw $16384, -{{[0-9]+}}(%rsp) # imm = 0x4000
-; AVX512-NEXT:    vpinsrw $0, -{{[0-9]+}}(%rsp), %xmm0, %xmm0
+; AVX512-NEXT:    vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
 ; AVX512-NEXT:    vpextrw $0, %xmm0, (%rax)
 ; AVX512-NEXT:    retq
 ;
 ; AVX512FP16-LABEL: constant_fold_vector_to_half:
 ; AVX512FP16:       # %bb.0:
-; AVX512FP16-NEXT:    movw $16384, -{{[0-9]+}}(%rsp) # imm = 0x4000
-; AVX512FP16-NEXT:    vmovsh -{{[0-9]+}}(%rsp), %xmm0
+; AVX512FP16-NEXT:    vmovsh {{.*#+}} xmm0 = [2.0E+0,0.0E+0,0.0E+0,0.0E+0,0.0E+0,0.0E+0,0.0E+0,0.0E+0]
 ; AVX512FP16-NEXT:    vmovsh %xmm0, (%rax)
 ; AVX512FP16-NEXT:    retq
   store volatile half bitcast (<4 x i4> <i4 0, i4 0, i4 0, i4 4> to half), ptr undef

>From 5152170ab0895b2651125f3833789bd86787dae5 Mon Sep 17 00:00:00 2001
From: Michael-Chen-NJU <2802328816 at qq.com>
Date: Wed, 17 Dec 2025 11:45:43 +0800
Subject: [PATCH 2/3] [DAG] uses standard DAG.isConstant* helpers

---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp |  6 ++---
 llvm/test/CodeGen/X86/dag-stlf-mismatch.ll    | 22 ++++++++++++++++---
 2 files changed, 22 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 111218ef2a01a..984a01a61e9a0 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -20479,9 +20479,9 @@ SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
     if (STMemType != LDMemType) {
       // TODO: Support vectors? This requires extract_subvector/bitcast.
       if (LdMemSize == StMemSize) {
-        bool IsConstantOrUndef = isa<ConstantSDNode>(Val) ||
-                                 isa<ConstantFPSDNode>(Val) ||
-                                 isa<ConstantPoolSDNode>(Val) || Val.isUndef();
+        bool IsConstantOrUndef =
+            Val.isUndef() || DAG.isConstantIntBuildVectorOrConstantInt(Val) ||
+            DAG.isConstantFPBuildVectorOrConstantFP(Val);
 
         if (IsConstantOrUndef && isTypeLegal(Val.getValueType()) &&
             TLI.isOperationLegalOrCustom(ISD::BITCAST, LDMemType))
diff --git a/llvm/test/CodeGen/X86/dag-stlf-mismatch.ll b/llvm/test/CodeGen/X86/dag-stlf-mismatch.ll
index 7d65ff77494cf..f37cb20c04a07 100644
--- a/llvm/test/CodeGen/X86/dag-stlf-mismatch.ll
+++ b/llvm/test/CodeGen/X86/dag-stlf-mismatch.ll
@@ -1,8 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
 ; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s
 
-target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
+%struct.Data = type { float }
 
 define float @test_stlf_integer(ptr %p, float %v) {
 ; CHECK-LABEL: test_stlf_integer:
@@ -38,6 +37,23 @@ define float @test_stlf_bitcast(ptr %p, float %v) {
 ; CHECK-NEXT:    retq
   store <2 x i64> zeroinitializer, ptr %p, align 4
   %f = load float, ptr %p, align 4
-  %r = fmul fast float %f, %v
+  %r = fmul float %f, %v
   ret float %r
 }
+
+declare void @ext_func(ptr byval(%struct.Data) align 4 %p)
+define void @test_stlf_late_byval(ptr %ptr) {
+; CHECK-LABEL: test_stlf_late_byval:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    pushq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    movl $0, (%rdi)
+; CHECK-NEXT:    movl $0, (%rsp)
+; CHECK-NEXT:    callq ext_func at PLT
+; CHECK-NEXT:    popq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:    retq
+  store i32 0, ptr %ptr, align 4
+  call void @ext_func(ptr byval(%struct.Data) align 4 %ptr)
+  ret void
+}
\ No newline at end of file

>From c3abf924422ed0013408a4b8abbeeea35510c927 Mon Sep 17 00:00:00 2001
From: Michael-Chen-NJU <2802328816 at qq.com>
Date: Thu, 25 Dec 2025 21:13:39 +0800
Subject: [PATCH 3/3] fix

---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 7 +------
 llvm/test/CodeGen/X86/dag-stlf-mismatch.ll    | 2 +-
 2 files changed, 2 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 984a01a61e9a0..647acd809bdb0 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -20479,12 +20479,7 @@ SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
     if (STMemType != LDMemType) {
       // TODO: Support vectors? This requires extract_subvector/bitcast.
       if (LdMemSize == StMemSize) {
-        bool IsConstantOrUndef =
-            Val.isUndef() || DAG.isConstantIntBuildVectorOrConstantInt(Val) ||
-            DAG.isConstantFPBuildVectorOrConstantFP(Val);
-
-        if (IsConstantOrUndef && isTypeLegal(Val.getValueType()) &&
-            TLI.isOperationLegalOrCustom(ISD::BITCAST, LDMemType))
+        if (DAG.isConstantValueOfAnyType(Val) && isTypeLegal(STMemType))
           Val = DAG.getBitcast(LDMemType, Val);
         else
           break;
diff --git a/llvm/test/CodeGen/X86/dag-stlf-mismatch.ll b/llvm/test/CodeGen/X86/dag-stlf-mismatch.ll
index f37cb20c04a07..82550d675d930 100644
--- a/llvm/test/CodeGen/X86/dag-stlf-mismatch.ll
+++ b/llvm/test/CodeGen/X86/dag-stlf-mismatch.ll
@@ -56,4 +56,4 @@ define void @test_stlf_late_byval(ptr %ptr) {
   store i32 0, ptr %ptr, align 4
   call void @ext_func(ptr byval(%struct.Data) align 4 %ptr)
   ret void
-}
\ No newline at end of file
+}