[llvm] [SelectionDAG] Fix crash in SimplifyDemandedBits for bf16 BITCAST sign bit extraction (PR #189129)

Sat Mar 28 17:02:34 PDT 2026

https://github.com/lfmeadow updated https://github.com/llvm/llvm-project/pull/189129

>From 6594491ce604b1322e24423a6846743caf7f6ecc Mon Sep 17 00:00:00 2001
From: Larry Meadows <lmeadows at amd.com>
Date: Fri, 27 Mar 2026 18:16:33 -0500
Subject: [PATCH] [SelectionDAG] Exclude bf16 from BITCAST->FGETSIGN sign bit
 optimization

SimplifyDemandedBits in TargetLowering.cpp lowers "only sign bit demanded
from bitcast(fp)" to FGETSIGN+SHL. This path already excludes f16 and f128
but not bf16. When SrcVT is bf16, the FGETSIGN result is i32 (since i16 is
not legal for FGETSIGN), but the SHL expects i16 operands, causing a type
mismatch assertion in debug builds and silent miscompilation in release
builds.

Add bf16 to the exclusion list alongside f16.

Made-with: Cursor
---
 .../CodeGen/SelectionDAG/TargetLowering.cpp   |  2 +-
 llvm/test/CodeGen/X86/int-to-fp-demanded.ll   | 31 +++++++++++++++++++
 2 files changed, 32 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index a0b5f67c2e6c7..3db886f89021c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -2742,7 +2742,7 @@ bool TargetLowering::SimplifyDemandedBits(
       bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, VT);
       bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
       if ((OpVTLegal || i32Legal) && VT.isSimple() && SrcVT != MVT::f16 &&
-          SrcVT != MVT::f128) {
+          SrcVT != MVT::bf16 && SrcVT != MVT::f128) {
         // Cannot eliminate/lower SHL for f128 yet.
         EVT Ty = OpVTLegal ? VT : MVT::i32;
         // Make a FGETSIGN + SHL to move the sign bit into the appropriate
diff --git a/llvm/test/CodeGen/X86/int-to-fp-demanded.ll b/llvm/test/CodeGen/X86/int-to-fp-demanded.ll
index cdde03fb0534b..0c6b0deb14716 100644
--- a/llvm/test/CodeGen/X86/int-to-fp-demanded.ll
+++ b/llvm/test/CodeGen/X86/int-to-fp-demanded.ll
@@ -4,6 +4,37 @@
 
 declare void @use.i1(i1)
 declare void @use.i32(i32)
+
+; Verify that extracting the sign bit of a bfloat via bitcast does not crash
+; or miscompile. SimplifyDemandedBits tried to lower this through FGETSIGN but
+; created a type-mismatched SHL because bf16 was not excluded alongside f16.
+define i16 @bf16_bitcast_signbit(float %a) nounwind {
+; X86-LABEL: bf16_bitcast_signbit:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %eax
+; X86-NEXT:    flds {{[0-9]+}}(%esp)
+; X86-NEXT:    fstps (%esp)
+; X86-NEXT:    calll __truncsfbf2
+; X86-NEXT:    # kill: def $ax killed $ax def $eax
+; X86-NEXT:    andl $32768, %eax # imm = 0x8000
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    popl %ecx
+; X86-NEXT:    retl
+;
+; X64-LABEL: bf16_bitcast_signbit:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rax
+; X64-NEXT:    callq __truncsfbf2 at PLT
+; X64-NEXT:    pextrw $0, %xmm0, %eax
+; X64-NEXT:    andl $32768, %eax # imm = 0x8000
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    popq %rcx
+; X64-NEXT:    retq
+  %bf = fptrunc float %a to bfloat
+  %i = bitcast bfloat %bf to i16
+  %r = and i16 %i, -32768
+  ret i16 %r
+}
 define i32 @sitofp_signbit_only(i32 %i_in) nounwind {
 ; X86-LABEL: sitofp_signbit_only:
 ; X86:       # %bb.0: