[llvm] DAG: Fix assuming f16 is the only 16-bit fp type in concat vector combine (PR #121637)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Sat Jan 4 03:18:12 PST 2025
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/121637
Fixes #121601
>From 3a385f7b77a2f1a99326c67c78985a3c76769a4e Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Sat, 4 Jan 2025 18:07:08 +0700
Subject: [PATCH] DAG: Fix assuming f16 is the only 16-bit fp type in concat
vector combine
Fixes #121601
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 30 ++++++++-----------
...1601-combine-concat-vectors-assumes-f16.ll | 19 ++++++++++++
2 files changed, 32 insertions(+), 17 deletions(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/issue121601-combine-concat-vectors-assumes-f16.ll
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 6b2501591c81a3..9ef0660bf07199 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -24291,8 +24291,8 @@ static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
// Keep track of what we encounter.
- bool AnyInteger = false;
- bool AnyFP = false;
+ EVT AnyFPVT;
+
for (const SDValue &Op : N->ops()) {
if (ISD::BITCAST == Op.getOpcode() &&
!Op.getOperand(0).getValueType().isVector())
@@ -24306,27 +24306,23 @@ static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
// If it's neither, bail out, it could be something weird like x86mmx.
EVT LastOpVT = Ops.back().getValueType();
if (LastOpVT.isFloatingPoint())
- AnyFP = true;
- else if (LastOpVT.isInteger())
- AnyInteger = true;
- else
+ AnyFPVT = LastOpVT;
+ else if (!LastOpVT.isInteger())
return SDValue();
}
// If any of the operands is a floating point scalar bitcast to a vector,
// use floating point types throughout, and bitcast everything.
// Replace UNDEFs by another scalar UNDEF node, of the final desired type.
- if (AnyFP) {
- SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits());
- if (AnyInteger) {
- for (SDValue &Op : Ops) {
- if (Op.getValueType() == SVT)
- continue;
- if (Op.isUndef())
- Op = DAG.getNode(ISD::UNDEF, DL, SVT);
- else
- Op = DAG.getBitcast(SVT, Op);
- }
+ if (AnyFPVT != EVT()) {
+ SVT = AnyFPVT;
+ for (SDValue &Op : Ops) {
+ if (Op.getValueType() == SVT)
+ continue;
+ if (Op.isUndef())
+ Op = DAG.getNode(ISD::UNDEF, DL, SVT);
+ else
+ Op = DAG.getBitcast(SVT, Op);
}
}
diff --git a/llvm/test/CodeGen/AMDGPU/issue121601-combine-concat-vectors-assumes-f16.ll b/llvm/test/CodeGen/AMDGPU/issue121601-combine-concat-vectors-assumes-f16.ll
new file mode 100644
index 00000000000000..1a87887e28d72e
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/issue121601-combine-concat-vectors-assumes-f16.ll
@@ -0,0 +1,19 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck %s
+
+define <4 x float> @issue121601(bfloat %fptrunc) {
+; CHECK-LABEL: issue121601:
+; CHECK: ; %bb.0: ; %bb
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; CHECK-NEXT: v_mov_b32_e32 v1, v0
+; CHECK-NEXT: v_mov_b32_e32 v2, 0
+; CHECK-NEXT: v_mov_b32_e32 v3, 0
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+bb:
+ %bitcast = bitcast bfloat %fptrunc to <1 x bfloat>
+ %shufflevector = shufflevector <1 x bfloat> %bitcast, <1 x bfloat> zeroinitializer, <2 x i32> zeroinitializer
+ %fpext = fpext <2 x bfloat> %shufflevector to <2 x float>
+ %shufflevector1 = shufflevector <2 x float> %fpext, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ ret <4 x float> %shufflevector1
+}
More information about the llvm-commits
mailing list