[llvm] da72822 - GlobalISel: Fix CSEMIRBuilder mishandling constant folds of vectors
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Tue Jan 18 14:21:11 PST 2022
Author: Matt Arsenault
Date: 2022-01-18T17:21:02-05:00
New Revision: da7282276385f82092fed4e73373bbfaf7cd8331
URL: https://github.com/llvm/llvm-project/commit/da7282276385f82092fed4e73373bbfaf7cd8331
DIFF: https://github.com/llvm/llvm-project/commit/da7282276385f82092fed4e73373bbfaf7cd8331.diff
LOG: GlobalISel: Fix CSEMIRBuilder mishandling constant folds of vectors
This was ignoring the requested result register, resulting in a
missing def when this happened in the IRTranslator. Fixes some crashes
and verifier errors at -O0.
Alternatively we could pass DstOps to the constant fold functions.
Added:
llvm/test/CodeGen/AMDGPU/GlobalISel/combiner-crash.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constant-fold-vector-op.ll
Modified:
llvm/include/llvm/CodeGen/GlobalISel/Utils.h
llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp
llvm/lib/CodeGen/GlobalISel/Utils.cpp
llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-getelementptr.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
index 586df8b7d1166..aed915d2cc4b7 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
@@ -270,9 +270,10 @@ Optional<APFloat> ConstantFoldFPBinOp(unsigned Opcode, const Register Op1,
/// If successful, returns the G_BUILD_VECTOR representing the folded vector
/// constant. \p MIB should have an insertion point already set to create new
/// G_CONSTANT instructions as needed.
-Optional<MachineInstr *>
-ConstantFoldVectorBinop(unsigned Opcode, const Register Op1, const Register Op2,
- const MachineRegisterInfo &MRI, MachineIRBuilder &MIB);
+Register ConstantFoldVectorBinop(unsigned Opcode, const Register Op1,
+ const Register Op2,
+ const MachineRegisterInfo &MRI,
+ MachineIRBuilder &MIB);
Optional<APInt> ConstantFoldExtOp(unsigned Opcode, const Register Op1,
uint64_t Imm, const MachineRegisterInfo &MRI);
diff --git a/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp
index 2676becdd8077..1a642e233a6ac 100644
--- a/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp
@@ -191,10 +191,10 @@ MachineInstrBuilder CSEMIRBuilder::buildInstr(unsigned Opc,
assert(DstOps.size() == 1 && "Invalid dsts");
if (SrcOps[0].getLLTTy(*getMRI()).isVector()) {
// Try to constant fold vector constants.
- auto VecCst = ConstantFoldVectorBinop(
+ Register VecCst = ConstantFoldVectorBinop(
Opc, SrcOps[0].getReg(), SrcOps[1].getReg(), *getMRI(), *this);
if (VecCst)
- return MachineInstrBuilder(getMF(), *VecCst);
+ return buildCopy(DstOps[0], VecCst);
break;
}
if (Optional<APInt> Cst = ConstantFoldBinOp(Opc, SrcOps[0].getReg(),
diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index ec74ace49b496..5cdf9be16c06d 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -592,17 +592,16 @@ Optional<APFloat> llvm::ConstantFoldFPBinOp(unsigned Opcode, const Register Op1,
return None;
}
-Optional<MachineInstr *>
-llvm::ConstantFoldVectorBinop(unsigned Opcode, const Register Op1,
- const Register Op2,
- const MachineRegisterInfo &MRI,
- MachineIRBuilder &MIB) {
+Register llvm::ConstantFoldVectorBinop(unsigned Opcode, const Register Op1,
+ const Register Op2,
+ const MachineRegisterInfo &MRI,
+ MachineIRBuilder &MIB) {
auto *SrcVec1 = getOpcodeDef<GBuildVector>(Op1, MRI);
if (!SrcVec1)
- return None;
+ return Register();
auto *SrcVec2 = getOpcodeDef<GBuildVector>(Op2, MRI);
if (!SrcVec2)
- return None;
+ return Register();
const LLT EltTy = MRI.getType(SrcVec1->getSourceReg(0));
@@ -611,14 +610,14 @@ llvm::ConstantFoldVectorBinop(unsigned Opcode, const Register Op1,
auto MaybeCst = ConstantFoldBinOp(Opcode, SrcVec1->getSourceReg(Idx),
SrcVec2->getSourceReg(Idx), MRI);
if (!MaybeCst)
- return None;
+ return Register();
auto FoldedCstReg = MIB.buildConstant(EltTy, *MaybeCst).getReg(0);
FoldedElements.emplace_back(FoldedCstReg);
}
// Create the new vector constant.
auto CstVec =
MIB.buildBuildVector(MRI.getType(SrcVec1->getReg(0)), FoldedElements);
- return &*CstVec;
+ return CstVec.getReg(0);
}
bool llvm::isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI,
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combiner-crash.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/combiner-crash.ll
new file mode 100644
index 0000000000000..b337a37dae99f
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combiner-crash.ll
@@ -0,0 +1,8 @@
+; RUN: llc -O0 -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o - %s
+
+define amdgpu_kernel void @test_long_add4() {
+entry:
+ %add = add <4 x i64> zeroinitializer, zeroinitializer
+ store <4 x i64> %add, <4 x i64> addrspace(1)* null, align 32
+ ret void
+}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constant-fold-vector-op.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constant-fold-vector-op.ll
new file mode 100644
index 0000000000000..9ee4c4b000ab9
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constant-fold-vector-op.ll
@@ -0,0 +1,22 @@
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+; RUN: llc -O0 -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -stop-after=irtranslator -o - %s | FileCheck %s
+
+; The CSEMIRBuilder was mishandling the result operand when constant
+; folding a vector operation, resulting in a missing def for the
+; stored value.
+define amdgpu_kernel void @constant_fold_vector_add() {
+ ; CHECK-LABEL: name: constant_fold_vector_add
+ ; CHECK: bb.1.entry:
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64), [[C]](s64), [[C]](s64)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[C2]](s64), [[C2]](s64), [[C2]](s64), [[C2]](s64)
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY [[BUILD_VECTOR1]](<4 x s64>)
+ ; CHECK-NEXT: G_STORE [[COPY]](<4 x s64>), [[C1]](p1) :: (store (<4 x s64>) into `<4 x i64> addrspace(1)* null`, addrspace 1)
+ ; CHECK-NEXT: S_ENDPGM 0
+entry:
+ %add = add <4 x i64> zeroinitializer, zeroinitializer
+ store <4 x i64> %add, <4 x i64> addrspace(1)* null, align 32
+ ret void
+}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-getelementptr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-getelementptr.ll
index c3cc9ce6e54c7..45f1baf1a0a23 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-getelementptr.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-getelementptr.ll
@@ -195,15 +195,16 @@ define <2 x i32 addrspace(1)*> @vector_gep_v2p1_index_v2i64_constant(<2 x i32 ad
; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C2]](s64), [[C2]](s64)
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C2]](s64), [[C3]](s64)
- ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(<2 x p1>) = G_PTR_ADD [[BUILD_VECTOR]], [[BUILD_VECTOR4]](<2 x s64>)
- ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(<2 x p1>) = COPY [[PTR_ADD]](<2 x p1>)
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY9]](<2 x p1>)
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s64>) = COPY [[BUILD_VECTOR4]](<2 x s64>)
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(<2 x p1>) = G_PTR_ADD [[BUILD_VECTOR]], [[COPY9]](<2 x s64>)
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(<2 x p1>) = COPY [[PTR_ADD]](<2 x p1>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY10]](<2 x p1>)
; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32)
; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32)
; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32)
; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32)
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]]
- ; CHECK-NEXT: S_SETPC_B64_return [[COPY10]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]]
+ ; CHECK-NEXT: S_SETPC_B64_return [[COPY11]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
%gep = getelementptr i32, <2 x i32 addrspace(1)*> %ptr, <2 x i64> <i64 1, i64 2>
ret <2 x i32 addrspace(1)*> %gep
}
More information about the llvm-commits
mailing list