[llvm] [SPIRV] Add support for the SPIR-V extension SPV_KHR_bfloat16 (PR #155645)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 4 05:09:27 PDT 2025
https://github.com/YixingZhang007 updated https://github.com/llvm/llvm-project/pull/155645
>From 1754281f3e4e13c322c7066dee0bc72b07d23d4f Mon Sep 17 00:00:00 2001
From: "Zhang, Yixing" <yixing.zhang at intel.com>
Date: Wed, 27 Aug 2025 03:42:59 -0700
Subject: [PATCH 1/3] add support for the SPIR-V extension SPV_KHR_bfloat16
---
llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp | 415 +++++++++---------
llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp | 3 +-
llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp | 40 +-
llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h | 13 +
llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp | 38 +-
.../lib/Target/SPIRV/SPIRVSymbolicOperands.td | 4 +
.../extensions/SPV_KHR_bfloat16/bfloat16.ll | 22 +
.../bfloat16_cooperative_matrix.ll | 20 +
.../SPV_KHR_bfloat16/bfloat16_dot.ll | 21 +
9 files changed, 355 insertions(+), 221 deletions(-)
create mode 100644 llvm/test/CodeGen/SPIRV/extensions/SPV_KHR_bfloat16/bfloat16.ll
create mode 100644 llvm/test/CodeGen/SPIRV/extensions/SPV_KHR_bfloat16/bfloat16_cooperative_matrix.ll
create mode 100644 llvm/test/CodeGen/SPIRV/extensions/SPV_KHR_bfloat16/bfloat16_dot.ll
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 541269ab6bfce..7aeff7f2396ab 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -101,14 +101,14 @@ static cl::opt<bool>
char IRTranslator::ID = 0;
INITIALIZE_PASS_BEGIN(IRTranslator, DEBUG_TYPE, "IRTranslator LLVM IR -> MI",
- false, false)
+ false, false)
INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
INITIALIZE_PASS_DEPENDENCY(GISelCSEAnalysisWrapperPass)
INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(StackProtector)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_END(IRTranslator, DEBUG_TYPE, "IRTranslator LLVM IR -> MI",
- false, false)
+ false, false)
static void reportTranslationError(MachineFunction &MF,
const TargetPassConfig &TPC,
@@ -168,7 +168,6 @@ class DILocationVerifier : public GISelChangeObserver {
} // namespace
#endif // ifndef NDEBUG
-
void IRTranslator::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<StackProtector>();
AU.addRequired<TargetPassConfig>();
@@ -360,8 +359,8 @@ bool IRTranslator::translateCompare(const User &U,
if (CmpInst::isIntPredicate(Pred))
MIRBuilder.buildICmp(Pred, Res, Op0, Op1, Flags);
else if (Pred == CmpInst::FCMP_FALSE)
- MIRBuilder.buildCopy(
- Res, getOrCreateVReg(*Constant::getNullValue(U.getType())));
+ MIRBuilder.buildCopy(Res,
+ getOrCreateVReg(*Constant::getNullValue(U.getType())));
else if (Pred == CmpInst::FCMP_TRUE)
MIRBuilder.buildCopy(
Res, getOrCreateVReg(*Constant::getAllOnesValue(U.getType())));
@@ -951,8 +950,8 @@ void IRTranslator::emitSwitchCase(SwitchCG::CaseBlock &CB,
assert(CB.PredInfo.Pred == CmpInst::ICMP_SLE &&
"Can only handle SLE ranges");
- const APInt& Low = cast<ConstantInt>(CB.CmpLHS)->getValue();
- const APInt& High = cast<ConstantInt>(CB.CmpRHS)->getValue();
+ const APInt &Low = cast<ConstantInt>(CB.CmpLHS)->getValue();
+ const APInt &High = cast<ConstantInt>(CB.CmpRHS)->getValue();
Register CmpOpReg = getOrCreateVReg(*CB.CmpMHS);
if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(true)) {
@@ -987,16 +986,12 @@ void IRTranslator::emitSwitchCase(SwitchCG::CaseBlock &CB,
MIB.setDebugLoc(OldDbgLoc);
}
-bool IRTranslator::lowerJumpTableWorkItem(SwitchCG::SwitchWorkListItem W,
- MachineBasicBlock *SwitchMBB,
- MachineBasicBlock *CurMBB,
- MachineBasicBlock *DefaultMBB,
- MachineIRBuilder &MIB,
- MachineFunction::iterator BBI,
- BranchProbability UnhandledProbs,
- SwitchCG::CaseClusterIt I,
- MachineBasicBlock *Fallthrough,
- bool FallthroughUnreachable) {
+bool IRTranslator::lowerJumpTableWorkItem(
+ SwitchCG::SwitchWorkListItem W, MachineBasicBlock *SwitchMBB,
+ MachineBasicBlock *CurMBB, MachineBasicBlock *DefaultMBB,
+ MachineIRBuilder &MIB, MachineFunction::iterator BBI,
+ BranchProbability UnhandledProbs, SwitchCG::CaseClusterIt I,
+ MachineBasicBlock *Fallthrough, bool FallthroughUnreachable) {
using namespace SwitchCG;
MachineFunction *CurMF = SwitchMBB->getParent();
// FIXME: Optimize away range check based on pivot comparisons.
@@ -1058,14 +1053,11 @@ bool IRTranslator::lowerJumpTableWorkItem(SwitchCG::SwitchWorkListItem W,
}
return true;
}
-bool IRTranslator::lowerSwitchRangeWorkItem(SwitchCG::CaseClusterIt I,
- Value *Cond,
- MachineBasicBlock *Fallthrough,
- bool FallthroughUnreachable,
- BranchProbability UnhandledProbs,
- MachineBasicBlock *CurMBB,
- MachineIRBuilder &MIB,
- MachineBasicBlock *SwitchMBB) {
+bool IRTranslator::lowerSwitchRangeWorkItem(
+ SwitchCG::CaseClusterIt I, Value *Cond, MachineBasicBlock *Fallthrough,
+ bool FallthroughUnreachable, BranchProbability UnhandledProbs,
+ MachineBasicBlock *CurMBB, MachineIRBuilder &MIB,
+ MachineBasicBlock *SwitchMBB) {
using namespace SwitchCG;
const Value *RHS, *LHS, *MHS;
CmpInst::Predicate Pred;
@@ -1700,8 +1692,7 @@ bool IRTranslator::translateGetElementPtr(const User &U,
}
if (Offset != 0) {
- auto OffsetMIB =
- MIRBuilder.buildConstant(OffsetTy, Offset);
+ auto OffsetMIB = MIRBuilder.buildConstant(OffsetTy, Offset);
MIRBuilder.buildPtrAdd(getOrCreateVReg(U), BaseReg, OffsetMIB.getReg(0),
PtrAddFlagsWithConst(Offset));
@@ -1898,145 +1889,145 @@ bool IRTranslator::translateFixedPointIntrinsic(unsigned Op, const CallInst &CI,
Register Src0 = getOrCreateVReg(*CI.getOperand(0));
Register Src1 = getOrCreateVReg(*CI.getOperand(1));
uint64_t Scale = cast<ConstantInt>(CI.getOperand(2))->getZExtValue();
- MIRBuilder.buildInstr(Op, {Dst}, { Src0, Src1, Scale });
+ MIRBuilder.buildInstr(Op, {Dst}, {Src0, Src1, Scale});
return true;
}
unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID) {
switch (ID) {
- default:
- break;
- case Intrinsic::acos:
- return TargetOpcode::G_FACOS;
- case Intrinsic::asin:
- return TargetOpcode::G_FASIN;
- case Intrinsic::atan:
- return TargetOpcode::G_FATAN;
- case Intrinsic::atan2:
- return TargetOpcode::G_FATAN2;
- case Intrinsic::bswap:
- return TargetOpcode::G_BSWAP;
- case Intrinsic::bitreverse:
- return TargetOpcode::G_BITREVERSE;
- case Intrinsic::fshl:
- return TargetOpcode::G_FSHL;
- case Intrinsic::fshr:
- return TargetOpcode::G_FSHR;
- case Intrinsic::ceil:
- return TargetOpcode::G_FCEIL;
- case Intrinsic::cos:
- return TargetOpcode::G_FCOS;
- case Intrinsic::cosh:
- return TargetOpcode::G_FCOSH;
- case Intrinsic::ctpop:
- return TargetOpcode::G_CTPOP;
- case Intrinsic::exp:
- return TargetOpcode::G_FEXP;
- case Intrinsic::exp2:
- return TargetOpcode::G_FEXP2;
- case Intrinsic::exp10:
- return TargetOpcode::G_FEXP10;
- case Intrinsic::fabs:
- return TargetOpcode::G_FABS;
- case Intrinsic::copysign:
- return TargetOpcode::G_FCOPYSIGN;
- case Intrinsic::minnum:
- return TargetOpcode::G_FMINNUM;
- case Intrinsic::maxnum:
- return TargetOpcode::G_FMAXNUM;
- case Intrinsic::minimum:
- return TargetOpcode::G_FMINIMUM;
- case Intrinsic::maximum:
- return TargetOpcode::G_FMAXIMUM;
- case Intrinsic::minimumnum:
- return TargetOpcode::G_FMINIMUMNUM;
- case Intrinsic::maximumnum:
- return TargetOpcode::G_FMAXIMUMNUM;
- case Intrinsic::canonicalize:
- return TargetOpcode::G_FCANONICALIZE;
- case Intrinsic::floor:
- return TargetOpcode::G_FFLOOR;
- case Intrinsic::fma:
- return TargetOpcode::G_FMA;
- case Intrinsic::log:
- return TargetOpcode::G_FLOG;
- case Intrinsic::log2:
- return TargetOpcode::G_FLOG2;
- case Intrinsic::log10:
- return TargetOpcode::G_FLOG10;
- case Intrinsic::ldexp:
- return TargetOpcode::G_FLDEXP;
- case Intrinsic::nearbyint:
- return TargetOpcode::G_FNEARBYINT;
- case Intrinsic::pow:
- return TargetOpcode::G_FPOW;
- case Intrinsic::powi:
- return TargetOpcode::G_FPOWI;
- case Intrinsic::rint:
- return TargetOpcode::G_FRINT;
- case Intrinsic::round:
- return TargetOpcode::G_INTRINSIC_ROUND;
- case Intrinsic::roundeven:
- return TargetOpcode::G_INTRINSIC_ROUNDEVEN;
- case Intrinsic::sin:
- return TargetOpcode::G_FSIN;
- case Intrinsic::sinh:
- return TargetOpcode::G_FSINH;
- case Intrinsic::sqrt:
- return TargetOpcode::G_FSQRT;
- case Intrinsic::tan:
- return TargetOpcode::G_FTAN;
- case Intrinsic::tanh:
- return TargetOpcode::G_FTANH;
- case Intrinsic::trunc:
- return TargetOpcode::G_INTRINSIC_TRUNC;
- case Intrinsic::readcyclecounter:
- return TargetOpcode::G_READCYCLECOUNTER;
- case Intrinsic::readsteadycounter:
- return TargetOpcode::G_READSTEADYCOUNTER;
- case Intrinsic::ptrmask:
- return TargetOpcode::G_PTRMASK;
- case Intrinsic::lrint:
- return TargetOpcode::G_INTRINSIC_LRINT;
- case Intrinsic::llrint:
- return TargetOpcode::G_INTRINSIC_LLRINT;
- // FADD/FMUL require checking the FMF, so are handled elsewhere.
- case Intrinsic::vector_reduce_fmin:
- return TargetOpcode::G_VECREDUCE_FMIN;
- case Intrinsic::vector_reduce_fmax:
- return TargetOpcode::G_VECREDUCE_FMAX;
- case Intrinsic::vector_reduce_fminimum:
- return TargetOpcode::G_VECREDUCE_FMINIMUM;
- case Intrinsic::vector_reduce_fmaximum:
- return TargetOpcode::G_VECREDUCE_FMAXIMUM;
- case Intrinsic::vector_reduce_add:
- return TargetOpcode::G_VECREDUCE_ADD;
- case Intrinsic::vector_reduce_mul:
- return TargetOpcode::G_VECREDUCE_MUL;
- case Intrinsic::vector_reduce_and:
- return TargetOpcode::G_VECREDUCE_AND;
- case Intrinsic::vector_reduce_or:
- return TargetOpcode::G_VECREDUCE_OR;
- case Intrinsic::vector_reduce_xor:
- return TargetOpcode::G_VECREDUCE_XOR;
- case Intrinsic::vector_reduce_smax:
- return TargetOpcode::G_VECREDUCE_SMAX;
- case Intrinsic::vector_reduce_smin:
- return TargetOpcode::G_VECREDUCE_SMIN;
- case Intrinsic::vector_reduce_umax:
- return TargetOpcode::G_VECREDUCE_UMAX;
- case Intrinsic::vector_reduce_umin:
- return TargetOpcode::G_VECREDUCE_UMIN;
- case Intrinsic::experimental_vector_compress:
- return TargetOpcode::G_VECTOR_COMPRESS;
- case Intrinsic::lround:
- return TargetOpcode::G_LROUND;
- case Intrinsic::llround:
- return TargetOpcode::G_LLROUND;
- case Intrinsic::get_fpenv:
- return TargetOpcode::G_GET_FPENV;
- case Intrinsic::get_fpmode:
- return TargetOpcode::G_GET_FPMODE;
+ default:
+ break;
+ case Intrinsic::acos:
+ return TargetOpcode::G_FACOS;
+ case Intrinsic::asin:
+ return TargetOpcode::G_FASIN;
+ case Intrinsic::atan:
+ return TargetOpcode::G_FATAN;
+ case Intrinsic::atan2:
+ return TargetOpcode::G_FATAN2;
+ case Intrinsic::bswap:
+ return TargetOpcode::G_BSWAP;
+ case Intrinsic::bitreverse:
+ return TargetOpcode::G_BITREVERSE;
+ case Intrinsic::fshl:
+ return TargetOpcode::G_FSHL;
+ case Intrinsic::fshr:
+ return TargetOpcode::G_FSHR;
+ case Intrinsic::ceil:
+ return TargetOpcode::G_FCEIL;
+ case Intrinsic::cos:
+ return TargetOpcode::G_FCOS;
+ case Intrinsic::cosh:
+ return TargetOpcode::G_FCOSH;
+ case Intrinsic::ctpop:
+ return TargetOpcode::G_CTPOP;
+ case Intrinsic::exp:
+ return TargetOpcode::G_FEXP;
+ case Intrinsic::exp2:
+ return TargetOpcode::G_FEXP2;
+ case Intrinsic::exp10:
+ return TargetOpcode::G_FEXP10;
+ case Intrinsic::fabs:
+ return TargetOpcode::G_FABS;
+ case Intrinsic::copysign:
+ return TargetOpcode::G_FCOPYSIGN;
+ case Intrinsic::minnum:
+ return TargetOpcode::G_FMINNUM;
+ case Intrinsic::maxnum:
+ return TargetOpcode::G_FMAXNUM;
+ case Intrinsic::minimum:
+ return TargetOpcode::G_FMINIMUM;
+ case Intrinsic::maximum:
+ return TargetOpcode::G_FMAXIMUM;
+ case Intrinsic::minimumnum:
+ return TargetOpcode::G_FMINIMUMNUM;
+ case Intrinsic::maximumnum:
+ return TargetOpcode::G_FMAXIMUMNUM;
+ case Intrinsic::canonicalize:
+ return TargetOpcode::G_FCANONICALIZE;
+ case Intrinsic::floor:
+ return TargetOpcode::G_FFLOOR;
+ case Intrinsic::fma:
+ return TargetOpcode::G_FMA;
+ case Intrinsic::log:
+ return TargetOpcode::G_FLOG;
+ case Intrinsic::log2:
+ return TargetOpcode::G_FLOG2;
+ case Intrinsic::log10:
+ return TargetOpcode::G_FLOG10;
+ case Intrinsic::ldexp:
+ return TargetOpcode::G_FLDEXP;
+ case Intrinsic::nearbyint:
+ return TargetOpcode::G_FNEARBYINT;
+ case Intrinsic::pow:
+ return TargetOpcode::G_FPOW;
+ case Intrinsic::powi:
+ return TargetOpcode::G_FPOWI;
+ case Intrinsic::rint:
+ return TargetOpcode::G_FRINT;
+ case Intrinsic::round:
+ return TargetOpcode::G_INTRINSIC_ROUND;
+ case Intrinsic::roundeven:
+ return TargetOpcode::G_INTRINSIC_ROUNDEVEN;
+ case Intrinsic::sin:
+ return TargetOpcode::G_FSIN;
+ case Intrinsic::sinh:
+ return TargetOpcode::G_FSINH;
+ case Intrinsic::sqrt:
+ return TargetOpcode::G_FSQRT;
+ case Intrinsic::tan:
+ return TargetOpcode::G_FTAN;
+ case Intrinsic::tanh:
+ return TargetOpcode::G_FTANH;
+ case Intrinsic::trunc:
+ return TargetOpcode::G_INTRINSIC_TRUNC;
+ case Intrinsic::readcyclecounter:
+ return TargetOpcode::G_READCYCLECOUNTER;
+ case Intrinsic::readsteadycounter:
+ return TargetOpcode::G_READSTEADYCOUNTER;
+ case Intrinsic::ptrmask:
+ return TargetOpcode::G_PTRMASK;
+ case Intrinsic::lrint:
+ return TargetOpcode::G_INTRINSIC_LRINT;
+ case Intrinsic::llrint:
+ return TargetOpcode::G_INTRINSIC_LLRINT;
+ // FADD/FMUL require checking the FMF, so are handled elsewhere.
+ case Intrinsic::vector_reduce_fmin:
+ return TargetOpcode::G_VECREDUCE_FMIN;
+ case Intrinsic::vector_reduce_fmax:
+ return TargetOpcode::G_VECREDUCE_FMAX;
+ case Intrinsic::vector_reduce_fminimum:
+ return TargetOpcode::G_VECREDUCE_FMINIMUM;
+ case Intrinsic::vector_reduce_fmaximum:
+ return TargetOpcode::G_VECREDUCE_FMAXIMUM;
+ case Intrinsic::vector_reduce_add:
+ return TargetOpcode::G_VECREDUCE_ADD;
+ case Intrinsic::vector_reduce_mul:
+ return TargetOpcode::G_VECREDUCE_MUL;
+ case Intrinsic::vector_reduce_and:
+ return TargetOpcode::G_VECREDUCE_AND;
+ case Intrinsic::vector_reduce_or:
+ return TargetOpcode::G_VECREDUCE_OR;
+ case Intrinsic::vector_reduce_xor:
+ return TargetOpcode::G_VECREDUCE_XOR;
+ case Intrinsic::vector_reduce_smax:
+ return TargetOpcode::G_VECREDUCE_SMAX;
+ case Intrinsic::vector_reduce_smin:
+ return TargetOpcode::G_VECREDUCE_SMIN;
+ case Intrinsic::vector_reduce_umax:
+ return TargetOpcode::G_VECREDUCE_UMAX;
+ case Intrinsic::vector_reduce_umin:
+ return TargetOpcode::G_VECREDUCE_UMIN;
+ case Intrinsic::experimental_vector_compress:
+ return TargetOpcode::G_VECTOR_COMPRESS;
+ case Intrinsic::lround:
+ return TargetOpcode::G_LROUND;
+ case Intrinsic::llround:
+ return TargetOpcode::G_LLROUND;
+ case Intrinsic::get_fpenv:
+ return TargetOpcode::G_GET_FPENV;
+ case Intrinsic::get_fpmode:
+ return TargetOpcode::G_GET_FPMODE;
}
return Intrinsic::not_intrinsic;
}
@@ -2086,7 +2077,7 @@ static unsigned getConstrainedOpcode(Intrinsic::ID ID) {
}
bool IRTranslator::translateConstrainedFPIntrinsic(
- const ConstrainedFPIntrinsic &FPI, MachineIRBuilder &MIRBuilder) {
+ const ConstrainedFPIntrinsic &FPI, MachineIRBuilder &MIRBuilder) {
fp::ExceptionBehavior EB = *FPI.getExceptionBehavior();
unsigned Opcode = getConstrainedOpcode(FPI.getIntrinsicID());
@@ -2226,17 +2217,18 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
case Intrinsic::dbg_declare: {
const DbgDeclareInst &DI = cast<DbgDeclareInst>(CI);
assert(DI.getVariable() && "Missing variable");
- translateDbgDeclareRecord(DI.getAddress(), DI.hasArgList(), DI.getVariable(),
- DI.getExpression(), DI.getDebugLoc(), MIRBuilder);
+ translateDbgDeclareRecord(DI.getAddress(), DI.hasArgList(),
+ DI.getVariable(), DI.getExpression(),
+ DI.getDebugLoc(), MIRBuilder);
return true;
}
case Intrinsic::dbg_label: {
const DbgLabelInst &DI = cast<DbgLabelInst>(CI);
assert(DI.getLabel() && "Missing label");
- assert(DI.getLabel()->isValidLocationForIntrinsic(
- MIRBuilder.getDebugLoc()) &&
- "Expected inlined-at fields to agree");
+ assert(
+ DI.getLabel()->isValidLocationForIntrinsic(MIRBuilder.getDebugLoc()) &&
+ "Expected inlined-at fields to agree");
MIRBuilder.buildDbgLabel(DI.getLabel());
return true;
@@ -2267,7 +2259,7 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
// This form of DBG_VALUE is target-independent.
const DbgValueInst &DI = cast<DbgValueInst>(CI);
translateDbgValueRecord(DI.getValue(), DI.hasArgList(), DI.getVariable(),
- DI.getExpression(), DI.getDebugLoc(), MIRBuilder);
+ DI.getExpression(), DI.getDebugLoc(), MIRBuilder);
return true;
}
case Intrinsic::uadd_with_overflow:
@@ -2306,21 +2298,29 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
// TODO: Preserve "int min is poison" arg in GMIR?
return translateUnaryOp(TargetOpcode::G_ABS, CI, MIRBuilder);
case Intrinsic::smul_fix:
- return translateFixedPointIntrinsic(TargetOpcode::G_SMULFIX, CI, MIRBuilder);
+ return translateFixedPointIntrinsic(TargetOpcode::G_SMULFIX, CI,
+ MIRBuilder);
case Intrinsic::umul_fix:
- return translateFixedPointIntrinsic(TargetOpcode::G_UMULFIX, CI, MIRBuilder);
+ return translateFixedPointIntrinsic(TargetOpcode::G_UMULFIX, CI,
+ MIRBuilder);
case Intrinsic::smul_fix_sat:
- return translateFixedPointIntrinsic(TargetOpcode::G_SMULFIXSAT, CI, MIRBuilder);
+ return translateFixedPointIntrinsic(TargetOpcode::G_SMULFIXSAT, CI,
+ MIRBuilder);
case Intrinsic::umul_fix_sat:
- return translateFixedPointIntrinsic(TargetOpcode::G_UMULFIXSAT, CI, MIRBuilder);
+ return translateFixedPointIntrinsic(TargetOpcode::G_UMULFIXSAT, CI,
+ MIRBuilder);
case Intrinsic::sdiv_fix:
- return translateFixedPointIntrinsic(TargetOpcode::G_SDIVFIX, CI, MIRBuilder);
+ return translateFixedPointIntrinsic(TargetOpcode::G_SDIVFIX, CI,
+ MIRBuilder);
case Intrinsic::udiv_fix:
- return translateFixedPointIntrinsic(TargetOpcode::G_UDIVFIX, CI, MIRBuilder);
+ return translateFixedPointIntrinsic(TargetOpcode::G_UDIVFIX, CI,
+ MIRBuilder);
case Intrinsic::sdiv_fix_sat:
- return translateFixedPointIntrinsic(TargetOpcode::G_SDIVFIXSAT, CI, MIRBuilder);
+ return translateFixedPointIntrinsic(TargetOpcode::G_SDIVFIXSAT, CI,
+ MIRBuilder);
case Intrinsic::udiv_fix_sat:
- return translateFixedPointIntrinsic(TargetOpcode::G_UDIVFIXSAT, CI, MIRBuilder);
+ return translateFixedPointIntrinsic(TargetOpcode::G_UDIVFIXSAT, CI,
+ MIRBuilder);
case Intrinsic::fmuladd: {
const TargetMachine &TM = MF->getTarget();
Register Dst = getOrCreateVReg(CI);
@@ -2435,11 +2435,11 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
case Intrinsic::ctlz: {
ConstantInt *Cst = cast<ConstantInt>(CI.getArgOperand(1));
bool isTrailing = ID == Intrinsic::cttz;
- unsigned Opcode = isTrailing
- ? Cst->isZero() ? TargetOpcode::G_CTTZ
- : TargetOpcode::G_CTTZ_ZERO_UNDEF
- : Cst->isZero() ? TargetOpcode::G_CTLZ
- : TargetOpcode::G_CTLZ_ZERO_UNDEF;
+ unsigned Opcode = isTrailing ? Cst->isZero()
+ ? TargetOpcode::G_CTTZ
+ : TargetOpcode::G_CTTZ_ZERO_UNDEF
+ : Cst->isZero() ? TargetOpcode::G_CTLZ
+ : TargetOpcode::G_CTLZ_ZERO_UNDEF;
MIRBuilder.buildInstr(Opcode, {getOrCreateVReg(CI)},
{getOrCreateVReg(*CI.getArgOperand(0))});
return true;
@@ -2478,8 +2478,8 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
case Intrinsic::write_register: {
Value *Arg = CI.getArgOperand(0);
MIRBuilder.buildInstr(TargetOpcode::G_WRITE_REGISTER)
- .addMetadata(cast<MDNode>(cast<MetadataAsValue>(Arg)->getMetadata()))
- .addUse(getOrCreateVReg(*CI.getArgOperand(1)));
+ .addMetadata(cast<MDNode>(cast<MetadataAsValue>(Arg)->getMetadata()))
+ .addUse(getOrCreateVReg(*CI.getArgOperand(1)));
return true;
}
case Intrinsic::localescape: {
@@ -2659,7 +2659,7 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
return translateVectorDeinterleave2Intrinsic(CI, MIRBuilder);
}
-#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC) \
+#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC) \
case Intrinsic::INTRINSIC:
#include "llvm/IR/ConstrainedOps.def"
return translateConstrainedFPIntrinsic(cast<ConstrainedFPIntrinsic>(CI),
@@ -2765,9 +2765,6 @@ bool IRTranslator::translateCallBase(const CallBase &CB,
}
bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
- if (containsBF16Type(U))
- return false;
-
const CallInst &CI = cast<CallInst>(U);
const Function *F = CI.getCalledFunction();
@@ -2878,8 +2875,7 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
}
bool IRTranslator::findUnwindDestinations(
- const BasicBlock *EHPadBB,
- BranchProbability Prob,
+ const BasicBlock *EHPadBB, BranchProbability Prob,
SmallVectorImpl<std::pair<MachineBasicBlock *, BranchProbability>>
&UnwindDests) {
EHPersonality Personality = classifyEHPersonality(
@@ -2962,8 +2958,8 @@ bool IRTranslator::translateInvoke(const User &U,
// FIXME: support Windows dllimport function calls and calls through
// weak symbols.
if (Fn && (Fn->hasDLLImportStorageClass() ||
- (MF->getTarget().getTargetTriple().isOSWindows() &&
- Fn->hasExternalWeakLinkage())))
+ (MF->getTarget().getTargetTriple().isOSWindows() &&
+ Fn->hasExternalWeakLinkage())))
return false;
bool LowerInlineAsm = I.isInlineAsm();
@@ -3050,8 +3046,7 @@ bool IRTranslator::translateLandingPad(const User &U,
// Add a label to mark the beginning of the landing pad. Deletion of the
// landing pad can thus be detected via the MachineModuleInfo.
- MIRBuilder.buildInstr(TargetOpcode::EH_LABEL)
- .addSym(MF->addLandingPad(&MBB));
+ MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(MF->addLandingPad(&MBB));
// If the unwinder does not preserve all registers, ensure that the
// function marks the clobbered registers as used.
@@ -3479,8 +3474,7 @@ bool IRTranslator::translateAtomicRMW(const User &U,
return true;
}
-bool IRTranslator::translateFence(const User &U,
- MachineIRBuilder &MIRBuilder) {
+bool IRTranslator::translateFence(const User &U, MachineIRBuilder &MIRBuilder) {
const FenceInst &Fence = cast<FenceInst>(U);
MIRBuilder.buildFence(static_cast<unsigned>(Fence.getOrdering()),
Fence.getSyncScopeID());
@@ -3538,10 +3532,10 @@ void IRTranslator::finishPendingPhis() {
}
void IRTranslator::translateDbgValueRecord(Value *V, bool HasArgList,
- const DILocalVariable *Variable,
- const DIExpression *Expression,
- const DebugLoc &DL,
- MachineIRBuilder &MIRBuilder) {
+ const DILocalVariable *Variable,
+ const DIExpression *Expression,
+ const DebugLoc &DL,
+ MachineIRBuilder &MIRBuilder) {
assert(Variable->isValidLocationForIntrinsic(DL) &&
"Expected inlined-at fields to agree");
// Act as if we're handling a debug intrinsic.
@@ -3584,10 +3578,10 @@ void IRTranslator::translateDbgValueRecord(Value *V, bool HasArgList,
}
void IRTranslator::translateDbgDeclareRecord(Value *Address, bool HasArgList,
- const DILocalVariable *Variable,
- const DIExpression *Expression,
- const DebugLoc &DL,
- MachineIRBuilder &MIRBuilder) {
+ const DILocalVariable *Variable,
+ const DIExpression *Expression,
+ const DebugLoc &DL,
+ MachineIRBuilder &MIRBuilder) {
if (!Address || isa<UndefValue>(Address)) {
LLVM_DEBUG(dbgs() << "Dropping debug info for " << *Variable << "\n");
return;
@@ -3599,13 +3593,12 @@ void IRTranslator::translateDbgDeclareRecord(Value *Address, bool HasArgList,
if (AI && AI->isStaticAlloca()) {
// Static allocas are tracked at the MF level, no need for DBG_VALUE
// instructions (in fact, they get ignored if they *do* exist).
- MF->setVariableDbgInfo(Variable, Expression,
- getOrCreateFrameIndex(*AI), DL);
+ MF->setVariableDbgInfo(Variable, Expression, getOrCreateFrameIndex(*AI),
+ DL);
return;
}
- if (translateIfEntryValueArgument(true, Address, Variable,
- Expression, DL,
+ if (translateIfEntryValueArgument(true, Address, Variable, Expression, DL,
MIRBuilder))
return;
@@ -3617,7 +3610,7 @@ void IRTranslator::translateDbgDeclareRecord(Value *Address, bool HasArgList,
}
void IRTranslator::translateDbgInfo(const Instruction &Inst,
- MachineIRBuilder &MIRBuilder) {
+ MachineIRBuilder &MIRBuilder) {
for (DbgRecord &DR : Inst.getDbgRecordRange()) {
if (DbgLabelRecord *DLR = dyn_cast<DbgLabelRecord>(&DR)) {
MIRBuilder.setDebugLoc(DLR->getDebugLoc());
@@ -3708,7 +3701,7 @@ bool IRTranslator::translate(const Constant &C, Register Reg) {
}
EntryBuilder->buildBuildVector(Reg, Ops);
} else if (auto CE = dyn_cast<ConstantExpr>(&C)) {
- switch(CE->getOpcode()) {
+ switch (CE->getOpcode()) {
#define HANDLE_INST(NUM, OPCODE, CLASS) \
case Instruction::OPCODE: \
return translate##OPCODE(*CE, *EntryBuilder.get());
@@ -4074,7 +4067,7 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
// Create all blocks, in IR order, to preserve the layout.
FuncInfo.MBBMap.resize(F.getMaxBlockNumber());
- for (const BasicBlock &BB: F) {
+ for (const BasicBlock &BB : F) {
auto *&MBB = FuncInfo.MBBMap[BB.getNumber()];
MBB = MF->CreateMachineBasicBlock(&BB);
@@ -4103,7 +4096,7 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
// Lower the actual args into this basic block.
SmallVector<ArrayRef<Register>, 8> VRegArgs;
- for (const Argument &Arg: F.args()) {
+ for (const Argument &Arg : F.args()) {
if (DL->getTypeStoreSize(Arg.getType()).isZero())
continue; // Don't handle zero sized types.
ArrayRef<Register> VRegs = getOrCreateVRegs(Arg);
diff --git a/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp b/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp
index e7da5504b2d58..993de9e9f64ec 100644
--- a/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp
@@ -147,7 +147,8 @@ static const std::map<std::string, SPIRV::Extension::Extension, std::less<>>
{"SPV_KHR_float_controls2",
SPIRV::Extension::Extension::SPV_KHR_float_controls2},
{"SPV_INTEL_tensor_float32_conversion",
- SPIRV::Extension::Extension::SPV_INTEL_tensor_float32_conversion}};
+ SPIRV::Extension::Extension::SPV_INTEL_tensor_float32_conversion},
+ {"SPV_KHR_bfloat16", SPIRV::Extension::Extension::SPV_KHR_bfloat16}};
bool SPIRVExtensionsParser::parse(cl::Option &O, StringRef ArgName,
StringRef ArgValue,
diff --git a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp
index cfe24c84941a9..ce9ebb619f242 100644
--- a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp
@@ -1122,7 +1122,19 @@ SPIRVType *SPIRVGlobalRegistry::restOfCreateSPIRVType(
SPIRVType *SpirvType = createSPIRVType(Ty, MIRBuilder, AccessQual,
ExplicitLayoutRequired, EmitIR);
TypesInProcessing.erase(Ty);
- VRegToTypeMap[&MIRBuilder.getMF()][getSPIRVTypeID(SpirvType)] = SpirvType;
+
+ // Record the FPVariant of the floating-point registers in the
+ // VRegFPVariantMap.
+ MachineFunction *MF = &MIRBuilder.getMF();
+ Register TypeReg = getSPIRVTypeID(SpirvType);
+ if (Ty->isFloatingPointTy()) {
+ if (Ty->isBFloatTy()) {
+ VRegFPVariantMap[MF][TypeReg] = FPVariant::BRAIN_FLOAT;
+ } else {
+ VRegFPVariantMap[MF][TypeReg] = FPVariant::IEEE_FLOAT;
+ }
+ }
+ VRegToTypeMap[MF][TypeReg] = SpirvType;
// TODO: We could end up with two SPIR-V types pointing to the same llvm type.
// Is that a problem?
@@ -1679,11 +1691,15 @@ SPIRVType *SPIRVGlobalRegistry::getOrCreateSPIRVType(unsigned BitWidth,
MachineIRBuilder MIRBuilder(DepMBB, DepMBB.getFirstNonPHI());
const MachineInstr *NewMI =
createOpType(MIRBuilder, [&](MachineIRBuilder &MIRBuilder) {
- return BuildMI(MIRBuilder.getMBB(), *MIRBuilder.getInsertPt(),
- MIRBuilder.getDL(), TII.get(SPIRVOPcode))
- .addDef(createTypeVReg(CurMF->getRegInfo()))
- .addImm(BitWidth)
- .addImm(0);
+ auto MIB = BuildMI(MIRBuilder.getMBB(), *MIRBuilder.getInsertPt(),
+ MIRBuilder.getDL(), TII.get(SPIRVOPcode))
+ .addDef(createTypeVReg(CurMF->getRegInfo()))
+ .addImm(BitWidth);
+
+ if (SPIRVOPcode != SPIRV::OpTypeFloat)
+ MIB.addImm(0);
+
+ return MIB;
});
add(Ty, false, NewMI);
return finishCreatingSPIRVType(Ty, NewMI);
@@ -2088,3 +2104,15 @@ bool SPIRVGlobalRegistry::hasBlockDecoration(SPIRVType *Type) const {
}
return false;
}
+
+SPIRVGlobalRegistry::FPVariant
+SPIRVGlobalRegistry::getFPVariantForVReg(Register VReg,
+ const MachineFunction *MF) {
+ auto t = VRegFPVariantMap.find(MF ? MF : CurMF);
+ if (t != VRegFPVariantMap.end()) {
+ auto tt = t->second.find(VReg);
+ if (tt != t->second.end())
+ return tt->second;
+ }
+ return FPVariant::NONE;
+}
diff --git a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h
index 7ef812828b7cc..1f8c30dc01f7f 100644
--- a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h
+++ b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h
@@ -29,6 +29,10 @@ using SPIRVType = const MachineInstr;
using StructOffsetDecorator = std::function<void(Register)>;
class SPIRVGlobalRegistry : public SPIRVIRMapping {
+public:
+ enum class FPVariant { NONE, IEEE_FLOAT, BRAIN_FLOAT };
+
+private:
// Registers holding values which have types associated with them.
// Initialized upon VReg definition in IRTranslator.
// Do not confuse this with DuplicatesTracker as DT maps Type* to <MF, Reg>
@@ -88,6 +92,11 @@ class SPIRVGlobalRegistry : public SPIRVIRMapping {
// map of aliasing decorations to aliasing metadata
std::unordered_map<const MDNode *, MachineInstr *> AliasInstMDMap;
+ // Maps floating point Registers to their FPVariant (float type kind), given
+ // the MachineFunction.
+ DenseMap<const MachineFunction *, DenseMap<Register, FPVariant>>
+ VRegFPVariantMap;
+
// Add a new OpTypeXXX instruction without checking for duplicates.
SPIRVType *createSPIRVType(const Type *Type, MachineIRBuilder &MIRBuilder,
SPIRV::AccessQualifier::AccessQualifier AQ,
@@ -422,6 +431,10 @@ class SPIRVGlobalRegistry : public SPIRVIRMapping {
// structures referring this instruction.
void invalidateMachineInstr(MachineInstr *MI);
+ // Return the FPVariant of to the given floating-point regiester.
+ FPVariant getFPVariantForVReg(Register VReg,
+ const MachineFunction *MF = nullptr);
+
private:
SPIRVType *getOpTypeBool(MachineIRBuilder &MIRBuilder);
diff --git a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
index 8039cf0c432fa..b8041725c9050 100644
--- a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
@@ -1261,12 +1261,35 @@ void addInstrRequirements(const MachineInstr &MI,
Reqs.addCapability(SPIRV::Capability::Int8);
break;
}
+ case SPIRV::OpDot: {
+ const MachineFunction *MF = MI.getMF();
+ SPIRVGlobalRegistry *GR = ST.getSPIRVGlobalRegistry();
+ SPIRVGlobalRegistry::FPVariant FPV =
+ GR->getFPVariantForVReg(MI.getOperand(1).getReg(), MF);
+ if (FPV == SPIRVGlobalRegistry::FPVariant::BRAIN_FLOAT) {
+ Reqs.addCapability(SPIRV::Capability::BFloat16DotProductKHR);
+ }
+ break;
+ }
case SPIRV::OpTypeFloat: {
unsigned BitWidth = MI.getOperand(1).getImm();
if (BitWidth == 64)
Reqs.addCapability(SPIRV::Capability::Float64);
- else if (BitWidth == 16)
+ else if (BitWidth == 16) {
+ SPIRVGlobalRegistry *GR = ST.getSPIRVGlobalRegistry();
+ const MachineFunction *MF = MI.getMF();
+ SPIRVGlobalRegistry::FPVariant FPV =
+ GR->getFPVariantForVReg(MI.getOperand(0).getReg(), MF);
+ if (FPV == SPIRVGlobalRegistry::FPVariant::BRAIN_FLOAT) {
+ if (!ST.canUseExtension(SPIRV::Extension::SPV_KHR_bfloat16))
+ report_fatal_error("OpTypeFloat type with bfloat requires the "
+ "following SPIR-V extension: SPV_KHR_bfloat16",
+ false);
+ Reqs.addExtension(SPIRV::Extension::SPV_KHR_bfloat16);
+ Reqs.addCapability(SPIRV::Capability::BFloat16TypeKHR);
+ }
Reqs.addCapability(SPIRV::Capability::Float16);
+ }
break;
}
case SPIRV::OpTypeVector: {
@@ -1593,15 +1616,24 @@ void addInstrRequirements(const MachineInstr &MI,
Reqs.addCapability(SPIRV::Capability::AsmINTEL);
}
break;
- case SPIRV::OpTypeCooperativeMatrixKHR:
+ case SPIRV::OpTypeCooperativeMatrixKHR: {
if (!ST.canUseExtension(SPIRV::Extension::SPV_KHR_cooperative_matrix))
report_fatal_error(
"OpTypeCooperativeMatrixKHR type requires the "
"following SPIR-V extension: SPV_KHR_cooperative_matrix",
false);
Reqs.addExtension(SPIRV::Extension::SPV_KHR_cooperative_matrix);
- Reqs.addCapability(SPIRV::Capability::CooperativeMatrixKHR);
+ const MachineFunction *MF = MI.getMF();
+ SPIRVGlobalRegistry *GR = ST.getSPIRVGlobalRegistry();
+ SPIRVGlobalRegistry::FPVariant FPV =
+ GR->getFPVariantForVReg(MI.getOperand(1).getReg(), MF);
+ if (FPV == SPIRVGlobalRegistry::FPVariant::BRAIN_FLOAT) {
+ Reqs.addCapability(SPIRV::Capability::BFloat16CooperativeMatrixKHR);
+ } else {
+ Reqs.addCapability(SPIRV::Capability::CooperativeMatrixKHR);
+ }
break;
+ }
case SPIRV::OpArithmeticFenceEXT:
if (!ST.canUseExtension(SPIRV::Extension::SPV_EXT_arithmetic_fence))
report_fatal_error("OpArithmeticFenceEXT requires the "
diff --git a/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td b/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td
index d2824ee2d2caf..9d630356e8ffb 100644
--- a/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td
+++ b/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td
@@ -382,6 +382,7 @@ defm SPV_INTEL_2d_block_io : ExtensionOperand<122, [EnvOpenCL]>;
defm SPV_INTEL_int4 : ExtensionOperand<123, [EnvOpenCL]>;
defm SPV_KHR_float_controls2 : ExtensionOperand<124, [EnvVulkan, EnvOpenCL]>;
defm SPV_INTEL_tensor_float32_conversion : ExtensionOperand<125, [EnvOpenCL]>;
+defm SPV_KHR_bfloat16 : ExtensionOperand<126, [EnvOpenCL]>;
//===----------------------------------------------------------------------===//
// Multiclass used to define Capabilities enum values and at the same time
@@ -594,6 +595,9 @@ defm Subgroup2DBlockTransposeINTEL : CapabilityOperand<6230, 0, 0, [SPV_INTEL_2d
defm Int4TypeINTEL : CapabilityOperand<5112, 0, 0, [SPV_INTEL_int4], []>;
defm Int4CooperativeMatrixINTEL : CapabilityOperand<5114, 0, 0, [SPV_INTEL_int4], [Int4TypeINTEL, CooperativeMatrixKHR]>;
defm TensorFloat32RoundingINTEL : CapabilityOperand<6425, 0, 0, [SPV_INTEL_tensor_float32_conversion], []>;
+defm BFloat16TypeKHR : CapabilityOperand<5116, 0, 0, [SPV_KHR_bfloat16], []>;
+defm BFloat16DotProductKHR : CapabilityOperand<5117, 0, 0, [SPV_KHR_bfloat16], [BFloat16TypeKHR]>;
+defm BFloat16CooperativeMatrixKHR : CapabilityOperand<5118, 0, 0, [SPV_KHR_bfloat16], [BFloat16TypeKHR, CooperativeMatrixKHR]>;
//===----------------------------------------------------------------------===//
// Multiclass used to define SourceLanguage enum values and at the same time
diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_KHR_bfloat16/bfloat16.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_KHR_bfloat16/bfloat16.ll
new file mode 100644
index 0000000000000..bfc84691f6945
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_KHR_bfloat16/bfloat16.ll
@@ -0,0 +1,22 @@
+; RUN: not llc -O0 -mtriple=spirv32-unknown-unknown %s -o %t.spvt 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR
+; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv32-unknown-unknown --spirv-ext=+SPV_KHR_bfloat16 %s -o - | FileCheck %s
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown --spirv-ext=+SPV_KHR_bfloat16 %s -o - -filetype=obj | spirv-val %}
+
+; CHECK-ERROR: LLVM ERROR: OpTypeFloat type with bfloat requires the following SPIR-V extension: SPV_KHR_bfloat16
+
+; CHECK-DAG: OpCapability BFloat16TypeKHR
+; CHECK-DAG: OpExtension "SPV_KHR_bfloat16"
+; CHECK: %[[#BFLOAT:]] = OpTypeFloat 16
+; CHECK: %[[#]] = OpTypeVector %[[#BFLOAT]] 2
+
+target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
+target triple = "spir64-unknown-unknown"
+
+define spir_kernel void @test() {
+entry:
+ %addr1 = alloca bfloat
+ %addr2 = alloca <2 x bfloat>
+ %data1 = load bfloat, ptr %addr1
+ %data2 = load <2 x bfloat>, ptr %addr2
+ ret void
+}
diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_KHR_bfloat16/bfloat16_cooperative_matrix.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_KHR_bfloat16/bfloat16_cooperative_matrix.ll
new file mode 100644
index 0000000000000..5a6e6d88ca6a0
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_KHR_bfloat16/bfloat16_cooperative_matrix.ll
@@ -0,0 +1,20 @@
+; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv32-unknown-unknown --spirv-ext=+SPV_KHR_bfloat16,+SPV_KHR_cooperative_matrix %s -o - | FileCheck %s
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown --spirv-ext=+SPV_KHR_bfloat16,+SPV_KHR_cooperative_matrix %s -o - -filetype=obj | spirv-val %}
+
+; CHECK-DAG: OpCapability BFloat16TypeKHR
+; CHECK-DAG: OpCapability BFloat16CooperativeMatrixKHR
+; CHECK-DAG: OpExtension "SPV_KHR_bfloat16"
+; CHECK: %[[#BFLOAT:]] = OpTypeFloat 16
+; CHECK: %[[#MatTy:]] = OpTypeCooperativeMatrixKHR %[[#BFLOAT]] %[[#]] %[[#]] %[[#]] %[[#]]
+; CHECK: OpCompositeConstruct %[[#MatTy]] %[[#]]
+
+define spir_kernel void @matr_mult(ptr addrspace(1) align 1 %_arg_accA, ptr addrspace(1) align 1 %_arg_accB, ptr addrspace(1) align 4 %_arg_accC, i64 %_arg_N, i64 %_arg_K) {
+entry:
+ %addr1 = alloca target("spirv.CooperativeMatrixKHR", bfloat, 3, 12, 12, 2), align 4
+ %res = alloca target("spirv.CooperativeMatrixKHR", bfloat, 3, 12, 12, 2), align 4
+ %m1 = tail call spir_func target("spirv.CooperativeMatrixKHR", bfloat, 3, 12, 12, 2) @_Z26__spirv_CompositeConstruct(bfloat 1.0)
+ store target("spirv.CooperativeMatrixKHR", bfloat, 3, 12, 12, 2) %m1, ptr %addr1, align 4
+ ret void
+}
+
+declare dso_local spir_func target("spirv.CooperativeMatrixKHR", bfloat, 3, 12, 12, 2) @_Z26__spirv_CompositeConstruct(bfloat)
diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_KHR_bfloat16/bfloat16_dot.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_KHR_bfloat16/bfloat16_dot.ll
new file mode 100644
index 0000000000000..7cfe29261f2cd
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_KHR_bfloat16/bfloat16_dot.ll
@@ -0,0 +1,21 @@
+; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv32-unknown-unknown --spirv-ext=+SPV_KHR_bfloat16 %s -o - | FileCheck %s
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown --spirv-ext=+SPV_KHR_bfloat16 %s -o - -filetype=obj | spirv-val %}
+
+; CHECK-DAG: OpCapability BFloat16TypeKHR
+; CHECK-DAG: OpCapability BFloat16DotProductKHR
+; CHECK-DAG: OpExtension "SPV_KHR_bfloat16"
+; CHECK: %[[#BFLOAT:]] = OpTypeFloat 16
+; CHECK: %[[#]] = OpTypeVector %[[#BFLOAT]] 2
+; CHECK: OpDot
+
+declare spir_func bfloat @_Z3dotDv2_u6__bf16Dv2_S_(<2 x bfloat>, <2 x bfloat>)
+
+define spir_kernel void @test() {
+entry:
+ %addrA = alloca <2 x bfloat>
+ %addrB = alloca <2 x bfloat>
+ %dataA = load <2 x bfloat>, ptr %addrA
+ %dataB = load <2 x bfloat>, ptr %addrB
+ %call = call spir_func bfloat @_Z3dotDv2_u6__bf16Dv2_S_(<2 x bfloat> %dataA, <2 x bfloat> %dataB)
+ ret void
+}
>From 9bfd01702f672b04e55e4e6e229ee121668c3eaa Mon Sep 17 00:00:00 2001
From: "Zhang, Yixing" <yixing.zhang at intel.com>
Date: Thu, 4 Sep 2025 05:08:32 -0700
Subject: [PATCH 2/3] undo incorrect clang format change
---
llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp | 412 ++++++++++---------
1 file changed, 208 insertions(+), 204 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 7aeff7f2396ab..8354929e11cd2 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -101,14 +101,14 @@ static cl::opt<bool>
char IRTranslator::ID = 0;
INITIALIZE_PASS_BEGIN(IRTranslator, DEBUG_TYPE, "IRTranslator LLVM IR -> MI",
- false, false)
+ false, false)
INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
INITIALIZE_PASS_DEPENDENCY(GISelCSEAnalysisWrapperPass)
INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(StackProtector)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_END(IRTranslator, DEBUG_TYPE, "IRTranslator LLVM IR -> MI",
- false, false)
+ false, false)
static void reportTranslationError(MachineFunction &MF,
const TargetPassConfig &TPC,
@@ -168,6 +168,7 @@ class DILocationVerifier : public GISelChangeObserver {
} // namespace
#endif // ifndef NDEBUG
+
void IRTranslator::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<StackProtector>();
AU.addRequired<TargetPassConfig>();
@@ -359,8 +360,8 @@ bool IRTranslator::translateCompare(const User &U,
if (CmpInst::isIntPredicate(Pred))
MIRBuilder.buildICmp(Pred, Res, Op0, Op1, Flags);
else if (Pred == CmpInst::FCMP_FALSE)
- MIRBuilder.buildCopy(Res,
- getOrCreateVReg(*Constant::getNullValue(U.getType())));
+ MIRBuilder.buildCopy(
+ Res, getOrCreateVReg(*Constant::getNullValue(U.getType())));
else if (Pred == CmpInst::FCMP_TRUE)
MIRBuilder.buildCopy(
Res, getOrCreateVReg(*Constant::getAllOnesValue(U.getType())));
@@ -950,8 +951,8 @@ void IRTranslator::emitSwitchCase(SwitchCG::CaseBlock &CB,
assert(CB.PredInfo.Pred == CmpInst::ICMP_SLE &&
"Can only handle SLE ranges");
- const APInt &Low = cast<ConstantInt>(CB.CmpLHS)->getValue();
- const APInt &High = cast<ConstantInt>(CB.CmpRHS)->getValue();
+ const APInt& Low = cast<ConstantInt>(CB.CmpLHS)->getValue();
+ const APInt& High = cast<ConstantInt>(CB.CmpRHS)->getValue();
Register CmpOpReg = getOrCreateVReg(*CB.CmpMHS);
if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(true)) {
@@ -986,12 +987,16 @@ void IRTranslator::emitSwitchCase(SwitchCG::CaseBlock &CB,
MIB.setDebugLoc(OldDbgLoc);
}
-bool IRTranslator::lowerJumpTableWorkItem(
- SwitchCG::SwitchWorkListItem W, MachineBasicBlock *SwitchMBB,
- MachineBasicBlock *CurMBB, MachineBasicBlock *DefaultMBB,
- MachineIRBuilder &MIB, MachineFunction::iterator BBI,
- BranchProbability UnhandledProbs, SwitchCG::CaseClusterIt I,
- MachineBasicBlock *Fallthrough, bool FallthroughUnreachable) {
+bool IRTranslator::lowerJumpTableWorkItem(SwitchCG::SwitchWorkListItem W,
+ MachineBasicBlock *SwitchMBB,
+ MachineBasicBlock *CurMBB,
+ MachineBasicBlock *DefaultMBB,
+ MachineIRBuilder &MIB,
+ MachineFunction::iterator BBI,
+ BranchProbability UnhandledProbs,
+ SwitchCG::CaseClusterIt I,
+ MachineBasicBlock *Fallthrough,
+ bool FallthroughUnreachable) {
using namespace SwitchCG;
MachineFunction *CurMF = SwitchMBB->getParent();
// FIXME: Optimize away range check based on pivot comparisons.
@@ -1053,11 +1058,14 @@ bool IRTranslator::lowerJumpTableWorkItem(
}
return true;
}
-bool IRTranslator::lowerSwitchRangeWorkItem(
- SwitchCG::CaseClusterIt I, Value *Cond, MachineBasicBlock *Fallthrough,
- bool FallthroughUnreachable, BranchProbability UnhandledProbs,
- MachineBasicBlock *CurMBB, MachineIRBuilder &MIB,
- MachineBasicBlock *SwitchMBB) {
+bool IRTranslator::lowerSwitchRangeWorkItem(SwitchCG::CaseClusterIt I,
+ Value *Cond,
+ MachineBasicBlock *Fallthrough,
+ bool FallthroughUnreachable,
+ BranchProbability UnhandledProbs,
+ MachineBasicBlock *CurMBB,
+ MachineIRBuilder &MIB,
+ MachineBasicBlock *SwitchMBB) {
using namespace SwitchCG;
const Value *RHS, *LHS, *MHS;
CmpInst::Predicate Pred;
@@ -1692,7 +1700,8 @@ bool IRTranslator::translateGetElementPtr(const User &U,
}
if (Offset != 0) {
- auto OffsetMIB = MIRBuilder.buildConstant(OffsetTy, Offset);
+ auto OffsetMIB =
+ MIRBuilder.buildConstant(OffsetTy, Offset);
MIRBuilder.buildPtrAdd(getOrCreateVReg(U), BaseReg, OffsetMIB.getReg(0),
PtrAddFlagsWithConst(Offset));
@@ -1889,145 +1898,145 @@ bool IRTranslator::translateFixedPointIntrinsic(unsigned Op, const CallInst &CI,
Register Src0 = getOrCreateVReg(*CI.getOperand(0));
Register Src1 = getOrCreateVReg(*CI.getOperand(1));
uint64_t Scale = cast<ConstantInt>(CI.getOperand(2))->getZExtValue();
- MIRBuilder.buildInstr(Op, {Dst}, {Src0, Src1, Scale});
+ MIRBuilder.buildInstr(Op, {Dst}, { Src0, Src1, Scale });
return true;
}
unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID) {
switch (ID) {
- default:
- break;
- case Intrinsic::acos:
- return TargetOpcode::G_FACOS;
- case Intrinsic::asin:
- return TargetOpcode::G_FASIN;
- case Intrinsic::atan:
- return TargetOpcode::G_FATAN;
- case Intrinsic::atan2:
- return TargetOpcode::G_FATAN2;
- case Intrinsic::bswap:
- return TargetOpcode::G_BSWAP;
- case Intrinsic::bitreverse:
- return TargetOpcode::G_BITREVERSE;
- case Intrinsic::fshl:
- return TargetOpcode::G_FSHL;
- case Intrinsic::fshr:
- return TargetOpcode::G_FSHR;
- case Intrinsic::ceil:
- return TargetOpcode::G_FCEIL;
- case Intrinsic::cos:
- return TargetOpcode::G_FCOS;
- case Intrinsic::cosh:
- return TargetOpcode::G_FCOSH;
- case Intrinsic::ctpop:
- return TargetOpcode::G_CTPOP;
- case Intrinsic::exp:
- return TargetOpcode::G_FEXP;
- case Intrinsic::exp2:
- return TargetOpcode::G_FEXP2;
- case Intrinsic::exp10:
- return TargetOpcode::G_FEXP10;
- case Intrinsic::fabs:
- return TargetOpcode::G_FABS;
- case Intrinsic::copysign:
- return TargetOpcode::G_FCOPYSIGN;
- case Intrinsic::minnum:
- return TargetOpcode::G_FMINNUM;
- case Intrinsic::maxnum:
- return TargetOpcode::G_FMAXNUM;
- case Intrinsic::minimum:
- return TargetOpcode::G_FMINIMUM;
- case Intrinsic::maximum:
- return TargetOpcode::G_FMAXIMUM;
- case Intrinsic::minimumnum:
- return TargetOpcode::G_FMINIMUMNUM;
- case Intrinsic::maximumnum:
- return TargetOpcode::G_FMAXIMUMNUM;
- case Intrinsic::canonicalize:
- return TargetOpcode::G_FCANONICALIZE;
- case Intrinsic::floor:
- return TargetOpcode::G_FFLOOR;
- case Intrinsic::fma:
- return TargetOpcode::G_FMA;
- case Intrinsic::log:
- return TargetOpcode::G_FLOG;
- case Intrinsic::log2:
- return TargetOpcode::G_FLOG2;
- case Intrinsic::log10:
- return TargetOpcode::G_FLOG10;
- case Intrinsic::ldexp:
- return TargetOpcode::G_FLDEXP;
- case Intrinsic::nearbyint:
- return TargetOpcode::G_FNEARBYINT;
- case Intrinsic::pow:
- return TargetOpcode::G_FPOW;
- case Intrinsic::powi:
- return TargetOpcode::G_FPOWI;
- case Intrinsic::rint:
- return TargetOpcode::G_FRINT;
- case Intrinsic::round:
- return TargetOpcode::G_INTRINSIC_ROUND;
- case Intrinsic::roundeven:
- return TargetOpcode::G_INTRINSIC_ROUNDEVEN;
- case Intrinsic::sin:
- return TargetOpcode::G_FSIN;
- case Intrinsic::sinh:
- return TargetOpcode::G_FSINH;
- case Intrinsic::sqrt:
- return TargetOpcode::G_FSQRT;
- case Intrinsic::tan:
- return TargetOpcode::G_FTAN;
- case Intrinsic::tanh:
- return TargetOpcode::G_FTANH;
- case Intrinsic::trunc:
- return TargetOpcode::G_INTRINSIC_TRUNC;
- case Intrinsic::readcyclecounter:
- return TargetOpcode::G_READCYCLECOUNTER;
- case Intrinsic::readsteadycounter:
- return TargetOpcode::G_READSTEADYCOUNTER;
- case Intrinsic::ptrmask:
- return TargetOpcode::G_PTRMASK;
- case Intrinsic::lrint:
- return TargetOpcode::G_INTRINSIC_LRINT;
- case Intrinsic::llrint:
- return TargetOpcode::G_INTRINSIC_LLRINT;
- // FADD/FMUL require checking the FMF, so are handled elsewhere.
- case Intrinsic::vector_reduce_fmin:
- return TargetOpcode::G_VECREDUCE_FMIN;
- case Intrinsic::vector_reduce_fmax:
- return TargetOpcode::G_VECREDUCE_FMAX;
- case Intrinsic::vector_reduce_fminimum:
- return TargetOpcode::G_VECREDUCE_FMINIMUM;
- case Intrinsic::vector_reduce_fmaximum:
- return TargetOpcode::G_VECREDUCE_FMAXIMUM;
- case Intrinsic::vector_reduce_add:
- return TargetOpcode::G_VECREDUCE_ADD;
- case Intrinsic::vector_reduce_mul:
- return TargetOpcode::G_VECREDUCE_MUL;
- case Intrinsic::vector_reduce_and:
- return TargetOpcode::G_VECREDUCE_AND;
- case Intrinsic::vector_reduce_or:
- return TargetOpcode::G_VECREDUCE_OR;
- case Intrinsic::vector_reduce_xor:
- return TargetOpcode::G_VECREDUCE_XOR;
- case Intrinsic::vector_reduce_smax:
- return TargetOpcode::G_VECREDUCE_SMAX;
- case Intrinsic::vector_reduce_smin:
- return TargetOpcode::G_VECREDUCE_SMIN;
- case Intrinsic::vector_reduce_umax:
- return TargetOpcode::G_VECREDUCE_UMAX;
- case Intrinsic::vector_reduce_umin:
- return TargetOpcode::G_VECREDUCE_UMIN;
- case Intrinsic::experimental_vector_compress:
- return TargetOpcode::G_VECTOR_COMPRESS;
- case Intrinsic::lround:
- return TargetOpcode::G_LROUND;
- case Intrinsic::llround:
- return TargetOpcode::G_LLROUND;
- case Intrinsic::get_fpenv:
- return TargetOpcode::G_GET_FPENV;
- case Intrinsic::get_fpmode:
- return TargetOpcode::G_GET_FPMODE;
+ default:
+ break;
+ case Intrinsic::acos:
+ return TargetOpcode::G_FACOS;
+ case Intrinsic::asin:
+ return TargetOpcode::G_FASIN;
+ case Intrinsic::atan:
+ return TargetOpcode::G_FATAN;
+ case Intrinsic::atan2:
+ return TargetOpcode::G_FATAN2;
+ case Intrinsic::bswap:
+ return TargetOpcode::G_BSWAP;
+ case Intrinsic::bitreverse:
+ return TargetOpcode::G_BITREVERSE;
+ case Intrinsic::fshl:
+ return TargetOpcode::G_FSHL;
+ case Intrinsic::fshr:
+ return TargetOpcode::G_FSHR;
+ case Intrinsic::ceil:
+ return TargetOpcode::G_FCEIL;
+ case Intrinsic::cos:
+ return TargetOpcode::G_FCOS;
+ case Intrinsic::cosh:
+ return TargetOpcode::G_FCOSH;
+ case Intrinsic::ctpop:
+ return TargetOpcode::G_CTPOP;
+ case Intrinsic::exp:
+ return TargetOpcode::G_FEXP;
+ case Intrinsic::exp2:
+ return TargetOpcode::G_FEXP2;
+ case Intrinsic::exp10:
+ return TargetOpcode::G_FEXP10;
+ case Intrinsic::fabs:
+ return TargetOpcode::G_FABS;
+ case Intrinsic::copysign:
+ return TargetOpcode::G_FCOPYSIGN;
+ case Intrinsic::minnum:
+ return TargetOpcode::G_FMINNUM;
+ case Intrinsic::maxnum:
+ return TargetOpcode::G_FMAXNUM;
+ case Intrinsic::minimum:
+ return TargetOpcode::G_FMINIMUM;
+ case Intrinsic::maximum:
+ return TargetOpcode::G_FMAXIMUM;
+ case Intrinsic::minimumnum:
+ return TargetOpcode::G_FMINIMUMNUM;
+ case Intrinsic::maximumnum:
+ return TargetOpcode::G_FMAXIMUMNUM;
+ case Intrinsic::canonicalize:
+ return TargetOpcode::G_FCANONICALIZE;
+ case Intrinsic::floor:
+ return TargetOpcode::G_FFLOOR;
+ case Intrinsic::fma:
+ return TargetOpcode::G_FMA;
+ case Intrinsic::log:
+ return TargetOpcode::G_FLOG;
+ case Intrinsic::log2:
+ return TargetOpcode::G_FLOG2;
+ case Intrinsic::log10:
+ return TargetOpcode::G_FLOG10;
+ case Intrinsic::ldexp:
+ return TargetOpcode::G_FLDEXP;
+ case Intrinsic::nearbyint:
+ return TargetOpcode::G_FNEARBYINT;
+ case Intrinsic::pow:
+ return TargetOpcode::G_FPOW;
+ case Intrinsic::powi:
+ return TargetOpcode::G_FPOWI;
+ case Intrinsic::rint:
+ return TargetOpcode::G_FRINT;
+ case Intrinsic::round:
+ return TargetOpcode::G_INTRINSIC_ROUND;
+ case Intrinsic::roundeven:
+ return TargetOpcode::G_INTRINSIC_ROUNDEVEN;
+ case Intrinsic::sin:
+ return TargetOpcode::G_FSIN;
+ case Intrinsic::sinh:
+ return TargetOpcode::G_FSINH;
+ case Intrinsic::sqrt:
+ return TargetOpcode::G_FSQRT;
+ case Intrinsic::tan:
+ return TargetOpcode::G_FTAN;
+ case Intrinsic::tanh:
+ return TargetOpcode::G_FTANH;
+ case Intrinsic::trunc:
+ return TargetOpcode::G_INTRINSIC_TRUNC;
+ case Intrinsic::readcyclecounter:
+ return TargetOpcode::G_READCYCLECOUNTER;
+ case Intrinsic::readsteadycounter:
+ return TargetOpcode::G_READSTEADYCOUNTER;
+ case Intrinsic::ptrmask:
+ return TargetOpcode::G_PTRMASK;
+ case Intrinsic::lrint:
+ return TargetOpcode::G_INTRINSIC_LRINT;
+ case Intrinsic::llrint:
+ return TargetOpcode::G_INTRINSIC_LLRINT;
+ // FADD/FMUL require checking the FMF, so are handled elsewhere.
+ case Intrinsic::vector_reduce_fmin:
+ return TargetOpcode::G_VECREDUCE_FMIN;
+ case Intrinsic::vector_reduce_fmax:
+ return TargetOpcode::G_VECREDUCE_FMAX;
+ case Intrinsic::vector_reduce_fminimum:
+ return TargetOpcode::G_VECREDUCE_FMINIMUM;
+ case Intrinsic::vector_reduce_fmaximum:
+ return TargetOpcode::G_VECREDUCE_FMAXIMUM;
+ case Intrinsic::vector_reduce_add:
+ return TargetOpcode::G_VECREDUCE_ADD;
+ case Intrinsic::vector_reduce_mul:
+ return TargetOpcode::G_VECREDUCE_MUL;
+ case Intrinsic::vector_reduce_and:
+ return TargetOpcode::G_VECREDUCE_AND;
+ case Intrinsic::vector_reduce_or:
+ return TargetOpcode::G_VECREDUCE_OR;
+ case Intrinsic::vector_reduce_xor:
+ return TargetOpcode::G_VECREDUCE_XOR;
+ case Intrinsic::vector_reduce_smax:
+ return TargetOpcode::G_VECREDUCE_SMAX;
+ case Intrinsic::vector_reduce_smin:
+ return TargetOpcode::G_VECREDUCE_SMIN;
+ case Intrinsic::vector_reduce_umax:
+ return TargetOpcode::G_VECREDUCE_UMAX;
+ case Intrinsic::vector_reduce_umin:
+ return TargetOpcode::G_VECREDUCE_UMIN;
+ case Intrinsic::experimental_vector_compress:
+ return TargetOpcode::G_VECTOR_COMPRESS;
+ case Intrinsic::lround:
+ return TargetOpcode::G_LROUND;
+ case Intrinsic::llround:
+ return TargetOpcode::G_LLROUND;
+ case Intrinsic::get_fpenv:
+ return TargetOpcode::G_GET_FPENV;
+ case Intrinsic::get_fpmode:
+ return TargetOpcode::G_GET_FPMODE;
}
return Intrinsic::not_intrinsic;
}
@@ -2077,7 +2086,7 @@ static unsigned getConstrainedOpcode(Intrinsic::ID ID) {
}
bool IRTranslator::translateConstrainedFPIntrinsic(
- const ConstrainedFPIntrinsic &FPI, MachineIRBuilder &MIRBuilder) {
+ const ConstrainedFPIntrinsic &FPI, MachineIRBuilder &MIRBuilder) {
fp::ExceptionBehavior EB = *FPI.getExceptionBehavior();
unsigned Opcode = getConstrainedOpcode(FPI.getIntrinsicID());
@@ -2217,18 +2226,17 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
case Intrinsic::dbg_declare: {
const DbgDeclareInst &DI = cast<DbgDeclareInst>(CI);
assert(DI.getVariable() && "Missing variable");
- translateDbgDeclareRecord(DI.getAddress(), DI.hasArgList(),
- DI.getVariable(), DI.getExpression(),
- DI.getDebugLoc(), MIRBuilder);
+ translateDbgDeclareRecord(DI.getAddress(), DI.hasArgList(), DI.getVariable(),
+ DI.getExpression(), DI.getDebugLoc(), MIRBuilder);
return true;
}
case Intrinsic::dbg_label: {
const DbgLabelInst &DI = cast<DbgLabelInst>(CI);
assert(DI.getLabel() && "Missing label");
- assert(
- DI.getLabel()->isValidLocationForIntrinsic(MIRBuilder.getDebugLoc()) &&
- "Expected inlined-at fields to agree");
+ assert(DI.getLabel()->isValidLocationForIntrinsic(
+ MIRBuilder.getDebugLoc()) &&
+ "Expected inlined-at fields to agree");
MIRBuilder.buildDbgLabel(DI.getLabel());
return true;
@@ -2259,7 +2267,7 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
// This form of DBG_VALUE is target-independent.
const DbgValueInst &DI = cast<DbgValueInst>(CI);
translateDbgValueRecord(DI.getValue(), DI.hasArgList(), DI.getVariable(),
- DI.getExpression(), DI.getDebugLoc(), MIRBuilder);
+ DI.getExpression(), DI.getDebugLoc(), MIRBuilder);
return true;
}
case Intrinsic::uadd_with_overflow:
@@ -2298,29 +2306,21 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
// TODO: Preserve "int min is poison" arg in GMIR?
return translateUnaryOp(TargetOpcode::G_ABS, CI, MIRBuilder);
case Intrinsic::smul_fix:
- return translateFixedPointIntrinsic(TargetOpcode::G_SMULFIX, CI,
- MIRBuilder);
+ return translateFixedPointIntrinsic(TargetOpcode::G_SMULFIX, CI, MIRBuilder)
case Intrinsic::umul_fix:
- return translateFixedPointIntrinsic(TargetOpcode::G_UMULFIX, CI,
- MIRBuilder);
+ return translateFixedPointIntrinsic(TargetOpcode::G_UMULFIX, CI, MIRBuilder);
case Intrinsic::smul_fix_sat:
- return translateFixedPointIntrinsic(TargetOpcode::G_SMULFIXSAT, CI,
- MIRBuilder);
+ return translateFixedPointIntrinsic(TargetOpcode::G_SMULFIXSAT, CI, MIRBuilder);
case Intrinsic::umul_fix_sat:
- return translateFixedPointIntrinsic(TargetOpcode::G_UMULFIXSAT, CI,
- MIRBuilder);
+ return translateFixedPointIntrinsic(TargetOpcode::G_UMULFIXSAT, CI, MIRBuilder);
case Intrinsic::sdiv_fix:
- return translateFixedPointIntrinsic(TargetOpcode::G_SDIVFIX, CI,
- MIRBuilder);
+ return translateFixedPointIntrinsic(TargetOpcode::G_SDIVFIX, CI, MIRBuilder);
case Intrinsic::udiv_fix:
- return translateFixedPointIntrinsic(TargetOpcode::G_UDIVFIX, CI,
- MIRBuilder);
+ return translateFixedPointIntrinsic(TargetOpcode::G_UDIVFIX, CI, MIRBuilder);
case Intrinsic::sdiv_fix_sat:
- return translateFixedPointIntrinsic(TargetOpcode::G_SDIVFIXSAT, CI,
- MIRBuilder);
+ return translateFixedPointIntrinsic(TargetOpcode::G_SDIVFIXSAT, CI, MIRBuilder);
case Intrinsic::udiv_fix_sat:
- return translateFixedPointIntrinsic(TargetOpcode::G_UDIVFIXSAT, CI,
- MIRBuilder);
+ return translateFixedPointIntrinsic(TargetOpcode::G_UDIVFIXSAT, CI, MIRBuilder);
case Intrinsic::fmuladd: {
const TargetMachine &TM = MF->getTarget();
Register Dst = getOrCreateVReg(CI);
@@ -2435,11 +2435,11 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
case Intrinsic::ctlz: {
ConstantInt *Cst = cast<ConstantInt>(CI.getArgOperand(1));
bool isTrailing = ID == Intrinsic::cttz;
- unsigned Opcode = isTrailing ? Cst->isZero()
- ? TargetOpcode::G_CTTZ
- : TargetOpcode::G_CTTZ_ZERO_UNDEF
- : Cst->isZero() ? TargetOpcode::G_CTLZ
- : TargetOpcode::G_CTLZ_ZERO_UNDEF;
+ unsigned Opcode = isTrailing
+ ? Cst->isZero() ? TargetOpcode::G_CTTZ
+ : TargetOpcode::G_CTTZ_ZERO_UNDEF
+ : Cst->isZero() ? TargetOpcode::G_CTLZ
+ : TargetOpcode::G_CTLZ_ZERO_UNDEF;
MIRBuilder.buildInstr(Opcode, {getOrCreateVReg(CI)},
{getOrCreateVReg(*CI.getArgOperand(0))});
return true;
@@ -2478,8 +2478,8 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
case Intrinsic::write_register: {
Value *Arg = CI.getArgOperand(0);
MIRBuilder.buildInstr(TargetOpcode::G_WRITE_REGISTER)
- .addMetadata(cast<MDNode>(cast<MetadataAsValue>(Arg)->getMetadata()))
- .addUse(getOrCreateVReg(*CI.getArgOperand(1)));
+ .addMetadata(cast<MDNode>(cast<MetadataAsValue>(Arg)->getMetadata()))
+ .addUse(getOrCreateVReg(*CI.getArgOperand(1)));
return true;
}
case Intrinsic::localescape: {
@@ -2659,7 +2659,7 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
return translateVectorDeinterleave2Intrinsic(CI, MIRBuilder);
}
-#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC) \
+#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC) \
case Intrinsic::INTRINSIC:
#include "llvm/IR/ConstrainedOps.def"
return translateConstrainedFPIntrinsic(cast<ConstrainedFPIntrinsic>(CI),
@@ -2875,7 +2875,8 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
}
bool IRTranslator::findUnwindDestinations(
- const BasicBlock *EHPadBB, BranchProbability Prob,
+ const BasicBlock *EHPadBB,
+ BranchProbability Prob,
SmallVectorImpl<std::pair<MachineBasicBlock *, BranchProbability>>
&UnwindDests) {
EHPersonality Personality = classifyEHPersonality(
@@ -2958,8 +2959,8 @@ bool IRTranslator::translateInvoke(const User &U,
// FIXME: support Windows dllimport function calls and calls through
// weak symbols.
if (Fn && (Fn->hasDLLImportStorageClass() ||
- (MF->getTarget().getTargetTriple().isOSWindows() &&
- Fn->hasExternalWeakLinkage())))
+ (MF->getTarget().getTargetTriple().isOSWindows() &&
+ Fn->hasExternalWeakLinkage())))
return false;
bool LowerInlineAsm = I.isInlineAsm();
@@ -3046,7 +3047,8 @@ bool IRTranslator::translateLandingPad(const User &U,
// Add a label to mark the beginning of the landing pad. Deletion of the
// landing pad can thus be detected via the MachineModuleInfo.
- MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(MF->addLandingPad(&MBB));
+ MIRBuilder.buildInstr(TargetOpcode::EH_LABEL)
+ .addSym(MF->addLandingPad(&MBB));
// If the unwinder does not preserve all registers, ensure that the
// function marks the clobbered registers as used.
@@ -3474,7 +3476,8 @@ bool IRTranslator::translateAtomicRMW(const User &U,
return true;
}
-bool IRTranslator::translateFence(const User &U, MachineIRBuilder &MIRBuilder) {
+bool IRTranslator::translateFence(const User &U,
+ MachineIRBuilder &MIRBuilder) {
const FenceInst &Fence = cast<FenceInst>(U);
MIRBuilder.buildFence(static_cast<unsigned>(Fence.getOrdering()),
Fence.getSyncScopeID());
@@ -3532,10 +3535,10 @@ void IRTranslator::finishPendingPhis() {
}
void IRTranslator::translateDbgValueRecord(Value *V, bool HasArgList,
- const DILocalVariable *Variable,
- const DIExpression *Expression,
- const DebugLoc &DL,
- MachineIRBuilder &MIRBuilder) {
+ const DILocalVariable *Variable,
+ const DIExpression *Expression,
+ const DebugLoc &DL,
+ MachineIRBuilder &MIRBuilder) {
assert(Variable->isValidLocationForIntrinsic(DL) &&
"Expected inlined-at fields to agree");
// Act as if we're handling a debug intrinsic.
@@ -3578,10 +3581,10 @@ void IRTranslator::translateDbgValueRecord(Value *V, bool HasArgList,
}
void IRTranslator::translateDbgDeclareRecord(Value *Address, bool HasArgList,
- const DILocalVariable *Variable,
- const DIExpression *Expression,
- const DebugLoc &DL,
- MachineIRBuilder &MIRBuilder) {
+ const DILocalVariable *Variable,
+ const DIExpression *Expression,
+ const DebugLoc &DL,
+ MachineIRBuilder &MIRBuilder) {
if (!Address || isa<UndefValue>(Address)) {
LLVM_DEBUG(dbgs() << "Dropping debug info for " << *Variable << "\n");
return;
@@ -3593,12 +3596,13 @@ void IRTranslator::translateDbgDeclareRecord(Value *Address, bool HasArgList,
if (AI && AI->isStaticAlloca()) {
// Static allocas are tracked at the MF level, no need for DBG_VALUE
// instructions (in fact, they get ignored if they *do* exist).
- MF->setVariableDbgInfo(Variable, Expression, getOrCreateFrameIndex(*AI),
- DL);
+ MF->setVariableDbgInfo(Variable, Expression,
+ getOrCreateFrameIndex(*AI), DL);
return;
}
- if (translateIfEntryValueArgument(true, Address, Variable, Expression, DL,
+ if (translateIfEntryValueArgument(true, Address, Variable,
+ Expression, DL,
MIRBuilder))
return;
@@ -3610,7 +3614,7 @@ void IRTranslator::translateDbgDeclareRecord(Value *Address, bool HasArgList,
}
void IRTranslator::translateDbgInfo(const Instruction &Inst,
- MachineIRBuilder &MIRBuilder) {
+ MachineIRBuilder &MIRBuilder) {
for (DbgRecord &DR : Inst.getDbgRecordRange()) {
if (DbgLabelRecord *DLR = dyn_cast<DbgLabelRecord>(&DR)) {
MIRBuilder.setDebugLoc(DLR->getDebugLoc());
@@ -3701,7 +3705,7 @@ bool IRTranslator::translate(const Constant &C, Register Reg) {
}
EntryBuilder->buildBuildVector(Reg, Ops);
} else if (auto CE = dyn_cast<ConstantExpr>(&C)) {
- switch (CE->getOpcode()) {
+ switch(CE->getOpcode()) {
#define HANDLE_INST(NUM, OPCODE, CLASS) \
case Instruction::OPCODE: \
return translate##OPCODE(*CE, *EntryBuilder.get());
@@ -4067,7 +4071,7 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
// Create all blocks, in IR order, to preserve the layout.
FuncInfo.MBBMap.resize(F.getMaxBlockNumber());
- for (const BasicBlock &BB : F) {
+ for (const BasicBlock &BB: F) {
auto *&MBB = FuncInfo.MBBMap[BB.getNumber()];
MBB = MF->CreateMachineBasicBlock(&BB);
@@ -4096,7 +4100,7 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
// Lower the actual args into this basic block.
SmallVector<ArrayRef<Register>, 8> VRegArgs;
- for (const Argument &Arg : F.args()) {
+ for (const Argument &Arg: F.args()) {
if (DL->getTypeStoreSize(Arg.getType()).isZero())
continue; // Don't handle zero sized types.
ArrayRef<Register> VRegs = getOrCreateVRegs(Arg);
>From 6de71367cb1727d62efa179ae7685c595c0dd4cc Mon Sep 17 00:00:00 2001
From: "Zhang, Yixing" <yixing.zhang at intel.com>
Date: Thu, 4 Sep 2025 05:09:16 -0700
Subject: [PATCH 3/3] nit
---
llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 8354929e11cd2..8a4b1afec3d26 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -2306,7 +2306,7 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
// TODO: Preserve "int min is poison" arg in GMIR?
return translateUnaryOp(TargetOpcode::G_ABS, CI, MIRBuilder);
case Intrinsic::smul_fix:
- return translateFixedPointIntrinsic(TargetOpcode::G_SMULFIX, CI, MIRBuilder)
+ return translateFixedPointIntrinsic(TargetOpcode::G_SMULFIX, CI, MIRBuilder);
case Intrinsic::umul_fix:
return translateFixedPointIntrinsic(TargetOpcode::G_UMULFIX, CI, MIRBuilder);
case Intrinsic::smul_fix_sat:
More information about the llvm-commits
mailing list