[llvm] [GlobalISel][AMDGPU] Implement FPInfo for LLT (PR #122503)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Jan 10 09:50:07 PST 2025
github-actions[bot] wrote:
<!--LLVM CODE FORMAT COMMENT: {clang-format}-->
:warning: C/C++ code formatter, clang-format found issues in your code. :warning:
<details>
<summary>
You can test this locally with the following command:
</summary>
``````````bash
git-clang-format --diff cc88a5e61578e58afdd8ef4e9f1b7cd10d77fba3 9c8056668c78aa31b103b742cd439639ff61034a --extensions cpp,h -- llvm/include/llvm/CodeGen/GlobalISel/InferTypeInfoPass.h llvm/lib/CodeGen/GlobalISel/InferTypeInfoPass.cpp llvm/include/llvm/CodeGen/Analysis.h llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h llvm/include/llvm/CodeGen/LowLevelTypeUtils.h llvm/include/llvm/CodeGenTypes/LowLevelType.h llvm/include/llvm/InitializePasses.h llvm/lib/CodeGen/Analysis.cpp llvm/lib/CodeGen/GlobalISel/CallLowering.cpp llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp llvm/lib/CodeGen/LowLevelTypeUtils.cpp llvm/lib/CodeGen/MIRParser/MIParser.cpp llvm/lib/CodeGenTypes/LowLevelType.cpp llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp llvm/lib/Target/AMDGPU/AMDGPUGlobalISelDivergenceLowering.cpp llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTable.cpp
``````````
</details>
<details>
<summary>
View the diff from clang-format here.
</summary>
``````````diff
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h b/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h
index 983758e306..3b0af1f74d 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h
@@ -774,7 +774,8 @@ private:
BranchProbability Prob = BranchProbability::getUnknown());
public:
- IRTranslator(CodeGenOptLevel OptLevel = CodeGenOptLevel::None, bool EnableFPInfo = false);
+ IRTranslator(CodeGenOptLevel OptLevel = CodeGenOptLevel::None,
+ bool EnableFPInfo = false);
StringRef getPassName() const override { return "IRTranslator"; }
diff --git a/llvm/include/llvm/CodeGenTypes/LowLevelType.h b/llvm/include/llvm/CodeGenTypes/LowLevelType.h
index d42c4a5ed0..2f252ac4df 100644
--- a/llvm/include/llvm/CodeGenTypes/LowLevelType.h
+++ b/llvm/include/llvm/CodeGenTypes/LowLevelType.h
@@ -139,25 +139,39 @@ public:
}
// Get a 16-bit brain float value.
- static constexpr LLT bfloat() { return floatingPoint(16, FPInfo::VARIANT_FLOAT_1); }
+ static constexpr LLT bfloat() {
+ return floatingPoint(16, FPInfo::VARIANT_FLOAT_1);
+ }
/// Get a 16-bit IEEE half value.
- static constexpr LLT float16() { return floatingPoint(16, FPInfo::IEEE_FLOAT); }
+ static constexpr LLT float16() {
+ return floatingPoint(16, FPInfo::IEEE_FLOAT);
+ }
/// Get a 32-bit IEEE float value.
- static constexpr LLT float32() { return floatingPoint(32, FPInfo::IEEE_FLOAT); }
+ static constexpr LLT float32() {
+ return floatingPoint(32, FPInfo::IEEE_FLOAT);
+ }
/// Get a 64-bit IEEE double value.
- static constexpr LLT float64() { return floatingPoint(64, FPInfo::IEEE_FLOAT); }
+ static constexpr LLT float64() {
+ return floatingPoint(64, FPInfo::IEEE_FLOAT);
+ }
/// Get a 80-bit X86 floating point value.
- static constexpr LLT x86fp80() { return floatingPoint(80, FPInfo::VARIANT_FLOAT_1); }
+ static constexpr LLT x86fp80() {
+ return floatingPoint(80, FPInfo::VARIANT_FLOAT_1);
+ }
/// Get a 128-bit IEEE quad value.
- static constexpr LLT float128() { return floatingPoint(128, FPInfo::IEEE_FLOAT); }
+ static constexpr LLT float128() {
+ return floatingPoint(128, FPInfo::IEEE_FLOAT);
+ }
/// Get a 128-bit PowerPC double double value.
- static constexpr LLT ppcf128() { return floatingPoint(128, FPInfo::VARIANT_FLOAT_1); }
+ static constexpr LLT ppcf128() {
+ return floatingPoint(128, FPInfo::VARIANT_FLOAT_1);
+ }
/// Get a low-level fixed-width vector of some number of elements and element
/// width.
@@ -209,9 +223,7 @@ public:
explicit LLT(MVT VT, bool EnableFPInfo = false);
explicit constexpr LLT() : Info(static_cast<Kind>(0)), RawData(0) {}
- constexpr bool isValid() const {
- return isToken() || RawData != 0;
- }
+ constexpr bool isValid() const { return isToken() || RawData != 0; }
constexpr bool isScalar() const {
return Info == Kind::INTEGER || Info == Kind::FLOAT;
}
@@ -236,9 +248,15 @@ public:
constexpr bool isFloatVector() const {
return isVector() && Info == Kind::VECTOR_FLOAT;
}
- constexpr bool isBFloat() const { return isVariantFloat(16, FPInfo::VARIANT_FLOAT_1); }
- constexpr bool isX86FP80() const { return isVariantFloat(80, FPInfo::VARIANT_FLOAT_1); }
- constexpr bool isPPCF128() const { return isVariantFloat(128, FPInfo::VARIANT_FLOAT_1); }
+ constexpr bool isBFloat() const {
+ return isVariantFloat(16, FPInfo::VARIANT_FLOAT_1);
+ }
+ constexpr bool isX86FP80() const {
+ return isVariantFloat(80, FPInfo::VARIANT_FLOAT_1);
+ }
+ constexpr bool isPPCF128() const {
+ return isVariantFloat(128, FPInfo::VARIANT_FLOAT_1);
+ }
constexpr bool isToken() const {
return Info == Kind::INTEGER && RawData == 0;
}
@@ -474,9 +492,9 @@ private:
### (6)
%%% (7)
- (1) ScalarSize (2) PointerSize (3) PointerAddressSpace
+ (1) ScalarSize (2) PointerSize (3) PointerAddressSpace
(4) VectorElements (5) VectorScalable (6) FPInfo (7) Kind
-
+
*/
typedef int BitFieldInfo[2];
///
@@ -537,11 +555,12 @@ private:
RawData = maskAndShift(SizeInBits, PointerSizeFieldInfo) |
maskAndShift(AddressSpace, PointerAddressSpaceFieldInfo);
} else {
- RawData = maskAndShift(SizeInBits, ScalarSizeFieldInfo) |
- maskAndShift((uint64_t) FP, ScalarFPFieldInfo);
+ RawData = maskAndShift(SizeInBits, ScalarSizeFieldInfo) |
+ maskAndShift((uint64_t)FP, ScalarFPFieldInfo);
}
- if (Info == Kind::VECTOR_INTEGER || Info == Kind::VECTOR_FLOAT || Info == Kind::VECTOR_POINTER) {
+ if (Info == Kind::VECTOR_INTEGER || Info == Kind::VECTOR_FLOAT ||
+ Info == Kind::VECTOR_POINTER) {
RawData |= maskAndShift(EC.getKnownMinValue(), VectorElementsFieldInfo) |
maskAndShift(EC.isScalable() ? 1 : 0, VectorScalableFieldInfo);
}
diff --git a/llvm/lib/CodeGen/Analysis.cpp b/llvm/lib/CodeGen/Analysis.cpp
index 7a433354cd..c81ec850cf 100644
--- a/llvm/lib/CodeGen/Analysis.cpp
+++ b/llvm/lib/CodeGen/Analysis.cpp
@@ -150,8 +150,8 @@ void llvm::computeValueLLTs(const DataLayout &DL, Type &Ty,
const StructLayout *SL = Offsets ? DL.getStructLayout(STy) : nullptr;
for (unsigned I = 0, E = STy->getNumElements(); I != E; ++I) {
uint64_t EltOffset = SL ? SL->getElementOffset(I) : 0;
- computeValueLLTs(DL, *STy->getElementType(I), ValueTys, EnableFPInfo, Offsets,
- StartingOffset + EltOffset);
+ computeValueLLTs(DL, *STy->getElementType(I), ValueTys, EnableFPInfo,
+ Offsets, StartingOffset + EltOffset);
}
return;
}
diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index 32702ee465..36f6713744 100644
--- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -1006,7 +1006,8 @@ void CallLowering::insertSRetLoads(MachineIRBuilder &MIRBuilder, Type *RetTy,
Align BaseAlign = DL.getPrefTypeAlign(RetTy);
Type *RetPtrTy =
PointerType::get(RetTy->getContext(), DL.getAllocaAddrSpace());
- LLT OffsetLLTy = getLLTForType(*DL.getIndexType(RetPtrTy), DL, /* EnableFPInfo */ true);
+ LLT OffsetLLTy =
+ getLLTForType(*DL.getIndexType(RetPtrTy), DL, /* EnableFPInfo */ true);
MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
@@ -1036,7 +1037,8 @@ void CallLowering::insertSRetStores(MachineIRBuilder &MIRBuilder, Type *RetTy,
unsigned NumValues = SplitVTs.size();
Align BaseAlign = DL.getPrefTypeAlign(RetTy);
unsigned AS = DL.getAllocaAddrSpace();
- LLT OffsetLLTy = getLLTForType(*DL.getIndexType(RetTy->getContext(), AS), DL, /* EnableFPInfo */ true);
+ LLT OffsetLLTy = getLLTForType(*DL.getIndexType(RetTy->getContext(), AS), DL,
+ /* EnableFPInfo */ true);
MachinePointerInfo PtrInfo(AS);
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index f196dd8dc8..6493067c69 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -218,7 +218,7 @@ ArrayRef<Register> IRTranslator::getOrCreateVRegs(const Value &Val) {
"Don't know how to create an empty vreg");
SmallVector<LLT, 4> SplitTys;
- computeValueLLTs(*DL, *Val.getType(), SplitTys, EnableFPInfo,
+ computeValueLLTs(*DL, *Val.getType(), SplitTys, EnableFPInfo,
Offsets->empty() ? Offsets : nullptr);
if (!isa<Constant>(Val)) {
@@ -1125,8 +1125,8 @@ void IRTranslator::emitBitTestHeader(SwitchCG::BitTestBlock &B,
// Conditional branch to the default block.
auto RangeCst = MIB.buildConstant(SwitchOpTy, B.Range);
LLT CmpTy = LLT::integer(1);
- auto RangeCmp = MIB.buildICmp(CmpInst::Predicate::ICMP_UGT, CmpTy,
- RangeSub, RangeCst);
+ auto RangeCmp =
+ MIB.buildICmp(CmpInst::Predicate::ICMP_UGT, CmpTy, RangeSub, RangeCst);
MIB.buildBrCond(RangeCmp, *B.Default);
}
@@ -1153,14 +1153,12 @@ void IRTranslator::emitBitTestCase(SwitchCG::BitTestBlock &BB,
auto MaskTrailingZeros =
MIB.buildConstant(SwitchTy, llvm::countr_zero(B.Mask));
Cmp =
- MIB.buildICmp(ICmpInst::ICMP_EQ, I1, Reg, MaskTrailingZeros)
- .getReg(0);
+ MIB.buildICmp(ICmpInst::ICMP_EQ, I1, Reg, MaskTrailingZeros).getReg(0);
} else if (PopCount == BB.Range) {
// There is only one zero bit in the range, test for it directly.
auto MaskTrailingOnes =
MIB.buildConstant(SwitchTy, llvm::countr_one(B.Mask));
- Cmp = MIB.buildICmp(CmpInst::ICMP_NE, I1, Reg, MaskTrailingOnes)
- .getReg(0);
+ Cmp = MIB.buildICmp(CmpInst::ICMP_NE, I1, Reg, MaskTrailingOnes).getReg(0);
} else {
// Make desired shift.
auto CstOne = MIB.buildConstant(SwitchTy, 1);
@@ -1170,8 +1168,7 @@ void IRTranslator::emitBitTestCase(SwitchCG::BitTestBlock &BB,
auto CstMask = MIB.buildConstant(SwitchTy, B.Mask);
auto AndOp = MIB.buildAnd(SwitchTy, SwitchVal, CstMask);
auto CstZero = MIB.buildConstant(SwitchTy, 0);
- Cmp = MIB.buildICmp(CmpInst::ICMP_NE, I1, AndOp, CstZero)
- .getReg(0);
+ Cmp = MIB.buildICmp(CmpInst::ICMP_NE, I1, AndOp, CstZero).getReg(0);
}
// The branch probability from SwitchBB to B.TargetBB is B.ExtraProb.
@@ -2379,7 +2376,8 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
getStackGuard(getOrCreateVReg(CI), MIRBuilder);
return true;
case Intrinsic::stackprotector: {
- LLT PtrTy = getLLTForType(*CI.getArgOperand(0)->getType(), *DL, EnableFPInfo);
+ LLT PtrTy =
+ getLLTForType(*CI.getArgOperand(0)->getType(), *DL, EnableFPInfo);
Register GuardVal;
if (TLI->useLoadStackGuardNode(*CI.getModule())) {
GuardVal = MRI->createGenericVirtualRegister(PtrTy);
@@ -2422,7 +2420,8 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
return true;
}
case Intrinsic::invariant_start: {
- LLT PtrTy = getLLTForType(*CI.getArgOperand(0)->getType(), *DL, EnableFPInfo);
+ LLT PtrTy =
+ getLLTForType(*CI.getArgOperand(0)->getType(), *DL, EnableFPInfo);
Register Undef = MRI->createGenericVirtualRegister(PtrTy);
MIRBuilder.buildUndef(Undef);
return true;
@@ -2621,7 +2620,8 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
case Intrinsic::vector_deinterleave2: {
// Both intrinsics have at least one operand.
Value *Op0 = CI.getOperand(0);
- LLT ResTy = getLLTForType(*Op0->getType(), MIRBuilder.getDataLayout(), EnableFPInfo);
+ LLT ResTy = getLLTForType(*Op0->getType(), MIRBuilder.getDataLayout(),
+ EnableFPInfo);
if (!ResTy.isFixedVector())
return false;
@@ -3880,8 +3880,7 @@ bool IRTranslator::emitSPDescriptorParent(StackProtectorDescriptor &SPD,
// Otherwise, emit a volatile load to retrieve the stack guard value.
if (TLI->useLoadStackGuardNode(*ParentBB->getBasicBlock()->getModule())) {
LLT RegTy = LLT::integer(PtrTy.getSizeInBits());
- Guard =
- MRI->createGenericVirtualRegister(RegTy);
+ Guard = MRI->createGenericVirtualRegister(RegTy);
getStackGuard(Guard, *CurBuilder);
} else {
// TODO: test using android subtarget when we support @llvm.thread.pointer.
@@ -3898,8 +3897,7 @@ bool IRTranslator::emitSPDescriptorParent(StackProtectorDescriptor &SPD,
// Perform the comparison.
LLT I1 = LLT::integer(1);
- auto Cmp =
- CurBuilder->buildICmp(CmpInst::ICMP_NE, I1, Guard, GuardVal);
+ auto Cmp = CurBuilder->buildICmp(CmpInst::ICMP_NE, I1, Guard, GuardVal);
// If the guard/stackslot do not equal, branch to failure MBB.
CurBuilder->buildBrCond(Cmp, *SPD.getFailureMBB());
// Otherwise branch to success MBB.
diff --git a/llvm/lib/CodeGen/GlobalISel/InferTypeInfoPass.cpp b/llvm/lib/CodeGen/GlobalISel/InferTypeInfoPass.cpp
index 471f26d0e0..034e5577d7 100644
--- a/llvm/lib/CodeGen/GlobalISel/InferTypeInfoPass.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/InferTypeInfoPass.cpp
@@ -55,7 +55,7 @@ void InferTypeInfo::getAnalysisUsage(AnalysisUsage &AU) const {
static LLT updateType(LLT Ty, bool FP) {
LLT InferredScalarTy =
- FP ? LLT::floatingPoint(Ty.getScalarSizeInBits(), LLT::FPInfo::IEEE_FLOAT)
+ FP ? LLT::floatingPoint(Ty.getScalarSizeInBits(), LLT::FPInfo::IEEE_FLOAT)
: LLT::integer(Ty.getScalarSizeInBits());
LLT InferredTy =
Ty.isVector() ? Ty.changeElementType(InferredScalarTy) : InferredScalarTy;
@@ -200,12 +200,14 @@ bool InferTypeInfo::shouldBeFP(MachineOperand &Op, unsigned Depth = 0) const {
case TargetOpcode::G_STORE:
case TargetOpcode::G_INDEXED_STORE: {
MachineOperand &Val = MI.getOperand(0);
- return Op.getReg() == Val.getReg() && all_of(MRI->def_operands(Op.getReg()), Pred);
- }
+ return Op.getReg() == Val.getReg() &&
+ all_of(MRI->def_operands(Op.getReg()), Pred);
+ }
case TargetOpcode::G_INDEXED_LOAD:
case TargetOpcode::G_LOAD: {
MachineOperand &Dst = MI.getOperand(0);
- return Op.getReg() == Dst.getReg() && all_of(MRI->use_operands(Dst.getReg()), Pred);
+ return Op.getReg() == Dst.getReg() &&
+ all_of(MRI->use_operands(Dst.getReg()), Pred);
}
case TargetOpcode::G_ATOMICRMW_FADD:
case TargetOpcode::G_ATOMICRMW_FSUB:
@@ -219,23 +221,23 @@ bool InferTypeInfo::shouldBeFP(MachineOperand &Op, unsigned Depth = 0) const {
case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
case TargetOpcode::G_INTRINSIC: {
- GIntrinsic *Intrinsic = dyn_cast<GIntrinsic>(&MI);
- if (!Intrinsic)
- return false;
-
- switch (Intrinsic->getIntrinsicID()) {
- case Intrinsic::amdgcn_rcp:
- case Intrinsic::amdgcn_log:
- case Intrinsic::amdgcn_exp2:
- case Intrinsic::amdgcn_rsq:
- case Intrinsic::amdgcn_sqrt:
- case Intrinsic::amdgcn_fdot2_f16_f16:
- case Intrinsic::amdgcn_mfma_f32_4x4x4f16:
- return true;
- default:
- return false;
- }
+ GIntrinsic *Intrinsic = dyn_cast<GIntrinsic>(&MI);
+ if (!Intrinsic)
+ return false;
+
+ switch (Intrinsic->getIntrinsicID()) {
+ case Intrinsic::amdgcn_rcp:
+ case Intrinsic::amdgcn_log:
+ case Intrinsic::amdgcn_exp2:
+ case Intrinsic::amdgcn_rsq:
+ case Intrinsic::amdgcn_sqrt:
+ case Intrinsic::amdgcn_fdot2_f16_f16:
+ case Intrinsic::amdgcn_mfma_f32_4x4x4f16:
+ return true;
+ default:
return false;
+ }
+ return false;
}
default:
break;
@@ -273,16 +275,18 @@ bool InferTypeInfo::inferTypeInfo(MachineFunction &MF) {
}
}
- for (auto &MemOp: MI.memoperands()) {
- bool IsFP = any_of(MI.all_defs(), [&](MachineOperand &O){ return shouldBeFP(O); }) ||
- any_of(MI.all_uses(), [&](MachineOperand &O){ return shouldBeFP(O); });
+ for (auto &MemOp : MI.memoperands()) {
+ bool IsFP = any_of(MI.all_defs(),
+ [&](MachineOperand &O) { return shouldBeFP(O); }) ||
+ any_of(MI.all_uses(),
+ [&](MachineOperand &O) { return shouldBeFP(O); });
- if (!IsFP)
- continue;
+ if (!IsFP)
+ continue;
- LLT Ty = MemOp->getType();
- LLT NewTy = updateType(Ty, true);
- MemOp->setType(NewTy);
+ LLT Ty = MemOp->getType();
+ LLT NewTy = updateType(Ty, true);
+ MemOp->setType(NewTy);
}
}
}
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 63cb2e6ef9..013497a7ea 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -2041,7 +2041,8 @@ void LegalizerHelper::widenScalarDst(MachineInstr &MI, LLT WideTy,
MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
if (DstTy.isFloat() && TruncOpcode != TargetOpcode::G_FPTRUNC) {
- auto Trunc = MIRBuilder.buildInstr(TruncOpcode, {DstTy.dropType()}, {DstExt});
+ auto Trunc =
+ MIRBuilder.buildInstr(TruncOpcode, {DstTy.dropType()}, {DstExt});
MIRBuilder.buildBitcast(MO, Trunc);
MO.setReg(DstExt);
return;
@@ -7723,7 +7724,8 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOSI(MachineInstr &MI) {
auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
// FIXME: Only f32 to i64 conversions are supported.
- if (!SrcTy.getScalarType().isScalar(32) || !DstTy.getScalarType().isScalar(64))
+ if (!SrcTy.getScalarType().isScalar(32) ||
+ !DstTy.getScalarType().isScalar(64))
return UnableToLegalize;
// Expand f32 -> i64 conversion
diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
index adfec6f35d..2e1f2c5862 100644
--- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
@@ -563,7 +563,8 @@ MachineInstrBuilder MachineIRBuilder::buildExtOrTrunc(unsigned ExtOpc,
Op.getLLTTy(*getMRI()).getSizeInBits())
Opcode = TargetOpcode::G_TRUNC;
else
- assert(Res.getLLTTy(*getMRI()).getSizeInBits() == Op.getLLTTy(*getMRI()).getSizeInBits());
+ assert(Res.getLLTTy(*getMRI()).getSizeInBits() ==
+ Op.getLLTTy(*getMRI()).getSizeInBits());
return buildInstr(Opcode, Res, Op);
}
diff --git a/llvm/lib/CodeGen/LowLevelTypeUtils.cpp b/llvm/lib/CodeGen/LowLevelTypeUtils.cpp
index f229d954db..9b73b72927 100644
--- a/llvm/lib/CodeGen/LowLevelTypeUtils.cpp
+++ b/llvm/lib/CodeGen/LowLevelTypeUtils.cpp
@@ -78,7 +78,8 @@ LLT llvm::getLLTForType(Type &Ty, const DataLayout &DL, bool EnableFPInfo) {
MVT llvm::getMVTForLLT(LLT Ty, bool EnableFPInfo) {
if (Ty.isVector()) {
- return MVT::getVectorVT(getMVTForLLT(Ty.getElementType()), Ty.getElementCount());
+ return MVT::getVectorVT(getMVTForLLT(Ty.getElementType()),
+ Ty.getElementCount());
}
if (Ty.isFloat()) {
@@ -94,7 +95,6 @@ MVT llvm::getMVTForLLT(LLT Ty, bool EnableFPInfo) {
return MVT::getFloatingPointVT(Ty.getSizeInBits());
}
-
return MVT::getIntegerVT(Ty.getSizeInBits());
}
@@ -114,8 +114,9 @@ LLT llvm::getLLTForMVT(MVT Ty, bool EnableFPInfo) {
if (!Ty.isVector())
return LLT::integer(Ty.getSizeInBits());
- return LLT::scalarOrVector(Ty.getVectorElementCount(),
- LLT::integer(Ty.getVectorElementType().getSizeInBits()));
+ return LLT::scalarOrVector(
+ Ty.getVectorElementCount(),
+ LLT::integer(Ty.getVectorElementType().getSizeInBits()));
}
const llvm::fltSemantics &llvm::getFltSemanticForLLT(LLT Ty) {
diff --git a/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/llvm/lib/CodeGen/MIRParser/MIParser.cpp
index c73f5230a7..6b3606853a 100644
--- a/llvm/lib/CodeGen/MIRParser/MIParser.cpp
+++ b/llvm/lib/CodeGen/MIRParser/MIParser.cpp
@@ -1924,13 +1924,14 @@ static bool verifyAddrSpace(uint64_t AddrSpace) {
}
bool MIParser::parseLowLevelType(StringRef::iterator Loc, LLT &Ty) {
- if (Token.range().front() == 's' || Token.range().front() == 'i' || Token.range().front() == 'f' || Token.range().front() == 'p') {
+ if (Token.range().front() == 's' || Token.range().front() == 'i' ||
+ Token.range().front() == 'f' || Token.range().front() == 'p') {
StringRef SizeStr = Token.range().drop_front();
if (SizeStr.size() == 0 || !llvm::all_of(SizeStr, isdigit))
return error("expected integers after 's'/'i'/'f'/'p' type character");
}
-
- if (Token.range().substr(0,2) == "bf") {
+
+ if (Token.range().substr(0, 2) == "bf") {
StringRef SizeStr = Token.range().drop_front(2);
if (SizeStr.size() == 0 || !llvm::all_of(SizeStr, isdigit))
return error("expected integers after 'bf' type string");
@@ -1959,14 +1960,14 @@ bool MIParser::parseLowLevelType(StringRef::iterator Loc, LLT &Ty) {
} else if (Token.range().front() == 'f') {
auto ScalarSize = APSInt(Token.range().drop_front()).getZExtValue();
if (!ScalarSize || !verifyScalarSize(ScalarSize))
- return error("invalid size for scalar type");
+ return error("invalid size for scalar type");
Ty = LLT::floatingPoint(ScalarSize, LLT::FPInfo::IEEE_FLOAT);
lex();
return false;
} else if (Token.range().substr(0, 2) == "bf") {
auto ScalarSize = APSInt(Token.range().drop_front(2)).getZExtValue();
if (!ScalarSize || !verifyScalarSize(ScalarSize))
- return error("invalid size for scalar type");
+ return error("invalid size for scalar type");
Ty = LLT::floatingPoint(ScalarSize, LLT::FPInfo::VARIANT_FLOAT_1);
lex();
return false;
@@ -2054,9 +2055,10 @@ bool MIParser::parseTypedImmediateOperand(MachineOperand &Dest) {
assert(Token.is(MIToken::Identifier));
StringRef TypeStr = Token.range();
if (TypeStr.front() != 'i' && TypeStr.front() != 's' &&
- TypeStr.front() != 'p' && TypeStr.front() != 'f' && TypeStr.substr(0,2) != "bf")
- return error(
- "a typed immediate operand should start with one of 'i', 's','f','bf', or 'p'");
+ TypeStr.front() != 'p' && TypeStr.front() != 'f' &&
+ TypeStr.substr(0, 2) != "bf")
+ return error("a typed immediate operand should start with one of 'i', "
+ "'s','f','bf', or 'p'");
StringRef SizeStr = Token.range().drop_front();
if (SizeStr.size() == 0 || !llvm::all_of(SizeStr, isdigit))
return error("expected integers after 'i'/'s'/'f'/'bf'/'p' type character");
diff --git a/llvm/lib/CodeGenTypes/LowLevelType.cpp b/llvm/lib/CodeGenTypes/LowLevelType.cpp
index 947b22de67..79bd4e258a 100644
--- a/llvm/lib/CodeGenTypes/LowLevelType.cpp
+++ b/llvm/lib/CodeGenTypes/LowLevelType.cpp
@@ -35,30 +35,30 @@ static std::optional<LLT::FPInfo> deriveFPInfo(MVT VT) {
}
LLT::LLT(MVT VT, bool EnableFPInfo) {
- auto FP = EnableFPInfo ? deriveFPInfo(VT) : std::nullopt;
- bool AsVector = VT.isVector() && (VT.getVectorMinNumElements() > 1 || VT.isScalableVector());
-
- Kind Info;
- if (EnableFPInfo && FP.has_value())
- Info = AsVector ? Kind::VECTOR_FLOAT : Kind::FLOAT;
- else
- Info = AsVector ? Kind::VECTOR_INTEGER : Kind::INTEGER;
+ auto FP = EnableFPInfo ? deriveFPInfo(VT) : std::nullopt;
+ bool AsVector = VT.isVector() &&
+ (VT.getVectorMinNumElements() > 1 || VT.isScalableVector());
- if (VT.isVector()) {
- init(Info,
- VT.getVectorElementCount(),
- VT.getVectorElementType().getSizeInBits(),
- /*AddressSpace=*/0, FP.value_or(FPInfo::IEEE_FLOAT));
- } else if (VT.isValid() && !VT.isScalableTargetExtVT()) {
- // Aggregates are no different from real scalars as far as GlobalISel is
- // concerned.
- init(Info, ElementCount::getFixed(0), VT.getSizeInBits(),
- /*AddressSpace=*/0, FP.value_or(FPInfo::IEEE_FLOAT));
- } else {
- this->Info = static_cast<Kind>(0);
- this->RawData = 0;
- }
+ Kind Info;
+ if (EnableFPInfo && FP.has_value())
+ Info = AsVector ? Kind::VECTOR_FLOAT : Kind::FLOAT;
+ else
+ Info = AsVector ? Kind::VECTOR_INTEGER : Kind::INTEGER;
+
+ if (VT.isVector()) {
+ init(Info, VT.getVectorElementCount(),
+ VT.getVectorElementType().getSizeInBits(),
+ /*AddressSpace=*/0, FP.value_or(FPInfo::IEEE_FLOAT));
+ } else if (VT.isValid() && !VT.isScalableTargetExtVT()) {
+ // Aggregates are no different from real scalars as far as GlobalISel is
+ // concerned.
+ init(Info, ElementCount::getFixed(0), VT.getSizeInBits(),
+ /*AddressSpace=*/0, FP.value_or(FPInfo::IEEE_FLOAT));
+ } else {
+ this->Info = static_cast<Kind>(0);
+ this->RawData = 0;
}
+}
void LLT::print(raw_ostream &OS) const {
constexpr bool EnableFPInfo = false;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
index f3e0c24796..d7bc2dd117 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
@@ -35,13 +35,13 @@ static Register extendRegisterMin32(CallLowering::ValueHandler &Handler,
LLT SrcTy = LLT(VA.getLocVT(), /*EnableFPInfo*/ true);
if (SrcTy.getSizeInBits() < 32) {
- LLT I32 = LLT::integer(32);
- LLT DstTy = LLT::integer(SrcTy.getSizeInBits());
+ LLT I32 = LLT::integer(32);
+ LLT DstTy = LLT::integer(SrcTy.getSizeInBits());
Register SrcReg = ValVReg;
if (SrcTy.isFloat())
SrcReg = Handler.MIRBuilder.buildBitcast(DstTy, ValVReg).getReg(0);
-
+
// 16-bit types are reported as legal for 32-bit registers. We need to
// extend and do a 32-bit copy to avoid the verifier complaining about it.
return Handler.MIRBuilder.buildAnyExt(I32, SrcReg).getReg(0);
@@ -133,13 +133,15 @@ struct AMDGPUIncomingArgHandler : public CallLowering::IncomingValueHandler {
if (LocTy.getSizeInBits() < 32) {
// 16-bit types are reported as legal for 32-bit registers. We need to do
// a 32-bit copy, and truncate to avoid the verifier complaining about it.
- Register CopyReg = MIRBuilder.buildCopy(LLT::scalar(32), PhysReg).getReg(0);
+ Register CopyReg =
+ MIRBuilder.buildCopy(LLT::scalar(32), PhysReg).getReg(0);
if (LocTy.getScalarType().isFloat()) {
- LLT TruncTy = LocTy.isVector()
- ? LLT::vector(LocTy.getElementCount(),
- LLT::integer(LocTy.getScalarSizeInBits()))
- : LLT::integer(LocTy.getScalarSizeInBits());
+ LLT TruncTy =
+ LocTy.isVector()
+ ? LLT::vector(LocTy.getElementCount(),
+ LLT::integer(LocTy.getScalarSizeInBits()))
+ : LLT::integer(LocTy.getScalarSizeInBits());
auto Extended = buildExtensionHint(VA, CopyReg, TruncTy);
auto Trunc = MIRBuilder.buildTrunc(TruncTy, Extended);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 4a3cbc9bc0..9deb44adf1 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -652,8 +652,7 @@ bool AMDGPUInstructionSelector::selectG_BUILD_VECTOR(MachineInstr &MI) const {
// For G_BUILD_VECTOR_TRUNC, additionally check that the operands are s32.
Register Dst = MI.getOperand(0).getReg();
if (!MRI->getType(Dst).isFixedVector(2, 16) ||
- (MI.getOpcode() == AMDGPU::G_BUILD_VECTOR_TRUNC &&
- !SrcTy.isScalar(32)))
+ (MI.getOpcode() == AMDGPU::G_BUILD_VECTOR_TRUNC && !SrcTy.isScalar(32)))
return selectImpl(MI, *CoverageInfo);
const RegisterBank *DstBank = RBI.getRegBank(Dst, *MRI, TRI);
@@ -2662,8 +2661,7 @@ bool AMDGPUInstructionSelector::selectG_FPEXT(MachineInstr &I) const {
Register Src = I.getOperand(1).getReg();
- if (MRI->getType(Dst).isScalar(32) &&
- MRI->getType(Src).isScalar(16)) {
+ if (MRI->getType(Dst).isScalar(32) && MRI->getType(Src).isScalar(16)) {
if (isExtractHiElt(*MRI, Src, Src)) {
MachineBasicBlock *BB = I.getParent();
BuildMI(*BB, &I, I.getDebugLoc(), TII.get(AMDGPU::S_CVT_HI_F32_F16), Dst)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 119b7e6e59..5a704031e3 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -54,7 +54,6 @@ static cl::opt<bool> EnableNewLegality(
static constexpr unsigned MaxRegisterSize = 1024;
-
static const LLT I1 = LLT::integer(1);
static const LLT I8 = LLT::integer(8);
static const LLT I16 = LLT::integer(16);
@@ -151,45 +150,24 @@ static const LLT V2I128 = LLT::fixed_vector(2, I128);
static const LLT V4I128 = LLT::fixed_vector(4, I128);
static std::initializer_list<LLT> AllScalarTypes = {
- I16, F16, BF16, I32, F32, I64, F64, I96, I128, I160, I192, I224, I256, I512, I1024};
+ I16, F16, BF16, I32, F32, I64, F64, I96,
+ I128, I160, I192, I224, I256, I512, I1024};
static std::initializer_list<LLT> AllS16Vectors{
- V2I16, V2F16, V2BF16,
- V4I16, V4F16, V4BF16,
- V6I16, V6F16, V6BF16,
- V8I16, V8F16, V8BF16,
- V10I16, V10F16, V10BF16,
- V12I16, V12F16, V12BF16,
- V16I16, V16F16, V16BF16,
- V2I128,
- V4I128,
+ V2I16, V2F16, V2BF16, V4I16, V4F16, V4BF16, V6I16, V6F16,
+ V6BF16, V8I16, V8F16, V8BF16, V10I16, V10F16, V10BF16, V12I16,
+ V12F16, V12BF16, V16I16, V16F16, V16BF16, V2I128, V4I128,
};
static std::initializer_list<LLT> AllS32Vectors = {
- V2I32, V2F32,
- V3I32, V3F32,
- V4I32, V4F32,
- V5I32, V5F32,
- V6I32, V6F32,
- V7I32, V7F32,
- V8I32, V8F32,
- V9I32, V9F32,
- V10I32, V10F32,
- V11I32, V11F32,
- V12I32, V12F32,
- V16I32, V16F32,
- V32I32, V32F32,
+ V2I32, V2F32, V3I32, V3F32, V4I32, V4F32, V5I32, V5F32, V6I32,
+ V6F32, V7I32, V7F32, V8I32, V8F32, V9I32, V9F32, V10I32, V10F32,
+ V11I32, V11F32, V12I32, V12F32, V16I32, V16F32, V32I32, V32F32,
};
static std::initializer_list<LLT> AllS64Vectors = {
- V2I64, V2F64,
- V3I64, V3F64,
- V4I64, V4F64,
- V5I64, V5F64,
- V6I64, V6F64,
- V7I64, V7F64,
- V8I64, V8F64,
- V16I64, V16F64,
+ V2I64, V2F64, V3I64, V3F64, V4I64, V4F64, V5I64, V5F64,
+ V6I64, V6F64, V7I64, V7F64, V8I64, V8F64, V16I64, V16F64,
};
// Round the number of elements to the next power of two elements
@@ -757,17 +735,12 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
const std::initializer_list<LLT> AddrSpaces128 = {RsrcPtr};
- const std::initializer_list<LLT> FPTypesBase = {
- F32, F64
- };
+ const std::initializer_list<LLT> FPTypesBase = {F32, F64};
- const std::initializer_list<LLT> FPTypes16 = {
- F32, F64, F16, BF16
- };
+ const std::initializer_list<LLT> FPTypes16 = {F32, F64, F16, BF16};
- const std::initializer_list<LLT> FPTypesPK16 = {
- F32, F64, F16, BF16, V2F16, V2BF16
- };
+ const std::initializer_list<LLT> FPTypesPK16 = {F32, F64, F16,
+ BF16, V2F16, V2BF16};
const LLT MinScalarFPTy = ST.has16BitInsts() ? F16 : F32;
@@ -777,7 +750,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
// TODO: All multiples of 32, vectors of pointers, all v2s16 pairs, more
// elements for v3s16
getActionDefinitionsBuilder(G_PHI)
- .legalFor({I32, F32, I64, F64, V2I16, V2F16, V2BF16, I16, F16, BF16, V4I16, V4F16, V4BF16, I1, I128, I256})
+ .legalFor({I32, F32, I64, F64, V2I16, V2F16, V2BF16, I16, F16, BF16,
+ V4I16, V4F16, V4BF16, I1, I128, I256})
.legalFor(AllS32Vectors)
.legalFor(AllS64Vectors)
.legalFor(AddrSpaces64)
@@ -830,55 +804,55 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
assert(ST.hasMad64_32());
getActionDefinitionsBuilder({G_UADDSAT, G_USUBSAT, G_SADDSAT, G_SSUBSAT})
- .legalFor({I32, I16, V2I16}) // Clamp modifier
- .minScalarOrElt(0, I16)
- .clampMaxNumElementsStrict(0, I16, 2)
- .scalarize(0)
- .widenScalarToNextPow2(0, 32)
- .lower();
+ .legalFor({I32, I16, V2I16}) // Clamp modifier
+ .minScalarOrElt(0, I16)
+ .clampMaxNumElementsStrict(0, I16, 2)
+ .scalarize(0)
+ .widenScalarToNextPow2(0, 32)
+ .lower();
} else if (ST.has16BitInsts()) {
getActionDefinitionsBuilder({G_ADD, G_SUB})
- .legalFor({I32, I16})
- .minScalar(0, I16)
- .widenScalarToNextMultipleOf(0, 32)
- .maxScalar(0, I32)
- .scalarize(0);
+ .legalFor({I32, I16})
+ .minScalar(0, I16)
+ .widenScalarToNextMultipleOf(0, 32)
+ .maxScalar(0, I32)
+ .scalarize(0);
getActionDefinitionsBuilder(G_MUL)
- .legalFor({I32, I16})
- .scalarize(0)
- .minScalar(0, I16)
- .widenScalarToNextMultipleOf(0, 32)
- .custom();
+ .legalFor({I32, I16})
+ .scalarize(0)
+ .minScalar(0, I16)
+ .widenScalarToNextMultipleOf(0, 32)
+ .custom();
assert(ST.hasMad64_32());
// Technically the saturating operations require clamp bit support, but this
// was introduced at the same time as 16-bit operations.
getActionDefinitionsBuilder({G_UADDSAT, G_USUBSAT})
- .legalFor({I32, I16}) // Clamp modifier
- .minScalar(0, I16)
- .scalarize(0)
- .widenScalarToNextPow2(0, 16)
- .lower();
+ .legalFor({I32, I16}) // Clamp modifier
+ .minScalar(0, I16)
+ .scalarize(0)
+ .widenScalarToNextPow2(0, 16)
+ .lower();
// We're just lowering this, but it helps get a better result to try to
// coerce to the desired type first.
getActionDefinitionsBuilder({G_SADDSAT, G_SSUBSAT})
- .minScalar(0, I16)
- .scalarize(0)
- .lower();
+ .minScalar(0, I16)
+ .scalarize(0)
+ .lower();
} else {
getActionDefinitionsBuilder({G_ADD, G_SUB})
- .legalFor({I32})
- .widenScalarToNextMultipleOf(0, 32)
- .clampScalar(0, I32, I32)
- .scalarize(0);
+ .legalFor({I32})
+ .widenScalarToNextMultipleOf(0, 32)
+ .clampScalar(0, I32, I32)
+ .scalarize(0);
auto &Mul = getActionDefinitionsBuilder(G_MUL)
- .legalFor({I32})
- .scalarize(0)
- .minScalar(0, I32)
- .widenScalarToNextMultipleOf(0, 32);
+ .legalFor({I32})
+ .scalarize(0)
+ .minScalar(0, I32)
+ .widenScalarToNextMultipleOf(0, 32);
if (ST.hasMad64_32())
Mul.custom();
@@ -887,24 +861,24 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
if (ST.hasIntClamp()) {
getActionDefinitionsBuilder({G_UADDSAT, G_USUBSAT})
- .legalFor({I32}) // Clamp modifier.
- .scalarize(0)
- .minScalarOrElt(0, I32)
- .lower();
+ .legalFor({I32}) // Clamp modifier.
+ .scalarize(0)
+ .minScalarOrElt(0, I32)
+ .lower();
} else {
// Clamp bit support was added in VI, along with 16-bit operations.
getActionDefinitionsBuilder({G_UADDSAT, G_USUBSAT})
- .minScalar(0, I32)
- .scalarize(0)
- .lower();
+ .minScalar(0, I32)
+ .scalarize(0)
+ .lower();
}
// FIXME: DAG expansion gets better results. The widening uses the smaller
// range values and goes for the min/max lowering directly.
getActionDefinitionsBuilder({G_SADDSAT, G_SSUBSAT})
- .minScalar(0, I32)
- .scalarize(0)
- .lower();
+ .minScalar(0, I32)
+ .scalarize(0)
+ .lower();
}
getActionDefinitionsBuilder(
@@ -919,9 +893,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
.maxScalar(0, I32);
if (ST.hasVOP3PInsts()) {
- Mulh
- .clampMaxNumElements(0, I8, 2)
- .lowerFor({V2I8});
+ Mulh.clampMaxNumElements(0, I8, 2).lowerFor({V2I8});
}
Mulh
@@ -931,12 +903,12 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
// Report legal for any types we can handle anywhere. For the cases only legal
// on the SALU, RegBankSelect will be able to re-legalize.
getActionDefinitionsBuilder({G_AND, G_OR, G_XOR})
- .legalFor({I32, I1, I64, V2I32, I16, V2I16, V4I16})
- .clampScalar(0, I32, I64)
- .moreElementsIf(isSmallOddVector(0), oneMoreElement(0))
- .fewerElementsIf(vectorWiderThan(0, 64), fewerEltsToSize64Vector(0))
- .widenScalarToNextPow2(0)
- .scalarize(0);
+ .legalFor({I32, I1, I64, V2I32, I16, V2I16, V4I16})
+ .clampScalar(0, I32, I64)
+ .moreElementsIf(isSmallOddVector(0), oneMoreElement(0))
+ .fewerElementsIf(vectorWiderThan(0, 64), fewerEltsToSize64Vector(0))
+ .widenScalarToNextPow2(0)
+ .scalarize(0);
getActionDefinitionsBuilder(
{G_UADDO, G_USUBO, G_UADDE, G_SADDE, G_USUBE, G_SSUBE})
@@ -950,15 +922,15 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
.lower();
getActionDefinitionsBuilder(G_CONSTANT)
- .legalFor({I1, I32, I64, I16, GlobalPtr,
- LocalPtr, ConstantPtr, PrivatePtr, FlatPtr })
- .legalIf(isPointer(0))
- .clampScalar(0, I32, I64)
- .widenScalarToNextPow2(0);
+ .legalFor({I1, I32, I64, I16, GlobalPtr, LocalPtr, ConstantPtr,
+ PrivatePtr, FlatPtr})
+ .legalIf(isPointer(0))
+ .clampScalar(0, I32, I64)
+ .widenScalarToNextPow2(0);
getActionDefinitionsBuilder(G_FCONSTANT)
- .legalFor({F32, F64, F16, BF16})
- .clampScalar(0, F16, F64);
+ .legalFor({F32, F64, F16, BF16})
+ .clampScalar(0, F16, F64);
getActionDefinitionsBuilder({G_IMPLICIT_DEF, G_FREEZE})
.legalIf(isRegisterClassType(0))
@@ -978,7 +950,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
// If the amount is divergent, we have to do a wave reduction to get the
// maximum value, so this is expanded during RegBankSelect.
getActionDefinitionsBuilder(G_DYN_STACKALLOC)
- .legalFor({{PrivatePtr, I32}, {PrivatePtr, F32}});
+ .legalFor({{PrivatePtr, I32}, {PrivatePtr, F32}});
getActionDefinitionsBuilder(G_STACKSAVE)
.customFor({PrivatePtr});
@@ -992,14 +964,13 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
getActionDefinitionsBuilder(G_BLOCK_ADDR).legalFor({CodePtr});
- auto &FPOpActions = getActionDefinitionsBuilder(
- { G_FADD, G_FMUL, G_FMA, G_FCANONICALIZE,
- G_STRICT_FADD, G_STRICT_FMUL, G_STRICT_FMA})
- .legalFor({F32, F64});
- auto &TrigActions = getActionDefinitionsBuilder({G_FSIN, G_FCOS})
- .customFor({F32, F64});
- auto &FDIVActions = getActionDefinitionsBuilder(G_FDIV)
- .customFor({F32, F64});
+ auto &FPOpActions =
+ getActionDefinitionsBuilder({G_FADD, G_FMUL, G_FMA, G_FCANONICALIZE,
+ G_STRICT_FADD, G_STRICT_FMUL, G_STRICT_FMA})
+ .legalFor({F32, F64});
+ auto &TrigActions =
+ getActionDefinitionsBuilder({G_FSIN, G_FCOS}).customFor({F32, F64});
+ auto &FDIVActions = getActionDefinitionsBuilder(G_FDIV).customFor({F32, F64});
if (ST.has16BitInsts()) {
if (ST.hasVOP3PInsts())
@@ -1021,96 +992,85 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
if (ST.hasVOP3PInsts()) {
MinNumMaxNum.customFor(FPTypesPK16)
- .moreElementsIf(isSmallOddVector(0), oneMoreElement(0))
- .clampMaxNumElements(0, F16, 2)
- .clampScalar(0, F16, F64)
- .scalarize(0);
+ .moreElementsIf(isSmallOddVector(0), oneMoreElement(0))
+ .clampMaxNumElements(0, F16, 2)
+ .clampScalar(0, F16, F64)
+ .scalarize(0);
} else if (ST.has16BitInsts()) {
- MinNumMaxNum.customFor(FPTypes16)
- .clampScalar(0, F16, F64)
- .scalarize(0);
+ MinNumMaxNum.customFor(FPTypes16).clampScalar(0, F16, F64).scalarize(0);
} else {
- MinNumMaxNum.customFor(FPTypesBase)
- .clampScalar(0, F32, F64)
- .scalarize(0);
+ MinNumMaxNum.customFor(FPTypesBase).clampScalar(0, F32, F64).scalarize(0);
}
if (ST.hasVOP3PInsts())
FPOpActions.clampMaxNumElementsStrict(0, F16, 2);
-
- FPOpActions
- .scalarize(0)
- .clampScalar(0, ST.has16BitInsts() ? F16 : F32, F64);
- TrigActions
- .scalarize(0)
- .clampScalar(0, ST.has16BitInsts() ? F16 : F32, F64);
+ FPOpActions.scalarize(0).clampScalar(0, ST.has16BitInsts() ? F16 : F32, F64);
- FDIVActions
- .scalarize(0)
- .clampScalar(0, ST.has16BitInsts() ? F16 : F32, F64);
+ TrigActions.scalarize(0).clampScalar(0, ST.has16BitInsts() ? F16 : F32, F64);
+
+ FDIVActions.scalarize(0).clampScalar(0, ST.has16BitInsts() ? F16 : F32, F64);
getActionDefinitionsBuilder({G_FNEG, G_FABS})
- .legalFor(FPTypesPK16)
- .clampMaxNumElementsStrict(0, F16, 2)
- .scalarize(0)
- .clampScalar(0, F16, F64);
+ .legalFor(FPTypesPK16)
+ .clampMaxNumElementsStrict(0, F16, 2)
+ .scalarize(0)
+ .clampScalar(0, F16, F64);
if (ST.has16BitInsts()) {
getActionDefinitionsBuilder(G_FSQRT)
- .legalFor({F16})
- .customFor({F32, F64})
- .scalarize(0)
- .unsupported();
+ .legalFor({F16})
+ .customFor({F32, F64})
+ .scalarize(0)
+ .unsupported();
getActionDefinitionsBuilder(G_FFLOOR)
- .legalFor({F32, F64, F16})
- .scalarize(0)
- .clampScalar(0, F16, F64);
+ .legalFor({F32, F64, F16})
+ .scalarize(0)
+ .clampScalar(0, F16, F64);
getActionDefinitionsBuilder({G_FLDEXP, G_STRICT_FLDEXP})
- .legalFor({{F32, I32}, {F64, I32}, {F16, I16}})
- .scalarize(0)
- .maxScalarIf(typeIs(0, F16), 1, I16)
- .clampScalar(1, I32, I32)
- .lower();
+ .legalFor({{F32, I32}, {F64, I32}, {F16, I16}})
+ .scalarize(0)
+ .maxScalarIf(typeIs(0, F16), 1, I16)
+ .clampScalar(1, I32, I32)
+ .lower();
getActionDefinitionsBuilder(G_FFREXP)
- .customFor({{F32, F32}, {F64, F32}, {F16, F16}, {F16, F32}})
- .scalarize(0)
- .lower();
+ .customFor({{F32, F32}, {F64, F32}, {F16, F16}, {F16, F32}})
+ .scalarize(0)
+ .lower();
} else {
getActionDefinitionsBuilder(G_FSQRT)
- .customFor({F32, F64, F16})
- .scalarize(0)
- .unsupported();
-
+ .customFor({F32, F64, F16})
+ .scalarize(0)
+ .unsupported();
if (ST.hasFractBug()) {
getActionDefinitionsBuilder(G_FFLOOR)
- .customFor({F64})
- .legalFor({F32, F64})
- .scalarize(0)
- .clampScalar(0, F32, F64);
+ .customFor({F64})
+ .legalFor({F32, F64})
+ .scalarize(0)
+ .clampScalar(0, F32, F64);
} else {
getActionDefinitionsBuilder(G_FFLOOR)
- .legalFor({F32, F64})
- .scalarize(0)
- .clampScalar(0, F32, F64);
+ .legalFor({F32, F64})
+ .scalarize(0)
+ .clampScalar(0, F32, F64);
}
getActionDefinitionsBuilder({G_FLDEXP, G_STRICT_FLDEXP})
- .legalFor({{F32, I32}, {F64, I32}})
- .scalarize(0)
- .clampScalar(0, F32, F64)
- .clampScalar(1, I32, I32)
- .lower();
+ .legalFor({{F32, I32}, {F64, I32}})
+ .scalarize(0)
+ .clampScalar(0, F32, F64)
+ .clampScalar(1, I32, I32)
+ .lower();
getActionDefinitionsBuilder(G_FFREXP)
- .customFor({{F32, F32}, {F64, F32}})
- .scalarize(0)
- .minScalar(0, I32)
- .clampScalar(1, I32, I32)
- .lower();
+ .customFor({{F32, F32}, {F64, F32}})
+ .scalarize(0)
+ .minScalar(0, I32)
+ .clampScalar(1, I32, I32)
+ .lower();
}
auto &FPTruncActions = getActionDefinitionsBuilder(G_FPTRUNC);
@@ -1123,29 +1083,27 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
FPTruncActions.scalarize(0).lower();
getActionDefinitionsBuilder(G_FPEXT)
- .legalFor({{F64, F32}, {F32, F16}})
- .customFor({{F32, BF16}})
- .narrowScalarFor({{I64, I16}}, changeTo(0, I32))
- .scalarize(0);
+ .legalFor({{F64, F32}, {F32, F16}})
+ .customFor({{F32, BF16}})
+ .narrowScalarFor({{I64, I16}}, changeTo(0, I32))
+ .scalarize(0);
auto &FSubActions = getActionDefinitionsBuilder({G_FSUB, G_STRICT_FSUB});
if (ST.has16BitInsts()) {
FSubActions
- // Use actual fsub instruction
- .legalFor({F32, F16})
- // Must use fadd + fneg
- .lowerFor({F64, V2F16});
+ // Use actual fsub instruction
+ .legalFor({F32, F16})
+ // Must use fadd + fneg
+ .lowerFor({F64, V2F16});
} else {
FSubActions
- // Use actual fsub instruction
- .legalFor({F32})
- // Must use fadd + fneg
- .lowerFor({F64, F16, V2F16});
+ // Use actual fsub instruction
+ .legalFor({F32})
+ // Must use fadd + fneg
+ .lowerFor({F64, F16, V2F16});
}
- FSubActions
- .scalarize(0)
- .clampScalar(0, F32, F64);
+ FSubActions.scalarize(0).clampScalar(0, F32, F64);
// Whether this is legal depends on the floating point mode for the function.
auto &FMad = getActionDefinitionsBuilder(G_FMAD);
@@ -1162,28 +1120,27 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
if (ST.has16BitInsts()) {
FRem.customFor({F16, F32, F64});
} else {
- FRem.minScalar(0, F32)
- .customFor({F32, F64});
+ FRem.minScalar(0, F32).customFor({F32, F64});
}
FRem.scalarize(0);
// TODO: Do we need to clamp maximum bitwidth?
getActionDefinitionsBuilder(G_TRUNC)
- .legalIf(isScalar(0))
- .legalFor({{V2F16, V2F32}})
- .clampMaxNumElements(0, F16, 2)
- // Avoid scalarizing in cases that should be truly illegal. In unresolvable
- // situations (like an invalid implicit use), we don't want to infinite loop
- // in the legalizer.
- .fewerElementsIf(elementTypeIsLegal(0), LegalizeMutations::scalarize(0))
- .alwaysLegal();
+ .legalIf(isScalar(0))
+ .legalFor({{V2F16, V2F32}})
+ .clampMaxNumElements(0, F16, 2)
+ // Avoid scalarizing in cases that should be truly illegal. In
+ // unresolvable situations (like an invalid implicit use), we don't want
+ // to infinite loop in the legalizer.
+ .fewerElementsIf(elementTypeIsLegal(0), LegalizeMutations::scalarize(0))
+ .alwaysLegal();
getActionDefinitionsBuilder({G_SEXT, G_ZEXT, G_ANYEXT})
- .legalFor({{I64, I32}, {I32, I16}, {I64, I16},
- {I32, I1}, {I64, I1}, {I16, I1}})
- .scalarize(0)
- .clampScalar(0, I32, I64)
- .widenScalarToNextPow2(1, 32);
+ .legalFor(
+ {{I64, I32}, {I32, I16}, {I64, I16}, {I32, I1}, {I64, I1}, {I16, I1}})
+ .scalarize(0)
+ .clampScalar(0, I32, I64)
+ .widenScalarToNextPow2(1, 32);
// TODO: Split s1->s64 during regbankselect for VALU.
auto &IToFP = getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
@@ -1193,23 +1150,20 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
if (ST.has16BitInsts())
IToFP.legalFor({{F16, I16}});
IToFP.clampScalar(1, I32, I64)
- .minScalar(0, I32)
- .scalarize(0)
- .widenScalarToNextPow2(1);
+ .minScalar(0, I32)
+ .scalarize(0)
+ .widenScalarToNextPow2(1);
auto &FPToI = getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
- .legalFor({{I32, F32}, {I32, F64}, {I32, F16}})
- .customFor({{I64, F32}, {I64, F64}})
- .narrowScalarFor({{I64, I16}}, changeTo(0, I32));
+ .legalFor({{I32, F32}, {I32, F64}, {I32, F16}})
+ .customFor({{I64, F32}, {I64, F64}})
+ .narrowScalarFor({{I64, I16}}, changeTo(0, I32));
if (ST.has16BitInsts())
FPToI.legalFor({{I16, F16}});
else
FPToI.minScalar(1, I32);
- FPToI.minScalar(0, I32)
- .widenScalarToNextPow2(0, 32)
- .scalarize(0)
- .lower();
+ FPToI.minScalar(0, I32).widenScalarToNextPow2(0, 32).scalarize(0).lower();
getActionDefinitionsBuilder({G_LROUND, G_LLROUND})
.clampScalar(0, F16, F64)
@@ -1259,35 +1213,34 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
.scalarSameSizeAs(1, 0);
getActionDefinitionsBuilder(G_PTRMASK)
- .legalIf(all(sameSize(0, 1), typeInSet(1, {I64, I32})))
- .scalarSameSizeAs(1, 0)
- .scalarize(0);
+ .legalIf(all(sameSize(0, 1), typeInSet(1, {I64, I32})))
+ .scalarSameSizeAs(1, 0)
+ .scalarize(0);
auto &CmpBuilder =
- getActionDefinitionsBuilder(G_ICMP)
- // The compare output type differs based on the register bank of the output,
- // so make both s1 and s32 legal.
- //
- // Scalar compares producing output in scc will be promoted to s32, as that
- // is the allocatable register type that will be needed for the copy from
- // scc. This will be promoted during RegBankSelect, and we assume something
- // before that won't try to use s32 result types.
- //
- // Vector compares producing an output in vcc/SGPR will use s1 in VCC reg
- // bank.
- .legalForCartesianProduct(
- {I1}, {I32, I64, GlobalPtr, LocalPtr, ConstantPtr, PrivatePtr, FlatPtr})
- .legalForCartesianProduct(
- {I32}, {I32, I64, GlobalPtr, LocalPtr, ConstantPtr, PrivatePtr, FlatPtr});
+ getActionDefinitionsBuilder(G_ICMP)
+ // The compare output type differs based on the register bank of the
+ // output, so make both s1 and s32 legal.
+ //
+ // Scalar compares producing output in scc will be promoted to s32, as
+ // that is the allocatable register type that will be needed for the
+ // copy from scc. This will be promoted during RegBankSelect, and we
+ // assume something before that won't try to use s32 result types.
+ //
+ // Vector compares producing an output in vcc/SGPR will use s1 in VCC
+ // reg bank.
+ .legalForCartesianProduct({I1}, {I32, I64, GlobalPtr, LocalPtr,
+ ConstantPtr, PrivatePtr, FlatPtr})
+ .legalForCartesianProduct({I32}, {I32, I64, GlobalPtr, LocalPtr,
+ ConstantPtr, PrivatePtr, FlatPtr});
if (ST.has16BitInsts()) {
CmpBuilder.legalFor({{I1, I16}});
}
- CmpBuilder
- .widenScalarToNextPow2(1)
- .clampScalar(1, I32, I64)
- .scalarize(0)
- .legalIf(all(typeInSet(0, {I1, I32}), isPointer(1)));
+ CmpBuilder.widenScalarToNextPow2(1)
+ .clampScalar(1, I32, I64)
+ .scalarize(0)
+ .legalIf(all(typeInSet(0, {I1, I32}), isPointer(1)));
auto &FCmpBuilder =
getActionDefinitionsBuilder(G_FCMP).legalForCartesianProduct(
@@ -1296,10 +1249,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
if (ST.hasSALUFloatInsts())
FCmpBuilder.legalForCartesianProduct({I32}, {F16, F32});
- FCmpBuilder
- .widenScalarToNextPow2(1)
- .clampScalar(1, F32, F64)
- .scalarize(0);
+ FCmpBuilder.widenScalarToNextPow2(1).clampScalar(1, F32, F64).scalarize(0);
// FIXME: fpow has a selection pattern that should move to custom lowering.
auto &ExpOps = getActionDefinitionsBuilder(G_FPOW);
@@ -1307,12 +1257,11 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
ExpOps.customFor({{F32}, {F16}});
else
ExpOps.customFor({F32});
- ExpOps.clampScalar(0, MinScalarFPTy, F32)
- .scalarize(0);
+ ExpOps.clampScalar(0, MinScalarFPTy, F32).scalarize(0);
getActionDefinitionsBuilder(G_FPOWI)
- .clampScalar(0, MinScalarFPTy, F32)
- .lower();
+ .clampScalar(0, MinScalarFPTy, F32)
+ .lower();
auto &Log2Ops = getActionDefinitionsBuilder({G_FLOG2, G_FEXP2});
Log2Ops.customFor({F32});
@@ -1326,17 +1275,16 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
auto &LogOps =
getActionDefinitionsBuilder({G_FLOG, G_FLOG10, G_FEXP, G_FEXP10});
LogOps.customFor({F32, F16});
- LogOps.clampScalar(0, MinScalarFPTy, F32)
- .scalarize(0);
+ LogOps.clampScalar(0, MinScalarFPTy, F32).scalarize(0);
// The 64-bit versions produce 32-bit results, but only on the SALU.
getActionDefinitionsBuilder(G_CTPOP)
- .legalFor({{I32, I32}, {I32, I64}})
- .clampScalar(0, I32, I32)
- .widenScalarToNextPow2(1, 32)
- .clampScalar(1, I32, I64)
- .scalarize(0)
- .widenScalarToNextPow2(0, 32);
+ .legalFor({{I32, I32}, {I32, I64}})
+ .clampScalar(0, I32, I32)
+ .widenScalarToNextPow2(1, 32)
+ .clampScalar(1, I32, I64)
+ .scalarize(0)
+ .widenScalarToNextPow2(0, 32);
// If no 16 bit instr is available, lower into different instructions.
if (ST.has16BitInsts())
@@ -1357,12 +1305,12 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
// instructions expect. The hardware produces -1, but these produce the
// bitwidth.
getActionDefinitionsBuilder({G_CTLZ, G_CTTZ})
- .scalarize(0)
- .clampScalar(0, I32, I32)
- .clampScalar(1, I32, I64)
- .widenScalarToNextPow2(0, 32)
- .widenScalarToNextPow2(1, 32)
- .custom();
+ .scalarize(0)
+ .clampScalar(0, I32, I32)
+ .clampScalar(1, I32, I64)
+ .widenScalarToNextPow2(0, 32)
+ .widenScalarToNextPow2(1, 32)
+ .custom();
// The 64-bit versions produce 32-bit results, but only on the SALU.
getActionDefinitionsBuilder(G_CTLZ_ZERO_UNDEF)
@@ -1385,55 +1333,55 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
// S64 is only legal on SALU, and needs to be broken into 32-bit elements in
// RegBankSelect.
getActionDefinitionsBuilder(G_BITREVERSE)
- .legalFor({I32, I64})
- .clampScalar(0, I32, I64)
- .scalarize(0)
- .widenScalarToNextPow2(0);
+ .legalFor({I32, I64})
+ .clampScalar(0, I32, I64)
+ .scalarize(0)
+ .widenScalarToNextPow2(0);
if (ST.has16BitInsts()) {
getActionDefinitionsBuilder(G_BSWAP)
- .legalFor({I16, I32, V2I16})
- .clampMaxNumElementsStrict(0, I16, 2)
- // FIXME: Fixing non-power-of-2 before clamp is workaround for
- // narrowScalar limitation.
- .widenScalarToNextPow2(0)
- .clampScalar(0, I16, I32)
- .scalarize(0);
+ .legalFor({I16, I32, V2I16})
+ .clampMaxNumElementsStrict(0, I16, 2)
+ // FIXME: Fixing non-power-of-2 before clamp is workaround for
+ // narrowScalar limitation.
+ .widenScalarToNextPow2(0)
+ .clampScalar(0, I16, I32)
+ .scalarize(0);
if (ST.hasVOP3PInsts()) {
getActionDefinitionsBuilder({G_SMIN, G_SMAX, G_UMIN, G_UMAX, G_ABS})
- .legalFor({I32, I16, V2I16})
- .clampMaxNumElements(0, I16, 2)
- .minScalar(0, I16)
- .widenScalarToNextPow2(0)
- .scalarize(0)
- .lower();
+ .legalFor({I32, I16, V2I16})
+ .clampMaxNumElements(0, I16, 2)
+ .minScalar(0, I16)
+ .widenScalarToNextPow2(0)
+ .scalarize(0)
+ .lower();
} else {
getActionDefinitionsBuilder({G_SMIN, G_SMAX, G_UMIN, G_UMAX, G_ABS})
- .legalFor({I32, I16})
- .widenScalarToNextPow2(0)
- .minScalar(0, I16)
- .scalarize(0)
- .lower();
+ .legalFor({I32, I16})
+ .widenScalarToNextPow2(0)
+ .minScalar(0, I16)
+ .scalarize(0)
+ .lower();
}
} else {
// TODO: Should have same legality without v_perm_b32
getActionDefinitionsBuilder(G_BSWAP)
- .legalFor({I32})
- .lowerIf(scalarNarrowerThan(0, 32))
- // FIXME: Fixing non-power-of-2 before clamp is workaround for
- // narrowScalar limitation.
- .widenScalarToNextPow2(0)
- .maxScalar(0, I32)
- .scalarize(0)
- .lower();
+ .legalFor({I32})
+ .lowerIf(scalarNarrowerThan(0, 32))
+ // FIXME: Fixing non-power-of-2 before clamp is workaround for
+ // narrowScalar limitation.
+ .widenScalarToNextPow2(0)
+ .maxScalar(0, I32)
+ .scalarize(0)
+ .lower();
getActionDefinitionsBuilder({G_SMIN, G_SMAX, G_UMIN, G_UMAX, G_ABS})
- .legalFor({I32})
- .minScalar(0, I32)
- .widenScalarToNextPow2(0)
- .scalarize(0)
- .lower();
+ .legalFor({I32})
+ .minScalar(0, I32)
+ .widenScalarToNextPow2(0)
+ .scalarize(0)
+ .lower();
}
getActionDefinitionsBuilder(G_INTTOPTR)
@@ -1518,50 +1466,51 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
auto &Actions = getActionDefinitionsBuilder(Op);
// Explicitly list some common cases.
// TODO: Does this help compile time at all?
- Actions.legalForTypesWithMemDesc({{I32, GlobalPtr, I32, GlobalAlign32},
- {F32, GlobalPtr, F32, GlobalAlign32},
-
- {V2I32, GlobalPtr, V2I32, GlobalAlign32},
- {V2F32, GlobalPtr, V2F32, GlobalAlign32},
-
- {V4I32, GlobalPtr, V4I32, GlobalAlign32},
- {V4F32, GlobalPtr, V4F32, GlobalAlign32},
-
- {I64, GlobalPtr, I64, GlobalAlign32},
- {F64, GlobalPtr, F64, GlobalAlign32},
-
- {V2I64, GlobalPtr, V2I64, GlobalAlign32},
- {V2F64, GlobalPtr, V2F64, GlobalAlign32},
- {V2I16, GlobalPtr, V2I16, GlobalAlign32},
- {V2F16, GlobalPtr, V2F16, GlobalAlign32},
- {V2BF16, GlobalPtr, V2BF16, GlobalAlign32},
-
- {I32, GlobalPtr, I8, GlobalAlign8},
- {I32, GlobalPtr, I16, GlobalAlign16},
-
- {I32, LocalPtr, I32, 32},
- {F32, LocalPtr, F32, 32},
- {I64, LocalPtr, I64, 32},
- {F64, LocalPtr, F64, 32},
- {V2I32, LocalPtr, V2I32, 32},
- {V2F32, LocalPtr, V2F32, 32},
- {I32, LocalPtr, I8, 8},
- {I32, LocalPtr, I16, 16},
- {V2I16, LocalPtr, I32, 32},
-
- {I32, PrivatePtr, I32, 32},
- {F32, PrivatePtr, F32, 32},
- {I32, PrivatePtr, I8, 8},
- {I32, PrivatePtr, I16, 16},
- {V2I16, PrivatePtr, I32, 32},
-
- {I32, ConstantPtr, I32, GlobalAlign32},
- {F32, ConstantPtr, F32, GlobalAlign32},
- {V2I32, ConstantPtr, V2I32, GlobalAlign32},
- {V4I32, ConstantPtr, V4I32, GlobalAlign32},
- {I64, ConstantPtr, I64, GlobalAlign32},
- {F64, ConstantPtr, F64, GlobalAlign32},
- {V2I32, ConstantPtr, V2I32, GlobalAlign32}});
+ Actions.legalForTypesWithMemDesc(
+ {{I32, GlobalPtr, I32, GlobalAlign32},
+ {F32, GlobalPtr, F32, GlobalAlign32},
+
+ {V2I32, GlobalPtr, V2I32, GlobalAlign32},
+ {V2F32, GlobalPtr, V2F32, GlobalAlign32},
+
+ {V4I32, GlobalPtr, V4I32, GlobalAlign32},
+ {V4F32, GlobalPtr, V4F32, GlobalAlign32},
+
+ {I64, GlobalPtr, I64, GlobalAlign32},
+ {F64, GlobalPtr, F64, GlobalAlign32},
+
+ {V2I64, GlobalPtr, V2I64, GlobalAlign32},
+ {V2F64, GlobalPtr, V2F64, GlobalAlign32},
+ {V2I16, GlobalPtr, V2I16, GlobalAlign32},
+ {V2F16, GlobalPtr, V2F16, GlobalAlign32},
+ {V2BF16, GlobalPtr, V2BF16, GlobalAlign32},
+
+ {I32, GlobalPtr, I8, GlobalAlign8},
+ {I32, GlobalPtr, I16, GlobalAlign16},
+
+ {I32, LocalPtr, I32, 32},
+ {F32, LocalPtr, F32, 32},
+ {I64, LocalPtr, I64, 32},
+ {F64, LocalPtr, F64, 32},
+ {V2I32, LocalPtr, V2I32, 32},
+ {V2F32, LocalPtr, V2F32, 32},
+ {I32, LocalPtr, I8, 8},
+ {I32, LocalPtr, I16, 16},
+ {V2I16, LocalPtr, I32, 32},
+
+ {I32, PrivatePtr, I32, 32},
+ {F32, PrivatePtr, F32, 32},
+ {I32, PrivatePtr, I8, 8},
+ {I32, PrivatePtr, I16, 16},
+ {V2I16, PrivatePtr, I32, 32},
+
+ {I32, ConstantPtr, I32, GlobalAlign32},
+ {F32, ConstantPtr, F32, GlobalAlign32},
+ {V2I32, ConstantPtr, V2I32, GlobalAlign32},
+ {V4I32, ConstantPtr, V4I32, GlobalAlign32},
+ {I64, ConstantPtr, I64, GlobalAlign32},
+ {F64, ConstantPtr, F64, GlobalAlign32},
+ {V2I32, ConstantPtr, V2I32, GlobalAlign32}});
Actions.legalIf(
[=](const LegalityQuery &Query) -> bool {
return isLoadStoreLegal(ST, Query);
@@ -1695,27 +1644,27 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
// May need relegalization for the scalars.
return std::pair(0, EltTy);
})
- .minScalar(0, I32)
- .narrowScalarIf(isWideScalarExtLoadTruncStore(0), changeTo(0, I32))
- .widenScalarToNextPow2(0)
- .moreElementsIf(vectorSmallerThan(0, 32), moreEltsToNext32Bit(0))
- .lower();
+ .minScalar(0, I32)
+ .narrowScalarIf(isWideScalarExtLoadTruncStore(0), changeTo(0, I32))
+ .widenScalarToNextPow2(0)
+ .moreElementsIf(vectorSmallerThan(0, 32), moreEltsToNext32Bit(0))
+ .lower();
}
// FIXME: Unaligned accesses not lowered.
- auto &ExtLoads = getActionDefinitionsBuilder({G_SEXTLOAD, G_ZEXTLOAD})
- .legalForTypesWithMemDesc({{I32, GlobalPtr, I8, 8},
- {I32, GlobalPtr, I16, 2 * 8},
- {I32, LocalPtr, I8, 8},
- {I32, LocalPtr, I16, 16},
- {I32, PrivatePtr, I8, 8},
- {I32, PrivatePtr, I16, 16},
- {I32, ConstantPtr, I8, 8},
- {I32, ConstantPtr, I16, 2 * 8}})
- .legalIf(
- [=](const LegalityQuery &Query) -> bool {
- return isLoadStoreLegal(ST, Query);
- });
+ auto &ExtLoads =
+ getActionDefinitionsBuilder({G_SEXTLOAD, G_ZEXTLOAD})
+ .legalForTypesWithMemDesc({{I32, GlobalPtr, I8, 8},
+ {I32, GlobalPtr, I16, 2 * 8},
+ {I32, LocalPtr, I8, 8},
+ {I32, LocalPtr, I16, 16},
+ {I32, PrivatePtr, I8, 8},
+ {I32, PrivatePtr, I16, 16},
+ {I32, ConstantPtr, I8, 8},
+ {I32, ConstantPtr, I16, 2 * 8}})
+ .legalIf([=](const LegalityQuery &Query) -> bool {
+ return isLoadStoreLegal(ST, Query);
+ });
if (ST.hasFlatAddressSpace()) {
ExtLoads.legalForTypesWithMemDesc(
@@ -1729,18 +1678,20 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
// inserting addrspacecasts.
ExtLoads.customIf(typeIs(1, Constant32Ptr));
- ExtLoads.clampScalar(0, I32, I32)
- .widenScalarToNextPow2(0)
- .lower();
-
- auto &Atomics = getActionDefinitionsBuilder(
- {G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD, G_ATOMICRMW_SUB,
- G_ATOMICRMW_AND, G_ATOMICRMW_OR, G_ATOMICRMW_XOR,
- G_ATOMICRMW_MAX, G_ATOMICRMW_MIN, G_ATOMICRMW_UMAX,
- G_ATOMICRMW_UMIN, G_ATOMICRMW_UINC_WRAP, G_ATOMICRMW_UDEC_WRAP})
- .legalFor({{I32, GlobalPtr}, {I32, LocalPtr},
- {I64, GlobalPtr}, {I64, LocalPtr},
- {I32, RegionPtr}, {I64, RegionPtr}});
+ ExtLoads.clampScalar(0, I32, I32).widenScalarToNextPow2(0).lower();
+
+ auto &Atomics =
+ getActionDefinitionsBuilder(
+ {G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD, G_ATOMICRMW_SUB, G_ATOMICRMW_AND,
+ G_ATOMICRMW_OR, G_ATOMICRMW_XOR, G_ATOMICRMW_MAX, G_ATOMICRMW_MIN,
+ G_ATOMICRMW_UMAX, G_ATOMICRMW_UMIN, G_ATOMICRMW_UINC_WRAP,
+ G_ATOMICRMW_UDEC_WRAP})
+ .legalFor({{I32, GlobalPtr},
+ {I32, LocalPtr},
+ {I64, GlobalPtr},
+ {I64, LocalPtr},
+ {I32, RegionPtr},
+ {I64, RegionPtr}});
if (ST.hasFlatAddressSpace()) {
Atomics.legalFor({{I32, FlatPtr}, {I64, FlatPtr}});
}
@@ -1763,11 +1714,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
// These are legal with some caveats, and should have undergone expansion in
// the IR in most situations
// TODO: Move atomic expansion into legalizer
- Atomic.legalFor({
- {F32, GlobalPtr},
- {F64, GlobalPtr},
- {F64, FlatPtr}
- });
+ Atomic.legalFor({{F32, GlobalPtr}, {F64, GlobalPtr}, {F64, FlatPtr}});
}
if (ST.hasAtomicBufferGlobalPkAddF16NoRtnInsts() ||
@@ -1797,16 +1744,35 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
// BUFFER/FLAT_ATOMIC_CMP_SWAP on GCN GPUs needs input marshalling, and output
// demarshalling
getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG)
- .customFor({{I32, GlobalPtr}, {I64, GlobalPtr},
- {I32, FlatPtr}, {I64, FlatPtr}})
- .legalFor({{I32, LocalPtr}, {I64, LocalPtr},
- {I32, RegionPtr}, {I64, RegionPtr}});
+ .customFor(
+ {{I32, GlobalPtr}, {I64, GlobalPtr}, {I32, FlatPtr}, {I64, FlatPtr}})
+ .legalFor({{I32, LocalPtr},
+ {I64, LocalPtr},
+ {I32, RegionPtr},
+ {I64, RegionPtr}});
// TODO: Pointer types, any 32-bit or 64-bit vector
// Condition should be s32 for scalar, s1 for vector.
getActionDefinitionsBuilder(G_SELECT)
- .legalForCartesianProduct({I16, F16, BF16, I32, F32, I64, F64, V2I32, V2F32, V2I16, V2F16, V2BF16, V4I16, V4F16, V4BF16, GlobalPtr,
- LocalPtr, FlatPtr, PrivatePtr,
+ .legalForCartesianProduct({I16,
+ F16,
+ BF16,
+ I32,
+ F32,
+ I64,
+ F64,
+ V2I32,
+ V2F32,
+ V2I16,
+ V2F16,
+ V2BF16,
+ V4I16,
+ V4F16,
+ V4BF16,
+ GlobalPtr,
+ LocalPtr,
+ FlatPtr,
+ PrivatePtr,
LLT::fixed_vector(2, LocalPtr),
LLT::fixed_vector(2, PrivatePtr)},
{I1, I32})
@@ -1824,33 +1790,33 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
// TODO: Only the low 4/5/6 bits of the shift amount are observed, so we can
// be more flexible with the shift amount type.
auto &Shifts = getActionDefinitionsBuilder({G_SHL, G_LSHR, G_ASHR})
- .legalFor({{I32, I32}, {I64, I32}});
+ .legalFor({{I32, I32}, {I64, I32}});
if (ST.has16BitInsts()) {
if (ST.hasVOP3PInsts()) {
Shifts.legalFor({{I16, I16}, {V2I16, V2I16}})
- .clampMaxNumElements(0, I16, 2);
+ .clampMaxNumElements(0, I16, 2);
} else
Shifts.legalFor({{I16, I16}});
// TODO: Support 16-bit shift amounts for all types
Shifts.widenScalarIf(
- [=](const LegalityQuery &Query) {
- // Use 16-bit shift amounts for any 16-bit shift. Otherwise we want a
- // 32-bit amount.
- const LLT ValTy = Query.Types[0];
- const LLT AmountTy = Query.Types[1];
- return ValTy.getSizeInBits() <= 16 &&
- AmountTy.getSizeInBits() < 16;
- }, changeTo(1, I16));
+ [=](const LegalityQuery &Query) {
+ // Use 16-bit shift amounts for any 16-bit shift. Otherwise we want a
+ // 32-bit amount.
+ const LLT ValTy = Query.Types[0];
+ const LLT AmountTy = Query.Types[1];
+ return ValTy.getSizeInBits() <= 16 && AmountTy.getSizeInBits() < 16;
+ },
+ changeTo(1, I16));
Shifts.maxScalarIf(typeIs(0, I16), 1, I16);
Shifts.clampScalar(1, I32, I32);
Shifts.widenScalarToNextPow2(0, 16);
Shifts.clampScalar(0, I16, I64);
getActionDefinitionsBuilder({G_SSHLSAT, G_USHLSAT})
- .minScalar(0, I16)
- .scalarize(0)
- .lower();
+ .minScalar(0, I16)
+ .scalarize(0)
+ .lower();
} else {
// Make sure we legalize the shift amount type first, as the general
// expansion for the shifted type will produce much worse code if it hasn't
@@ -1860,9 +1826,9 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
Shifts.clampScalar(0, I32, I64);
getActionDefinitionsBuilder({G_SSHLSAT, G_USHLSAT})
- .minScalar(0, I32)
- .scalarize(0)
- .lower();
+ .minScalar(0, I32)
+ .scalarize(0)
+ .lower();
}
Shifts.scalarize(0);
@@ -1872,7 +1838,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
unsigned IdxTypeIdx = 2;
getActionDefinitionsBuilder(Op)
- .customIf([=](const LegalityQuery &Query) {
+ .customIf([=](const LegalityQuery &Query) {
const LLT EltTy = Query.Types[EltTypeIdx];
const LLT VecTy = Query.Types[VecTypeIdx];
const LLT IdxTy = Query.Types[IdxTypeIdx];
@@ -1893,37 +1859,38 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
IdxTy.getSizeInBits() == 32 &&
isLegalVecType;
})
- .bitcastIf(all(sizeIsMultipleOf32(VecTypeIdx), scalarOrEltNarrowerThan(VecTypeIdx, 32)),
- bitcastToVectorElement32(VecTypeIdx))
- //.bitcastIf(vectorSmallerThan(1, 32), bitcastToScalar(1))
- .bitcastIf(
- all(sizeIsMultipleOf32(VecTypeIdx), scalarOrEltWiderThan(VecTypeIdx, 64)),
- [=](const LegalityQuery &Query) {
- // For > 64-bit element types, try to turn this into a 64-bit
- // element vector since we may be able to do better indexing
- // if this is scalar. If not, fall back to 32.
- const LLT EltTy = Query.Types[EltTypeIdx];
- const LLT VecTy = Query.Types[VecTypeIdx];
- const unsigned DstEltSize = EltTy.getSizeInBits();
- const unsigned VecSize = VecTy.getSizeInBits();
-
- const unsigned TargetEltSize = DstEltSize % 64 == 0 ? 64 : 32;
- return std::pair(
- VecTypeIdx,
- LLT::fixed_vector(VecSize / TargetEltSize, TargetEltSize));
- })
- .clampScalar(EltTypeIdx, I32, I64)
- .clampScalar(VecTypeIdx, I32, I64)
- .clampScalar(IdxTypeIdx, I32, I32)
- .clampMaxNumElements(VecTypeIdx, I32, 32)
- .clampMaxNumElements(VecTypeIdx, F32, 32)
- // TODO: Clamp elements for 64-bit vectors?
- .moreElementsIf(
- isIllegalRegisterType(VecTypeIdx),
- moreElementsToNextExistingRegClass(VecTypeIdx))
- // It should only be necessary with variable indexes.
- // As a last resort, lower to the stack
- .lower();
+ .bitcastIf(all(sizeIsMultipleOf32(VecTypeIdx),
+ scalarOrEltNarrowerThan(VecTypeIdx, 32)),
+ bitcastToVectorElement32(VecTypeIdx))
+ //.bitcastIf(vectorSmallerThan(1, 32), bitcastToScalar(1))
+ .bitcastIf(all(sizeIsMultipleOf32(VecTypeIdx),
+ scalarOrEltWiderThan(VecTypeIdx, 64)),
+ [=](const LegalityQuery &Query) {
+ // For > 64-bit element types, try to turn this into a
+ // 64-bit element vector since we may be able to do better
+ // indexing if this is scalar. If not, fall back to 32.
+ const LLT EltTy = Query.Types[EltTypeIdx];
+ const LLT VecTy = Query.Types[VecTypeIdx];
+ const unsigned DstEltSize = EltTy.getSizeInBits();
+ const unsigned VecSize = VecTy.getSizeInBits();
+
+ const unsigned TargetEltSize =
+ DstEltSize % 64 == 0 ? 64 : 32;
+ return std::pair(VecTypeIdx,
+ LLT::fixed_vector(VecSize / TargetEltSize,
+ TargetEltSize));
+ })
+ .clampScalar(EltTypeIdx, I32, I64)
+ .clampScalar(VecTypeIdx, I32, I64)
+ .clampScalar(IdxTypeIdx, I32, I32)
+ .clampMaxNumElements(VecTypeIdx, I32, 32)
+ .clampMaxNumElements(VecTypeIdx, F32, 32)
+ // TODO: Clamp elements for 64-bit vectors?
+ .moreElementsIf(isIllegalRegisterType(VecTypeIdx),
+ moreElementsToNextExistingRegClass(VecTypeIdx))
+ // It should only be necessary with variable indexes.
+ // As a last resort, lower to the stack
+ .lower();
}
getActionDefinitionsBuilder(G_EXTRACT_VECTOR_ELT)
@@ -1938,76 +1905,75 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
// FIXME: Doesn't handle extract of illegal sizes.
getActionDefinitionsBuilder(Op)
- .lowerIf(all(typeIs(LitTyIdx, I16), sizeIs(BigTyIdx, 32)))
- .lowerIf([=](const LegalityQuery &Query) {
+ .lowerIf(all(typeIs(LitTyIdx, I16), sizeIs(BigTyIdx, 32)))
+ .lowerIf([=](const LegalityQuery &Query) {
// Sub-vector(or single element) insert and extract.
// TODO: verify immediate offset here since lower only works with
// whole elements.
const LLT BigTy = Query.Types[BigTyIdx];
return BigTy.isVector();
})
- // FIXME: Multiples of 16 should not be legal.
- .legalIf([=](const LegalityQuery &Query) {
+ // FIXME: Multiples of 16 should not be legal.
+ .legalIf([=](const LegalityQuery &Query) {
const LLT BigTy = Query.Types[BigTyIdx];
const LLT LitTy = Query.Types[LitTyIdx];
return (BigTy.getSizeInBits() % 32 == 0) &&
(LitTy.getSizeInBits() % 16 == 0);
})
- .widenScalarIf(
- [=](const LegalityQuery &Query) {
- const LLT BigTy = Query.Types[BigTyIdx];
- return (BigTy.getScalarSizeInBits() < 16);
- },
- LegalizeMutations::widenScalarOrEltToNextPow2(BigTyIdx, 16))
- .widenScalarIf(
- [=](const LegalityQuery &Query) {
- const LLT LitTy = Query.Types[LitTyIdx];
- return (LitTy.getScalarSizeInBits() < 16);
- },
- LegalizeMutations::widenScalarOrEltToNextPow2(LitTyIdx, 16))
- .moreElementsIf(isSmallOddVector(BigTyIdx), oneMoreElement(BigTyIdx))
- .widenScalarToNextPow2(BigTyIdx, 32);
-
- }
-
- auto &BuildVector = getActionDefinitionsBuilder(G_BUILD_VECTOR)
- .legalForCartesianProduct(AllS32Vectors, {I32})
- .legalForCartesianProduct(AllS32Vectors, {F32})
- .legalForCartesianProduct(AllS64Vectors, {I64})
- .legalForCartesianProduct(AllS64Vectors, {F64})
- .clampNumElements(0, V16I32, V32I32)
- .clampNumElements(0, V2I64, V16I64)
- .fewerElementsIf(isWideVec16(0), changeElementCountTo(0, V2I16))
- .moreElementsIf(
- isIllegalRegisterType(0),
- moreElementsToNextExistingRegClass(0));
+ .widenScalarIf(
+ [=](const LegalityQuery &Query) {
+ const LLT BigTy = Query.Types[BigTyIdx];
+ return (BigTy.getScalarSizeInBits() < 16);
+ },
+ LegalizeMutations::widenScalarOrEltToNextPow2(BigTyIdx, 16))
+ .widenScalarIf(
+ [=](const LegalityQuery &Query) {
+ const LLT LitTy = Query.Types[LitTyIdx];
+ return (LitTy.getScalarSizeInBits() < 16);
+ },
+ LegalizeMutations::widenScalarOrEltToNextPow2(LitTyIdx, 16))
+ .moreElementsIf(isSmallOddVector(BigTyIdx), oneMoreElement(BigTyIdx))
+ .widenScalarToNextPow2(BigTyIdx, 32);
+ }
+
+ auto &BuildVector =
+ getActionDefinitionsBuilder(G_BUILD_VECTOR)
+ .legalForCartesianProduct(AllS32Vectors, {I32})
+ .legalForCartesianProduct(AllS32Vectors, {F32})
+ .legalForCartesianProduct(AllS64Vectors, {I64})
+ .legalForCartesianProduct(AllS64Vectors, {F64})
+ .clampNumElements(0, V16I32, V32I32)
+ .clampNumElements(0, V2I64, V16I64)
+ .fewerElementsIf(isWideVec16(0), changeElementCountTo(0, V2I16))
+ .moreElementsIf(isIllegalRegisterType(0),
+ moreElementsToNextExistingRegClass(0));
if (ST.hasScalarPackInsts()) {
BuildVector
- // FIXME: Should probably widen s1 vectors straight to s32
- .minScalarOrElt(0, I16)
- .minScalar(1, I16);
+ // FIXME: Should probably widen s1 vectors straight to s32
+ .minScalarOrElt(0, I16)
+ .minScalar(1, I16);
getActionDefinitionsBuilder(G_BUILD_VECTOR_TRUNC)
- .legalFor({V2I16, I32})
- .lower();
+ .legalFor({V2I16, I32})
+ .lower();
} else {
BuildVector.customFor({V2I16, I16});
BuildVector.minScalarOrElt(0, I32);
getActionDefinitionsBuilder(G_BUILD_VECTOR_TRUNC)
- .customFor({V2I16, I32})
- .lower();
+ .customFor({V2I16, I32})
+ .lower();
}
BuildVector.legalIf(isRegisterType(0));
// FIXME: Clamp maximum size
getActionDefinitionsBuilder(G_CONCAT_VECTORS)
- .legalIf(all(isRegisterType(0), isRegisterType(1)))
- .clampMaxNumElements(0, I32, 32)
- .clampMaxNumElements(1, I16, 2) // TODO: Make 4?
- .clampMaxNumElements(0, I16, 64);
+ .legalIf(all(isRegisterType(0), isRegisterType(1)))
+ .clampMaxNumElements(0, I32, 32)
+ .clampMaxNumElements(1, I16, 2) // TODO: Make 4?
+ .clampMaxNumElements(0, I16, 64);
getActionDefinitionsBuilder(G_SHUFFLE_VECTOR).lower();
@@ -2028,43 +1994,49 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
return false;
};
- auto &Builder = getActionDefinitionsBuilder(Op)
- .legalIf(all(isRegisterType(0), isRegisterType(1)))
- .lowerFor({{I16, V2I16}})
- .lowerIf([=](const LegalityQuery &Query) {
- const LLT BigTy = Query.Types[BigTyIdx];
- return BigTy.getSizeInBits() == 32;
- })
- // Try to widen to s16 first for small types.
- // TODO: Only do this on targets with legal s16 shifts
- .minScalarOrEltIf(scalarNarrowerThan(LitTyIdx, 16), LitTyIdx, I16)
- .widenScalarToNextPow2(LitTyIdx, /*Min*/ 16)
- .moreElementsIf(isSmallOddVector(BigTyIdx), oneMoreElement(BigTyIdx))
- .fewerElementsIf(all(typeIs(0, I16), vectorWiderThan(1, 32),
- elementTypeIs(1, I16)),
- changeTo(1, V2I16))
- // Clamp the little scalar to s8-s256 and make it a power of 2. It's not
- // worth considering the multiples of 64 since 2*192 and 2*384 are not
- // valid.
- .clampScalar(LitTyIdx, I32, I512)
- .widenScalarToNextPow2(LitTyIdx, /*Min*/ 32)
- // Break up vectors with weird elements into scalars
- .fewerElementsIf(
- [=](const LegalityQuery &Query) { return notValidElt(Query, LitTyIdx); },
- scalarize(0))
- .fewerElementsIf(
- [=](const LegalityQuery &Query) { return notValidElt(Query, BigTyIdx); },
- scalarize(1))
- .clampScalar(BigTyIdx, I32, MaxScalar);
+ auto &Builder =
+ getActionDefinitionsBuilder(Op)
+ .legalIf(all(isRegisterType(0), isRegisterType(1)))
+ .lowerFor({{I16, V2I16}})
+ .lowerIf([=](const LegalityQuery &Query) {
+ const LLT BigTy = Query.Types[BigTyIdx];
+ return BigTy.getSizeInBits() == 32;
+ })
+ // Try to widen to s16 first for small types.
+ // TODO: Only do this on targets with legal s16 shifts
+ .minScalarOrEltIf(scalarNarrowerThan(LitTyIdx, 16), LitTyIdx, I16)
+ .widenScalarToNextPow2(LitTyIdx, /*Min*/ 16)
+ .moreElementsIf(isSmallOddVector(BigTyIdx),
+ oneMoreElement(BigTyIdx))
+ .fewerElementsIf(all(typeIs(0, I16), vectorWiderThan(1, 32),
+ elementTypeIs(1, I16)),
+ changeTo(1, V2I16))
+ // Clamp the little scalar to s8-s256 and make it a power of 2. It's
+ // not worth considering the multiples of 64 since 2*192 and 2*384
+ // are not valid.
+ .clampScalar(LitTyIdx, I32, I512)
+ .widenScalarToNextPow2(LitTyIdx, /*Min*/ 32)
+ // Break up vectors with weird elements into scalars
+ .fewerElementsIf(
+ [=](const LegalityQuery &Query) {
+ return notValidElt(Query, LitTyIdx);
+ },
+ scalarize(0))
+ .fewerElementsIf(
+ [=](const LegalityQuery &Query) {
+ return notValidElt(Query, BigTyIdx);
+ },
+ scalarize(1))
+ .clampScalar(BigTyIdx, I32, MaxScalar);
if (Op == G_MERGE_VALUES) {
Builder.widenScalarIf(
- // TODO: Use 16-bit shifts if legal for 8-bit values?
- [=](const LegalityQuery &Query) {
- const LLT Ty = Query.Types[LitTyIdx];
- return Ty.getSizeInBits() < 32;
- },
- changeTo(LitTyIdx, I32));
+ // TODO: Use 16-bit shifts if legal for 8-bit values?
+ [=](const LegalityQuery &Query) {
+ const LLT Ty = Query.Types[LitTyIdx];
+ return Ty.getSizeInBits() < 32;
+ },
+ changeTo(LitTyIdx, I32));
}
Builder.widenScalarIf(
@@ -2091,15 +2063,16 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
// S64 is only legal on SALU, and needs to be broken into 32-bit elements in
// RegBankSelect.
- auto &SextInReg = getActionDefinitionsBuilder(G_SEXT_INREG)
- .legalFor({{I32}, {I64}});
+ auto &SextInReg =
+ getActionDefinitionsBuilder(G_SEXT_INREG).legalFor({{I32}, {I64}});
if (ST.hasVOP3PInsts()) {
- SextInReg.lowerFor({{V2I16}})
- // Prefer to reduce vector widths for 16-bit vectors before lowering, to
- // get more vector shift opportunities, since we'll get those when
- // expanded.
- .clampMaxNumElementsStrict(0, I16, 2);
+ SextInReg
+ .lowerFor({{V2I16}})
+ // Prefer to reduce vector widths for 16-bit vectors before lowering, to
+ // get more vector shift opportunities, since we'll get those when
+ // expanded.
+ .clampMaxNumElementsStrict(0, I16, 2);
} else if (ST.has16BitInsts()) {
SextInReg.lowerFor({{I32}, {I64}, {I16}});
} else {
@@ -2108,10 +2081,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
SextInReg.lowerFor({{I32}, {I64}});
}
- SextInReg
- .scalarize(0)
- .clampScalar(0, I32, I64)
- .lower();
+ SextInReg.scalarize(0).clampScalar(0, I32, I64).lower();
getActionDefinitionsBuilder({G_ROTR, G_ROTL})
.scalarize(0)
@@ -2119,26 +2089,25 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
// TODO: Only Try to form v2s16 with legal packed instructions.
getActionDefinitionsBuilder(G_FSHR)
- .legalFor({{I32, I32}})
- .lowerFor({{V2I16, V2I16}})
- .clampMaxNumElementsStrict(0, I16, 2)
- .scalarize(0)
- .lower();
-
- if (ST.hasVOP3PInsts()) {
- getActionDefinitionsBuilder(G_FSHL)
+ .legalFor({{I32, I32}})
.lowerFor({{V2I16, V2I16}})
.clampMaxNumElementsStrict(0, I16, 2)
.scalarize(0)
.lower();
+
+ if (ST.hasVOP3PInsts()) {
+ getActionDefinitionsBuilder(G_FSHL)
+ .lowerFor({{V2I16, V2I16}})
+ .clampMaxNumElementsStrict(0, I16, 2)
+ .scalarize(0)
+ .lower();
} else {
getActionDefinitionsBuilder(G_FSHL)
.scalarize(0)
.lower();
}
- getActionDefinitionsBuilder(G_READCYCLECOUNTER)
- .legalFor({I64});
+ getActionDefinitionsBuilder(G_READCYCLECOUNTER).legalFor({I64});
getActionDefinitionsBuilder(G_READSTEADYCOUNTER).legalFor({I64});
@@ -2456,8 +2425,7 @@ bool AMDGPULegalizerInfo::legalizeAddrSpaceCast(
// Extract low 32-bits of the pointer.
auto PtrLo32 = B.buildExtract(DstTy, Src, 0);
- auto CmpRes =
- B.buildICmp(CmpInst::ICMP_NE, I1, Src, FlatNull.getReg(0));
+ auto CmpRes = B.buildICmp(CmpInst::ICMP_NE, I1, Src, FlatNull.getReg(0));
B.buildSelect(Dst, CmpRes, PtrLo32, SegmentNull.getReg(0));
MI.eraseFromParent();
@@ -2494,8 +2462,7 @@ bool AMDGPULegalizerInfo::legalizeAddrSpaceCast(
auto SegmentNull = B.buildConstant(SrcTy, TM.getNullPointerValue(SrcAS));
auto FlatNull = B.buildConstant(DstTy, TM.getNullPointerValue(DestAS));
- auto CmpRes = B.buildICmp(CmpInst::ICMP_NE, I1, Src,
- SegmentNull.getReg(0));
+ auto CmpRes = B.buildICmp(CmpInst::ICMP_NE, I1, Src, SegmentNull.getReg(0));
B.buildSelect(Dst, CmpRes, BuildPtr, FlatNull);
@@ -2558,9 +2525,9 @@ bool AMDGPULegalizerInfo::legalizeFroundeven(MachineInstr &MI,
return true;
}
-bool AMDGPULegalizerInfo::legalizeFceil(
- MachineInstr &MI, MachineRegisterInfo &MRI,
- MachineIRBuilder &B) const {
+bool AMDGPULegalizerInfo::legalizeFceil(MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const {
Register Src = MI.getOperand(1).getReg();
assert(MRI.getType(Src).isFloat(64));
@@ -2616,9 +2583,9 @@ static MachineInstrBuilder extractF64Exponent(Register Hi,
return B.buildSub(I32, ExpPart, B.buildConstant(I32, 1023));
}
-bool AMDGPULegalizerInfo::legalizeIntrinsicTrunc(
- MachineInstr &MI, MachineRegisterInfo &MRI,
- MachineIRBuilder &B) const {
+bool AMDGPULegalizerInfo::legalizeIntrinsicTrunc(MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const {
Register Src = MI.getOperand(1).getReg();
assert(MRI.getType(Src).isFloat(64));
@@ -2721,7 +2688,8 @@ bool AMDGPULegalizerInfo::legalizeFPTOI(MachineInstr &MI,
Register Src = MI.getOperand(1).getReg();
const LLT SrcLT = MRI.getType(Src);
- assert((SrcLT.isFloat(32) || SrcLT.isFloat(64)) && MRI.getType(Dst).isInteger(64));
+ assert((SrcLT.isFloat(32) || SrcLT.isFloat(64)) &&
+ MRI.getType(Dst).isInteger(64));
unsigned Flags = MI.getFlags();
@@ -2763,7 +2731,7 @@ bool AMDGPULegalizerInfo::legalizeFPTOI(MachineInstr &MI,
auto Fma = B.buildFMA(SrcLT, FloorMul, K1, Trunc, Flags);
auto Hi = (Signed && SrcLT.isFloat(64)) ? B.buildFPTOSI(I32, FloorMul)
- : B.buildFPTOUI(I32, FloorMul);
+ : B.buildFPTOUI(I32, FloorMul);
auto Lo = B.buildFPTOUI(I32, Fma);
if (Signed && SrcLT.isFloat(32)) {
@@ -3266,48 +3234,48 @@ bool AMDGPULegalizerInfo::legalizeStore(LegalizerHelper &Helper,
return false;
}
-bool AMDGPULegalizerInfo::legalizeFPExt(
- MachineInstr &MI, MachineRegisterInfo &MRI,
- MachineIRBuilder &B) const {
- // TODO: move to LegalizerHelper
- const SITargetLowering *TLI = ST.getTargetLowering();
+bool AMDGPULegalizerInfo::legalizeFPExt(MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const {
+ // TODO: move to LegalizerHelper
+ const SITargetLowering *TLI = ST.getTargetLowering();
- Register DstReg = MI.getOperand(0).getReg();
- Register SrcReg = MI.getOperand(1).getReg();
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
- auto ShiftTy = TLI->getPreferredShiftAmountTy(I32);
+ auto ShiftTy = TLI->getPreferredShiftAmountTy(I32);
- B.buildBitcast(
- DstReg, B.buildShl(I32, B.buildAnyExt(I32, B.buildBitcast(I16, SrcReg)),
- B.buildConstant(ShiftTy, 16)));
+ B.buildBitcast(
+ DstReg, B.buildShl(I32, B.buildAnyExt(I32, B.buildBitcast(I16, SrcReg)),
+ B.buildConstant(ShiftTy, 16)));
- MI.eraseFromParent();
- return true;
+ MI.eraseFromParent();
+ return true;
}
-bool AMDGPULegalizerInfo::legalizeFPTrunc(
- MachineInstr &MI, MachineRegisterInfo &MRI,
- MachineIRBuilder &B) const {
- // TODO: move to LegalizerHelper
- const SITargetLowering *TLI = ST.getTargetLowering();
+bool AMDGPULegalizerInfo::legalizeFPTrunc(MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const {
+ // TODO: move to LegalizerHelper
+ const SITargetLowering *TLI = ST.getTargetLowering();
- Register DstReg = MI.getOperand(0).getReg();
- Register SrcReg = MI.getOperand(1).getReg();
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
- auto ShiftTy = TLI->getPreferredShiftAmountTy(I32);
+ auto ShiftTy = TLI->getPreferredShiftAmountTy(I32);
- // FIXME:
- // if (!DAG.isKnownNeverSNaN(Op)) {
- // Op = DAG.getNode(ISD::FCANONICALIZE, dl, MVT::f32, Op,
- // Node->getFlags());
- // }
+ // FIXME:
+ // if (!DAG.isKnownNeverSNaN(Op)) {
+ // Op = DAG.getNode(ISD::FCANONICALIZE, dl, MVT::f32, Op,
+ // Node->getFlags());
+ // }
- B.buildBitcast(
- DstReg, B.buildTrunc(I16, B.buildLShr(I32, B.buildBitcast(I32, SrcReg),
- B.buildConstant(ShiftTy, 16))));
+ B.buildBitcast(DstReg,
+ B.buildTrunc(I16, B.buildLShr(I32, B.buildBitcast(I32, SrcReg),
+ B.buildConstant(ShiftTy, 16))));
- MI.eraseFromParent();
- return true;
+ MI.eraseFromParent();
+ return true;
}
bool AMDGPULegalizerInfo::legalizeFMad(
@@ -3564,8 +3532,7 @@ bool AMDGPULegalizerInfo::legalizeFlogCommon(MachineInstr &MI,
// Expand isfinite(x) => fabs(x) < inf
auto Inf = B.buildFConstant(Ty, APFloat::getInf(APFloat::IEEEsingle()));
auto Fabs = B.buildFAbs(Ty, Y);
- auto IsFinite =
- B.buildFCmp(CmpInst::FCMP_OLT, I1, Fabs, Inf, Flags);
+ auto IsFinite = B.buildFCmp(CmpInst::FCMP_OLT, I1, Fabs, Inf, Flags);
R = B.buildSelect(Ty, IsFinite, R, Y, Flags).getReg(0);
}
@@ -3837,8 +3804,7 @@ bool AMDGPULegalizerInfo::legalizeFExp(MachineInstr &MI,
auto UnderflowCheckConst =
B.buildFConstant(Ty, IsExp10 ? -0x1.66d3e8p+5f : -0x1.9d1da0p+6f);
auto Zero = B.buildFConstant(Ty, 0.0);
- auto Underflow =
- B.buildFCmp(CmpInst::FCMP_OLT, I1, X, UnderflowCheckConst);
+ auto Underflow = B.buildFCmp(CmpInst::FCMP_OLT, I1, X, UnderflowCheckConst);
R = B.buildSelect(Ty, Underflow, Zero, R);
@@ -3848,8 +3814,7 @@ bool AMDGPULegalizerInfo::legalizeFExp(MachineInstr &MI,
auto OverflowCheckConst =
B.buildFConstant(Ty, IsExp10 ? 0x1.344136p+5f : 0x1.62e430p+6f);
- auto Overflow =
- B.buildFCmp(CmpInst::FCMP_OGT, I1, X, OverflowCheckConst);
+ auto Overflow = B.buildFCmp(CmpInst::FCMP_OGT, I1, X, OverflowCheckConst);
auto Inf = B.buildFConstant(Ty, APFloat::getInf(APFloat::IEEEsingle()));
R = B.buildSelect(Ty, Overflow, Inf, R, Flags);
}
@@ -4215,7 +4180,7 @@ void AMDGPULegalizerInfo::buildMultiply(LegalizerHelper &Helper,
if (!IsHighest) {
auto Hi = B.buildUAdde(I32, I1, Accum[2 * i], SeparateOddOut[1],
- Lo->getOperand(1).getReg());
+ Lo->getOperand(1).getReg());
Accum[2 * i] = Hi.getReg(0);
SeparateOddCarry = Hi.getReg(1);
}
@@ -4926,8 +4891,8 @@ bool AMDGPULegalizerInfo::legalizeFastUnsafeFDIV(MachineInstr &MI,
// For f16 require afn or arcp.
// For f32 require afn.
- if (!AllowInaccurateRcp && (!ResTy.isScalar(16) ||
- !MI.getFlag(MachineInstr::FmArcp)))
+ if (!AllowInaccurateRcp &&
+ (!ResTy.isScalar(16) || !MI.getFlag(MachineInstr::FmArcp)))
return false;
// x / y -> x * (1.0 / y)
@@ -5242,8 +5207,7 @@ bool AMDGPULegalizerInfo::legalizeFFREXP(MachineInstr &MI,
if (ST.hasFractBug()) {
auto Fabs = B.buildFAbs(Ty, Val);
auto Inf = B.buildFConstant(Ty, APFloat::getInf(getFltSemanticForLLT(Ty)));
- auto IsFinite =
- B.buildFCmp(CmpInst::FCMP_OLT, I1, Fabs, Inf, Flags);
+ auto IsFinite = B.buildFCmp(CmpInst::FCMP_OLT, I1, Fabs, Inf, Flags);
auto Zero = B.buildConstant(InstrExpTy, 0);
Exp = B.buildSelect(InstrExpTy, IsFinite, Exp, Zero);
Mant = B.buildSelect(Ty, IsFinite, Mant, Val);
@@ -5331,7 +5295,7 @@ bool AMDGPULegalizerInfo::legalizeFSQRTF32(MachineInstr &MI,
B.buildIntrinsic(Intrinsic::amdgcn_sqrt, ArrayRef<Register>({SqrtS}))
.addUse(SqrtX.getReg(0))
.setMIFlags(Flags);
- auto SqrtSInt = B.buildBitcast(I32, SqrtS);
+ auto SqrtSInt = B.buildBitcast(I32, SqrtS);
auto NegOne = B.buildConstant(I32, -1);
auto SqrtSNextDown = B.buildBitcast(F32, B.buildAdd(I32, SqrtSInt, NegOne));
@@ -5447,7 +5411,7 @@ bool AMDGPULegalizerInfo::legalizeFSQRTF64(MachineInstr &MI,
// Scale down the result.
auto ScaleDownFactor = B.buildConstant(I32, -128);
- auto ScaleDown = B.buildSelect(I32, Scaling, ScaleDownFactor, ZeroInt);
+ auto ScaleDown = B.buildSelect(I32, Scaling, ScaleDownFactor, ZeroInt);
SqrtRet = B.buildFLdexp(F64, SqrtRet, ScaleDown, Flags);
// TODO: Switch to fcmp oeq 0 for finite only. Can't fully remove this check
@@ -5495,7 +5459,7 @@ bool AMDGPULegalizerInfo::legalizeRsqClampIntrinsic(MachineInstr &MI,
LLT Ty = MRI.getType(Dst);
if (!Ty.isFloat())
- return false;
+ return false;
const llvm::fltSemantics &FltSemantics = getFltSemanticForLLT(Ty);
@@ -5856,8 +5820,7 @@ Register AMDGPULegalizerInfo::handleD16VData(MachineIRBuilder &B,
Reg = B.buildBitcast(I32, Reg).getReg(0);
PackedRegs.push_back(Reg);
PackedRegs.resize(2, B.buildUndef(I32).getReg(0));
- return B.buildBuildVector(V2I32, PackedRegs)
- .getReg(0);
+ return B.buildBuildVector(V2I32, PackedRegs).getReg(0);
}
if (StoreVT.getNumElements() == 3) {
@@ -5877,8 +5840,7 @@ Register AMDGPULegalizerInfo::handleD16VData(MachineIRBuilder &B,
for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I)
PackedRegs.push_back(Unmerge.getReg(I));
PackedRegs.resize(4, B.buildUndef(I32).getReg(0));
- return B.buildBuildVector(V4I32, PackedRegs)
- .getReg(0);
+ return B.buildBuildVector(V4I32, PackedRegs).getReg(0);
}
llvm_unreachable("invalid data type");
@@ -6193,11 +6155,12 @@ bool AMDGPULegalizerInfo::legalizeBufferLoad(MachineInstr &MI,
Repack.push_back(B.buildTrunc(EltTy, Unmerge.getReg(I)).getReg(0));
if (Ty.isFloatVector()) {
- B.buildBitcast(Dst, B.buildMergeLikeInstr(Ty.changeElementType(I16), Repack));
+ B.buildBitcast(Dst,
+ B.buildMergeLikeInstr(Ty.changeElementType(I16), Repack));
} else {
B.buildMergeLikeInstr(Dst, Repack);
}
-
+
} else {
buildBufferLoad(Opc, Dst, RSrc, VIndex, VOffset, SOffset, ImmOffset, Format,
AuxiliaryData, MMO, IsTyped, HasVIndex, B);
@@ -6495,8 +6458,9 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic(
MRI->getType(MI.getOperand(ArgOffset + Intr->GradientStart).getReg());
LLT AddrTy =
MRI->getType(MI.getOperand(ArgOffset + Intr->CoordStart).getReg());
- const bool IsG16 =
- ST.hasG16() ? (BaseOpcode->Gradients && GradTy.isInteger(16)) : GradTy.isInteger(16);
+ const bool IsG16 = ST.hasG16()
+ ? (BaseOpcode->Gradients && GradTy.isInteger(16))
+ : GradTy.isInteger(16);
const bool IsA16 = AddrTy.isInteger(16);
const bool IsD16 = !IsAtomicPacked16Bit && Ty.getScalarType().isFloat(16);
@@ -6756,7 +6720,7 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic(
B.buildUnmerge({DstReg, Dst1Reg}, NewResultReg);
return true;
}
-
+
if (Ty.isFloat(32)) {
auto DstI32 = MRI->createGenericVirtualRegister(I32);
B.buildUnmerge({DstI32, Dst1Reg}, NewResultReg);
@@ -6918,10 +6882,10 @@ bool AMDGPULegalizerInfo::legalizeSBufferLoad(LegalizerHelper &Helper,
B.setInsertPt(B.getMBB(), ++B.getInsertPt());
if (Ty.isFloat()) {
- auto Trunc = B.buildTrunc(Ty.dropType(), Dst);
- B.buildBitcast(OrigDst, Trunc);
+ auto Trunc = B.buildTrunc(Ty.dropType(), Dst);
+ B.buildBitcast(OrigDst, Trunc);
} else {
- B.buildTrunc(OrigDst, Dst);
+ B.buildTrunc(OrigDst, Dst);
}
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
index 2deda39224..5ba88c2c71 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
@@ -77,8 +77,10 @@ public:
bool legalizeLoad(LegalizerHelper &Helper, MachineInstr &MI) const;
bool legalizeStore(LegalizerHelper &Helper, MachineInstr &MI) const;
- bool legalizeFPExt(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const;
- bool legalizeFPTrunc(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const;
+ bool legalizeFPExt(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const;
+ bool legalizeFPTrunc(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const;
bool legalizeFMad(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index 9ffb235010..dad278189b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -1568,8 +1568,9 @@ bool AMDGPURegisterBankInfo::applyMappingBFE(MachineIRBuilder &B,
// TODO: It might be worth using a pseudo here to avoid scc clobber and
// register class constraints.
- unsigned Opc = Ty.isScalar(32) ? (Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32) :
- (Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64);
+ unsigned Opc = Ty.isScalar(32)
+ ? (Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32)
+ : (Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64);
auto MIB = B.buildInstr(Opc, {DstReg}, {SrcReg, MergedInputs});
if (!constrainSelectedInstRegOperands(*MIB, *TII, *TRI, *this))
@@ -2588,8 +2589,9 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
Register SrcReg1 = MI.getOperand(2).getReg();
const LLT S32 = LLT::scalar(32);
const LLT S64 = LLT::scalar(64);
- assert(MRI.getType(DstReg).isScalar(64) && "This is a special case for s_mul_u64 "
- "that handles only 64-bit operands.");
+ assert(MRI.getType(DstReg).isScalar(64) &&
+ "This is a special case for s_mul_u64 "
+ "that handles only 64-bit operands.");
const RegisterBank *DstBank =
OpdMapper.getInstrMapping().getOperandMapping(0).BreakDown[0].RegBank;
@@ -3787,8 +3789,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
// For COPY between a physical reg and an s1, there is no type associated so
// we need to take the virtual register's type as a hint on how to interpret
// s1 values.
- if (!SrcReg.isVirtual() && !DstBank &&
- MRI.getType(DstReg).isScalar(1))
+ if (!SrcReg.isVirtual() && !DstBank && MRI.getType(DstReg).isScalar(1))
DstBank = &AMDGPU::VCCRegBank;
else if (!DstReg.isVirtual() && MRI.getType(SrcReg).isScalar(1))
DstBank = &AMDGPU::VCCRegBank;
diff --git a/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTable.cpp b/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTable.cpp
index fe2757ac66..4b3d4b314c 100644
--- a/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTable.cpp
+++ b/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTable.cpp
@@ -457,7 +457,7 @@ std::optional<LLTCodeGen> MVTToLLT(MVT::SimpleValueType SVT) {
return LLTCodeGen(LLT(VT, true));
if (VT.isInteger() || VT.isFloatingPoint())
- return LLTCodeGen(LLT(VT, true));
+ return LLTCodeGen(LLT(VT, true));
return std::nullopt;
}
``````````
</details>
https://github.com/llvm/llvm-project/pull/122503
More information about the llvm-commits
mailing list