[llvm] 77c6743 - LLT: Add some stub constructors for FP types
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Sun Sep 3 05:33:30 PDT 2023
Author: Matt Arsenault
Date: 2023-09-03T08:33:19-04:00
New Revision: 77c67436d9e7e63c50752d990a271cf860ba9a0e
URL: https://github.com/llvm/llvm-project/commit/77c67436d9e7e63c50752d990a271cf860ba9a0e
DIFF: https://github.com/llvm/llvm-project/commit/77c67436d9e7e63c50752d990a271cf860ba9a0e.diff
LOG: LLT: Add some stub constructors for FP types
This is to start documenting uses to ease a future migration
to supporting different types with the same size.
https://reviews.llvm.org/D150605
Added:
Modified:
llvm/include/llvm/CodeGen/LowLevelType.h
llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/LowLevelType.h b/llvm/include/llvm/CodeGen/LowLevelType.h
index a1e778cd718f6e..b7477834a1ff5e 100644
--- a/llvm/include/llvm/CodeGen/LowLevelType.h
+++ b/llvm/include/llvm/CodeGen/LowLevelType.h
@@ -71,6 +71,22 @@ class LLT {
ScalarTy.isPointer() ? ScalarTy.getAddressSpace() : 0};
}
+ /// Get a 16-bit IEEE half value.
+ /// TODO: Add IEEE semantics to type - This currently returns a simple `scalar(16)`.
+ static constexpr LLT float16() {
+ return scalar(16);
+ }
+
+ /// Get a 32-bit IEEE float value.
+ static constexpr LLT float32() {
+ return scalar(32);
+ }
+
+ /// Get a 64-bit IEEE double value.
+ static constexpr LLT float64() {
+ return scalar(64);
+ }
+
/// Get a low-level fixed-width vector of some number of elements and element
/// width.
static constexpr LLT fixed_vector(unsigned NumElements,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 332ab0e472d5ee..421fc41ad3b62c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -2979,10 +2979,10 @@ bool AMDGPULegalizerInfo::legalizeFMad(
// TODO: Always legal with future ftz flag.
// FIXME: Do we need just output?
- if (Ty == LLT::scalar(32) &&
+ if (Ty == LLT::float32() &&
MFI->getMode().FP32Denormals == DenormalMode::getPreserveSign())
return true;
- if (Ty == LLT::scalar(16) &&
+ if (Ty == LLT::float16() &&
MFI->getMode().FP64FP16Denormals == DenormalMode::getPreserveSign())
return true;
@@ -3513,27 +3513,26 @@ bool AMDGPULegalizerInfo::legalizeFPow(MachineInstr &MI,
Register Src1 = MI.getOperand(2).getReg();
unsigned Flags = MI.getFlags();
LLT Ty = B.getMRI()->getType(Dst);
- const LLT S16 = LLT::scalar(16);
- const LLT S32 = LLT::scalar(32);
+ const LLT F16 = LLT::float16();
+ const LLT F32 = LLT::float32();
- if (Ty == S32) {
- auto Log = B.buildFLog2(S32, Src0, Flags);
- auto Mul = B.buildIntrinsic(Intrinsic::amdgcn_fmul_legacy, {S32})
+ if (Ty == F32) {
+ auto Log = B.buildFLog2(F32, Src0, Flags);
+ auto Mul = B.buildIntrinsic(Intrinsic::amdgcn_fmul_legacy, {F32})
.addUse(Log.getReg(0))
.addUse(Src1)
.setMIFlags(Flags);
B.buildFExp2(Dst, Mul, Flags);
- } else if (Ty == S16) {
+ } else if (Ty == F16) {
// There's no f16 fmul_legacy, so we need to convert for it.
- auto Log = B.buildFLog2(S16, Src0, Flags);
- auto Ext0 = B.buildFPExt(S32, Log, Flags);
- auto Ext1 = B.buildFPExt(S32, Src1, Flags);
- auto Mul = B.buildIntrinsic(Intrinsic::amdgcn_fmul_legacy, {S32})
+ auto Log = B.buildFLog2(F16, Src0, Flags);
+ auto Ext0 = B.buildFPExt(F32, Log, Flags);
+ auto Ext1 = B.buildFPExt(F32, Src1, Flags);
+ auto Mul = B.buildIntrinsic(Intrinsic::amdgcn_fmul_legacy, {F32})
.addUse(Ext0.getReg(0))
.addUse(Ext1.getReg(0))
.setMIFlags(Flags);
-
- B.buildFExp2(Dst, B.buildFPTrunc(S16, Mul), Flags);
+ B.buildFExp2(Dst, B.buildFPTrunc(F16, Mul), Flags);
} else
return false;
@@ -3558,11 +3557,11 @@ bool AMDGPULegalizerInfo::legalizeFFloor(MachineInstr &MI,
MachineIRBuilder &B) const {
const LLT S1 = LLT::scalar(1);
- const LLT S64 = LLT::scalar(64);
+ const LLT F64 = LLT::float64();
Register Dst = MI.getOperand(0).getReg();
Register OrigSrc = MI.getOperand(1).getReg();
unsigned Flags = MI.getFlags();
- assert(ST.hasFractBug() && MRI.getType(Dst) == S64 &&
+ assert(ST.hasFractBug() && MRI.getType(Dst) == F64 &&
"this should not have been custom lowered");
// V_FRACT is buggy on SI, so the F32 version is never used and (x-floor(x))
@@ -3573,7 +3572,7 @@ bool AMDGPULegalizerInfo::legalizeFFloor(MachineInstr &MI,
//
// Convert floor(x) to (x - fract(x))
- auto Fract = B.buildIntrinsic(Intrinsic::amdgcn_fract, {S64})
+ auto Fract = B.buildIntrinsic(Intrinsic::amdgcn_fract, {F64})
.addUse(OrigSrc)
.setMIFlags(Flags);
@@ -3585,9 +3584,9 @@ bool AMDGPULegalizerInfo::legalizeFFloor(MachineInstr &MI,
Register ModSrc = stripAnySourceMods(OrigSrc, MRI);
auto Const =
- B.buildFConstant(S64, llvm::bit_cast<double>(0x3fefffffffffffff));
+ B.buildFConstant(F64, llvm::bit_cast<double>(0x3fefffffffffffff));
- Register Min = MRI.createGenericVirtualRegister(S64);
+ Register Min = MRI.createGenericVirtualRegister(F64);
// We don't need to concern ourselves with the snan handling
diff erence, so
// use the one which will directly select.
@@ -3600,10 +3599,10 @@ bool AMDGPULegalizerInfo::legalizeFFloor(MachineInstr &MI,
Register CorrectedFract = Min;
if (!MI.getFlag(MachineInstr::FmNoNans)) {
auto IsNan = B.buildFCmp(CmpInst::FCMP_ORD, S1, ModSrc, ModSrc, Flags);
- CorrectedFract = B.buildSelect(S64, IsNan, ModSrc, Min, Flags).getReg(0);
+ CorrectedFract = B.buildSelect(F64, IsNan, ModSrc, Min, Flags).getReg(0);
}
- auto NegFract = B.buildFNeg(S64, CorrectedFract, Flags);
+ auto NegFract = B.buildFNeg(F64, CorrectedFract, Flags);
B.buildFAdd(Dst, OrigSrc, NegFract, Flags);
MI.eraseFromParent();
More information about the llvm-commits
mailing list