[llvm] [AMDGPU] Fold llvm.amdgcn.cvt.pkrtz when either operand is fpext (PR #108237)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 11 07:50:02 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Jay Foad (jayfoad)
<details>
<summary>Changes</summary>
This also generalizes the Undef handling and adds Poison handling.
---
Full diff: https://github.com/llvm/llvm-project/pull/108237.diff
2 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp (+29-18)
- (modified) llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll (+42)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
index 4da3618357c420..389cc35b51f9da 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -643,27 +643,38 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
break;
}
case Intrinsic::amdgcn_cvt_pkrtz: {
- Value *Src0 = II.getArgOperand(0);
- Value *Src1 = II.getArgOperand(1);
- if (const ConstantFP *C0 = dyn_cast<ConstantFP>(Src0)) {
- if (const ConstantFP *C1 = dyn_cast<ConstantFP>(Src1)) {
- const fltSemantics &HalfSem =
- II.getType()->getScalarType()->getFltSemantics();
+ auto foldFPTruncToF16RTZ = [](Value *Arg) -> Value * {
+ Type *HalfTy = Type::getHalfTy(Arg->getContext());
+
+ if (isa<PoisonValue>(Arg))
+ return PoisonValue::get(HalfTy);
+ if (isa<UndefValue>(Arg))
+ return UndefValue::get(HalfTy);
+
+ ConstantFP *CFP = nullptr;
+ if (match(Arg, m_ConstantFP(CFP))) {
bool LosesInfo;
- APFloat Val0 = C0->getValueAPF();
- APFloat Val1 = C1->getValueAPF();
- Val0.convert(HalfSem, APFloat::rmTowardZero, &LosesInfo);
- Val1.convert(HalfSem, APFloat::rmTowardZero, &LosesInfo);
-
- Constant *Folded =
- ConstantVector::get({ConstantFP::get(II.getContext(), Val0),
- ConstantFP::get(II.getContext(), Val1)});
- return IC.replaceInstUsesWith(II, Folded);
+ APFloat Val(CFP->getValueAPF());
+ Val.convert(APFloat::IEEEhalf(), APFloat::rmTowardZero, &LosesInfo);
+ return ConstantFP::get(HalfTy, Val);
}
- }
- if (isa<UndefValue>(Src0) && isa<UndefValue>(Src1)) {
- return IC.replaceInstUsesWith(II, UndefValue::get(II.getType()));
+ Value *Src = nullptr;
+ if (match(Arg, m_OneUse(m_FPExt(m_Value(Src))))) {
+ if (Src->getType()->isHalfTy())
+ return Src;
+ }
+
+ return nullptr;
+ };
+
+ if (Value *Src0 = foldFPTruncToF16RTZ(II.getArgOperand(0))) {
+ if (Value *Src1 = foldFPTruncToF16RTZ(II.getArgOperand(1))) {
+ Value *V = PoisonValue::get(II.getType());
+ V = IC.Builder.CreateInsertElement(V, Src0, (uint64_t)0);
+ V = IC.Builder.CreateInsertElement(V, Src1, (uint64_t)1);
+ return IC.replaceInstUsesWith(II, V);
+ }
}
break;
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
index f3a3b8c1dc5d8a..176b3560b4064d 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
@@ -1161,6 +1161,48 @@ define <2 x half> @constant_rtz_pkrtz() {
ret <2 x half> %cvt
}
+define <2 x half> @fpext_const_cvt_pkrtz(half %x) {
+; CHECK-LABEL: @fpext_const_cvt_pkrtz(
+; CHECK-NEXT: [[CVT:%.*]] = insertelement <2 x half> <half poison, half 0xH4200>, half [[X:%.*]], i64 0
+; CHECK-NEXT: ret <2 x half> [[CVT]]
+;
+ %ext = fpext half %x to float
+ %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %ext, float 3.0)
+ ret <2 x half> %cvt
+}
+
+define <2 x half> @const_fpext_cvt_pkrtz(half %y) {
+; CHECK-LABEL: @const_fpext_cvt_pkrtz(
+; CHECK-NEXT: [[CVT:%.*]] = insertelement <2 x half> <half 0xH4500, half poison>, half [[Y:%.*]], i64 1
+; CHECK-NEXT: ret <2 x half> [[CVT]]
+;
+ %ext = fpext half %y to float
+ %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float 5.0, float %ext)
+ ret <2 x half> %cvt
+}
+
+define <2 x half> @fpext_fpext_cvt_pkrtz(half %x, half %y) {
+; CHECK-LABEL: @fpext_fpext_cvt_pkrtz(
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x half> poison, half [[X:%.*]], i64 0
+; CHECK-NEXT: [[CVT:%.*]] = insertelement <2 x half> [[TMP1]], half [[Y:%.*]], i64 1
+; CHECK-NEXT: ret <2 x half> [[CVT]]
+;
+ %extx = fpext half %x to float
+ %exty = fpext half %y to float
+ %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %extx, float %exty)
+ ret <2 x half> %cvt
+}
+
+define <2 x half> @poison_fpext_cvt_pkrtz(half %y) {
+; CHECK-LABEL: @poison_fpext_cvt_pkrtz(
+; CHECK-NEXT: [[CVT:%.*]] = insertelement <2 x half> poison, half [[Y:%.*]], i64 1
+; CHECK-NEXT: ret <2 x half> [[CVT]]
+;
+ %ext = fpext half %y to float
+ %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float poison, float %ext)
+ ret <2 x half> %cvt
+}
+
; --------------------------------------------------------------------
; llvm.amdgcn.cvt.pknorm.i16
; --------------------------------------------------------------------
``````````
</details>
https://github.com/llvm/llvm-project/pull/108237
More information about the llvm-commits
mailing list