[llvm] [AMDGPU] Fold llvm.amdgcn.cvt.pkrtz when either operand is fpext (PR #108237)
Jay Foad via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 16 06:45:20 PDT 2024
https://github.com/jayfoad updated https://github.com/llvm/llvm-project/pull/108237
>From 0810b94df2bd4de3b76c9c1aaa5475ceae2df02c Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad at amd.com>
Date: Wed, 11 Sep 2024 15:25:55 +0100
Subject: [PATCH 1/2] [AMDGPU] Fold llvm.amdgcn.cvt.pkrtz when either operand
is fpext
This also generalizes the Undef handling and adds Poison handling.
---
.../AMDGPU/AMDGPUInstCombineIntrinsic.cpp | 47 ++++++++++++-------
.../InstCombine/AMDGPU/amdgcn-intrinsics.ll | 42 +++++++++++++++++
2 files changed, 71 insertions(+), 18 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
index 4da3618357c420..389cc35b51f9da 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -643,27 +643,38 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
break;
}
case Intrinsic::amdgcn_cvt_pkrtz: {
- Value *Src0 = II.getArgOperand(0);
- Value *Src1 = II.getArgOperand(1);
- if (const ConstantFP *C0 = dyn_cast<ConstantFP>(Src0)) {
- if (const ConstantFP *C1 = dyn_cast<ConstantFP>(Src1)) {
- const fltSemantics &HalfSem =
- II.getType()->getScalarType()->getFltSemantics();
+ auto foldFPTruncToF16RTZ = [](Value *Arg) -> Value * {
+ Type *HalfTy = Type::getHalfTy(Arg->getContext());
+
+ if (isa<PoisonValue>(Arg))
+ return PoisonValue::get(HalfTy);
+ if (isa<UndefValue>(Arg))
+ return UndefValue::get(HalfTy);
+
+ ConstantFP *CFP = nullptr;
+ if (match(Arg, m_ConstantFP(CFP))) {
bool LosesInfo;
- APFloat Val0 = C0->getValueAPF();
- APFloat Val1 = C1->getValueAPF();
- Val0.convert(HalfSem, APFloat::rmTowardZero, &LosesInfo);
- Val1.convert(HalfSem, APFloat::rmTowardZero, &LosesInfo);
-
- Constant *Folded =
- ConstantVector::get({ConstantFP::get(II.getContext(), Val0),
- ConstantFP::get(II.getContext(), Val1)});
- return IC.replaceInstUsesWith(II, Folded);
+ APFloat Val(CFP->getValueAPF());
+ Val.convert(APFloat::IEEEhalf(), APFloat::rmTowardZero, &LosesInfo);
+ return ConstantFP::get(HalfTy, Val);
}
- }
- if (isa<UndefValue>(Src0) && isa<UndefValue>(Src1)) {
- return IC.replaceInstUsesWith(II, UndefValue::get(II.getType()));
+ Value *Src = nullptr;
+ if (match(Arg, m_OneUse(m_FPExt(m_Value(Src))))) {
+ if (Src->getType()->isHalfTy())
+ return Src;
+ }
+
+ return nullptr;
+ };
+
+ if (Value *Src0 = foldFPTruncToF16RTZ(II.getArgOperand(0))) {
+ if (Value *Src1 = foldFPTruncToF16RTZ(II.getArgOperand(1))) {
+ Value *V = PoisonValue::get(II.getType());
+ V = IC.Builder.CreateInsertElement(V, Src0, (uint64_t)0);
+ V = IC.Builder.CreateInsertElement(V, Src1, (uint64_t)1);
+ return IC.replaceInstUsesWith(II, V);
+ }
}
break;
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
index f3a3b8c1dc5d8a..176b3560b4064d 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
@@ -1161,6 +1161,48 @@ define <2 x half> @constant_rtz_pkrtz() {
ret <2 x half> %cvt
}
+define <2 x half> @fpext_const_cvt_pkrtz(half %x) {
+; CHECK-LABEL: @fpext_const_cvt_pkrtz(
+; CHECK-NEXT: [[CVT:%.*]] = insertelement <2 x half> <half poison, half 0xH4200>, half [[X:%.*]], i64 0
+; CHECK-NEXT: ret <2 x half> [[CVT]]
+;
+ %ext = fpext half %x to float
+ %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %ext, float 3.0)
+ ret <2 x half> %cvt
+}
+
+define <2 x half> @const_fpext_cvt_pkrtz(half %y) {
+; CHECK-LABEL: @const_fpext_cvt_pkrtz(
+; CHECK-NEXT: [[CVT:%.*]] = insertelement <2 x half> <half 0xH4500, half poison>, half [[Y:%.*]], i64 1
+; CHECK-NEXT: ret <2 x half> [[CVT]]
+;
+ %ext = fpext half %y to float
+ %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float 5.0, float %ext)
+ ret <2 x half> %cvt
+}
+
+define <2 x half> @fpext_fpext_cvt_pkrtz(half %x, half %y) {
+; CHECK-LABEL: @fpext_fpext_cvt_pkrtz(
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x half> poison, half [[X:%.*]], i64 0
+; CHECK-NEXT: [[CVT:%.*]] = insertelement <2 x half> [[TMP1]], half [[Y:%.*]], i64 1
+; CHECK-NEXT: ret <2 x half> [[CVT]]
+;
+ %extx = fpext half %x to float
+ %exty = fpext half %y to float
+ %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %extx, float %exty)
+ ret <2 x half> %cvt
+}
+
+define <2 x half> @poison_fpext_cvt_pkrtz(half %y) {
+; CHECK-LABEL: @poison_fpext_cvt_pkrtz(
+; CHECK-NEXT: [[CVT:%.*]] = insertelement <2 x half> poison, half [[Y:%.*]], i64 1
+; CHECK-NEXT: ret <2 x half> [[CVT]]
+;
+ %ext = fpext half %y to float
+ %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float poison, float %ext)
+ ret <2 x half> %cvt
+}
+
; --------------------------------------------------------------------
; llvm.amdgcn.cvt.pknorm.i16
; --------------------------------------------------------------------
>From cde137309c2ae65d03f7935210676815361ceee8 Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad at amd.com>
Date: Mon, 16 Sep 2024 14:43:10 +0100
Subject: [PATCH 2/2] Add tests and remove single-use restriction
---
.../AMDGPU/AMDGPUInstCombineIntrinsic.cpp | 2 +-
.../InstCombine/AMDGPU/amdgcn-intrinsics.ll | 37 +++++++++++++++++++
2 files changed, 38 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
index 389cc35b51f9da..c1cf5ca8f72ea5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -660,7 +660,7 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
}
Value *Src = nullptr;
- if (match(Arg, m_OneUse(m_FPExt(m_Value(Src))))) {
+ if (match(Arg, m_FPExt(m_Value(Src)))) {
if (Src->getType()->isHalfTy())
return Src;
}
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
index 176b3560b4064d..fabf8ab51764b9 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
@@ -1181,6 +1181,20 @@ define <2 x half> @const_fpext_cvt_pkrtz(half %y) {
ret <2 x half> %cvt
}
+define <2 x half> @const_fpext_multi_cvt_pkrtz(half %y) {
+; CHECK-LABEL: @const_fpext_multi_cvt_pkrtz(
+; CHECK-NEXT: [[CVT1:%.*]] = insertelement <2 x half> <half 0xH4500, half poison>, half [[Y:%.*]], i64 1
+; CHECK-NEXT: [[CVT2:%.*]] = insertelement <2 x half> <half 0xH4200, half poison>, half [[Y]], i64 1
+; CHECK-NEXT: [[ADD:%.*]] = fadd <2 x half> [[CVT1]], [[CVT2]]
+; CHECK-NEXT: ret <2 x half> [[ADD]]
+;
+ %ext = fpext half %y to float
+ %cvt1 = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float 5.0, float %ext)
+ %cvt2 = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float 3.0, float %ext)
+ %add = fadd <2 x half> %cvt1, %cvt2
+ ret <2 x half> %add
+}
+
define <2 x half> @fpext_fpext_cvt_pkrtz(half %x, half %y) {
; CHECK-LABEL: @fpext_fpext_cvt_pkrtz(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x half> poison, half [[X:%.*]], i64 0
@@ -1193,6 +1207,19 @@ define <2 x half> @fpext_fpext_cvt_pkrtz(half %x, half %y) {
ret <2 x half> %cvt
}
+define <2 x half> @fpext_fpext_bf16_cvt_pkrtz(bfloat %x, bfloat %y) {
+; CHECK-LABEL: @fpext_fpext_bf16_cvt_pkrtz(
+; CHECK-NEXT: [[EXTX:%.*]] = fpext bfloat [[X:%.*]] to float
+; CHECK-NEXT: [[EXTY:%.*]] = fpext bfloat [[Y:%.*]] to float
+; CHECK-NEXT: [[CVT:%.*]] = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float [[EXTX]], float [[EXTY]])
+; CHECK-NEXT: ret <2 x half> [[CVT]]
+;
+ %extx = fpext bfloat %x to float
+ %exty = fpext bfloat %y to float
+ %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %extx, float %exty)
+ ret <2 x half> %cvt
+}
+
define <2 x half> @poison_fpext_cvt_pkrtz(half %y) {
; CHECK-LABEL: @poison_fpext_cvt_pkrtz(
; CHECK-NEXT: [[CVT:%.*]] = insertelement <2 x half> poison, half [[Y:%.*]], i64 1
@@ -1203,6 +1230,16 @@ define <2 x half> @poison_fpext_cvt_pkrtz(half %y) {
ret <2 x half> %cvt
}
+define <2 x half> @fpext_poison_cvt_pkrtz(half %x) {
+; CHECK-LABEL: @fpext_poison_cvt_pkrtz(
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x half> poison, half [[X:%.*]], i64 0
+; CHECK-NEXT: ret <2 x half> [[TMP1]]
+;
+ %ext = fpext half %x to float
+ %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %ext, float poison)
+ ret <2 x half> %cvt
+}
+
; --------------------------------------------------------------------
; llvm.amdgcn.cvt.pknorm.i16
; --------------------------------------------------------------------
More information about the llvm-commits
mailing list