[llvm] [AMDGPU] Fold llvm.amdgcn.cvt.pkrtz when either operand is fpext (PR #108237)

Mon Sep 16 06:45:20 PDT 2024

https://github.com/jayfoad updated https://github.com/llvm/llvm-project/pull/108237

>From 0810b94df2bd4de3b76c9c1aaa5475ceae2df02c Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad at amd.com>
Date: Wed, 11 Sep 2024 15:25:55 +0100
Subject: [PATCH 1/2] [AMDGPU] Fold llvm.amdgcn.cvt.pkrtz when either operand
 is fpext

This also generalizes the Undef handling and adds Poison handling.
---
 .../AMDGPU/AMDGPUInstCombineIntrinsic.cpp     | 47 ++++++++++++-------
 .../InstCombine/AMDGPU/amdgcn-intrinsics.ll   | 42 +++++++++++++++++
 2 files changed, 71 insertions(+), 18 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
index 4da3618357c420..389cc35b51f9da 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -643,27 +643,38 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
     break;
   }
   case Intrinsic::amdgcn_cvt_pkrtz: {
-    Value *Src0 = II.getArgOperand(0);
-    Value *Src1 = II.getArgOperand(1);
-    if (const ConstantFP *C0 = dyn_cast<ConstantFP>(Src0)) {
-      if (const ConstantFP *C1 = dyn_cast<ConstantFP>(Src1)) {
-        const fltSemantics &HalfSem =
-            II.getType()->getScalarType()->getFltSemantics();
+    auto foldFPTruncToF16RTZ = [](Value *Arg) -> Value * {
+      Type *HalfTy = Type::getHalfTy(Arg->getContext());
+
+      if (isa<PoisonValue>(Arg))
+        return PoisonValue::get(HalfTy);
+      if (isa<UndefValue>(Arg))
+        return UndefValue::get(HalfTy);
+
+      ConstantFP *CFP = nullptr;
+      if (match(Arg, m_ConstantFP(CFP))) {
         bool LosesInfo;
-        APFloat Val0 = C0->getValueAPF();
-        APFloat Val1 = C1->getValueAPF();
-        Val0.convert(HalfSem, APFloat::rmTowardZero, &LosesInfo);
-        Val1.convert(HalfSem, APFloat::rmTowardZero, &LosesInfo);
-
-        Constant *Folded =
-            ConstantVector::get({ConstantFP::get(II.getContext(), Val0),
-                                 ConstantFP::get(II.getContext(), Val1)});
-        return IC.replaceInstUsesWith(II, Folded);
+        APFloat Val(CFP->getValueAPF());
+        Val.convert(APFloat::IEEEhalf(), APFloat::rmTowardZero, &LosesInfo);
+        return ConstantFP::get(HalfTy, Val);
       }
-    }
 
-    if (isa<UndefValue>(Src0) && isa<UndefValue>(Src1)) {
-      return IC.replaceInstUsesWith(II, UndefValue::get(II.getType()));
+      Value *Src = nullptr;
+      if (match(Arg, m_OneUse(m_FPExt(m_Value(Src))))) {
+        if (Src->getType()->isHalfTy())
+          return Src;
+      }
+
+      return nullptr;
+    };
+
+    if (Value *Src0 = foldFPTruncToF16RTZ(II.getArgOperand(0))) {
+      if (Value *Src1 = foldFPTruncToF16RTZ(II.getArgOperand(1))) {
+        Value *V = PoisonValue::get(II.getType());
+        V = IC.Builder.CreateInsertElement(V, Src0, (uint64_t)0);
+        V = IC.Builder.CreateInsertElement(V, Src1, (uint64_t)1);
+        return IC.replaceInstUsesWith(II, V);
+      }
     }
 
     break;
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
index f3a3b8c1dc5d8a..176b3560b4064d 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
@@ -1161,6 +1161,48 @@ define <2 x half> @constant_rtz_pkrtz() {
   ret <2 x half> %cvt
 }
 
+define <2 x half> @fpext_const_cvt_pkrtz(half %x) {
+; CHECK-LABEL: @fpext_const_cvt_pkrtz(
+; CHECK-NEXT:    [[CVT:%.*]] = insertelement <2 x half> <half poison, half 0xH4200>, half [[X:%.*]], i64 0
+; CHECK-NEXT:    ret <2 x half> [[CVT]]
+;
+  %ext = fpext half %x to float
+  %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %ext, float 3.0)
+  ret <2 x half> %cvt
+}
+
+define <2 x half> @const_fpext_cvt_pkrtz(half %y) {
+; CHECK-LABEL: @const_fpext_cvt_pkrtz(
+; CHECK-NEXT:    [[CVT:%.*]] = insertelement <2 x half> <half 0xH4500, half poison>, half [[Y:%.*]], i64 1
+; CHECK-NEXT:    ret <2 x half> [[CVT]]
+;
+  %ext = fpext half %y to float
+  %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float 5.0, float %ext)
+  ret <2 x half> %cvt
+}
+
+define <2 x half> @fpext_fpext_cvt_pkrtz(half %x, half %y) {
+; CHECK-LABEL: @fpext_fpext_cvt_pkrtz(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x half> poison, half [[X:%.*]], i64 0
+; CHECK-NEXT:    [[CVT:%.*]] = insertelement <2 x half> [[TMP1]], half [[Y:%.*]], i64 1
+; CHECK-NEXT:    ret <2 x half> [[CVT]]
+;
+  %extx = fpext half %x to float
+  %exty = fpext half %y to float
+  %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %extx, float %exty)
+  ret <2 x half> %cvt
+}
+
+define <2 x half> @poison_fpext_cvt_pkrtz(half %y) {
+; CHECK-LABEL: @poison_fpext_cvt_pkrtz(
+; CHECK-NEXT:    [[CVT:%.*]] = insertelement <2 x half> poison, half [[Y:%.*]], i64 1
+; CHECK-NEXT:    ret <2 x half> [[CVT]]
+;
+  %ext = fpext half %y to float
+  %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float poison, float %ext)
+  ret <2 x half> %cvt
+}
+
 ; --------------------------------------------------------------------
 ; llvm.amdgcn.cvt.pknorm.i16
 ; --------------------------------------------------------------------

>From cde137309c2ae65d03f7935210676815361ceee8 Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad at amd.com>
Date: Mon, 16 Sep 2024 14:43:10 +0100
Subject: [PATCH 2/2] Add tests and remove single-use restriction

---
 .../AMDGPU/AMDGPUInstCombineIntrinsic.cpp     |  2 +-
 .../InstCombine/AMDGPU/amdgcn-intrinsics.ll   | 37 +++++++++++++++++++
 2 files changed, 38 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
index 389cc35b51f9da..c1cf5ca8f72ea5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -660,7 +660,7 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
       }
 
       Value *Src = nullptr;
-      if (match(Arg, m_OneUse(m_FPExt(m_Value(Src))))) {
+      if (match(Arg, m_FPExt(m_Value(Src)))) {
         if (Src->getType()->isHalfTy())
           return Src;
       }
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
index 176b3560b4064d..fabf8ab51764b9 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
@@ -1181,6 +1181,20 @@ define <2 x half> @const_fpext_cvt_pkrtz(half %y) {
   ret <2 x half> %cvt
 }
 
+define <2 x half> @const_fpext_multi_cvt_pkrtz(half %y) {
+; CHECK-LABEL: @const_fpext_multi_cvt_pkrtz(
+; CHECK-NEXT:    [[CVT1:%.*]] = insertelement <2 x half> <half 0xH4500, half poison>, half [[Y:%.*]], i64 1
+; CHECK-NEXT:    [[CVT2:%.*]] = insertelement <2 x half> <half 0xH4200, half poison>, half [[Y]], i64 1
+; CHECK-NEXT:    [[ADD:%.*]] = fadd <2 x half> [[CVT1]], [[CVT2]]
+; CHECK-NEXT:    ret <2 x half> [[ADD]]
+;
+  %ext = fpext half %y to float
+  %cvt1 = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float 5.0, float %ext)
+  %cvt2 = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float 3.0, float %ext)
+  %add = fadd <2 x half> %cvt1, %cvt2
+  ret <2 x half> %add
+}
+
 define <2 x half> @fpext_fpext_cvt_pkrtz(half %x, half %y) {
 ; CHECK-LABEL: @fpext_fpext_cvt_pkrtz(
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x half> poison, half [[X:%.*]], i64 0
@@ -1193,6 +1207,19 @@ define <2 x half> @fpext_fpext_cvt_pkrtz(half %x, half %y) {
   ret <2 x half> %cvt
 }
 
+define <2 x half> @fpext_fpext_bf16_cvt_pkrtz(bfloat %x, bfloat %y) {
+; CHECK-LABEL: @fpext_fpext_bf16_cvt_pkrtz(
+; CHECK-NEXT:    [[EXTX:%.*]] = fpext bfloat [[X:%.*]] to float
+; CHECK-NEXT:    [[EXTY:%.*]] = fpext bfloat [[Y:%.*]] to float
+; CHECK-NEXT:    [[CVT:%.*]] = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float [[EXTX]], float [[EXTY]])
+; CHECK-NEXT:    ret <2 x half> [[CVT]]
+;
+  %extx = fpext bfloat %x to float
+  %exty = fpext bfloat %y to float
+  %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %extx, float %exty)
+  ret <2 x half> %cvt
+}
+
 define <2 x half> @poison_fpext_cvt_pkrtz(half %y) {
 ; CHECK-LABEL: @poison_fpext_cvt_pkrtz(
 ; CHECK-NEXT:    [[CVT:%.*]] = insertelement <2 x half> poison, half [[Y:%.*]], i64 1
@@ -1203,6 +1230,16 @@ define <2 x half> @poison_fpext_cvt_pkrtz(half %y) {
   ret <2 x half> %cvt
 }
 
+define <2 x half> @fpext_poison_cvt_pkrtz(half %x) {
+; CHECK-LABEL: @fpext_poison_cvt_pkrtz(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x half> poison, half [[X:%.*]], i64 0
+; CHECK-NEXT:    ret <2 x half> [[TMP1]]
+;
+  %ext = fpext half %x to float
+  %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %ext, float poison)
+  ret <2 x half> %cvt
+}
+
 ; --------------------------------------------------------------------
 ; llvm.amdgcn.cvt.pknorm.i16
 ; --------------------------------------------------------------------