[llvm] [DXIL] Implement pow lowering (PR #86733)

Tue Mar 26 13:28:43 PDT 2024

https://github.com/farzonl created https://github.com/llvm/llvm-project/pull/86733

closes #86179
- `DXILIntrinsicExpansion.cpp` - add the pow expansion to exp2(y*log2(x))

>From 7b27e939b9db3e81595cc98464a001e5d1d3dcca Mon Sep 17 00:00:00 2001
From: Farzon Lotfi <farzonlotfi at microsoft.com>
Date: Tue, 26 Mar 2024 16:24:30 -0400
Subject: [PATCH] [DXIL] Implement pow lowering closes #86179 -
 `DXILIntrinsicExpansion.cpp` - add the pow expansion to exp2(y*log2(x))

---
 .../Target/DirectX/DXILIntrinsicExpansion.cpp | 23 +++++++++++++++
 llvm/test/CodeGen/DirectX/pow-vec.ll          | 15 ++++++++++
 llvm/test/CodeGen/DirectX/pow.ll              | 29 +++++++++++++++++++
 3 files changed, 67 insertions(+)
 create mode 100644 llvm/test/CodeGen/DirectX/pow-vec.ll
 create mode 100644 llvm/test/CodeGen/DirectX/pow.ll

diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
index 3cc375edabde92..3e2d10f5ee7a23 100644
--- a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
+++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
@@ -37,6 +37,7 @@ static bool isIntrinsicExpansion(Function &F) {
   case Intrinsic::exp:
   case Intrinsic::log:
   case Intrinsic::log10:
+  case Intrinsic::pow:
   case Intrinsic::dx_any:
   case Intrinsic::dx_clamp:
   case Intrinsic::dx_uclamp:
@@ -197,6 +198,26 @@ static bool expandLog10Intrinsic(CallInst *Orig) {
   return expandLogIntrinsic(Orig, numbers::ln2f / numbers::ln10f);
 }
 
+static bool expandPowIntrinsic(CallInst *Orig) {
+
+  Value *X = Orig->getOperand(0);
+  Value *Y = Orig->getOperand(1);
+  Type *Ty = X->getType();
+  IRBuilder<> Builder(Orig->getParent());
+  Builder.SetInsertPoint(Orig);
+
+  auto *Log2Call =
+      Builder.CreateIntrinsic(Ty, Intrinsic::log2, {X}, nullptr, "elt.log2");
+  auto *Mul = Builder.CreateFMul(Log2Call, Y);
+  auto *Exp2Call =
+      Builder.CreateIntrinsic(Ty, Intrinsic::exp2, {Mul}, nullptr, "elt.exp2");
+  Exp2Call->setTailCall(Orig->isTailCall());
+  Exp2Call->setAttributes(Orig->getAttributes());
+  Orig->replaceAllUsesWith(Exp2Call);
+  Orig->eraseFromParent();
+  return true;
+}
+
 static bool expandRcpIntrinsic(CallInst *Orig) {
   Value *X = Orig->getOperand(0);
   IRBuilder<> Builder(Orig->getParent());
@@ -270,6 +291,8 @@ static bool expandIntrinsic(Function &F, CallInst *Orig) {
     return expandLogIntrinsic(Orig);
   case Intrinsic::log10:
     return expandLog10Intrinsic(Orig);
+  case Intrinsic::pow:
+    return expandPowIntrinsic(Orig);
   case Intrinsic::dx_any:
     return expandAnyIntrinsic(Orig);
   case Intrinsic::dx_uclamp:
diff --git a/llvm/test/CodeGen/DirectX/pow-vec.ll b/llvm/test/CodeGen/DirectX/pow-vec.ll
new file mode 100644
index 00000000000000..781fa5b8cb2409
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/pow-vec.ll
@@ -0,0 +1,15 @@
+; RUN: opt -S  -dxil-intrinsic-expansion  < %s | FileCheck %s
+
+; Make sure dxil operation function calls for pow are generated for float and half.
+
+; CHECK-LABEL: pow_float4
+; CHECK: call <4 x float> @llvm.log2.v4f32(<4 x float>  %a)
+; CHECK: fmul <4 x float> %{{.*}}, %b
+; CHECK: call <4 x float> @llvm.exp2.v4f32(<4 x float>  %{{.*}})
+define noundef <4 x float> @pow_float4(<4 x float> noundef %a, <4 x float> noundef %b) {
+entry:
+  %elt.pow = call <4 x float> @llvm.pow.v4f32(<4 x float> %a, <4 x float> %b)
+  ret <4 x float> %elt.pow
+}
+
+declare <4 x float> @llvm.pow.v4f32(<4 x float>,<4 x float>)
diff --git a/llvm/test/CodeGen/DirectX/pow.ll b/llvm/test/CodeGen/DirectX/pow.ll
new file mode 100644
index 00000000000000..25ce0fe731d0ba
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/pow.ll
@@ -0,0 +1,29 @@
+; RUN: opt -S  -dxil-intrinsic-expansion  < %s | FileCheck %s --check-prefixes=CHECK,EXPCHECK
+; RUN: opt -S  -dxil-op-lower  < %s | FileCheck %s --check-prefixes=CHECK,DOPCHECK
+
+; Make sure dxil operation function calls for pow are generated.
+
+define noundef float @pow_float(float noundef %a, float noundef %b) {
+entry:
+; DOPCHECK: call float @dx.op.unary.f32(i32 23, float %a)
+; EXPCHECK: call float @llvm.log2.f32(float %a)
+; CHECK: fmul float %{{.*}}, %b
+; DOPCHECK: call float @dx.op.unary.f32(i32 21, float %{{.*}})
+; EXPCHECK: call float @llvm.exp2.f32(float %{{.*}})
+  %elt.pow = call float @llvm.pow.f32(float %a, float %b)
+  ret float %elt.pow
+}
+
+define noundef half @pow_half(half noundef %a, half noundef %b) {
+entry:
+; DOPCHECK: call half @dx.op.unary.f16(i32 23, half %a)
+; EXPCHECK: call half @llvm.log2.f16(half %a)
+; CHECK: fmul half %{{.*}}, %b
+; DOPCHECK: call half @dx.op.unary.f16(i32 21, half %{{.*}})
+; EXPCHECK: call half @llvm.exp2.f16(half %{{.*}})
+  %elt.pow = call half @llvm.pow.f16(half %a, half %b)
+  ret half %elt.pow
+}
+
+declare half @llvm.pow.f16(half,half)
+declare float @llvm.pow.f32(float,float)