[llvm] [DXIL] Add sign intrinsic part 2 (PR #101988)

Thu Aug 15 06:21:57 PDT 2024

https://github.com/tgymnich updated https://github.com/llvm/llvm-project/pull/101988

>From ced51b90d800f3ddbefa1b92c7c64e3e537212a8 Mon Sep 17 00:00:00 2001
From: Tim Gymnich <tgymnich at icloud.com>
Date: Fri, 2 Aug 2024 21:40:24 +0200
Subject: [PATCH 1/3] [DXIL] Add sign intrinsic part 2

---
 llvm/include/llvm/IR/IntrinsicsDirectX.td     |  1 +
 .../Target/DirectX/DXILIntrinsicExpansion.cpp | 34 +++++++++++++++++++
 2 files changed, 35 insertions(+)

diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td
index 312c3862f240d8..b39591c3211ef0 100644
--- a/llvm/include/llvm/IR/IntrinsicsDirectX.td
+++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td
@@ -60,4 +60,5 @@ def int_dx_imad : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLV
 def int_dx_umad : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]>;
 def int_dx_rcp  : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
 def int_dx_rsqrt  : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
+def int_dx_sign : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_any_ty]>;
 }
diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
index ac85859af8a53e..d8e4f903db73c0 100644
--- a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
+++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
@@ -16,6 +16,7 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstrTypes.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Intrinsics.h"
@@ -45,6 +46,7 @@ static bool isIntrinsicExpansion(Function &F) {
   case Intrinsic::dx_length:
   case Intrinsic::dx_sdot:
   case Intrinsic::dx_udot:
+  case Intrinsic::dx_sign:
     return true;
   }
   return false;
@@ -293,6 +295,36 @@ static bool expandClampIntrinsic(CallInst *Orig, Intrinsic::ID ClampIntrinsic) {
   return true;
 }
 
+static bool expandSignIntrinsic(CallInst *Orig) {
+  IRBuilder<> Builder(Orig->getParent());
+  Value *X = Orig->getOperand(0);
+  Type *Ty = X->getType();
+  Type *ScalarTy = Ty->getScalarType();
+  Type *RetTy = Orig->getType();
+  Constant *Zero = Constant::getNullValue(Ty);
+  Builder.SetInsertPoint(Orig);
+
+  Value *GT;
+  Value *LT;
+  if (ScalarTy->isFloatingPointTy()) {
+    GT = Builder.CreateFCmpOLT(Zero, X);
+    LT = Builder.CreateFCmpOLT(X, Zero);
+  } else {
+    assert(ScalarTy->isIntegerTy());
+    GT = Builder.CreateICmpSLT(Zero, X);
+    LT = Builder.CreateICmpSLT(X, Zero);
+  }
+
+  Value *ZextGT = Builder.CreateZExt(GT, RetTy);
+  Value *ZextLT = Builder.CreateZExt(LT, RetTy);
+
+  Value *Ret = Builder.CreateSub(ZextGT, ZextLT);
+
+  Orig->replaceAllUsesWith(Ret);
+  Orig->eraseFromParent();
+  return true;
+}
+
 static bool expandIntrinsic(Function &F, CallInst *Orig) {
   switch (F.getIntrinsicID()) {
   case Intrinsic::abs:
@@ -317,6 +349,8 @@ static bool expandIntrinsic(Function &F, CallInst *Orig) {
   case Intrinsic::dx_sdot:
   case Intrinsic::dx_udot:
     return expandIntegerDot(Orig, F.getIntrinsicID());
+  case Intrinsic::dx_sign:
+    return expandSignIntrinsic(Orig);
   }
   return false;
 }

>From a37044ddb2ebc6083eb7d76a435b33cb2b6940da Mon Sep 17 00:00:00 2001
From: Tim Gymnich <tgymnich at icloud.com>
Date: Mon, 5 Aug 2024 16:04:36 +0200
Subject: [PATCH 2/3] add test

---
 llvm/test/CodeGen/DirectX/sign.ll | 216 ++++++++++++++++++++++++++++++
 1 file changed, 216 insertions(+)
 create mode 100644 llvm/test/CodeGen/DirectX/sign.ll

diff --git a/llvm/test/CodeGen/DirectX/sign.ll b/llvm/test/CodeGen/DirectX/sign.ll
new file mode 100644
index 00000000000000..2d9254a3abc77f
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/sign.ll
@@ -0,0 +1,216 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S  -dxil-intrinsic-expansion  -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s --check-prefixes=CHECK,EXPCHECK
+; RUN: opt -S  -dxil-op-lower  -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s --check-prefixes=CHECK,DOPCHECK
+
+
+define noundef i32 @sign_half(half noundef %a) {
+; CHECK-LABEL: define noundef i32 @sign_half(
+; CHECK-SAME: half noundef [[A:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = fcmp olt half 0xH0000, [[A]]
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp olt half [[A]], 0xH0000
+; CHECK-NEXT:    [[TMP2:%.*]] = zext i1 [[TMP0]] to i32
+; CHECK-NEXT:    [[TMP3:%.*]] = zext i1 [[TMP1]] to i32
+; CHECK-NEXT:    [[TMP4:%.*]] = sub i32 [[TMP2]], [[TMP3]]
+; CHECK-NEXT:    ret i32 [[TMP4]]
+;
+entry:
+  %elt.sign = call i32 @llvm.dx.sign.f16(half %a)
+  ret i32 %elt.sign
+}
+
+define noundef i32 @sign_float(float noundef %a) {
+; CHECK-LABEL: define noundef i32 @sign_float(
+; CHECK-SAME: float noundef [[A:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = fcmp olt float 0.000000e+00, [[A]]
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp olt float [[A]], 0.000000e+00
+; CHECK-NEXT:    [[TMP2:%.*]] = zext i1 [[TMP0]] to i32
+; CHECK-NEXT:    [[TMP3:%.*]] = zext i1 [[TMP1]] to i32
+; CHECK-NEXT:    [[TMP4:%.*]] = sub i32 [[TMP2]], [[TMP3]]
+; CHECK-NEXT:    ret i32 [[TMP4]]
+;
+entry:
+  %elt.sign = call i32 @llvm.dx.sign.f32(float %a)
+  ret i32 %elt.sign
+}
+
+define noundef i32 @sign_double(double noundef %a) {
+; CHECK-LABEL: define noundef i32 @sign_double(
+; CHECK-SAME: double noundef [[A:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = fcmp olt double 0.000000e+00, [[A]]
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp olt double [[A]], 0.000000e+00
+; CHECK-NEXT:    [[TMP2:%.*]] = zext i1 [[TMP0]] to i32
+; CHECK-NEXT:    [[TMP3:%.*]] = zext i1 [[TMP1]] to i32
+; CHECK-NEXT:    [[TMP4:%.*]] = sub i32 [[TMP2]], [[TMP3]]
+; CHECK-NEXT:    ret i32 [[TMP4]]
+;
+entry:
+  %elt.sign = call i32 @llvm.dx.sign.f64(double %a)
+  ret i32 %elt.sign
+}
+
+define noundef i32 @sign_i16(i16 noundef %a) {
+; CHECK-LABEL: define noundef i32 @sign_i16(
+; CHECK-SAME: i16 noundef [[A:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp slt i16 0, [[A]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i16 [[A]], 0
+; CHECK-NEXT:    [[TMP2:%.*]] = zext i1 [[TMP0]] to i32
+; CHECK-NEXT:    [[TMP3:%.*]] = zext i1 [[TMP1]] to i32
+; CHECK-NEXT:    [[TMP4:%.*]] = sub i32 [[TMP2]], [[TMP3]]
+; CHECK-NEXT:    ret i32 [[TMP4]]
+;
+entry:
+  %elt.sign = call i32 @llvm.dx.sign.i16(i16 %a)
+  ret i32 %elt.sign
+}
+
+define noundef i32 @sign_i32(i32 noundef %a) {
+; CHECK-LABEL: define noundef i32 @sign_i32(
+; CHECK-SAME: i32 noundef [[A:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp slt i32 0, [[A]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i32 [[A]], 0
+; CHECK-NEXT:    [[TMP2:%.*]] = zext i1 [[TMP0]] to i32
+; CHECK-NEXT:    [[TMP3:%.*]] = zext i1 [[TMP1]] to i32
+; CHECK-NEXT:    [[TMP4:%.*]] = sub i32 [[TMP2]], [[TMP3]]
+; CHECK-NEXT:    ret i32 [[TMP4]]
+;
+entry:
+  %elt.sign = call i32 @llvm.dx.sign.i32(i32 %a)
+  ret i32 %elt.sign
+}
+
+define noundef i32 @sign_i64(i64 noundef %a) {
+; CHECK-LABEL: define noundef i32 @sign_i64(
+; CHECK-SAME: i64 noundef [[A:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp slt i64 0, [[A]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i64 [[A]], 0
+; CHECK-NEXT:    [[TMP2:%.*]] = zext i1 [[TMP0]] to i32
+; CHECK-NEXT:    [[TMP3:%.*]] = zext i1 [[TMP1]] to i32
+; CHECK-NEXT:    [[TMP4:%.*]] = sub i32 [[TMP2]], [[TMP3]]
+; CHECK-NEXT:    ret i32 [[TMP4]]
+;
+entry:
+  %elt.sign = call i32 @llvm.dx.sign.i64(i64 %a)
+  ret i32 %elt.sign
+}
+
+define noundef <4 x i32> @sign_half_vector(<4 x half> noundef %a) {
+; CHECK-LABEL: define noundef <4 x i32> @sign_half_vector(
+; CHECK-SAME: <4 x half> noundef [[A:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = fcmp olt <4 x half> zeroinitializer, [[A]]
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp olt <4 x half> [[A]], zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = zext <4 x i1> [[TMP0]] to <4 x i32>
+; CHECK-NEXT:    [[TMP3:%.*]] = zext <4 x i1> [[TMP1]] to <4 x i32>
+; CHECK-NEXT:    [[TMP4:%.*]] = sub <4 x i32> [[TMP2]], [[TMP3]]
+; CHECK-NEXT:    ret <4 x i32> [[TMP4]]
+;
+entry:
+  %elt.sign = call <4 x i32> @llvm.dx.sign.v4f16(<4 x half> %a)
+  ret <4 x i32> %elt.sign
+}
+
+define noundef <4 x i32> @sign_float_vector(<4 x float> noundef %a) {
+; CHECK-LABEL: define noundef <4 x i32> @sign_float_vector(
+; CHECK-SAME: <4 x float> noundef [[A:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = fcmp olt <4 x float> zeroinitializer, [[A]]
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp olt <4 x float> [[A]], zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = zext <4 x i1> [[TMP0]] to <4 x i32>
+; CHECK-NEXT:    [[TMP3:%.*]] = zext <4 x i1> [[TMP1]] to <4 x i32>
+; CHECK-NEXT:    [[TMP4:%.*]] = sub <4 x i32> [[TMP2]], [[TMP3]]
+; CHECK-NEXT:    ret <4 x i32> [[TMP4]]
+;
+entry:
+  %elt.sign = call <4 x i32> @llvm.dx.sign.v4f32(<4 x float> %a)
+  ret <4 x i32> %elt.sign
+}
+
+define noundef <4 x i32> @sign_double_vector(<4 x double> noundef %a) {
+; CHECK-LABEL: define noundef <4 x i32> @sign_double_vector(
+; CHECK-SAME: <4 x double> noundef [[A:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = fcmp olt <4 x double> zeroinitializer, [[A]]
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp olt <4 x double> [[A]], zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = zext <4 x i1> [[TMP0]] to <4 x i32>
+; CHECK-NEXT:    [[TMP3:%.*]] = zext <4 x i1> [[TMP1]] to <4 x i32>
+; CHECK-NEXT:    [[TMP4:%.*]] = sub <4 x i32> [[TMP2]], [[TMP3]]
+; CHECK-NEXT:    ret <4 x i32> [[TMP4]]
+;
+entry:
+  %elt.sign = call <4 x i32> @llvm.dx.sign.v4f64(<4 x double> %a)
+  ret <4 x i32> %elt.sign
+}
+
+define noundef <4 x i32> @sign_i16_vector(<4 x i16> noundef %a) {
+; CHECK-LABEL: define noundef <4 x i32> @sign_i16_vector(
+; CHECK-SAME: <4 x i16> noundef [[A:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp slt <4 x i16> zeroinitializer, [[A]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <4 x i16> [[A]], zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = zext <4 x i1> [[TMP0]] to <4 x i32>
+; CHECK-NEXT:    [[TMP3:%.*]] = zext <4 x i1> [[TMP1]] to <4 x i32>
+; CHECK-NEXT:    [[TMP4:%.*]] = sub <4 x i32> [[TMP2]], [[TMP3]]
+; CHECK-NEXT:    ret <4 x i32> [[TMP4]]
+;
+entry:
+  %elt.sign = call <4 x i32> @llvm.dx.sign.v4i16(<4 x i16> %a)
+  ret <4 x i32> %elt.sign
+}
+
+define noundef <4 x i32> @sign_i32_vector(<4 x i32> noundef %a) {
+; CHECK-LABEL: define noundef <4 x i32> @sign_i32_vector(
+; CHECK-SAME: <4 x i32> noundef [[A:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp slt <4 x i32> zeroinitializer, [[A]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <4 x i32> [[A]], zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = zext <4 x i1> [[TMP0]] to <4 x i32>
+; CHECK-NEXT:    [[TMP3:%.*]] = zext <4 x i1> [[TMP1]] to <4 x i32>
+; CHECK-NEXT:    [[TMP4:%.*]] = sub <4 x i32> [[TMP2]], [[TMP3]]
+; CHECK-NEXT:    ret <4 x i32> [[TMP4]]
+;
+entry:
+  %elt.sign = call <4 x i32> @llvm.dx.sign.v4i32(<4 x i32> %a)
+  ret <4 x i32> %elt.sign
+}
+
+define noundef <4 x i32> @sign_i64_vector(<4 x i64> noundef %a) {
+; CHECK-LABEL: define noundef <4 x i32> @sign_i64_vector(
+; CHECK-SAME: <4 x i64> noundef [[A:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp slt <4 x i64> zeroinitializer, [[A]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <4 x i64> [[A]], zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = zext <4 x i1> [[TMP0]] to <4 x i32>
+; CHECK-NEXT:    [[TMP3:%.*]] = zext <4 x i1> [[TMP1]] to <4 x i32>
+; CHECK-NEXT:    [[TMP4:%.*]] = sub <4 x i32> [[TMP2]], [[TMP3]]
+; CHECK-NEXT:    ret <4 x i32> [[TMP4]]
+;
+entry:
+  %elt.sign = call <4 x i32> @llvm.dx.sign.v4i64(<4 x i64> %a)
+  ret <4 x i32> %elt.sign
+}
+
+
+declare i32 @llvm.dx.sign.f16(half)
+declare i32 @llvm.dx.sign.f32(float)
+declare i32 @llvm.dx.sign.f64(double)
+
+declare i32 @llvm.dx.sign.i16(i16)
+declare i32 @llvm.dx.sign.i32(i32)
+declare i32 @llvm.dx.sign.i64(i64)
+
+declare <4 x i32> @llvm.dx.sign.v4f16(<4 x half>)
+declare <4 x i32> @llvm.dx.sign.v4f32(<4 x float>)
+declare <4 x i32> @llvm.dx.sign.v4f64(<4 x double>)
+
+declare <4 x i32> @llvm.dx.sign.v4i16(<4 x i16>)
+declare <4 x i32> @llvm.dx.sign.v4i32(<4 x i32>)
+declare <4 x i32> @llvm.dx.sign.v4i64(<4 x i64>)
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; DOPCHECK: {{.*}}
+; EXPCHECK: {{.*}}

>From 61bc2a831436866cb4e704d8d1b6e85f6ff574fb Mon Sep 17 00:00:00 2001
From: Tim Gymnich <tgymnich at icloud.com>
Date: Thu, 15 Aug 2024 14:48:39 +0200
Subject: [PATCH 3/3] keep the the builder setup code close together

---
 llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
index d8e4f903db73c0..6535f9c288fb7d 100644
--- a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
+++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
@@ -297,12 +297,12 @@ static bool expandClampIntrinsic(CallInst *Orig, Intrinsic::ID ClampIntrinsic) {
 
 static bool expandSignIntrinsic(CallInst *Orig) {
   IRBuilder<> Builder(Orig->getParent());
+  Builder.SetInsertPoint(Orig);
   Value *X = Orig->getOperand(0);
   Type *Ty = X->getType();
   Type *ScalarTy = Ty->getScalarType();
   Type *RetTy = Orig->getType();
   Constant *Zero = Constant::getNullValue(Ty);
-  Builder.SetInsertPoint(Orig);
 
   Value *GT;
   Value *LT;