[llvm] 6f5e993 - [DirectX] legalize usub.sat (#135288)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Apr 11 11:32:50 PDT 2025
Author: Farzon Lotfi
Date: 2025-04-11T14:32:44-04:00
New Revision: 6f5e993b179a6f93b40d375a8864f2a482f3ca5d
URL: https://github.com/llvm/llvm-project/commit/6f5e993b179a6f93b40d375a8864f2a482f3ca5d
DIFF: https://github.com/llvm/llvm-project/commit/6f5e993b179a6f93b40d375a8864f2a482f3ca5d.diff
LOG: [DirectX] legalize usub.sat (#135288)
fixes #135285
This change implements the `usub.sat` intrinsic to perform an unsigned
saturating subtraction on the 2 arguments.
The minimum value this operation is clamp to is 0.
Added:
llvm/test/CodeGen/DirectX/usub_sat.ll
Modified:
llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
index 84acf4d536d0c..70f284e08b250 100644
--- a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
+++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
@@ -65,12 +65,27 @@ static bool isIntrinsicExpansion(Function &F) {
case Intrinsic::dx_sign:
case Intrinsic::dx_step:
case Intrinsic::dx_radians:
+ case Intrinsic::usub_sat:
case Intrinsic::vector_reduce_add:
case Intrinsic::vector_reduce_fadd:
return true;
}
return false;
}
+
+static Value *expandUsubSat(CallInst *Orig) {
+ Value *A = Orig->getArgOperand(0);
+ Value *B = Orig->getArgOperand(1);
+ Type *Ty = A->getType();
+
+ IRBuilder<> Builder(Orig);
+
+ Value *Cmp = Builder.CreateICmpULT(A, B, "usub.cmp");
+ Value *Sub = Builder.CreateSub(A, B, "usub.sub");
+ Value *Zero = ConstantInt::get(Ty, 0);
+ return Builder.CreateSelect(Cmp, Zero, Sub, "usub.sat");
+}
+
static Value *expandVecReduceAdd(CallInst *Orig, Intrinsic::ID IntrinsicId) {
assert(IntrinsicId == Intrinsic::vector_reduce_add ||
IntrinsicId == Intrinsic::vector_reduce_fadd);
@@ -586,6 +601,9 @@ static bool expandIntrinsic(Function &F, CallInst *Orig) {
case Intrinsic::dx_radians:
Result = expandRadiansIntrinsic(Orig);
break;
+ case Intrinsic::usub_sat:
+ Result = expandUsubSat(Orig);
+ break;
case Intrinsic::vector_reduce_add:
case Intrinsic::vector_reduce_fadd:
Result = expandVecReduceAdd(Orig, IntrinsicId);
diff --git a/llvm/test/CodeGen/DirectX/usub_sat.ll b/llvm/test/CodeGen/DirectX/usub_sat.ll
new file mode 100644
index 0000000000000..8cfb1a1fe9bd1
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/usub_sat.ll
@@ -0,0 +1,60 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -dxil-intrinsic-expansion -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+
+; Make sure dxil operation function calls for pow are generated.
+
+define noundef i16 @usub_sat_i16(i16 noundef %a, i16 noundef %b) {
+; CHECK-LABEL: define noundef i16 @usub_sat_i16(
+; CHECK-SAME: i16 noundef [[A:%.*]], i16 noundef [[B:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[USUB_CMP:%.*]] = icmp ult i16 [[A]], [[B]]
+; CHECK-NEXT: [[USUB_SUB:%.*]] = sub i16 [[A]], [[B]]
+; CHECK-NEXT: [[ELT_USUB_SAT:%.*]] = select i1 [[USUB_CMP]], i16 0, i16 [[USUB_SUB]]
+; CHECK-NEXT: ret i16 [[ELT_USUB_SAT]]
+;
+entry:
+ %elt.usub_sat = call i16 @llvm.usub.sat.i16(i16 %a, i16 %b)
+ ret i16 %elt.usub_sat
+}
+
+define noundef i32 @usub_sat_i32(i32 noundef %a, i32 noundef %b) {
+; CHECK-LABEL: define noundef i32 @usub_sat_i32(
+; CHECK-SAME: i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[USUB_CMP:%.*]] = icmp ult i32 [[A]], [[B]]
+; CHECK-NEXT: [[USUB_SUB:%.*]] = sub i32 [[A]], [[B]]
+; CHECK-NEXT: [[ELT_USUB_SAT:%.*]] = select i1 [[USUB_CMP]], i32 0, i32 [[USUB_SUB]]
+; CHECK-NEXT: ret i32 [[ELT_USUB_SAT]]
+;
+entry:
+ %elt.usub_sat = call i32 @llvm.usub.sat.i32(i32 %a, i32 %b)
+ ret i32 %elt.usub_sat
+}
+
+define noundef i64 @usub_sat_i64(i64 noundef %a, i64 noundef %b) {
+; CHECK-LABEL: define noundef i64 @usub_sat_i64(
+; CHECK-SAME: i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[USUB_CMP:%.*]] = icmp ult i64 [[A]], [[B]]
+; CHECK-NEXT: [[USUB_SUB:%.*]] = sub i64 [[A]], [[B]]
+; CHECK-NEXT: [[ELT_USUB_SAT:%.*]] = select i1 [[USUB_CMP]], i64 0, i64 [[USUB_SUB]]
+; CHECK-NEXT: ret i64 [[ELT_USUB_SAT]]
+;
+entry:
+ %elt.usub_sat = call i64 @llvm.usub.sat.i64(i64 %a, i64 %b)
+ ret i64 %elt.usub_sat
+}
+
+define noundef <4 x i32> @usub_sat_vec(<4 x i32> noundef %a, <4 x i32> noundef %b) {
+; CHECK-LABEL: define noundef <4 x i32> @usub_sat_vec(
+; CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[USUB_CMP:%.*]] = icmp ult <4 x i32> [[A]], [[B]]
+; CHECK-NEXT: [[USUB_SUB:%.*]] = sub <4 x i32> [[A]], [[B]]
+; CHECK-NEXT: [[ELT_USUB_SAT:%.*]] = select <4 x i1> [[USUB_CMP]], <4 x i32> zeroinitializer, <4 x i32> [[USUB_SUB]]
+; CHECK-NEXT: ret <4 x i32> [[ELT_USUB_SAT]]
+;
+entry:
+ %elt.usub_sat = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %a, <4 x i32> %b)
+ ret <4 x i32> %elt.usub_sat
+}
More information about the llvm-commits
mailing list