[clang] [llvm] [HLSL] [DXIL] Implement the AddUint64 HLSL function and the UAddc DXIL op (PR #127137)
Deric Cheung via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 26 09:17:11 PST 2025
https://github.com/Icohedron updated https://github.com/llvm/llvm-project/pull/127137
>From 4fae5642c6e8e305cdc687b4968ba5eabaa44b50 Mon Sep 17 00:00:00 2001
From: Icohedron <cheung.deric at gmail.com>
Date: Mon, 27 Jan 2025 11:18:09 -0800
Subject: [PATCH 01/13] Add the AddUint64 HLSL builtin function
- Defines the AddUint64 HLSL builtin function
- Implements the UAddc DXIL op to lower AddUint64 to DXIL
---
clang/include/clang/Basic/Builtins.td | 6 ++
.../clang/Basic/DiagnosticSemaKinds.td | 2 +
clang/lib/CodeGen/CGBuiltin.cpp | 45 ++++++++++++
clang/lib/Headers/hlsl/hlsl_intrinsics.h | 21 ++++++
clang/lib/Sema/SemaHLSL.cpp | 47 ++++++++++++
.../test/CodeGenHLSL/builtins/AddUint64.hlsl | 71 +++++++++++++++++++
.../SemaHLSL/BuiltIns/AddUint64-errors.hlsl | 41 +++++++++++
llvm/lib/Target/DirectX/DXIL.td | 13 ++++
llvm/lib/Target/DirectX/DXILOpBuilder.cpp | 14 ++++
llvm/lib/Target/DirectX/DXILOpBuilder.h | 3 +
llvm/lib/Target/DirectX/DXILOpLowering.cpp | 22 ++++--
llvm/test/CodeGen/DirectX/UAddc.ll | 40 +++++++++++
llvm/test/CodeGen/DirectX/UAddc_errors.ll | 30 ++++++++
13 files changed, 348 insertions(+), 7 deletions(-)
create mode 100644 clang/test/CodeGenHLSL/builtins/AddUint64.hlsl
create mode 100644 clang/test/SemaHLSL/BuiltIns/AddUint64-errors.hlsl
create mode 100644 llvm/test/CodeGen/DirectX/UAddc.ll
create mode 100644 llvm/test/CodeGen/DirectX/UAddc_errors.ll
diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index 29939242596ba..2433427a89429 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -4753,6 +4753,12 @@ def GetDeviceSideMangledName : LangBuiltin<"CUDA_LANG"> {
}
// HLSL
+def HLSLAddUint64: LangBuiltin<"HLSL_LANG"> {
+ let Spellings = ["__builtin_hlsl_adduint64"];
+ let Attributes = [NoThrow, Const];
+ let Prototype = "void(...)";
+}
+
def HLSLResourceGetPointer : LangBuiltin<"HLSL_LANG"> {
let Spellings = ["__builtin_hlsl_resource_getpointer"];
let Attributes = [NoThrow];
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 2fce5e88ba8a0..e78339ee924ff 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -10655,6 +10655,8 @@ def err_second_argument_to_cwsc_not_pointer : Error<
def err_vector_incorrect_num_elements : Error<
"%select{too many|too few}0 elements in vector %select{initialization|operand}3 (expected %1 elements, have %2)">;
+def err_invalid_even_odd_vector_element_count : Error<
+ "invalid element count of %0 in vector %select{initialization|operand}4 (expected an %select{even|odd}3 element count in the range of %1 and %2)">;
def err_altivec_empty_initializer : Error<"expected initializer">;
def err_invalid_neon_type_code : Error<
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 361e4c4bf2e2e..5322b38458b26 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -19445,6 +19445,51 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
return nullptr;
switch (BuiltinID) {
+ case Builtin::BI__builtin_hlsl_adduint64: {
+ Value *OpA = EmitScalarExpr(E->getArg(0));
+ Value *OpB = EmitScalarExpr(E->getArg(1));
+ assert(E->getArg(0)->getType()->hasIntegerRepresentation() &&
+ E->getArg(1)->getType()->hasIntegerRepresentation() &&
+ "AddUint64 operands must have an integer representation");
+ assert(((E->getArg(0)->getType()->castAs<VectorType>()->getNumElements() ==
+ 2 &&
+ E->getArg(1)->getType()->castAs<VectorType>()->getNumElements() ==
+ 2) ||
+ (E->getArg(0)->getType()->castAs<VectorType>()->getNumElements() ==
+ 4 &&
+ E->getArg(1)->getType()->castAs<VectorType>()->getNumElements() ==
+ 4)) &&
+ "input vectors must have 2 or 4 elements each");
+
+ llvm::Value *Result = PoisonValue::get(OpA->getType());
+ uint64_t NumElements =
+ E->getArg(0)->getType()->castAs<VectorType>()->getNumElements();
+ for (uint64_t i = 0; i < NumElements / 2; ++i) {
+
+ // Obtain low and high words of inputs A and B
+ llvm::Value *LowA = Builder.CreateExtractElement(OpA, 2 * i + 0);
+ llvm::Value *HighA = Builder.CreateExtractElement(OpA, 2 * i + 1);
+ llvm::Value *LowB = Builder.CreateExtractElement(OpB, 2 * i + 0);
+ llvm::Value *HighB = Builder.CreateExtractElement(OpB, 2 * i + 1);
+
+ // Use an uadd_with_overflow to compute the sum of low words and obtain a
+ // carry value
+ llvm::Value *Carry;
+ llvm::Value *LowSum = EmitOverflowIntrinsic(
+ *this, llvm::Intrinsic::uadd_with_overflow, LowA, LowB, Carry);
+ llvm::Value *ZExtCarry = Builder.CreateZExt(Carry, HighA->getType());
+
+ // Sum the high words and the carry
+ llvm::Value *HighSum = Builder.CreateAdd(HighA, HighB);
+ llvm::Value *HighSumPlusCarry = Builder.CreateAdd(HighSum, ZExtCarry);
+
+ // Insert the low and high word sums into the result vector
+ Result = Builder.CreateInsertElement(Result, LowSum, 2 * i + 0);
+ Result = Builder.CreateInsertElement(Result, HighSumPlusCarry, 2 * i + 1,
+ "hlsl.AddUint64");
+ }
+ return Result;
+ }
case Builtin::BI__builtin_hlsl_resource_getpointer: {
Value *HandleOp = EmitScalarExpr(E->getArg(0));
Value *IndexOp = EmitScalarExpr(E->getArg(1));
diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
index d1f5fdff8b600..513639ed1b81d 100644
--- a/clang/lib/Headers/hlsl/hlsl_intrinsics.h
+++ b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
@@ -138,6 +138,27 @@ _HLSL_BUILTIN_ALIAS(__builtin_elementwise_acos)
float4 acos(float4);
//===----------------------------------------------------------------------===//
+// AddUint64 builtins
+//===----------------------------------------------------------------------===//
+
+/// \fn T AddUint64(T a, T b)
+/// \brief Implements unsigned 64-bit integer addition using pairs of unsigned
+/// 32-bit integers.
+/// \param x [in] The first unsigned 32-bit integer pair(s)
+/// \param y [in] The second unsigned 32-bit integer pair(s)
+///
+/// This function takes one or two pairs (low, high) of unsigned 32-bit integer
+/// values and returns pairs (low, high) of unsigned 32-bit integer
+/// values representing the result of unsigned 64-bit integer addition.
+
+_HLSL_AVAILABILITY(shadermodel, 6.0)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_adduint64)
+uint32_t2 AddUint64(uint32_t2, uint32_t2);
+_HLSL_AVAILABILITY(shadermodel, 6.0)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_adduint64)
+uint32_t4 AddUint64(uint32_t4, uint32_t4);
+
+// //===----------------------------------------------------------------------===//
// all builtins
//===----------------------------------------------------------------------===//
diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index 4abd870ad6aaa..99eb5360ec356 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -2038,6 +2038,18 @@ static bool CheckAllArgsHaveFloatRepresentation(Sema *S, CallExpr *TheCall) {
checkAllFloatTypes);
}
+static bool CheckUnsignedIntRepresentations(Sema *S, CallExpr *TheCall) {
+ auto checkUnsignedInteger = [](clang::QualType PassedType) -> bool {
+ clang::QualType BaseType =
+ PassedType->isVectorType()
+ ? PassedType->getAs<clang::VectorType>()->getElementType()
+ : PassedType;
+ return !BaseType->isUnsignedIntegerType();
+ };
+ return CheckAllArgTypesAreCorrect(S, TheCall, S->Context.UnsignedIntTy,
+ checkUnsignedInteger);
+}
+
static bool CheckFloatOrHalfRepresentations(Sema *S, CallExpr *TheCall) {
auto checkFloatorHalf = [](clang::QualType PassedType) -> bool {
clang::QualType BaseType =
@@ -2229,6 +2241,41 @@ static bool CheckResourceHandle(
// returning an ExprError
bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
switch (BuiltinID) {
+ case Builtin::BI__builtin_hlsl_adduint64: {
+ if (SemaRef.checkArgCount(TheCall, 2))
+ return true;
+ if (CheckVectorElementCallArgs(&SemaRef, TheCall))
+ return true;
+ if (CheckUnsignedIntRepresentations(&SemaRef, TheCall))
+ return true;
+
+ // CheckVectorElementCallArgs(...) guarantees both args are the same type.
+ assert(TheCall->getArg(0)->getType() == TheCall->getArg(1)->getType() &&
+ "Both args must be of the same type");
+
+ // ensure both args are vectors
+ auto *VTy = TheCall->getArg(0)->getType()->getAs<VectorType>();
+ if (!VTy) {
+ SemaRef.Diag(TheCall->getBeginLoc(), diag::err_vec_builtin_non_vector)
+ << "AddUint64" << /*all*/ 1;
+ return true;
+ }
+
+ // ensure both args have 2 elements, or both args have 4 elements
+ int NumElementsArg = VTy->getNumElements();
+ if (NumElementsArg != 2 && NumElementsArg != 4) {
+ SemaRef.Diag(TheCall->getBeginLoc(),
+ diag::err_invalid_even_odd_vector_element_count)
+ << NumElementsArg << 2 << 4 << /*even*/ 0 << /*operand*/ 1;
+ return true;
+ }
+
+ ExprResult A = TheCall->getArg(0);
+ QualType ArgTyA = A.get()->getType();
+ // return type is the same as the input type
+ TheCall->setType(ArgTyA);
+ break;
+ }
case Builtin::BI__builtin_hlsl_resource_getpointer: {
if (SemaRef.checkArgCount(TheCall, 2) ||
CheckResourceHandle(&SemaRef, TheCall, 0) ||
diff --git a/clang/test/CodeGenHLSL/builtins/AddUint64.hlsl b/clang/test/CodeGenHLSL/builtins/AddUint64.hlsl
new file mode 100644
index 0000000000000..4141aef69323d
--- /dev/null
+++ b/clang/test/CodeGenHLSL/builtins/AddUint64.hlsl
@@ -0,0 +1,71 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.3-library %s \
+// RUN: -emit-llvm -disable-llvm-passes -o - | \
+// RUN: FileCheck %s --check-prefixes=CHECK
+
+
+// CHECK-LABEL: define noundef <2 x i32> @_Z20test_AddUint64_uint2Dv2_jS_(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <2 x i32>, align 8
+// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <2 x i32>, align 8
+// CHECK-NEXT: store <2 x i32> [[A]], ptr [[A_ADDR]], align 8
+// CHECK-NEXT: store <2 x i32> [[B]], ptr [[B_ADDR]], align 8
+// CHECK-NEXT: [[A_LOAD:%.*]] = load <2 x i32>, ptr [[A_ADDR]], align 8
+// CHECK-NEXT: [[B_LOAD:%.*]] = load <2 x i32>, ptr [[B_ADDR]], align 8
+// CHECK-NEXT: [[LowA:%.*]] = extractelement <2 x i32> [[A_LOAD]], i64 0
+// CHECK-NEXT: [[HighA:%.*]] = extractelement <2 x i32> [[A_LOAD]], i64 1
+// CHECK-NEXT: [[LowB:%.*]] = extractelement <2 x i32> [[B_LOAD]], i64 0
+// CHECK-NEXT: [[HighB:%.*]] = extractelement <2 x i32> [[B_LOAD]], i64 1
+// CHECK-NEXT: [[UAddc:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[LowA]], i32 [[LowB]])
+// CHECK-NEXT: [[Carry:%.*]] = extractvalue { i32, i1 } [[UAddc]], 1
+// CHECK-NEXT: [[LowSum:%.*]] = extractvalue { i32, i1 } [[UAddc]], 0
+// CHECK-NEXT: [[CarryZExt:%.*]] = zext i1 [[Carry]] to i32
+// CHECK-NEXT: [[HighSum:%.*]] = add i32 [[HighA]], [[HighB]]
+// CHECK-NEXT: [[HighSumPlusCarry:%.*]] = add i32 [[HighSum]], [[CarryZExt]]
+// CHECK-NEXT: [[HLSL_ADDUINT64_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[LowSum]], i64 0
+// CHECK-NEXT: [[HLSL_ADDUINT64:%.*]] = insertelement <2 x i32> [[HLSL_ADDUINT64_UPTO0]], i32 [[HighSumPlusCarry]], i64 1
+// CHECK-NEXT: ret <2 x i32> [[HLSL_ADDUINT64]]
+//
+uint2 test_AddUint64_uint2(uint2 a, uint2 b) {
+ return AddUint64(a, b);
+}
+
+// CHECK-LABEL: define noundef <4 x i32> @_Z20test_AddUint64_uint4Dv4_jS_(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <4 x i32>, align 16
+// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <4 x i32>, align 16
+// CHECK-NEXT: store <4 x i32> [[A]], ptr [[A_ADDR]], align 16
+// CHECK-NEXT: store <4 x i32> [[B]], ptr [[B_ADDR]], align 16
+// CHECK-NEXT: [[A_LOAD:%.*]] = load <4 x i32>, ptr [[A_ADDR]], align 16
+// CHECK-NEXT: [[B_LOAD:%.*]] = load <4 x i32>, ptr [[B_ADDR]], align 16
+// CHECK-NEXT: [[LowA:%.*]] = extractelement <4 x i32> [[A_LOAD]], i64 0
+// CHECK-NEXT: [[HighA:%.*]] = extractelement <4 x i32> [[A_LOAD]], i64 1
+// CHECK-NEXT: [[LowB:%.*]] = extractelement <4 x i32> [[B_LOAD]], i64 0
+// CHECK-NEXT: [[HighB:%.*]] = extractelement <4 x i32> [[B_LOAD]], i64 1
+// CHECK-NEXT: [[UAddc:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[LowA]], i32 [[LowB]])
+// CHECK-NEXT: [[Carry:%.*]] = extractvalue { i32, i1 } [[UAddc]], 1
+// CHECK-NEXT: [[LowSum:%.*]] = extractvalue { i32, i1 } [[UAddc]], 0
+// CHECK-NEXT: [[CarryZExt:%.*]] = zext i1 [[Carry]] to i32
+// CHECK-NEXT: [[HighSum:%.*]] = add i32 [[HighA]], [[HighB]]
+// CHECK-NEXT: [[HighSumPlusCarry:%.*]] = add i32 [[HighSum]], [[CarryZExt]]
+// CHECK-NEXT: [[HLSL_ADDUINT64_UPTO0:%.*]] = insertelement <4 x i32> poison, i32 [[LowSum]], i64 0
+// CHECK-NEXT: [[HLSL_ADDUINT64_UPTO1:%.*]] = insertelement <4 x i32> [[HLSL_ADDUINT64_UPTO0]], i32 [[HighSumPlusCarry]], i64 1
+// CHECK-NEXT: [[LowA1:%.*]] = extractelement <4 x i32> [[A_LOAD]], i64 2
+// CHECK-NEXT: [[HighA1:%.*]] = extractelement <4 x i32> [[A_LOAD]], i64 3
+// CHECK-NEXT: [[LowB1:%.*]] = extractelement <4 x i32> [[B_LOAD]], i64 2
+// CHECK-NEXT: [[HighB1:%.*]] = extractelement <4 x i32> [[B_LOAD]], i64 3
+// CHECK-NEXT: [[UAddc1:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[LowA1]], i32 [[LowB1]])
+// CHECK-NEXT: [[Carry1:%.*]] = extractvalue { i32, i1 } [[UAddc1]], 1
+// CHECK-NEXT: [[LowSum1:%.*]] = extractvalue { i32, i1 } [[UAddc1]], 0
+// CHECK-NEXT: [[CarryZExt1:%.*]] = zext i1 [[Carry1]] to i32
+// CHECK-NEXT: [[HighSum1:%.*]] = add i32 [[HighA1]], [[HighB1]]
+// CHECK-NEXT: [[HighSumPlusCarry1:%.*]] = add i32 [[HighSum1]], [[CarryZExt1]]
+// CHECK-NEXT: [[HLSL_ADDUINT64_UPTO2:%.*]] = insertelement <4 x i32> [[HLSL_ADDUINT64_UPTO1]], i32 [[LowSum1]], i64 2
+// CHECK-NEXT: [[HLSL_ADDUINT64:%.*]] = insertelement <4 x i32> [[HLSL_ADDUINT64_UPTO2]], i32 [[HighSumPlusCarry1]], i64 3
+// CHECK-NEXT: ret <4 x i32> [[HLSL_ADDUINT64]]
+//
+uint4 test_AddUint64_uint4(uint4 a, uint4 b) {
+ return AddUint64(a, b);
+}
diff --git a/clang/test/SemaHLSL/BuiltIns/AddUint64-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/AddUint64-errors.hlsl
new file mode 100644
index 0000000000000..ec9d026bb6fe7
--- /dev/null
+++ b/clang/test/SemaHLSL/BuiltIns/AddUint64-errors.hlsl
@@ -0,0 +1,41 @@
+// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only -disable-llvm-passes -verify
+
+uint2 test_too_few_arg() {
+ return __builtin_hlsl_adduint64();
+ // expected-error at -1 {{too few arguments to function call, expected 2, have 0}}
+}
+
+uint4 test_too_many_arg(uint4 a) {
+ return __builtin_hlsl_adduint64(a, a, a);
+ // expected-error at -1 {{too many arguments to function call, expected 2, have 3}}
+}
+
+uint2 test_mismatched_arg_types(uint2 a, uint4 b) {
+ return __builtin_hlsl_adduint64(a, b);
+ // expected-error at -1 {{all arguments to '__builtin_hlsl_adduint64' must have the same type}}
+}
+
+uint2 test_bad_num_arg_elements(uint3 a, uint3 b) {
+ return __builtin_hlsl_adduint64(a, b);
+ // expected-error at -1 {{invalid element count of 3 in vector operand (expected an even element count in the range of 2 and 4)}}
+}
+
+uint2 test_scalar_arg_type(uint a) {
+ return __builtin_hlsl_adduint64(a, a);
+ // expected-error at -1 {{all arguments to AddUint64 must be vectors}}
+}
+
+uint2 test_signed_integer_args(int2 a, int2 b) {
+ return __builtin_hlsl_adduint64(a, b);
+// expected-error at -1 {{passing 'int2' (aka 'vector<int, 2>') to parameter of incompatible type '__attribute__((__vector_size__(2 * sizeof(unsigned int)))) unsigned int' (vector of 2 'unsigned int' values)}}
+}
+
+struct S {
+ uint2 a;
+};
+
+uint2 test_incorrect_arg_type(S a) {
+ return __builtin_hlsl_adduint64(a, a);
+ // expected-error at -1 {{passing 'S' to parameter of incompatible type 'unsigned int'}}
+}
+
diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td
index 7cb841d9bd5b5..2f6b4d676edfd 100644
--- a/llvm/lib/Target/DirectX/DXIL.td
+++ b/llvm/lib/Target/DirectX/DXIL.td
@@ -50,6 +50,7 @@ def HandleTy : DXILOpParamType;
def ResBindTy : DXILOpParamType;
def ResPropsTy : DXILOpParamType;
def SplitDoubleTy : DXILOpParamType;
+def BinaryWithCarryTy : DXILOpParamType;
class DXILOpClass;
@@ -738,6 +739,18 @@ def UMin : DXILOp<40, binary> {
let attributes = [Attributes<DXIL1_0, [ReadNone]>];
}
+def UAddc : DXILOp<44, binaryWithCarryOrBorrow > {
+ let Doc = "Unsigned 32-bit integer arithmetic add with carry. uaddc(a,b) = (a+b, a+b overflowed ? 1 : 0)";
+ // TODO: This `let intrinsics = ...` line may be uncommented when
+ // https://github.com/llvm/llvm-project/issues/113192 is fixed
+ // let intrinsics = [IntrinSelect<int_uadd_with_overflow>];
+ let arguments = [OverloadTy, OverloadTy];
+ let result = BinaryWithCarryTy;
+ let overloads = [Overloads<DXIL1_0, [Int32Ty]>];
+ let stages = [Stages<DXIL1_0, [all_stages]>];
+ let attributes = [Attributes<DXIL1_0, [ReadNone]>];
+}
+
def FMad : DXILOp<46, tertiary> {
let Doc = "Floating point arithmetic multiply/add operation. fmad(m,a,b) = m "
"* a + b.";
diff --git a/llvm/lib/Target/DirectX/DXILOpBuilder.cpp b/llvm/lib/Target/DirectX/DXILOpBuilder.cpp
index badd5aabd6432..f0f1bbabb6b23 100644
--- a/llvm/lib/Target/DirectX/DXILOpBuilder.cpp
+++ b/llvm/lib/Target/DirectX/DXILOpBuilder.cpp
@@ -230,6 +230,14 @@ static StructType *getSplitDoubleType(LLVMContext &Context) {
return StructType::create({Int32Ty, Int32Ty}, "dx.types.splitdouble");
}
+static StructType *getBinaryWithCarryType(LLVMContext &Context) {
+ if (auto *ST = StructType::getTypeByName(Context, "dx.types.i32c"))
+ return ST;
+ Type *Int32Ty = Type::getInt32Ty(Context);
+ Type *Int1Ty = Type::getInt1Ty(Context);
+ return StructType::create({Int32Ty, Int1Ty}, "dx.types.i32c");
+}
+
static Type *getTypeFromOpParamType(OpParamType Kind, LLVMContext &Ctx,
Type *OverloadTy) {
switch (Kind) {
@@ -273,6 +281,8 @@ static Type *getTypeFromOpParamType(OpParamType Kind, LLVMContext &Ctx,
return getResPropsType(Ctx);
case OpParamType::SplitDoubleTy:
return getSplitDoubleType(Ctx);
+ case OpParamType::BinaryWithCarryTy:
+ return getBinaryWithCarryType(Ctx);
}
llvm_unreachable("Invalid parameter kind");
return nullptr;
@@ -539,6 +549,10 @@ StructType *DXILOpBuilder::getSplitDoubleType(LLVMContext &Context) {
return ::getSplitDoubleType(Context);
}
+StructType *DXILOpBuilder::getBinaryWithCarryType(LLVMContext &Context) {
+ return ::getBinaryWithCarryType(Context);
+}
+
StructType *DXILOpBuilder::getHandleType() {
return ::getHandleType(IRB.getContext());
}
diff --git a/llvm/lib/Target/DirectX/DXILOpBuilder.h b/llvm/lib/Target/DirectX/DXILOpBuilder.h
index df5a0240870f4..8e13b87a2be10 100644
--- a/llvm/lib/Target/DirectX/DXILOpBuilder.h
+++ b/llvm/lib/Target/DirectX/DXILOpBuilder.h
@@ -53,6 +53,9 @@ class DXILOpBuilder {
/// Get the `%dx.types.splitdouble` type.
StructType *getSplitDoubleType(LLVMContext &Context);
+ /// Get the `%dx.types.i32c` type.
+ StructType *getBinaryWithCarryType(LLVMContext &Context);
+
/// Get the `%dx.types.Handle` type.
StructType *getHandleType();
diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
index 0c245c1a43d31..c9e3d7e284963 100644
--- a/llvm/lib/Target/DirectX/DXILOpLowering.cpp
+++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
@@ -359,17 +359,16 @@ class OpLowerer {
return lowerToBindAndAnnotateHandle(F);
}
- Error replaceSplitDoubleCallUsages(CallInst *Intrin, CallInst *Op) {
+ Error replaceAggregateTypeOfCallUsages(CallInst *Intrin, CallInst *Op) {
for (Use &U : make_early_inc_range(Intrin->uses())) {
if (auto *EVI = dyn_cast<ExtractValueInst>(U.getUser())) {
-
- if (EVI->getNumIndices() != 1)
- return createStringError(std::errc::invalid_argument,
- "Splitdouble has only 2 elements");
EVI->setOperand(0, Op);
+ } else if (auto *IVI = dyn_cast<InsertValueInst>(U.getUser())) {
+ IVI->setOperand(0, Op);
} else {
return make_error<StringError>(
- "Splitdouble use is not ExtractValueInst",
+ (Intrin->getCalledFunction()->getName() +
+ " use is not a ExtractValueInst or InsertValueInst"),
inconvertibleErrorCode());
}
}
@@ -821,7 +820,16 @@ class OpLowerer {
F, OpCode::SplitDouble,
OpBuilder.getSplitDoubleType(M.getContext()),
[&](CallInst *CI, CallInst *Op) {
- return replaceSplitDoubleCallUsages(CI, Op);
+ return replaceAggregateTypeOfCallUsages(CI, Op);
+ });
+ break;
+ // TODO: this can be removed when
+ // https://github.com/llvm/llvm-project/issues/113192 is fixed
+ case Intrinsic::uadd_with_overflow:
+ HasErrors |= replaceFunctionWithNamedStructOp(
+ F, OpCode::UAddc, OpBuilder.getBinaryWithCarryType(M.getContext()),
+ [&](CallInst *CI, CallInst *Op) {
+ return replaceAggregateTypeOfCallUsages(CI, Op);
});
break;
case Intrinsic::ctpop:
diff --git a/llvm/test/CodeGen/DirectX/UAddc.ll b/llvm/test/CodeGen/DirectX/UAddc.ll
new file mode 100644
index 0000000000000..abafa40bf2306
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/UAddc.ll
@@ -0,0 +1,40 @@
+; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+
+; CHECK: %dx.types.i32c = type { i32, i1 }
+
+define noundef i32 @test_UAddc(i32 noundef %a, i32 noundef %b) {
+; CHECK-LABEL: define noundef i32 @test_UAddc(
+; CHECK-SAME: i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) {
+; CHECK-NEXT: [[UAddc:%.*]] = call %dx.types.i32c @dx.op.binaryWithCarryOrBorrow.i32(i32 44, i32 [[A]], i32 [[B]])
+; CHECK-NEXT: [[Carry:%.*]] = extractvalue %dx.types.i32c [[UAddc]], 1
+; CHECK-NEXT: [[Sum:%.*]] = extractvalue %dx.types.i32c [[UAddc]], 0
+; CHECK-NEXT: [[CarryZExt:%.*]] = zext i1 [[Carry]] to i32
+; CHECK-NEXT: [[Result:%.*]] = add i32 [[Sum]], [[CarryZExt]]
+; CHECK-NEXT: ret i32 [[Result]]
+;
+ %uaddc = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b)
+ %carry = extractvalue { i32, i1 } %uaddc, 1
+ %sum = extractvalue { i32, i1 } %uaddc, 0
+ %carry_zext = zext i1 %carry to i32
+ %result = add i32 %sum, %carry_zext
+ ret i32 %result
+}
+
+
+define noundef i32 @test_UAddc_insert(i32 noundef %a, i32 noundef %b) {
+; CHECK-LABEL: define noundef i32 @test_UAddc_insert(
+; CHECK-SAME: i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) {
+; CHECK-NEXT: [[UAddc:%.*]] = call %dx.types.i32c @dx.op.binaryWithCarryOrBorrow.i32(i32 44, i32 [[A]], i32 [[B]])
+; CHECK-NEXT: insertvalue %dx.types.i32c [[UAddc]], i32 [[A]], 0
+; CHECK-NEXT: [[Result:%.*]] = extractvalue %dx.types.i32c [[UAddc]], 0
+; CHECK-NEXT: ret i32 [[Result]]
+;
+ %uaddc = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b)
+ insertvalue { i32, i1 } %uaddc, i32 %a, 0
+ %result = extractvalue { i32, i1 } %uaddc, 0
+ ret i32 %result
+}
+
+declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32)
+; CHECK: declare %dx.types.i32c @dx.op.binaryWithCarryOrBorrow.i32(i32, i32, i32)
+
diff --git a/llvm/test/CodeGen/DirectX/UAddc_errors.ll b/llvm/test/CodeGen/DirectX/UAddc_errors.ll
new file mode 100644
index 0000000000000..72c676db9bba4
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/UAddc_errors.ll
@@ -0,0 +1,30 @@
+; We use llc for this test so that we don't abort after the first error.
+; RUN: not llc %s -o /dev/null 2>&1 | FileCheck %s
+
+target triple = "dxil-pc-shadermodel6.3-library"
+
+; DXIL operation UAddc only supports i32. Other integer types are unsupported.
+; CHECK: error:
+; CHECK-SAME: in function uaddc_i16
+; CHECK-SAME: Cannot create UAddc operation: Invalid overload type
+
+define noundef i16 @uaddc_i16(i16 noundef %a, i16 noundef %b) "hlsl.export" {
+ %uaddc = call { i16, i1 } @llvm.uadd.with.overflow.i16(i16 %a, i16 %b)
+ %carry = extractvalue { i16, i1 } %uaddc, 1
+ %sum = extractvalue { i16, i1 } %uaddc, 0
+ %carry_zext = zext i1 %carry to i16
+ %result = add i16 %sum, %carry_zext
+ ret i16 %result
+}
+
+; CHECK: error:
+; CHECK-SAME: in function uaddc_return
+; CHECK-SAME: llvm.uadd.with.overflow.i32 use is not a ExtractValueInst or InsertValueInst
+
+define noundef { i32, i1 } @uaddc_return(i32 noundef %a, i32 noundef %b) "hlsl.export" {
+ %uaddc = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b)
+ ret { i32, i1 } %uaddc
+}
+
+declare { i16, i1 } @llvm.uadd.with.overflow.i16(i16, i16)
+
>From 45afa2f3806b2f869d02aa68e2e1d8dddc6cba3b Mon Sep 17 00:00:00 2001
From: Icohedron <cheung.deric at gmail.com>
Date: Tue, 11 Feb 2025 22:56:23 +0000
Subject: [PATCH 02/13] Make AddUint64 use llvm.uadd.with.overflow.v2i32
When the input args are of type uint4, uses the vec2 variant of llvm.uadd.with.overflow to sum the low words of both args.
---
clang/lib/CodeGen/CGBuiltin.cpp | 65 +++++++++++-------
.../test/CodeGenHLSL/builtins/AddUint64.hlsl | 67 ++++++++-----------
llvm/test/CodeGen/DirectX/UAddc.ll | 60 +++++++++++++----
3 files changed, 115 insertions(+), 77 deletions(-)
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 5322b38458b26..0fe8cf5179b53 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -19461,31 +19461,50 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
4)) &&
"input vectors must have 2 or 4 elements each");
- llvm::Value *Result = PoisonValue::get(OpA->getType());
uint64_t NumElements =
E->getArg(0)->getType()->castAs<VectorType>()->getNumElements();
- for (uint64_t i = 0; i < NumElements / 2; ++i) {
-
- // Obtain low and high words of inputs A and B
- llvm::Value *LowA = Builder.CreateExtractElement(OpA, 2 * i + 0);
- llvm::Value *HighA = Builder.CreateExtractElement(OpA, 2 * i + 1);
- llvm::Value *LowB = Builder.CreateExtractElement(OpB, 2 * i + 0);
- llvm::Value *HighB = Builder.CreateExtractElement(OpB, 2 * i + 1);
-
- // Use an uadd_with_overflow to compute the sum of low words and obtain a
- // carry value
- llvm::Value *Carry;
- llvm::Value *LowSum = EmitOverflowIntrinsic(
- *this, llvm::Intrinsic::uadd_with_overflow, LowA, LowB, Carry);
- llvm::Value *ZExtCarry = Builder.CreateZExt(Carry, HighA->getType());
-
- // Sum the high words and the carry
- llvm::Value *HighSum = Builder.CreateAdd(HighA, HighB);
- llvm::Value *HighSumPlusCarry = Builder.CreateAdd(HighSum, ZExtCarry);
-
- // Insert the low and high word sums into the result vector
- Result = Builder.CreateInsertElement(Result, LowSum, 2 * i + 0);
- Result = Builder.CreateInsertElement(Result, HighSumPlusCarry, 2 * i + 1,
+
+ llvm::Value *Result = PoisonValue::get(OpA->getType());
+ llvm::Value *LowA;
+ llvm::Value *HighA;
+ llvm::Value *LowB;
+ llvm::Value *HighB;
+
+ // Obtain low and high words of inputs A and B
+ if (NumElements == 2) {
+ LowA = Builder.CreateExtractElement(OpA, (uint64_t)0, "LowA");
+ HighA = Builder.CreateExtractElement(OpA, (uint64_t)1, "HighA");
+ LowB = Builder.CreateExtractElement(OpB, (uint64_t)0, "LowB");
+ HighB = Builder.CreateExtractElement(OpB, (uint64_t)1, "HighB");
+ } else {
+ LowA = Builder.CreateShuffleVector(OpA, ArrayRef<int>{0, 2}, "LowA");
+ HighA = Builder.CreateShuffleVector(OpA, ArrayRef<int>{1, 3}, "HighA");
+ LowB = Builder.CreateShuffleVector(OpB, ArrayRef<int>{0, 2}, "LowB");
+ HighB = Builder.CreateShuffleVector(OpB, ArrayRef<int>{1, 3}, "HighB");
+ }
+
+ // Use an uadd_with_overflow to compute the sum of low words and obtain a
+ // carry value
+ llvm::Value *Carry;
+ llvm::Value *LowSum = EmitOverflowIntrinsic(
+ *this, llvm::Intrinsic::uadd_with_overflow, LowA, LowB, Carry);
+ llvm::Value *ZExtCarry =
+ Builder.CreateZExt(Carry, HighA->getType(), "CarryZExt");
+
+ // Sum the high words and the carry
+ llvm::Value *HighSum = Builder.CreateAdd(HighA, HighB, "HighSum");
+ llvm::Value *HighSumPlusCarry =
+ Builder.CreateAdd(HighSum, ZExtCarry, "HighSumPlusCarry");
+
+ // Insert the low and high word sums into the result vector
+ if (NumElements == 2) {
+ Result = Builder.CreateInsertElement(Result, LowSum, (uint64_t)0,
+ "hlsl.AddUint64.upto0");
+ Result = Builder.CreateInsertElement(Result, HighSumPlusCarry,
+ (uint64_t)1, "hlsl.AddUint64");
+ } else { /* NumElements == 4 */
+ Result = Builder.CreateShuffleVector(LowSum, HighSumPlusCarry,
+ ArrayRef<int>{0, 2, 1, 3},
"hlsl.AddUint64");
}
return Result;
diff --git a/clang/test/CodeGenHLSL/builtins/AddUint64.hlsl b/clang/test/CodeGenHLSL/builtins/AddUint64.hlsl
index 4141aef69323d..e1832bdbbf33f 100644
--- a/clang/test/CodeGenHLSL/builtins/AddUint64.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/AddUint64.hlsl
@@ -11,20 +11,20 @@
// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <2 x i32>, align 8
// CHECK-NEXT: store <2 x i32> [[A]], ptr [[A_ADDR]], align 8
// CHECK-NEXT: store <2 x i32> [[B]], ptr [[B_ADDR]], align 8
-// CHECK-NEXT: [[A_LOAD:%.*]] = load <2 x i32>, ptr [[A_ADDR]], align 8
-// CHECK-NEXT: [[B_LOAD:%.*]] = load <2 x i32>, ptr [[B_ADDR]], align 8
-// CHECK-NEXT: [[LowA:%.*]] = extractelement <2 x i32> [[A_LOAD]], i64 0
-// CHECK-NEXT: [[HighA:%.*]] = extractelement <2 x i32> [[A_LOAD]], i64 1
-// CHECK-NEXT: [[LowB:%.*]] = extractelement <2 x i32> [[B_LOAD]], i64 0
-// CHECK-NEXT: [[HighB:%.*]] = extractelement <2 x i32> [[B_LOAD]], i64 1
-// CHECK-NEXT: [[UAddc:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[LowA]], i32 [[LowB]])
-// CHECK-NEXT: [[Carry:%.*]] = extractvalue { i32, i1 } [[UAddc]], 1
-// CHECK-NEXT: [[LowSum:%.*]] = extractvalue { i32, i1 } [[UAddc]], 0
-// CHECK-NEXT: [[CarryZExt:%.*]] = zext i1 [[Carry]] to i32
-// CHECK-NEXT: [[HighSum:%.*]] = add i32 [[HighA]], [[HighB]]
-// CHECK-NEXT: [[HighSumPlusCarry:%.*]] = add i32 [[HighSum]], [[CarryZExt]]
-// CHECK-NEXT: [[HLSL_ADDUINT64_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[LowSum]], i64 0
-// CHECK-NEXT: [[HLSL_ADDUINT64:%.*]] = insertelement <2 x i32> [[HLSL_ADDUINT64_UPTO0]], i32 [[HighSumPlusCarry]], i64 1
+// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[A_ADDR]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[B_ADDR]], align 8
+// CHECK-NEXT: [[LOWA:%.*]] = extractelement <2 x i32> [[TMP0]], i64 0
+// CHECK-NEXT: [[HIGHA:%.*]] = extractelement <2 x i32> [[TMP0]], i64 1
+// CHECK-NEXT: [[LOWB:%.*]] = extractelement <2 x i32> [[TMP1]], i64 0
+// CHECK-NEXT: [[HIGHB:%.*]] = extractelement <2 x i32> [[TMP1]], i64 1
+// CHECK-NEXT: [[TMP2:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[LOWA]], i32 [[LOWB]])
+// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1
+// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i1 } [[TMP2]], 0
+// CHECK-NEXT: [[CARRYZEXT:%.*]] = zext i1 [[TMP3]] to i32
+// CHECK-NEXT: [[HIGHSUM:%.*]] = add i32 [[HIGHA]], [[HIGHB]]
+// CHECK-NEXT: [[HIGHSUMPLUSCARRY:%.*]] = add i32 [[HIGHSUM]], [[CARRYZEXT]]
+// CHECK-NEXT: [[HLSL_ADDUINT64_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[TMP4]], i64 0
+// CHECK-NEXT: [[HLSL_ADDUINT64:%.*]] = insertelement <2 x i32> [[HLSL_ADDUINT64_UPTO0]], i32 [[HIGHSUMPLUSCARRY]], i64 1
// CHECK-NEXT: ret <2 x i32> [[HLSL_ADDUINT64]]
//
uint2 test_AddUint64_uint2(uint2 a, uint2 b) {
@@ -38,32 +38,19 @@ uint2 test_AddUint64_uint2(uint2 a, uint2 b) {
// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <4 x i32>, align 16
// CHECK-NEXT: store <4 x i32> [[A]], ptr [[A_ADDR]], align 16
// CHECK-NEXT: store <4 x i32> [[B]], ptr [[B_ADDR]], align 16
-// CHECK-NEXT: [[A_LOAD:%.*]] = load <4 x i32>, ptr [[A_ADDR]], align 16
-// CHECK-NEXT: [[B_LOAD:%.*]] = load <4 x i32>, ptr [[B_ADDR]], align 16
-// CHECK-NEXT: [[LowA:%.*]] = extractelement <4 x i32> [[A_LOAD]], i64 0
-// CHECK-NEXT: [[HighA:%.*]] = extractelement <4 x i32> [[A_LOAD]], i64 1
-// CHECK-NEXT: [[LowB:%.*]] = extractelement <4 x i32> [[B_LOAD]], i64 0
-// CHECK-NEXT: [[HighB:%.*]] = extractelement <4 x i32> [[B_LOAD]], i64 1
-// CHECK-NEXT: [[UAddc:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[LowA]], i32 [[LowB]])
-// CHECK-NEXT: [[Carry:%.*]] = extractvalue { i32, i1 } [[UAddc]], 1
-// CHECK-NEXT: [[LowSum:%.*]] = extractvalue { i32, i1 } [[UAddc]], 0
-// CHECK-NEXT: [[CarryZExt:%.*]] = zext i1 [[Carry]] to i32
-// CHECK-NEXT: [[HighSum:%.*]] = add i32 [[HighA]], [[HighB]]
-// CHECK-NEXT: [[HighSumPlusCarry:%.*]] = add i32 [[HighSum]], [[CarryZExt]]
-// CHECK-NEXT: [[HLSL_ADDUINT64_UPTO0:%.*]] = insertelement <4 x i32> poison, i32 [[LowSum]], i64 0
-// CHECK-NEXT: [[HLSL_ADDUINT64_UPTO1:%.*]] = insertelement <4 x i32> [[HLSL_ADDUINT64_UPTO0]], i32 [[HighSumPlusCarry]], i64 1
-// CHECK-NEXT: [[LowA1:%.*]] = extractelement <4 x i32> [[A_LOAD]], i64 2
-// CHECK-NEXT: [[HighA1:%.*]] = extractelement <4 x i32> [[A_LOAD]], i64 3
-// CHECK-NEXT: [[LowB1:%.*]] = extractelement <4 x i32> [[B_LOAD]], i64 2
-// CHECK-NEXT: [[HighB1:%.*]] = extractelement <4 x i32> [[B_LOAD]], i64 3
-// CHECK-NEXT: [[UAddc1:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[LowA1]], i32 [[LowB1]])
-// CHECK-NEXT: [[Carry1:%.*]] = extractvalue { i32, i1 } [[UAddc1]], 1
-// CHECK-NEXT: [[LowSum1:%.*]] = extractvalue { i32, i1 } [[UAddc1]], 0
-// CHECK-NEXT: [[CarryZExt1:%.*]] = zext i1 [[Carry1]] to i32
-// CHECK-NEXT: [[HighSum1:%.*]] = add i32 [[HighA1]], [[HighB1]]
-// CHECK-NEXT: [[HighSumPlusCarry1:%.*]] = add i32 [[HighSum1]], [[CarryZExt1]]
-// CHECK-NEXT: [[HLSL_ADDUINT64_UPTO2:%.*]] = insertelement <4 x i32> [[HLSL_ADDUINT64_UPTO1]], i32 [[LowSum1]], i64 2
-// CHECK-NEXT: [[HLSL_ADDUINT64:%.*]] = insertelement <4 x i32> [[HLSL_ADDUINT64_UPTO2]], i32 [[HighSumPlusCarry1]], i64 3
+// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[A_ADDR]], align 16
+// CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[B_ADDR]], align 16
+// CHECK-NEXT: [[LOWA:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <2 x i32> <i32 0, i32 2>
+// CHECK-NEXT: [[HIGHA:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <2 x i32> <i32 1, i32 3>
+// CHECK-NEXT: [[LOWB:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <2 x i32> <i32 0, i32 2>
+// CHECK-NEXT: [[HIGHB:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <2 x i32> <i32 1, i32 3>
+// CHECK-NEXT: [[TMP2:%.*]] = call { <2 x i32>, <2 x i1> } @llvm.uadd.with.overflow.v2i32(<2 x i32> [[LOWA]], <2 x i32> [[LOWB]])
+// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <2 x i32>, <2 x i1> } [[TMP2]], 1
+// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <2 x i32>, <2 x i1> } [[TMP2]], 0
+// CHECK-NEXT: [[CARRYZEXT:%.*]] = zext <2 x i1> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[HIGHSUM:%.*]] = add <2 x i32> [[HIGHA]], [[HIGHB]]
+// CHECK-NEXT: [[HIGHSUMPLUSCARRY:%.*]] = add <2 x i32> [[HIGHSUM]], [[CARRYZEXT]]
+// CHECK-NEXT: [[HLSL_ADDUINT64:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[HIGHSUMPLUSCARRY]], <4 x i32> <i32 0, i32 2, i32 1, i32 3>
// CHECK-NEXT: ret <4 x i32> [[HLSL_ADDUINT64]]
//
uint4 test_AddUint64_uint4(uint4 a, uint4 b) {
diff --git a/llvm/test/CodeGen/DirectX/UAddc.ll b/llvm/test/CodeGen/DirectX/UAddc.ll
index abafa40bf2306..c16a3f6a5b5fe 100644
--- a/llvm/test/CodeGen/DirectX/UAddc.ll
+++ b/llvm/test/CodeGen/DirectX/UAddc.ll
@@ -1,17 +1,18 @@
-; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
; CHECK: %dx.types.i32c = type { i32, i1 }
define noundef i32 @test_UAddc(i32 noundef %a, i32 noundef %b) {
; CHECK-LABEL: define noundef i32 @test_UAddc(
; CHECK-SAME: i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) {
-; CHECK-NEXT: [[UAddc:%.*]] = call %dx.types.i32c @dx.op.binaryWithCarryOrBorrow.i32(i32 44, i32 [[A]], i32 [[B]])
-; CHECK-NEXT: [[Carry:%.*]] = extractvalue %dx.types.i32c [[UAddc]], 1
-; CHECK-NEXT: [[Sum:%.*]] = extractvalue %dx.types.i32c [[UAddc]], 0
-; CHECK-NEXT: [[CarryZExt:%.*]] = zext i1 [[Carry]] to i32
-; CHECK-NEXT: [[Result:%.*]] = add i32 [[Sum]], [[CarryZExt]]
-; CHECK-NEXT: ret i32 [[Result]]
-;
+; CHECK-NEXT: [[UADDC1:%.*]] = call [[DX_TYPES_I32C:%.*]] @[[DX_OP_BINARYWITHCARRYORBORROW_I32:[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 44, i32 [[A]], i32 [[B]]) #[[ATTR0:[0-9]+]]
+; CHECK-NEXT: [[CARRY:%.*]] = extractvalue [[DX_TYPES_I32C]] [[UADDC1]], 1
+; CHECK-NEXT: [[SUM:%.*]] = extractvalue [[DX_TYPES_I32C]] [[UADDC1]], 0
+; CHECK-NEXT: [[CARRY_ZEXT:%.*]] = zext i1 [[CARRY]] to i32
+; CHECK-NEXT: [[RESULT:%.*]] = add i32 [[SUM]], [[CARRY_ZEXT]]
+; CHECK-NEXT: ret i32 [[RESULT]]
+;
%uaddc = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b)
%carry = extractvalue { i32, i1 } %uaddc, 1
%sum = extractvalue { i32, i1 } %uaddc, 0
@@ -20,15 +21,47 @@ define noundef i32 @test_UAddc(i32 noundef %a, i32 noundef %b) {
ret i32 %result
}
+define noundef <2 x i32> @test_UAddc_vec2(<2 x i32> noundef %a, <2 x i32> noundef %b) {
+; CHECK-LABEL: define noundef <2 x i32> @test_UAddc_vec2(
+; CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) {
+; CHECK-NEXT: [[A_I0:%.*]] = extractelement <2 x i32> [[A]], i64 0
+; CHECK-NEXT: [[B_I0:%.*]] = extractelement <2 x i32> [[B]], i64 0
+; CHECK-NEXT: [[UADDC_I09:%.*]] = call [[DX_TYPES_I32C:%.*]] @[[DX_OP_BINARYWITHCARRYORBORROW_I32]](i32 44, i32 [[A_I0]], i32 [[B_I0]]) #[[ATTR0]]
+; CHECK-NEXT: [[A_I1:%.*]] = extractelement <2 x i32> [[A]], i64 1
+; CHECK-NEXT: [[B_I1:%.*]] = extractelement <2 x i32> [[B]], i64 1
+; CHECK-NEXT: [[UADDC_I18:%.*]] = call [[DX_TYPES_I32C]] @[[DX_OP_BINARYWITHCARRYORBORROW_I32]](i32 44, i32 [[A_I1]], i32 [[B_I1]]) #[[ATTR0]]
+; CHECK-NEXT: [[CARRY_ELEM1:%.*]] = extractvalue [[DX_TYPES_I32C]] [[UADDC_I09]], 1
+; CHECK-NEXT: [[CARRY_ELEM11:%.*]] = extractvalue [[DX_TYPES_I32C]] [[UADDC_I18]], 1
+; CHECK-NEXT: [[CARRY_UPTO0:%.*]] = insertelement <2 x i1> poison, i1 [[CARRY_ELEM1]], i64 0
+; CHECK-NEXT: [[CARRY:%.*]] = insertelement <2 x i1> [[CARRY_UPTO0]], i1 [[CARRY_ELEM11]], i64 1
+; CHECK-NEXT: [[CARRY_I0:%.*]] = extractelement <2 x i1> [[CARRY]], i64 0
+; CHECK-NEXT: [[CARRY_I1:%.*]] = extractelement <2 x i1> [[CARRY]], i64 1
+; CHECK-NEXT: [[SUM_ELEM0:%.*]] = extractvalue [[DX_TYPES_I32C]] [[UADDC_I09]], 0
+; CHECK-NEXT: [[SUM_ELEM02:%.*]] = extractvalue [[DX_TYPES_I32C]] [[UADDC_I18]], 0
+; CHECK-NEXT: [[CARRY_ZEXT_I0:%.*]] = zext i1 [[CARRY_I0]] to i32
+; CHECK-NEXT: [[CARRY_ZEXT_I1:%.*]] = zext i1 [[CARRY_I1]] to i32
+; CHECK-NEXT: [[RESULT_I0:%.*]] = add i32 [[SUM_ELEM0]], [[CARRY_ZEXT_I0]]
+; CHECK-NEXT: [[RESULT_I1:%.*]] = add i32 [[SUM_ELEM02]], [[CARRY_ZEXT_I1]]
+; CHECK-NEXT: [[RESULT_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[RESULT_I0]], i64 0
+; CHECK-NEXT: [[RESULT:%.*]] = insertelement <2 x i32> [[RESULT_UPTO0]], i32 [[RESULT_I1]], i64 1
+; CHECK-NEXT: ret <2 x i32> [[RESULT]]
+;
+ %uaddc = call { <2 x i32>, <2 x i1> } @llvm.uadd.with.overflow.v2i32(<2 x i32> %a, <2 x i32> %b)
+ %carry = extractvalue { <2 x i32>, <2 x i1> } %uaddc, 1
+ %sum = extractvalue { <2 x i32>, <2 x i1> } %uaddc, 0
+ %carry_zext = zext <2 x i1> %carry to <2 x i32>
+ %result = add <2 x i32> %sum, %carry_zext
+ ret <2 x i32> %result
+}
define noundef i32 @test_UAddc_insert(i32 noundef %a, i32 noundef %b) {
; CHECK-LABEL: define noundef i32 @test_UAddc_insert(
; CHECK-SAME: i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) {
-; CHECK-NEXT: [[UAddc:%.*]] = call %dx.types.i32c @dx.op.binaryWithCarryOrBorrow.i32(i32 44, i32 [[A]], i32 [[B]])
-; CHECK-NEXT: insertvalue %dx.types.i32c [[UAddc]], i32 [[A]], 0
-; CHECK-NEXT: [[Result:%.*]] = extractvalue %dx.types.i32c [[UAddc]], 0
-; CHECK-NEXT: ret i32 [[Result]]
-;
+; CHECK-NEXT: [[UADDC1:%.*]] = call [[DX_TYPES_I32C:%.*]] @[[DX_OP_BINARYWITHCARRYORBORROW_I32]](i32 44, i32 [[A]], i32 [[B]]) #[[ATTR0]]
+; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[DX_TYPES_I32C]] [[UADDC1]], i32 [[A]], 0
+; CHECK-NEXT: [[RESULT:%.*]] = extractvalue [[DX_TYPES_I32C]] [[UADDC1]], 0
+; CHECK-NEXT: ret i32 [[RESULT]]
+;
%uaddc = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b)
insertvalue { i32, i1 } %uaddc, i32 %a, 0
%result = extractvalue { i32, i1 } %uaddc, 0
@@ -36,5 +69,4 @@ define noundef i32 @test_UAddc_insert(i32 noundef %a, i32 noundef %b) {
}
declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32)
-; CHECK: declare %dx.types.i32c @dx.op.binaryWithCarryOrBorrow.i32(i32, i32, i32)
>From 7714dc5686059f827d53b83d9472f02934aa88de Mon Sep 17 00:00:00 2001
From: Icohedron <cheung.deric at gmail.com>
Date: Fri, 14 Feb 2025 22:31:14 +0000
Subject: [PATCH 03/13] Report name of function called in source
---
clang/lib/Sema/SemaHLSL.cpp | 2 +-
clang/test/SemaHLSL/BuiltIns/AddUint64-errors.hlsl | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index 99eb5360ec356..5773329101f27 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -2257,7 +2257,7 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
auto *VTy = TheCall->getArg(0)->getType()->getAs<VectorType>();
if (!VTy) {
SemaRef.Diag(TheCall->getBeginLoc(), diag::err_vec_builtin_non_vector)
- << "AddUint64" << /*all*/ 1;
+ << TheCall->getDirectCallee() << /*all*/ 1;
return true;
}
diff --git a/clang/test/SemaHLSL/BuiltIns/AddUint64-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/AddUint64-errors.hlsl
index ec9d026bb6fe7..9dd2aa1457f8f 100644
--- a/clang/test/SemaHLSL/BuiltIns/AddUint64-errors.hlsl
+++ b/clang/test/SemaHLSL/BuiltIns/AddUint64-errors.hlsl
@@ -22,7 +22,7 @@ uint2 test_bad_num_arg_elements(uint3 a, uint3 b) {
uint2 test_scalar_arg_type(uint a) {
return __builtin_hlsl_adduint64(a, a);
- // expected-error at -1 {{all arguments to AddUint64 must be vectors}}
+ // expected-error at -1 {{all arguments to '__builtin_hlsl_adduint64' must be vectors}}
}
uint2 test_signed_integer_args(int2 a, int2 b) {
>From 9a08afa8d74e2f056e2b15bd8893291fc2ff8afa Mon Sep 17 00:00:00 2001
From: Icohedron <cheung.deric at gmail.com>
Date: Sat, 15 Feb 2025 01:06:41 +0000
Subject: [PATCH 04/13] Add a check for integer bit count. Replace vector size
check with bit count check
---
.../clang/Basic/DiagnosticSemaKinds.td | 7 +++++--
clang/lib/Sema/SemaHLSL.cpp | 21 +++++++++++++++----
.../SemaHLSL/BuiltIns/AddUint64-errors.hlsl | 7 ++++++-
3 files changed, 28 insertions(+), 7 deletions(-)
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index e78339ee924ff..ef912fd86a021 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -10655,10 +10655,13 @@ def err_second_argument_to_cwsc_not_pointer : Error<
def err_vector_incorrect_num_elements : Error<
"%select{too many|too few}0 elements in vector %select{initialization|operand}3 (expected %1 elements, have %2)">;
-def err_invalid_even_odd_vector_element_count : Error<
- "invalid element count of %0 in vector %select{initialization|operand}4 (expected an %select{even|odd}3 element count in the range of %1 and %2)">;
def err_altivec_empty_initializer : Error<"expected initializer">;
+def err_vector_incorrect_bit_count : Error<
+ "incorrect number of bits in vector operand (expected %select{|a multiple of}0 %1 bits, have %2)">;
+def err_integer_incorrect_bit_count : Error<
+ "incorrect number of bits in integer (expected %0 bits, have %1)">;
+
def err_invalid_neon_type_code : Error<
"incompatible constant for this __builtin_neon function">;
def err_argument_invalid_range : Error<
diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index 5773329101f27..90085177e5cbc 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -21,6 +21,7 @@
#include "clang/AST/Type.h"
#include "clang/AST/TypeLoc.h"
#include "clang/Basic/Builtins.h"
+#include "clang/Basic/DiagnosticFrontend.h"
#include "clang/Basic/DiagnosticSema.h"
#include "clang/Basic/IdentifierTable.h"
#include "clang/Basic/LLVM.h"
@@ -36,6 +37,7 @@
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/DXILABI.h"
#include "llvm/Support/ErrorHandling.h"
@@ -2261,12 +2263,23 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
return true;
}
- // ensure both args have 2 elements, or both args have 4 elements
+ // ensure arg integers are 32-bits
+ uint64_t ElementBitCount = getASTContext()
+ .getTypeSizeInChars(VTy->getElementType())
+ .getQuantity() *
+ 8;
+ if (ElementBitCount != 32) {
+ SemaRef.Diag(TheCall->getBeginLoc(),
+ diag::err_integer_incorrect_bit_count)
+ << 32 << ElementBitCount;
+ return true;
+ }
+
+ // ensure both args are vectors of total bit size of a multiple of 64
int NumElementsArg = VTy->getNumElements();
if (NumElementsArg != 2 && NumElementsArg != 4) {
- SemaRef.Diag(TheCall->getBeginLoc(),
- diag::err_invalid_even_odd_vector_element_count)
- << NumElementsArg << 2 << 4 << /*even*/ 0 << /*operand*/ 1;
+ SemaRef.Diag(TheCall->getBeginLoc(), diag::err_vector_incorrect_bit_count)
+ << 1 /*a multiple of*/ << 64 << NumElementsArg * ElementBitCount;
return true;
}
diff --git a/clang/test/SemaHLSL/BuiltIns/AddUint64-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/AddUint64-errors.hlsl
index 9dd2aa1457f8f..1f9e92da90ca5 100644
--- a/clang/test/SemaHLSL/BuiltIns/AddUint64-errors.hlsl
+++ b/clang/test/SemaHLSL/BuiltIns/AddUint64-errors.hlsl
@@ -17,7 +17,7 @@ uint2 test_mismatched_arg_types(uint2 a, uint4 b) {
uint2 test_bad_num_arg_elements(uint3 a, uint3 b) {
return __builtin_hlsl_adduint64(a, b);
- // expected-error at -1 {{invalid element count of 3 in vector operand (expected an even element count in the range of 2 and 4)}}
+ // expected-error at -1 {{incorrect number of bits in vector operand (expected a multiple of 64 bits, have 96)}}
}
uint2 test_scalar_arg_type(uint a) {
@@ -25,6 +25,11 @@ uint2 test_scalar_arg_type(uint a) {
// expected-error at -1 {{all arguments to '__builtin_hlsl_adduint64' must be vectors}}
}
+uint2 test_uint64_args(uint16_t2 a) {
+ return __builtin_hlsl_adduint64(a, a);
+ // expected-error at -1 {{incorrect number of bits in integer (expected 32 bits, have 16)}}
+}
+
uint2 test_signed_integer_args(int2 a, int2 b) {
return __builtin_hlsl_adduint64(a, b);
// expected-error at -1 {{passing 'int2' (aka 'vector<int, 2>') to parameter of incompatible type '__attribute__((__vector_size__(2 * sizeof(unsigned int)))) unsigned int' (vector of 2 'unsigned int' values)}}
>From ae4629795d69b6b5aa1a2f3f667618c50895dfee Mon Sep 17 00:00:00 2001
From: Icohedron <cheung.deric at gmail.com>
Date: Tue, 18 Feb 2025 17:11:04 +0000
Subject: [PATCH 05/13] Remove unnecessary includes
---
clang/lib/Sema/SemaHLSL.cpp | 2 --
1 file changed, 2 deletions(-)
diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index 90085177e5cbc..5b4f08705670b 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -21,7 +21,6 @@
#include "clang/AST/Type.h"
#include "clang/AST/TypeLoc.h"
#include "clang/Basic/Builtins.h"
-#include "clang/Basic/DiagnosticFrontend.h"
#include "clang/Basic/DiagnosticSema.h"
#include "clang/Basic/IdentifierTable.h"
#include "clang/Basic/LLVM.h"
@@ -37,7 +36,6 @@
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
-#include "llvm/IR/DataLayout.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/DXILABI.h"
#include "llvm/Support/ErrorHandling.h"
>From 4e017e47ff17aae239f1921608300f65a7e50e7f Mon Sep 17 00:00:00 2001
From: Icohedron <cheung.deric at gmail.com>
Date: Wed, 19 Feb 2025 21:50:21 +0000
Subject: [PATCH 06/13] Simply asserts and return early for clarity
Addresses Justin Bogner's PR comments regarding assertions and the return.
Co-authored-by: Justin Bogner <mail at justinbogner.com>
---
clang/lib/CodeGen/CGBuiltin.cpp | 38 +++++++++++++--------------------
1 file changed, 15 insertions(+), 23 deletions(-)
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 0fe8cf5179b53..97b4d18e39216 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -19448,23 +19448,15 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
case Builtin::BI__builtin_hlsl_adduint64: {
Value *OpA = EmitScalarExpr(E->getArg(0));
Value *OpB = EmitScalarExpr(E->getArg(1));
- assert(E->getArg(0)->getType()->hasIntegerRepresentation() &&
- E->getArg(1)->getType()->hasIntegerRepresentation() &&
+ QualType Arg0Ty = E->getArg(0)->getType();
+ uint64_t NumElements = Arg0Ty->castAs<VectorType>()->getNumElements();
+ assert(Arg0Ty == E->getArg(1)->getType() &&
+ "AddUint64 operand types must match");
+ assert(Arg0Ty->hasIntegerRepresentation() &&
"AddUint64 operands must have an integer representation");
- assert(((E->getArg(0)->getType()->castAs<VectorType>()->getNumElements() ==
- 2 &&
- E->getArg(1)->getType()->castAs<VectorType>()->getNumElements() ==
- 2) ||
- (E->getArg(0)->getType()->castAs<VectorType>()->getNumElements() ==
- 4 &&
- E->getArg(1)->getType()->castAs<VectorType>()->getNumElements() ==
- 4)) &&
- "input vectors must have 2 or 4 elements each");
-
- uint64_t NumElements =
- E->getArg(0)->getType()->castAs<VectorType>()->getNumElements();
+ assert((NumElements == 2 || NumElements == 4) &&
+ "AddUint64 operands must have 2 or 4 elements");
- llvm::Value *Result = PoisonValue::get(OpA->getType());
llvm::Value *LowA;
llvm::Value *HighA;
llvm::Value *LowB;
@@ -19496,17 +19488,17 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
llvm::Value *HighSumPlusCarry =
Builder.CreateAdd(HighSum, ZExtCarry, "HighSumPlusCarry");
- // Insert the low and high word sums into the result vector
- if (NumElements == 2) {
- Result = Builder.CreateInsertElement(Result, LowSum, (uint64_t)0,
- "hlsl.AddUint64.upto0");
- Result = Builder.CreateInsertElement(Result, HighSumPlusCarry,
- (uint64_t)1, "hlsl.AddUint64");
- } else { /* NumElements == 4 */
- Result = Builder.CreateShuffleVector(LowSum, HighSumPlusCarry,
+ if (NumElements == 4) {
+ return Builder.CreateShuffleVector(LowSum, HighSumPlusCarry,
ArrayRef<int>{0, 2, 1, 3},
"hlsl.AddUint64");
}
+
+ llvm::Value *Result = PoisonValue::get(OpA->getType());
+ Result = Builder.CreateInsertElement(Result, LowSum, (uint64_t)0,
+ "hlsl.AddUint64.upto0");
+ Result = Builder.CreateInsertElement(Result, HighSumPlusCarry, (uint64_t)1,
+ "hlsl.AddUint64");
return Result;
}
case Builtin::BI__builtin_hlsl_resource_getpointer: {
>From 3e832cf8b376dd9efd1cefd0f40ac93569b5a5fc Mon Sep 17 00:00:00 2001
From: Icohedron <cheung.deric at gmail.com>
Date: Wed, 19 Feb 2025 22:38:12 +0000
Subject: [PATCH 07/13] Apply clang-format to CGBuiltin.cpp
---
clang/lib/CodeGen/CGBuiltin.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 97b4d18e39216..86d9d29e5fa75 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -19490,8 +19490,8 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
if (NumElements == 4) {
return Builder.CreateShuffleVector(LowSum, HighSumPlusCarry,
- ArrayRef<int>{0, 2, 1, 3},
- "hlsl.AddUint64");
+ ArrayRef<int>{0, 2, 1, 3},
+ "hlsl.AddUint64");
}
llvm::Value *Result = PoisonValue::get(OpA->getType());
>From ff09962d1fdb4345264348db1702f737da26be52 Mon Sep 17 00:00:00 2001
From: Icohedron <cheung.deric at gmail.com>
Date: Tue, 25 Feb 2025 02:20:49 +0000
Subject: [PATCH 08/13] Reimplement UAddc with simplified named struct handling
---
llvm/lib/Target/DirectX/DXIL.td | 4 +---
llvm/lib/Target/DirectX/DXILOpBuilder.cpp | 4 ----
llvm/lib/Target/DirectX/DXILOpBuilder.h | 3 ---
llvm/lib/Target/DirectX/DXILOpLowering.cpp | 9 ---------
llvm/test/CodeGen/DirectX/UAddc_errors.ll | 2 +-
5 files changed, 2 insertions(+), 20 deletions(-)
diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td
index 469b577801a96..008d68b92782e 100644
--- a/llvm/lib/Target/DirectX/DXIL.td
+++ b/llvm/lib/Target/DirectX/DXIL.td
@@ -741,9 +741,7 @@ def UMin : DXILOp<40, binary> {
def UAddc : DXILOp<44, binaryWithCarryOrBorrow > {
let Doc = "Unsigned 32-bit integer arithmetic add with carry. uaddc(a,b) = (a+b, a+b overflowed ? 1 : 0)";
- // TODO: This `let intrinsics = ...` line may be uncommented when
- // https://github.com/llvm/llvm-project/issues/113192 is fixed
- // let intrinsics = [IntrinSelect<int_uadd_with_overflow>];
+ let intrinsics = [IntrinSelect<int_uadd_with_overflow>];
let arguments = [OverloadTy, OverloadTy];
let result = BinaryWithCarryTy;
let overloads = [Overloads<DXIL1_0, [Int32Ty]>];
diff --git a/llvm/lib/Target/DirectX/DXILOpBuilder.cpp b/llvm/lib/Target/DirectX/DXILOpBuilder.cpp
index 3aa35b27a343e..8710bd642a5c3 100644
--- a/llvm/lib/Target/DirectX/DXILOpBuilder.cpp
+++ b/llvm/lib/Target/DirectX/DXILOpBuilder.cpp
@@ -545,10 +545,6 @@ StructType *DXILOpBuilder::getResRetType(Type *ElementTy) {
return ::getResRetType(ElementTy);
}
-StructType *DXILOpBuilder::getBinaryWithCarryType(LLVMContext &Context) {
- return ::getBinaryWithCarryType(Context);
-}
-
StructType *DXILOpBuilder::getHandleType() {
return ::getHandleType(IRB.getContext());
}
diff --git a/llvm/lib/Target/DirectX/DXILOpBuilder.h b/llvm/lib/Target/DirectX/DXILOpBuilder.h
index 973025d82828d..5fe9f4429a494 100644
--- a/llvm/lib/Target/DirectX/DXILOpBuilder.h
+++ b/llvm/lib/Target/DirectX/DXILOpBuilder.h
@@ -50,9 +50,6 @@ class DXILOpBuilder {
/// Get a `%dx.types.ResRet` type with the given element type.
StructType *getResRetType(Type *ElementTy);
- /// Get the `%dx.types.i32c` type.
- StructType *getBinaryWithCarryType(LLVMContext &Context);
-
/// Get the `%dx.types.Handle` type.
StructType *getHandleType();
diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
index c56e165b41440..43a607b750cb9 100644
--- a/llvm/lib/Target/DirectX/DXILOpLowering.cpp
+++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
@@ -818,15 +818,6 @@ class OpLowerer {
case Intrinsic::dx_resource_updatecounter:
HasErrors |= lowerUpdateCounter(F);
break;
- // TODO: this can be removed when
- // https://github.com/llvm/llvm-project/issues/113192 is fixed
- case Intrinsic::uadd_with_overflow:
- HasErrors |= replaceFunctionWithNamedStructOp(
- F, OpCode::UAddc, OpBuilder.getBinaryWithCarryType(M.getContext()),
- [&](CallInst *CI, CallInst *Op) {
- return replaceAggregateTypeOfCallUsages(CI, Op);
- });
- break;
case Intrinsic::ctpop:
HasErrors |= lowerCtpopToCountBits(F);
break;
diff --git a/llvm/test/CodeGen/DirectX/UAddc_errors.ll b/llvm/test/CodeGen/DirectX/UAddc_errors.ll
index 72c676db9bba4..0c6964a09c953 100644
--- a/llvm/test/CodeGen/DirectX/UAddc_errors.ll
+++ b/llvm/test/CodeGen/DirectX/UAddc_errors.ll
@@ -19,7 +19,7 @@ define noundef i16 @uaddc_i16(i16 noundef %a, i16 noundef %b) "hlsl.export" {
; CHECK: error:
; CHECK-SAME: in function uaddc_return
-; CHECK-SAME: llvm.uadd.with.overflow.i32 use is not a ExtractValueInst or InsertValueInst
+; CHECK-SAME: DXIL ops that return structs may only be used by insert- and extractvalue
define noundef { i32, i1 } @uaddc_return(i32 noundef %a, i32 noundef %b) "hlsl.export" {
%uaddc = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b)
>From 9f73e67794bbed9e7c4883c50cf8a6de770cca8c Mon Sep 17 00:00:00 2001
From: Deric Cheung <cheung.deric at gmail.com>
Date: Mon, 24 Feb 2025 19:20:42 -0800
Subject: [PATCH 09/13] Remove unused function
'replaceAggregateTypeOfCallUsages'
---
llvm/lib/Target/DirectX/DXILOpLowering.cpp | 19 -------------------
1 file changed, 19 deletions(-)
diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
index 43a607b750cb9..83cc4b18824c7 100644
--- a/llvm/lib/Target/DirectX/DXILOpLowering.cpp
+++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
@@ -364,25 +364,6 @@ class OpLowerer {
return lowerToBindAndAnnotateHandle(F);
}
- Error replaceAggregateTypeOfCallUsages(CallInst *Intrin, CallInst *Op) {
- for (Use &U : make_early_inc_range(Intrin->uses())) {
- if (auto *EVI = dyn_cast<ExtractValueInst>(U.getUser())) {
- EVI->setOperand(0, Op);
- } else if (auto *IVI = dyn_cast<InsertValueInst>(U.getUser())) {
- IVI->setOperand(0, Op);
- } else {
- return make_error<StringError>(
- (Intrin->getCalledFunction()->getName() +
- " use is not a ExtractValueInst or InsertValueInst"),
- inconvertibleErrorCode());
- }
- }
-
- Intrin->eraseFromParent();
-
- return Error::success();
- }
-
/// Replace uses of \c Intrin with the values in the `dx.ResRet` of \c Op.
/// Since we expect to be post-scalarization, make an effort to avoid vectors.
Error replaceResRetUses(CallInst *Intrin, CallInst *Op, bool HasCheckBit) {
>From 72b404b2a213be0d1d4ca6af67e36ddcd9da7c9b Mon Sep 17 00:00:00 2001
From: Icohedron <cheung.deric at gmail.com>
Date: Tue, 25 Feb 2025 03:35:59 +0000
Subject: [PATCH 10/13] Edit UAddc.ll CHECK lines for readability
---
llvm/test/CodeGen/DirectX/UAddc.ll | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/llvm/test/CodeGen/DirectX/UAddc.ll b/llvm/test/CodeGen/DirectX/UAddc.ll
index c16a3f6a5b5fe..073b23874b877 100644
--- a/llvm/test/CodeGen/DirectX/UAddc.ll
+++ b/llvm/test/CodeGen/DirectX/UAddc.ll
@@ -6,7 +6,7 @@
define noundef i32 @test_UAddc(i32 noundef %a, i32 noundef %b) {
; CHECK-LABEL: define noundef i32 @test_UAddc(
; CHECK-SAME: i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) {
-; CHECK-NEXT: [[UADDC1:%.*]] = call [[DX_TYPES_I32C:%.*]] @[[DX_OP_BINARYWITHCARRYORBORROW_I32:[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 44, i32 [[A]], i32 [[B]]) #[[ATTR0:[0-9]+]]
+; CHECK-NEXT: [[UADDC1:%.*]] = call [[DX_TYPES_I32C:%dx\.types\.i32c]] @dx.op.binaryWithCarryOrBorrow.i32(i32 44, i32 [[A]], i32 [[B]]) #[[ATTR0:[0-9]+]]
; CHECK-NEXT: [[CARRY:%.*]] = extractvalue [[DX_TYPES_I32C]] [[UADDC1]], 1
; CHECK-NEXT: [[SUM:%.*]] = extractvalue [[DX_TYPES_I32C]] [[UADDC1]], 0
; CHECK-NEXT: [[CARRY_ZEXT:%.*]] = zext i1 [[CARRY]] to i32
@@ -26,10 +26,10 @@ define noundef <2 x i32> @test_UAddc_vec2(<2 x i32> noundef %a, <2 x i32> nounde
; CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) {
; CHECK-NEXT: [[A_I0:%.*]] = extractelement <2 x i32> [[A]], i64 0
; CHECK-NEXT: [[B_I0:%.*]] = extractelement <2 x i32> [[B]], i64 0
-; CHECK-NEXT: [[UADDC_I09:%.*]] = call [[DX_TYPES_I32C:%.*]] @[[DX_OP_BINARYWITHCARRYORBORROW_I32]](i32 44, i32 [[A_I0]], i32 [[B_I0]]) #[[ATTR0]]
+; CHECK-NEXT: [[UADDC_I09:%.*]] = call [[DX_TYPES_I32C:%dx\.types\.i32c]] @dx.op.binaryWithCarryOrBorrow.i32(i32 44, i32 [[A_I0]], i32 [[B_I0]]) #[[ATTR0]]
; CHECK-NEXT: [[A_I1:%.*]] = extractelement <2 x i32> [[A]], i64 1
; CHECK-NEXT: [[B_I1:%.*]] = extractelement <2 x i32> [[B]], i64 1
-; CHECK-NEXT: [[UADDC_I18:%.*]] = call [[DX_TYPES_I32C]] @[[DX_OP_BINARYWITHCARRYORBORROW_I32]](i32 44, i32 [[A_I1]], i32 [[B_I1]]) #[[ATTR0]]
+; CHECK-NEXT: [[UADDC_I18:%.*]] = call [[DX_TYPES_I32C]] @dx.op.binaryWithCarryOrBorrow.i32(i32 44, i32 [[A_I1]], i32 [[B_I1]]) #[[ATTR0]]
; CHECK-NEXT: [[CARRY_ELEM1:%.*]] = extractvalue [[DX_TYPES_I32C]] [[UADDC_I09]], 1
; CHECK-NEXT: [[CARRY_ELEM11:%.*]] = extractvalue [[DX_TYPES_I32C]] [[UADDC_I18]], 1
; CHECK-NEXT: [[CARRY_UPTO0:%.*]] = insertelement <2 x i1> poison, i1 [[CARRY_ELEM1]], i64 0
@@ -57,7 +57,7 @@ define noundef <2 x i32> @test_UAddc_vec2(<2 x i32> noundef %a, <2 x i32> nounde
define noundef i32 @test_UAddc_insert(i32 noundef %a, i32 noundef %b) {
; CHECK-LABEL: define noundef i32 @test_UAddc_insert(
; CHECK-SAME: i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) {
-; CHECK-NEXT: [[UADDC1:%.*]] = call [[DX_TYPES_I32C:%.*]] @[[DX_OP_BINARYWITHCARRYORBORROW_I32]](i32 44, i32 [[A]], i32 [[B]]) #[[ATTR0]]
+; CHECK-NEXT: [[UADDC1:%.*]] = call [[DX_TYPES_I32C:%dx\.types\.i32c]] @dx.op.binaryWithCarryOrBorrow.i32(i32 44, i32 [[A]], i32 [[B]]) #[[ATTR0]]
; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[DX_TYPES_I32C]] [[UADDC1]], i32 [[A]], 0
; CHECK-NEXT: [[RESULT:%.*]] = extractvalue [[DX_TYPES_I32C]] [[UADDC1]], 0
; CHECK-NEXT: ret i32 [[RESULT]]
>From be292074c45876ca3c9bc78b37846d6ff2e8dcd7 Mon Sep 17 00:00:00 2001
From: Icohedron <cheung.deric at gmail.com>
Date: Tue, 25 Feb 2025 17:54:49 +0000
Subject: [PATCH 11/13] Remove line saying test was generated. Improve variable
names
---
llvm/test/CodeGen/DirectX/UAddc.ll | 31 +++++++++++++++---------------
1 file changed, 15 insertions(+), 16 deletions(-)
diff --git a/llvm/test/CodeGen/DirectX/UAddc.ll b/llvm/test/CodeGen/DirectX/UAddc.ll
index 073b23874b877..70a6ea00344cb 100644
--- a/llvm/test/CodeGen/DirectX/UAddc.ll
+++ b/llvm/test/CodeGen/DirectX/UAddc.ll
@@ -1,4 +1,3 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
; CHECK: %dx.types.i32c = type { i32, i1 }
@@ -6,9 +5,9 @@
define noundef i32 @test_UAddc(i32 noundef %a, i32 noundef %b) {
; CHECK-LABEL: define noundef i32 @test_UAddc(
; CHECK-SAME: i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) {
-; CHECK-NEXT: [[UADDC1:%.*]] = call [[DX_TYPES_I32C:%dx\.types\.i32c]] @dx.op.binaryWithCarryOrBorrow.i32(i32 44, i32 [[A]], i32 [[B]]) #[[ATTR0:[0-9]+]]
-; CHECK-NEXT: [[CARRY:%.*]] = extractvalue [[DX_TYPES_I32C]] [[UADDC1]], 1
-; CHECK-NEXT: [[SUM:%.*]] = extractvalue [[DX_TYPES_I32C]] [[UADDC1]], 0
+; CHECK-NEXT: [[UADDC:%.*]] = call [[DX_TYPES_I32C:%dx\.types\.i32c]] @dx.op.binaryWithCarryOrBorrow.i32(i32 44, i32 [[A]], i32 [[B]]) #[[ATTR0:[0-9]+]]
+; CHECK-NEXT: [[CARRY:%.*]] = extractvalue [[DX_TYPES_I32C]] [[UADDC]], 1
+; CHECK-NEXT: [[SUM:%.*]] = extractvalue [[DX_TYPES_I32C]] [[UADDC]], 0
; CHECK-NEXT: [[CARRY_ZEXT:%.*]] = zext i1 [[CARRY]] to i32
; CHECK-NEXT: [[RESULT:%.*]] = add i32 [[SUM]], [[CARRY_ZEXT]]
; CHECK-NEXT: ret i32 [[RESULT]]
@@ -26,22 +25,22 @@ define noundef <2 x i32> @test_UAddc_vec2(<2 x i32> noundef %a, <2 x i32> nounde
; CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) {
; CHECK-NEXT: [[A_I0:%.*]] = extractelement <2 x i32> [[A]], i64 0
; CHECK-NEXT: [[B_I0:%.*]] = extractelement <2 x i32> [[B]], i64 0
-; CHECK-NEXT: [[UADDC_I09:%.*]] = call [[DX_TYPES_I32C:%dx\.types\.i32c]] @dx.op.binaryWithCarryOrBorrow.i32(i32 44, i32 [[A_I0]], i32 [[B_I0]]) #[[ATTR0]]
+; CHECK-NEXT: [[UADDC_I0:%.*]] = call [[DX_TYPES_I32C:%dx\.types\.i32c]] @dx.op.binaryWithCarryOrBorrow.i32(i32 44, i32 [[A_I0]], i32 [[B_I0]]) #[[ATTR0]]
; CHECK-NEXT: [[A_I1:%.*]] = extractelement <2 x i32> [[A]], i64 1
; CHECK-NEXT: [[B_I1:%.*]] = extractelement <2 x i32> [[B]], i64 1
-; CHECK-NEXT: [[UADDC_I18:%.*]] = call [[DX_TYPES_I32C]] @dx.op.binaryWithCarryOrBorrow.i32(i32 44, i32 [[A_I1]], i32 [[B_I1]]) #[[ATTR0]]
-; CHECK-NEXT: [[CARRY_ELEM1:%.*]] = extractvalue [[DX_TYPES_I32C]] [[UADDC_I09]], 1
-; CHECK-NEXT: [[CARRY_ELEM11:%.*]] = extractvalue [[DX_TYPES_I32C]] [[UADDC_I18]], 1
-; CHECK-NEXT: [[CARRY_UPTO0:%.*]] = insertelement <2 x i1> poison, i1 [[CARRY_ELEM1]], i64 0
-; CHECK-NEXT: [[CARRY:%.*]] = insertelement <2 x i1> [[CARRY_UPTO0]], i1 [[CARRY_ELEM11]], i64 1
+; CHECK-NEXT: [[UADDC_I1:%.*]] = call [[DX_TYPES_I32C]] @dx.op.binaryWithCarryOrBorrow.i32(i32 44, i32 [[A_I1]], i32 [[B_I1]]) #[[ATTR0]]
+; CHECK-NEXT: [[CARRY_ELEM0:%.*]] = extractvalue [[DX_TYPES_I32C]] [[UADDC_I0]], 1
+; CHECK-NEXT: [[CARRY_ELEM1:%.*]] = extractvalue [[DX_TYPES_I32C]] [[UADDC_I1]], 1
+; CHECK-NEXT: [[CARRY_UPTO0:%.*]] = insertelement <2 x i1> poison, i1 [[CARRY_ELEM0]], i64 0
+; CHECK-NEXT: [[CARRY:%.*]] = insertelement <2 x i1> [[CARRY_UPTO0]], i1 [[CARRY_ELEM1]], i64 1
; CHECK-NEXT: [[CARRY_I0:%.*]] = extractelement <2 x i1> [[CARRY]], i64 0
; CHECK-NEXT: [[CARRY_I1:%.*]] = extractelement <2 x i1> [[CARRY]], i64 1
-; CHECK-NEXT: [[SUM_ELEM0:%.*]] = extractvalue [[DX_TYPES_I32C]] [[UADDC_I09]], 0
-; CHECK-NEXT: [[SUM_ELEM02:%.*]] = extractvalue [[DX_TYPES_I32C]] [[UADDC_I18]], 0
+; CHECK-NEXT: [[SUM_ELEM0:%.*]] = extractvalue [[DX_TYPES_I32C]] [[UADDC_I0]], 0
+; CHECK-NEXT: [[SUM_ELEM1:%.*]] = extractvalue [[DX_TYPES_I32C]] [[UADDC_I1]], 0
; CHECK-NEXT: [[CARRY_ZEXT_I0:%.*]] = zext i1 [[CARRY_I0]] to i32
; CHECK-NEXT: [[CARRY_ZEXT_I1:%.*]] = zext i1 [[CARRY_I1]] to i32
; CHECK-NEXT: [[RESULT_I0:%.*]] = add i32 [[SUM_ELEM0]], [[CARRY_ZEXT_I0]]
-; CHECK-NEXT: [[RESULT_I1:%.*]] = add i32 [[SUM_ELEM02]], [[CARRY_ZEXT_I1]]
+; CHECK-NEXT: [[RESULT_I1:%.*]] = add i32 [[SUM_ELEM1]], [[CARRY_ZEXT_I1]]
; CHECK-NEXT: [[RESULT_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[RESULT_I0]], i64 0
; CHECK-NEXT: [[RESULT:%.*]] = insertelement <2 x i32> [[RESULT_UPTO0]], i32 [[RESULT_I1]], i64 1
; CHECK-NEXT: ret <2 x i32> [[RESULT]]
@@ -57,9 +56,9 @@ define noundef <2 x i32> @test_UAddc_vec2(<2 x i32> noundef %a, <2 x i32> nounde
define noundef i32 @test_UAddc_insert(i32 noundef %a, i32 noundef %b) {
; CHECK-LABEL: define noundef i32 @test_UAddc_insert(
; CHECK-SAME: i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) {
-; CHECK-NEXT: [[UADDC1:%.*]] = call [[DX_TYPES_I32C:%dx\.types\.i32c]] @dx.op.binaryWithCarryOrBorrow.i32(i32 44, i32 [[A]], i32 [[B]]) #[[ATTR0]]
-; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[DX_TYPES_I32C]] [[UADDC1]], i32 [[A]], 0
-; CHECK-NEXT: [[RESULT:%.*]] = extractvalue [[DX_TYPES_I32C]] [[UADDC1]], 0
+; CHECK-NEXT: [[UADDC:%.*]] = call [[DX_TYPES_I32C:%dx\.types\.i32c]] @dx.op.binaryWithCarryOrBorrow.i32(i32 44, i32 [[A]], i32 [[B]]) #[[ATTR0]]
+; CHECK-NEXT: [[UNUSED:%.*]] = insertvalue [[DX_TYPES_I32C]] [[UADDC]], i32 [[A]], 0
+; CHECK-NEXT: [[RESULT:%.*]] = extractvalue [[DX_TYPES_I32C]] [[UADDC]], 0
; CHECK-NEXT: ret i32 [[RESULT]]
;
%uaddc = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b)
>From 62d95a45cb78091f9b71e22021a94485f3a07121 Mon Sep 17 00:00:00 2001
From: Icohedron <cheung.deric at gmail.com>
Date: Tue, 25 Feb 2025 22:13:21 +0000
Subject: [PATCH 12/13] Add AddUint64 SPIRV HLSL intrinsic test
---
.../SPIRV/hlsl-intrinsics/AddUint64.ll | 88 +++++++++++++++++++
1 file changed, 88 insertions(+)
create mode 100644 llvm/test/CodeGen/SPIRV/hlsl-intrinsics/AddUint64.ll
diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/AddUint64.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/AddUint64.ll
new file mode 100644
index 0000000000000..6521699a242ed
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/AddUint64.ll
@@ -0,0 +1,88 @@
+; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %}
+
+; Code here is an excerpt of clang/test/CodeGenHLSL/builtins/AddUint64.hlsl compiled for spirv using the following command
+; clang -cc1 -finclude-default-header -triple spirv-unknown-vulkan-compute clang/test/CodeGenHLSL/builtins/AddUint64.hlsl -emit-llvm -disable-llvm-passes -o llvm/test/CodeGen/SPIRV/hlsl-intrinsics/uadd_with_overflow.ll
+
+; CHECK-DAG: %[[#int_32:]] = OpTypeInt 32 0
+; CHECK-DAG: %[[#vec2_int_32:]] = OpTypeVector %[[#int_32]] 2
+; CHECK-DAG: %[[#bool:]] = OpTypeBool
+; CHECK-DAG: %[[#const_i32_1:]] = OpConstant %[[#int_32]] 1
+; CHECK-DAG: %[[#struct_i32_i32:]] = OpTypeStruct %[[#int_32]] %[[#int_32]]
+; CHECK-DAG: %[[#func_v2i32_v2i32_v2i32:]] = OpTypeFunction %[[#vec2_int_32]] %[[#vec2_int_32]] %[[#vec2_int_32]]
+; CHECK-DAG: %[[#const_i32_0:]] = OpConstant %[[#int_32]] 0
+; CHECK-DAG: %[[#undef_v2i32:]] = OpUndef %[[#vec2_int_32]]
+; CHECK-DAG: %[[#vec4_int_32:]] = OpTypeVector %[[#int_32]] 4
+; CHECK-DAG: %[[#vec2_bool:]] = OpTypeVector %[[#bool]] 2
+; CHECK-DAG: %[[#const_v2i32_0_0:]] = OpConstantComposite %[[#vec2_int_32]] %[[#const_i32_0]] %[[#const_i32_0]]
+; CHECK-DAG: %[[#const_v2i32_1_1:]] = OpConstantComposite %[[#vec2_int_32]] %[[#const_i32_1]] %[[#const_i32_1]]
+; CHECK-DAG: %[[#struct_v2i32_v2i32:]] = OpTypeStruct %[[#vec2_int_32]] %[[#vec2_int_32]]
+; CHECK-DAG: %[[#func_v4i32_v4i32_v4i32:]] = OpTypeFunction %[[#vec4_int_32]] %[[#vec4_int_32]] %[[#vec4_int_32]]
+; CHECK-DAG: %[[#undef_v4i32:]] = OpUndef %[[#vec4_int_32]]
+
+
+define spir_func <2 x i32> @test_AddUint64_uint2(<2 x i32> %a, <2 x i32> %b) {
+entry:
+; CHECK: %[[#a:]] = OpFunctionParameter %[[#vec2_int_32]]
+; CHECK: %[[#b:]] = OpFunctionParameter %[[#vec2_int_32]]
+; CHECK: %[[#a_low:]] = OpCompositeExtract %[[#int_32]] %[[#a]] 0
+; CHECK: %[[#a_high:]] = OpCompositeExtract %[[#int_32]] %[[#a]] 1
+; CHECK: %[[#b_low:]] = OpCompositeExtract %[[#int_32]] %[[#b]] 0
+; CHECK: %[[#b_high:]] = OpCompositeExtract %[[#int_32]] %[[#b]] 1
+; CHECK: %[[#iaddcarry:]] = OpIAddCarry %[[#struct_i32_i32]] %[[#a_low]] %[[#b_low]]
+; CHECK: %[[#lowsum:]] = OpCompositeExtract %[[#int_32]] %[[#iaddcarry]] 0
+; CHECK: %[[#carry:]] = OpCompositeExtract %[[#int_32]] %[[#iaddcarry]] 1
+; CHECK: %[[#carry_ne0:]] = OpINotEqual %[[#bool]] %[[#carry]] %[[#const_i32_0]]
+; CHECK: %[[#select_1_or_0:]] = OpSelect %[[#int_32]] %[[#carry_ne0]] %[[#const_i32_1]] %[[#const_i32_0]]
+; CHECK: %[[#highsum:]] = OpIAdd %[[#int_32]] %[[#a_high]] %[[#b_high]]
+; CHECK: %[[#highsumpluscarry:]] = OpIAdd %[[#int_32]] %[[#highsum]] %[[#select_1_or_0]]
+; CHECK: %[[#adduint64_upto0:]] = OpCompositeInsert %[[#vec2_int_32]] %[[#lowsum]] %[[#undef_v2i32]] 0
+; CHECK: %[[#adduint64:]] = OpCompositeInsert %[[#vec2_int_32]] %[[#highsumpluscarry]] %[[#adduint64_upto0]] 1
+; CHECK: OpReturnValue %[[#adduint64]]
+;
+ %LowA = extractelement <2 x i32> %a, i64 0
+ %HighA = extractelement <2 x i32> %a, i64 1
+ %LowB = extractelement <2 x i32> %b, i64 0
+ %HighB = extractelement <2 x i32> %b, i64 1
+ %3 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %LowA, i32 %LowB)
+ %4 = extractvalue { i32, i1 } %3, 1
+ %5 = extractvalue { i32, i1 } %3, 0
+ %CarryZExt = zext i1 %4 to i32
+ %HighSum = add i32 %HighA, %HighB
+ %HighSumPlusCarry = add i32 %HighSum, %CarryZExt
+ %hlsl.AddUint64.upto0 = insertelement <2 x i32> poison, i32 %5, i64 0
+ %hlsl.AddUint64 = insertelement <2 x i32> %hlsl.AddUint64.upto0, i32 %HighSumPlusCarry, i64 1
+ ret <2 x i32> %hlsl.AddUint64
+}
+
+define spir_func <4 x i32> @test_AddUint64_uint4(<4 x i32> %a, <4 x i32> %b) #0 {
+entry:
+; CHECK: %[[#a:]] = OpFunctionParameter %[[#vec4_int_32]]
+; CHECK: %[[#b:]] = OpFunctionParameter %[[#vec4_int_32]]
+; CHECK: %[[#a_low:]] = OpVectorShuffle %[[#vec2_int_32]] %[[#a]] %[[#undef_v4i32]] 0 2
+; CHECK: %[[#a_high:]] = OpVectorShuffle %[[#vec2_int_32]] %[[#a]] %[[#undef_v4i32]] 1 3
+; CHECK: %[[#b_low:]] = OpVectorShuffle %[[#vec2_int_32]] %[[#b]] %[[#undef_v4i32]] 0 2
+; CHECK: %[[#b_high:]] = OpVectorShuffle %[[#vec2_int_32]] %[[#b]] %[[#undef_v4i32]] 1 3
+; CHECK: %[[#iaddcarry:]] = OpIAddCarry %[[#struct_v2i32_v2i32]] %[[#a_low]] %[[#vec2_int_32]]
+; CHECK: %[[#lowsum:]] = OpCompositeExtract %[[#vec2_int_32]] %[[#iaddcarry]] 0
+; CHECK: %[[#carry:]] = OpCompositeExtract %[[#vec2_int_32]] %[[#iaddcarry]] 1
+; CHECK: %[[#carry_ne0:]] = OpINotEqual %[[#vec2_bool]] %[[#carry]] %[[#const_v2i32_0_0]]
+; CHECK: %[[#select_1_or_0:]] = OpSelect %[[#vec2_int_32]] %[[#carry_ne0]] %[[#const_v2i32_1_1]] %[[#const_v2i32_0_0]]
+; CHECK: %[[#highsum:]] = OpIAdd %[[#vec2_int_32]] %[[#a_high]] %[[#b_high]]
+; CHECK: %[[#highsumpluscarry:]] = OpIAdd %[[#vec2_int_32]] %[[#highsum]] %[[#select_1_or_0]]
+; CHECK: %[[#adduint64:]] = OpVectorShuffle %[[#vec4_int_32]] %[[#lowsum]] %[[#highsumpluscarry]] 0 2 1 3
+; CHECK: OpReturnValue %[[#adduint64]]
+;
+ %LowA = shufflevector <4 x i32> %a, <4 x i32> poison, <2 x i32> <i32 0, i32 2>
+ %HighA = shufflevector <4 x i32> %a, <4 x i32> poison, <2 x i32> <i32 1, i32 3>
+ %LowB = shufflevector <4 x i32> %b, <4 x i32> poison, <2 x i32> <i32 0, i32 2>
+ %HighB = shufflevector <4 x i32> %b, <4 x i32> poison, <2 x i32> <i32 1, i32 3>
+ %3 = call { <2 x i32>, <2 x i1> } @llvm.uadd.with.overflow.v2i32(<2 x i32> %LowA, <2 x i32> %LowB)
+ %4 = extractvalue { <2 x i32>, <2 x i1> } %3, 1
+ %5 = extractvalue { <2 x i32>, <2 x i1> } %3, 0
+ %CarryZExt = zext <2 x i1> %4 to <2 x i32>
+ %HighSum = add <2 x i32> %HighA, %HighB
+ %HighSumPlusCarry = add <2 x i32> %HighSum, %CarryZExt
+ %hlsl.AddUint64 = shufflevector <2 x i32> %5, <2 x i32> %HighSumPlusCarry, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
+ ret <4 x i32> %hlsl.AddUint64
+}
>From fd356da6731ab1d69750d108c721a3be5190a180 Mon Sep 17 00:00:00 2001
From: Icohedron <cheung.deric at gmail.com>
Date: Wed, 26 Feb 2025 17:04:59 +0000
Subject: [PATCH 13/13] Clarify the UAddc.ll test with additional comments on
its usage
---
llvm/test/CodeGen/DirectX/UAddc.ll | 12 ++++++++----
1 file changed, 8 insertions(+), 4 deletions(-)
diff --git a/llvm/test/CodeGen/DirectX/UAddc.ll b/llvm/test/CodeGen/DirectX/UAddc.ll
index 70a6ea00344cb..4b46b56b455f6 100644
--- a/llvm/test/CodeGen/DirectX/UAddc.ll
+++ b/llvm/test/CodeGen/DirectX/UAddc.ll
@@ -1,11 +1,14 @@
; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
-; CHECK: %dx.types.i32c = type { i32, i1 }
+; This test exercises the lowering of the intrinsic @llvm.uadd.with.overflow.i32 to the UAddc DXIL op
+; CHECK-DAG: [[DX_TYPES_I32C:%dx\.types\.i32c]] = type { i32, i1 }
+
+; NOTE: The uint2 overload of AddUint64 HLSL uses @llvm.uadd.with.overflow.i32, resulting in one UAddc op
define noundef i32 @test_UAddc(i32 noundef %a, i32 noundef %b) {
; CHECK-LABEL: define noundef i32 @test_UAddc(
; CHECK-SAME: i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) {
-; CHECK-NEXT: [[UADDC:%.*]] = call [[DX_TYPES_I32C:%dx\.types\.i32c]] @dx.op.binaryWithCarryOrBorrow.i32(i32 44, i32 [[A]], i32 [[B]]) #[[ATTR0:[0-9]+]]
+; CHECK-NEXT: [[UADDC:%.*]] = call [[DX_TYPES_I32C]] @dx.op.binaryWithCarryOrBorrow.i32(i32 44, i32 [[A]], i32 [[B]]) #[[ATTR0:[0-9]+]]
; CHECK-NEXT: [[CARRY:%.*]] = extractvalue [[DX_TYPES_I32C]] [[UADDC]], 1
; CHECK-NEXT: [[SUM:%.*]] = extractvalue [[DX_TYPES_I32C]] [[UADDC]], 0
; CHECK-NEXT: [[CARRY_ZEXT:%.*]] = zext i1 [[CARRY]] to i32
@@ -20,12 +23,13 @@ define noundef i32 @test_UAddc(i32 noundef %a, i32 noundef %b) {
ret i32 %result
}
+; NOTE: The uint4 overload of AddUint64 HLSL uses @llvm.uadd.with.overflow.v2i32, resulting in two UAddc ops after scalarization
define noundef <2 x i32> @test_UAddc_vec2(<2 x i32> noundef %a, <2 x i32> noundef %b) {
; CHECK-LABEL: define noundef <2 x i32> @test_UAddc_vec2(
; CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) {
; CHECK-NEXT: [[A_I0:%.*]] = extractelement <2 x i32> [[A]], i64 0
; CHECK-NEXT: [[B_I0:%.*]] = extractelement <2 x i32> [[B]], i64 0
-; CHECK-NEXT: [[UADDC_I0:%.*]] = call [[DX_TYPES_I32C:%dx\.types\.i32c]] @dx.op.binaryWithCarryOrBorrow.i32(i32 44, i32 [[A_I0]], i32 [[B_I0]]) #[[ATTR0]]
+; CHECK-NEXT: [[UADDC_I0:%.*]] = call [[DX_TYPES_I32C]] @dx.op.binaryWithCarryOrBorrow.i32(i32 44, i32 [[A_I0]], i32 [[B_I0]]) #[[ATTR0]]
; CHECK-NEXT: [[A_I1:%.*]] = extractelement <2 x i32> [[A]], i64 1
; CHECK-NEXT: [[B_I1:%.*]] = extractelement <2 x i32> [[B]], i64 1
; CHECK-NEXT: [[UADDC_I1:%.*]] = call [[DX_TYPES_I32C]] @dx.op.binaryWithCarryOrBorrow.i32(i32 44, i32 [[A_I1]], i32 [[B_I1]]) #[[ATTR0]]
@@ -56,7 +60,7 @@ define noundef <2 x i32> @test_UAddc_vec2(<2 x i32> noundef %a, <2 x i32> nounde
define noundef i32 @test_UAddc_insert(i32 noundef %a, i32 noundef %b) {
; CHECK-LABEL: define noundef i32 @test_UAddc_insert(
; CHECK-SAME: i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) {
-; CHECK-NEXT: [[UADDC:%.*]] = call [[DX_TYPES_I32C:%dx\.types\.i32c]] @dx.op.binaryWithCarryOrBorrow.i32(i32 44, i32 [[A]], i32 [[B]]) #[[ATTR0]]
+; CHECK-NEXT: [[UADDC:%.*]] = call [[DX_TYPES_I32C]] @dx.op.binaryWithCarryOrBorrow.i32(i32 44, i32 [[A]], i32 [[B]]) #[[ATTR0]]
; CHECK-NEXT: [[UNUSED:%.*]] = insertvalue [[DX_TYPES_I32C]] [[UADDC]], i32 [[A]], 0
; CHECK-NEXT: [[RESULT:%.*]] = extractvalue [[DX_TYPES_I32C]] [[UADDC]], 0
; CHECK-NEXT: ret i32 [[RESULT]]
More information about the llvm-commits
mailing list