[clang] [llvm] [HLSL] [DXIL] Implement the `AddUint64` HLSL function and the `UAddc` DXIL op (PR #125319)
Deric Cheung via llvm-commits
llvm-commits at lists.llvm.org
Fri Jan 31 16:04:01 PST 2025
https://github.com/Icohedron created https://github.com/llvm/llvm-project/pull/125319
This PR addresses #99205
- Implements the HLSL intrinsic `AddUint64` used to perform unsigned 64-bit integer addition by using pairs of unsigned 32-bit integers instead of native 64-bit types
- The LLVM intrinsic `uadd_with_overflow` is used in the implementation of `AddUint64` in `CGBuiltin.cpp`
- The DXIL op `UAddc` was defined in `DXIL.td`, and a lowering of the LLVM intrinsic `uadd_with_overflow` to the `UAddc` DXIL op was implemented in `DXILOpLowering.cpp`
Notes:
- `__builtin_addc` was not able to be used to implement `AddUint64` in `hlsl_intrinsics.h` because its `CarryOut` argument is a pointer, and pointers are not supported in HLSL
- A lowering of the LLVM intrinsic `uadd_with_overflow` to SPIR-V [already exists](https://github.com/llvm/llvm-project/blob/main/llvm/test/CodeGen/SPIRV/llvm-intrinsics/uadd.with.overflow.ll)
- When lowering the LLVM intrinsic `uadd_with_overflow` to the `UAddc` DXIL op, the anonymous struct type `{ i32, i1 }` is replaced with a named struct type `%dx.types.i32c`. This aspect of the implementation may be changed when issue #113192 gets addressed
>From 1e194fdf6dc731276cd867501708b348e3bbc97c Mon Sep 17 00:00:00 2001
From: Icohedron <cheung.deric at gmail.com>
Date: Mon, 27 Jan 2025 11:18:09 -0800
Subject: [PATCH 1/2] Implement AddUint64 HLSL codegen and sema
---
clang/include/clang/Basic/Builtins.td | 6 ++
.../clang/Basic/DiagnosticSemaKinds.td | 2 +-
clang/lib/CodeGen/CGBuiltin.cpp | 45 ++++++++++++
clang/lib/Headers/hlsl/hlsl_intrinsics.h | 21 ++++++
clang/lib/Sema/SemaHLSL.cpp | 49 +++++++++++++
.../test/CodeGenHLSL/builtins/AddUint64.hlsl | 71 +++++++++++++++++++
.../SemaHLSL/BuiltIns/AddUint64-errors.hlsl | 46 ++++++++++++
7 files changed, 239 insertions(+), 1 deletion(-)
create mode 100644 clang/test/CodeGenHLSL/builtins/AddUint64.hlsl
create mode 100644 clang/test/SemaHLSL/BuiltIns/AddUint64-errors.hlsl
diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index 60c360d4a9e075..ffa60bf98aea06 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -4753,6 +4753,12 @@ def GetDeviceSideMangledName : LangBuiltin<"CUDA_LANG"> {
}
// HLSL
+def HLSLAddUint64: LangBuiltin<"HLSL_LANG"> {
+ let Spellings = ["__builtin_hlsl_adduint64"];
+ let Attributes = [NoThrow, Const];
+ let Prototype = "void(...)";
+}
+
def HLSLResourceGetPointer : LangBuiltin<"HLSL_LANG"> {
let Spellings = ["__builtin_hlsl_resource_getpointer"];
let Attributes = [NoThrow];
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 774e5484cfa0e7..2d7d306a207417 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -10623,7 +10623,7 @@ def err_second_argument_to_cwsc_not_pointer : Error<
"second argument to __builtin_call_with_static_chain must be of pointer type">;
def err_vector_incorrect_num_elements : Error<
- "%select{too many|too few}0 elements in vector %select{initialization|operand}3 (expected %1 elements, have %2)">;
+ "%select{too many|too few|incorrect number of}0 elements in vector %select{initialization|operand}3 (expected %1 elements, have %2)">;
def err_altivec_empty_initializer : Error<"expected initializer">;
def err_invalid_neon_type_code : Error<
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 26bccccdc5e36e..3a2c74f39afa78 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -19105,6 +19105,51 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
return nullptr;
switch (BuiltinID) {
+ case Builtin::BI__builtin_hlsl_adduint64: {
+ Value *OpA = EmitScalarExpr(E->getArg(0));
+ Value *OpB = EmitScalarExpr(E->getArg(1));
+ assert(E->getArg(0)->getType()->hasIntegerRepresentation() &&
+ E->getArg(1)->getType()->hasIntegerRepresentation() &&
+ "AddUint64 operands must have an integer representation");
+ assert(((E->getArg(0)->getType()->castAs<VectorType>()->getNumElements() ==
+ 2 &&
+ E->getArg(1)->getType()->castAs<VectorType>()->getNumElements() ==
+ 2) ||
+ (E->getArg(0)->getType()->castAs<VectorType>()->getNumElements() ==
+ 4 &&
+ E->getArg(1)->getType()->castAs<VectorType>()->getNumElements() ==
+ 4)) &&
+ "input vectors must have 2 or 4 elements each");
+
+ llvm::Value *Result = PoisonValue::get(OpA->getType());
+ uint64_t NumElements =
+ E->getArg(0)->getType()->castAs<VectorType>()->getNumElements();
+ for (uint64_t i = 0; i < NumElements / 2; ++i) {
+
+ // Obtain low and high words of inputs A and B
+ llvm::Value *LowA = Builder.CreateExtractElement(OpA, 2 * i + 0);
+ llvm::Value *HighA = Builder.CreateExtractElement(OpA, 2 * i + 1);
+ llvm::Value *LowB = Builder.CreateExtractElement(OpB, 2 * i + 0);
+ llvm::Value *HighB = Builder.CreateExtractElement(OpB, 2 * i + 1);
+
+ // Use an uadd_with_overflow to compute the sum of low words and obtain a
+ // carry value
+ llvm::Value *Carry;
+ llvm::Value *LowSum = EmitOverflowIntrinsic(
+ *this, llvm::Intrinsic::uadd_with_overflow, LowA, LowB, Carry);
+ llvm::Value *ZExtCarry = Builder.CreateZExt(Carry, HighA->getType());
+
+ // Sum the high words and the carry
+ llvm::Value *HighSum = Builder.CreateAdd(HighA, HighB);
+ llvm::Value *HighSumPlusCarry = Builder.CreateAdd(HighSum, ZExtCarry);
+
+ // Insert the low and high word sums into the result vector
+ Result = Builder.CreateInsertElement(Result, LowSum, 2 * i + 0);
+ Result = Builder.CreateInsertElement(Result, HighSumPlusCarry, 2 * i + 1,
+ "hlsl.AddUint64");
+ }
+ return Result;
+ }
case Builtin::BI__builtin_hlsl_resource_getpointer: {
Value *HandleOp = EmitScalarExpr(E->getArg(0));
Value *IndexOp = EmitScalarExpr(E->getArg(1));
diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
index d1e4eb08aa7646..8043fcbc87c479 100644
--- a/clang/lib/Headers/hlsl/hlsl_intrinsics.h
+++ b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
@@ -138,6 +138,27 @@ _HLSL_BUILTIN_ALIAS(__builtin_elementwise_acos)
float4 acos(float4);
//===----------------------------------------------------------------------===//
+// AddUint64 builtins
+//===----------------------------------------------------------------------===//
+
+/// \fn T AddUint64(T a, T b)
+/// \brief Implements unsigned 64-bit integer addition using pairs of unsigned
+/// 32-bit integers.
+/// \param x [in] The first unsigned 32-bit integer pair(s)
+/// \param y [in] The second unsigned 32-bit integer pair(s)
+///
+/// This function takes one or two pairs (low, high) of unsigned 32-bit integer
+/// values and returns pairs (low, high) of unsigned 32-bit integer
+/// values representing the result of unsigned 64-bit integer addition.
+
+_HLSL_AVAILABILITY(shadermodel, 6.0)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_adduint64)
+uint32_t2 AddUint64(uint32_t2, uint32_t2);
+_HLSL_AVAILABILITY(shadermodel, 6.0)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_adduint64)
+uint32_t4 AddUint64(uint32_t4, uint32_t4);
+
+// //===----------------------------------------------------------------------===//
// all builtins
//===----------------------------------------------------------------------===//
diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index f26469e6a2f1d7..9b1110f441013a 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -21,6 +21,7 @@
#include "clang/AST/Type.h"
#include "clang/AST/TypeLoc.h"
#include "clang/Basic/Builtins.h"
+#include "clang/Basic/DiagnosticParse.h"
#include "clang/Basic/DiagnosticSema.h"
#include "clang/Basic/IdentifierTable.h"
#include "clang/Basic/LLVM.h"
@@ -2023,6 +2024,18 @@ static bool CheckAllArgsHaveFloatRepresentation(Sema *S, CallExpr *TheCall) {
checkAllFloatTypes);
}
+static bool CheckUnsignedIntegerRepresentation(Sema *S, CallExpr *TheCall) {
+ auto checkUnsignedInteger = [](clang::QualType PassedType) -> bool {
+ clang::QualType BaseType =
+ PassedType->isVectorType()
+ ? PassedType->getAs<clang::VectorType>()->getElementType()
+ : PassedType;
+ return !BaseType->isUnsignedIntegerType();
+ };
+ return CheckAllArgTypesAreCorrect(S, TheCall, S->Context.UnsignedIntTy,
+ checkUnsignedInteger);
+}
+
static bool CheckFloatOrHalfRepresentations(Sema *S, CallExpr *TheCall) {
auto checkFloatorHalf = [](clang::QualType PassedType) -> bool {
clang::QualType BaseType =
@@ -2214,6 +2227,42 @@ static bool CheckResourceHandle(
// returning an ExprError
bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
switch (BuiltinID) {
+ case Builtin::BI__builtin_hlsl_adduint64: {
+ if (SemaRef.checkArgCount(TheCall, 2))
+ return true;
+ if (CheckVectorElementCallArgs(&SemaRef, TheCall))
+ return true;
+ if (CheckUnsignedIntegerRepresentation(&SemaRef, TheCall))
+ return true;
+
+ // CheckVectorElementCallArgs(...) guarantees both args are the same type.
+ assert(TheCall->getArg(0)->getType() == TheCall->getArg(1)->getType() &&
+ "Both args must be of the same type");
+
+ // ensure both args are vectors
+ auto *VTy = TheCall->getArg(0)->getType()->getAs<VectorType>();
+ if (!VTy) {
+ SemaRef.Diag(TheCall->getBeginLoc(),
+ diag::err_vector_incorrect_num_elements)
+ << 2 << "2 or 4" << 1 << /*operand*/ 1;
+ return true;
+ }
+
+ // ensure both args have 2 elements, or both args have 4 elements
+ int NumElementsArg1 = VTy->getNumElements();
+ if (NumElementsArg1 != 2 && NumElementsArg1 != 4) {
+ SemaRef.Diag(TheCall->getBeginLoc(),
+ diag::err_vector_incorrect_num_elements)
+ << 2 << "2 or 4" << NumElementsArg1 << /*operand*/ 1;
+ return true;
+ }
+
+ ExprResult A = TheCall->getArg(0);
+ QualType ArgTyA = A.get()->getType();
+ // return type is the same as the input type
+ TheCall->setType(ArgTyA);
+ break;
+ }
case Builtin::BI__builtin_hlsl_resource_getpointer: {
if (SemaRef.checkArgCount(TheCall, 2) ||
CheckResourceHandle(&SemaRef, TheCall, 0) ||
diff --git a/clang/test/CodeGenHLSL/builtins/AddUint64.hlsl b/clang/test/CodeGenHLSL/builtins/AddUint64.hlsl
new file mode 100644
index 00000000000000..4141aef69323df
--- /dev/null
+++ b/clang/test/CodeGenHLSL/builtins/AddUint64.hlsl
@@ -0,0 +1,71 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.3-library %s \
+// RUN: -emit-llvm -disable-llvm-passes -o - | \
+// RUN: FileCheck %s --check-prefixes=CHECK
+
+
+// CHECK-LABEL: define noundef <2 x i32> @_Z20test_AddUint64_uint2Dv2_jS_(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <2 x i32>, align 8
+// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <2 x i32>, align 8
+// CHECK-NEXT: store <2 x i32> [[A]], ptr [[A_ADDR]], align 8
+// CHECK-NEXT: store <2 x i32> [[B]], ptr [[B_ADDR]], align 8
+// CHECK-NEXT: [[A_LOAD:%.*]] = load <2 x i32>, ptr [[A_ADDR]], align 8
+// CHECK-NEXT: [[B_LOAD:%.*]] = load <2 x i32>, ptr [[B_ADDR]], align 8
+// CHECK-NEXT: [[LowA:%.*]] = extractelement <2 x i32> [[A_LOAD]], i64 0
+// CHECK-NEXT: [[HighA:%.*]] = extractelement <2 x i32> [[A_LOAD]], i64 1
+// CHECK-NEXT: [[LowB:%.*]] = extractelement <2 x i32> [[B_LOAD]], i64 0
+// CHECK-NEXT: [[HighB:%.*]] = extractelement <2 x i32> [[B_LOAD]], i64 1
+// CHECK-NEXT: [[UAddc:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[LowA]], i32 [[LowB]])
+// CHECK-NEXT: [[Carry:%.*]] = extractvalue { i32, i1 } [[UAddc]], 1
+// CHECK-NEXT: [[LowSum:%.*]] = extractvalue { i32, i1 } [[UAddc]], 0
+// CHECK-NEXT: [[CarryZExt:%.*]] = zext i1 [[Carry]] to i32
+// CHECK-NEXT: [[HighSum:%.*]] = add i32 [[HighA]], [[HighB]]
+// CHECK-NEXT: [[HighSumPlusCarry:%.*]] = add i32 [[HighSum]], [[CarryZExt]]
+// CHECK-NEXT: [[HLSL_ADDUINT64_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[LowSum]], i64 0
+// CHECK-NEXT: [[HLSL_ADDUINT64:%.*]] = insertelement <2 x i32> [[HLSL_ADDUINT64_UPTO0]], i32 [[HighSumPlusCarry]], i64 1
+// CHECK-NEXT: ret <2 x i32> [[HLSL_ADDUINT64]]
+//
+uint2 test_AddUint64_uint2(uint2 a, uint2 b) {
+ return AddUint64(a, b);
+}
+
+// CHECK-LABEL: define noundef <4 x i32> @_Z20test_AddUint64_uint4Dv4_jS_(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <4 x i32>, align 16
+// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <4 x i32>, align 16
+// CHECK-NEXT: store <4 x i32> [[A]], ptr [[A_ADDR]], align 16
+// CHECK-NEXT: store <4 x i32> [[B]], ptr [[B_ADDR]], align 16
+// CHECK-NEXT: [[A_LOAD:%.*]] = load <4 x i32>, ptr [[A_ADDR]], align 16
+// CHECK-NEXT: [[B_LOAD:%.*]] = load <4 x i32>, ptr [[B_ADDR]], align 16
+// CHECK-NEXT: [[LowA:%.*]] = extractelement <4 x i32> [[A_LOAD]], i64 0
+// CHECK-NEXT: [[HighA:%.*]] = extractelement <4 x i32> [[A_LOAD]], i64 1
+// CHECK-NEXT: [[LowB:%.*]] = extractelement <4 x i32> [[B_LOAD]], i64 0
+// CHECK-NEXT: [[HighB:%.*]] = extractelement <4 x i32> [[B_LOAD]], i64 1
+// CHECK-NEXT: [[UAddc:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[LowA]], i32 [[LowB]])
+// CHECK-NEXT: [[Carry:%.*]] = extractvalue { i32, i1 } [[UAddc]], 1
+// CHECK-NEXT: [[LowSum:%.*]] = extractvalue { i32, i1 } [[UAddc]], 0
+// CHECK-NEXT: [[CarryZExt:%.*]] = zext i1 [[Carry]] to i32
+// CHECK-NEXT: [[HighSum:%.*]] = add i32 [[HighA]], [[HighB]]
+// CHECK-NEXT: [[HighSumPlusCarry:%.*]] = add i32 [[HighSum]], [[CarryZExt]]
+// CHECK-NEXT: [[HLSL_ADDUINT64_UPTO0:%.*]] = insertelement <4 x i32> poison, i32 [[LowSum]], i64 0
+// CHECK-NEXT: [[HLSL_ADDUINT64_UPTO1:%.*]] = insertelement <4 x i32> [[HLSL_ADDUINT64_UPTO0]], i32 [[HighSumPlusCarry]], i64 1
+// CHECK-NEXT: [[LowA1:%.*]] = extractelement <4 x i32> [[A_LOAD]], i64 2
+// CHECK-NEXT: [[HighA1:%.*]] = extractelement <4 x i32> [[A_LOAD]], i64 3
+// CHECK-NEXT: [[LowB1:%.*]] = extractelement <4 x i32> [[B_LOAD]], i64 2
+// CHECK-NEXT: [[HighB1:%.*]] = extractelement <4 x i32> [[B_LOAD]], i64 3
+// CHECK-NEXT: [[UAddc1:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[LowA1]], i32 [[LowB1]])
+// CHECK-NEXT: [[Carry1:%.*]] = extractvalue { i32, i1 } [[UAddc1]], 1
+// CHECK-NEXT: [[LowSum1:%.*]] = extractvalue { i32, i1 } [[UAddc1]], 0
+// CHECK-NEXT: [[CarryZExt1:%.*]] = zext i1 [[Carry1]] to i32
+// CHECK-NEXT: [[HighSum1:%.*]] = add i32 [[HighA1]], [[HighB1]]
+// CHECK-NEXT: [[HighSumPlusCarry1:%.*]] = add i32 [[HighSum1]], [[CarryZExt1]]
+// CHECK-NEXT: [[HLSL_ADDUINT64_UPTO2:%.*]] = insertelement <4 x i32> [[HLSL_ADDUINT64_UPTO1]], i32 [[LowSum1]], i64 2
+// CHECK-NEXT: [[HLSL_ADDUINT64:%.*]] = insertelement <4 x i32> [[HLSL_ADDUINT64_UPTO2]], i32 [[HighSumPlusCarry1]], i64 3
+// CHECK-NEXT: ret <4 x i32> [[HLSL_ADDUINT64]]
+//
+uint4 test_AddUint64_uint4(uint4 a, uint4 b) {
+ return AddUint64(a, b);
+}
diff --git a/clang/test/SemaHLSL/BuiltIns/AddUint64-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/AddUint64-errors.hlsl
new file mode 100644
index 00000000000000..4d3d4f48b21eb7
--- /dev/null
+++ b/clang/test/SemaHLSL/BuiltIns/AddUint64-errors.hlsl
@@ -0,0 +1,46 @@
+// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only -disable-llvm-passes -verify
+
+uint2 test_too_few_arg() {
+ return __builtin_hlsl_adduint64();
+ // expected-error at -1 {{too few arguments to function call, expected 2, have 0}}
+}
+
+uint4 test_too_many_arg(uint4 a) {
+ return __builtin_hlsl_adduint64(a, a, a);
+ // expected-error at -1 {{too many arguments to function call, expected 2, have 3}}
+}
+
+uint2 test_mismatched_arg_types(uint2 a, uint4 b) {
+ return __builtin_hlsl_adduint64(a, b);
+ // expected-error at -1 {{all arguments to '__builtin_hlsl_adduint64' must have the same type}}
+}
+
+uint2 test_too_many_arg_elements(uint3 a, uint3 b) {
+ return __builtin_hlsl_adduint64(a, b);
+ // expected-error at -1 {{incorrect number of elements in vector operand (expected 2 or 4 elements, have 3)}}
+}
+
+uint4 test_too_few_arg_elements(uint3 a, uint3 b) {
+ return __builtin_hlsl_adduint64(a, b);
+ // expected-error at -1 {{incorrect number of elements in vector operand (expected 2 or 4 elements, have 3)}}
+}
+
+uint2 test_scalar_arg_type(uint a) {
+ return __builtin_hlsl_adduint64(a, a);
+ // expected-error at -1 {{incorrect number of elements in vector operand (expected 2 or 4 elements, have 1)}}
+}
+
+uint2 test_signed_integer_args(int2 a, int2 b) {
+ return __builtin_hlsl_adduint64(a, b);
+// expected-error at -1 {{passing 'int2' (aka 'vector<int, 2>') to parameter of incompatible type '__attribute__((__vector_size__(2 * sizeof(unsigned int)))) unsigned int' (vector of 2 'unsigned int' values)}}
+}
+
+struct S {
+ uint2 a;
+};
+
+uint2 test_incorrect_arg_type(S a) {
+ return __builtin_hlsl_adduint64(a, a);
+ // expected-error at -1 {{passing 'S' to parameter of incompatible type 'unsigned int'}}
+}
+
>From 2bc239b3e7fce57899c2b54089005a73e8aff1c5 Mon Sep 17 00:00:00 2001
From: Icohedron <cheung.deric at gmail.com>
Date: Thu, 30 Jan 2025 16:08:09 -0800
Subject: [PATCH 2/2] Implement the UAddc DXIL op to lower AddUint64 to DXIL
---
llvm/lib/Target/DirectX/DXIL.td | 13 +++++++++++
llvm/lib/Target/DirectX/DXILOpBuilder.cpp | 14 ++++++++++++
llvm/lib/Target/DirectX/DXILOpBuilder.h | 3 +++
llvm/lib/Target/DirectX/DXILOpLowering.cpp | 19 +++++++++++-----
llvm/test/CodeGen/DirectX/UAddc.ll | 25 ++++++++++++++++++++++
llvm/test/CodeGen/DirectX/UAddc_errors.ll | 17 +++++++++++++++
6 files changed, 86 insertions(+), 5 deletions(-)
create mode 100644 llvm/test/CodeGen/DirectX/UAddc.ll
create mode 100644 llvm/test/CodeGen/DirectX/UAddc_errors.ll
diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td
index d099bb395449da..ec31d27a1ed7a0 100644
--- a/llvm/lib/Target/DirectX/DXIL.td
+++ b/llvm/lib/Target/DirectX/DXIL.td
@@ -50,6 +50,7 @@ def HandleTy : DXILOpParamType;
def ResBindTy : DXILOpParamType;
def ResPropsTy : DXILOpParamType;
def SplitDoubleTy : DXILOpParamType;
+def BinaryWithCarryTy : DXILOpParamType;
class DXILOpClass;
@@ -738,6 +739,18 @@ def UMin : DXILOp<40, binary> {
let attributes = [Attributes<DXIL1_0, [ReadNone]>];
}
+def UAddc : DXILOp<44, binaryWithCarryOrBorrow > {
+ let Doc = "Unsigned 32-bit integer arithmetic add with carry. uaddc(a,b) = (a+b, a+b overflowed ? 1 : 0)";
+ // TODO: This `let intrinsics = ...` line may be uncommented when
+ // https://github.com/llvm/llvm-project/issues/113192 is fixed
+ // let intrinsics = [IntrinSelect<int_uadd_with_overflow>];
+ let arguments = [OverloadTy, OverloadTy];
+ let result = BinaryWithCarryTy;
+ let overloads = [Overloads<DXIL1_0, [Int32Ty]>];
+ let stages = [Stages<DXIL1_0, [all_stages]>];
+ let attributes = [Attributes<DXIL1_0, [ReadNone]>];
+}
+
def FMad : DXILOp<46, tertiary> {
let Doc = "Floating point arithmetic multiply/add operation. fmad(m,a,b) = m "
"* a + b.";
diff --git a/llvm/lib/Target/DirectX/DXILOpBuilder.cpp b/llvm/lib/Target/DirectX/DXILOpBuilder.cpp
index badd5aabd6432d..af309663c6044d 100644
--- a/llvm/lib/Target/DirectX/DXILOpBuilder.cpp
+++ b/llvm/lib/Target/DirectX/DXILOpBuilder.cpp
@@ -230,6 +230,14 @@ static StructType *getSplitDoubleType(LLVMContext &Context) {
return StructType::create({Int32Ty, Int32Ty}, "dx.types.splitdouble");
}
+static StructType *getBinaryWithCarryType(LLVMContext &Context) {
+ if (auto *ST = StructType::getTypeByName(Context, "dx.types.i32c"))
+ return ST;
+ Type *Int32Ty = Type::getInt32Ty(Context);
+ Type *Int1Ty= Type::getInt1Ty(Context);
+ return StructType::create({Int32Ty, Int1Ty}, "dx.types.i32c");
+}
+
static Type *getTypeFromOpParamType(OpParamType Kind, LLVMContext &Ctx,
Type *OverloadTy) {
switch (Kind) {
@@ -273,6 +281,8 @@ static Type *getTypeFromOpParamType(OpParamType Kind, LLVMContext &Ctx,
return getResPropsType(Ctx);
case OpParamType::SplitDoubleTy:
return getSplitDoubleType(Ctx);
+ case OpParamType::BinaryWithCarryTy:
+ return getBinaryWithCarryType(Ctx);
}
llvm_unreachable("Invalid parameter kind");
return nullptr;
@@ -539,6 +549,10 @@ StructType *DXILOpBuilder::getSplitDoubleType(LLVMContext &Context) {
return ::getSplitDoubleType(Context);
}
+StructType *DXILOpBuilder::getBinaryWithCarryType(LLVMContext &Context) {
+ return ::getBinaryWithCarryType(Context);
+}
+
StructType *DXILOpBuilder::getHandleType() {
return ::getHandleType(IRB.getContext());
}
diff --git a/llvm/lib/Target/DirectX/DXILOpBuilder.h b/llvm/lib/Target/DirectX/DXILOpBuilder.h
index df5a0240870f4a..8e13b87a2be10a 100644
--- a/llvm/lib/Target/DirectX/DXILOpBuilder.h
+++ b/llvm/lib/Target/DirectX/DXILOpBuilder.h
@@ -53,6 +53,9 @@ class DXILOpBuilder {
/// Get the `%dx.types.splitdouble` type.
StructType *getSplitDoubleType(LLVMContext &Context);
+ /// Get the `%dx.types.i32c` type.
+ StructType *getBinaryWithCarryType(LLVMContext &Context);
+
/// Get the `%dx.types.Handle` type.
StructType *getHandleType();
diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
index 0c245c1a43d31c..d639430ba5d31d 100644
--- a/llvm/lib/Target/DirectX/DXILOpLowering.cpp
+++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
@@ -359,17 +359,16 @@ class OpLowerer {
return lowerToBindAndAnnotateHandle(F);
}
- Error replaceSplitDoubleCallUsages(CallInst *Intrin, CallInst *Op) {
+ Error replaceExtractElementTypeOfCallUsages(CallInst *Intrin, CallInst *Op) {
for (Use &U : make_early_inc_range(Intrin->uses())) {
if (auto *EVI = dyn_cast<ExtractValueInst>(U.getUser())) {
if (EVI->getNumIndices() != 1)
- return createStringError(std::errc::invalid_argument,
- "Splitdouble has only 2 elements");
+ return createStringError(std::errc::invalid_argument, (std::string(Intrin->getOpcodeName()) + " has only 2 elements").c_str());
EVI->setOperand(0, Op);
} else {
return make_error<StringError>(
- "Splitdouble use is not ExtractValueInst",
+ (std::string(Intrin->getOpcodeName()) + " use is not ExtractValueInst").c_str(),
inconvertibleErrorCode());
}
}
@@ -821,7 +820,17 @@ class OpLowerer {
F, OpCode::SplitDouble,
OpBuilder.getSplitDoubleType(M.getContext()),
[&](CallInst *CI, CallInst *Op) {
- return replaceSplitDoubleCallUsages(CI, Op);
+ return replaceExtractElementTypeOfCallUsages(CI, Op);
+ });
+ break;
+ // TODO: this can be removed when
+ // https://github.com/llvm/llvm-project/issues/113192 is fixed
+ case Intrinsic::uadd_with_overflow:
+ HasErrors |= replaceFunctionWithNamedStructOp(
+ F, OpCode::UAddc,
+ OpBuilder.getBinaryWithCarryType(M.getContext()),
+ [&](CallInst *CI, CallInst *Op) {
+ return replaceExtractElementTypeOfCallUsages(CI, Op);
});
break;
case Intrinsic::ctpop:
diff --git a/llvm/test/CodeGen/DirectX/UAddc.ll b/llvm/test/CodeGen/DirectX/UAddc.ll
new file mode 100644
index 00000000000000..ed1222694b6926
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/UAddc.ll
@@ -0,0 +1,25 @@
+; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+
+; CHECK: %dx.types.i32c = type { i32, i1 }
+
+define noundef i32 @test_UAddc(i32 noundef %a, i32 noundef %b) {
+; CHECK-LABEL: define noundef i32 @test_UAddc(
+; CHECK-SAME: i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) {
+; CHECK-NEXT: [[UAddc:%.*]] = call %dx.types.i32c @dx.op.binaryWithCarryOrBorrow.i32(i32 44, i32 [[A]], i32 [[B]])
+; CHECK-NEXT: [[Carry:%.*]] = extractvalue %dx.types.i32c [[UAddc]], 1
+; CHECK-NEXT: [[Sum:%.*]] = extractvalue %dx.types.i32c [[UAddc]], 0
+; CHECK-NEXT: [[CarryZExt:%.*]] = zext i1 [[Carry]] to i32
+; CHECK-NEXT: [[Result:%.*]] = add i32 [[Sum]], [[CarryZExt]]
+; CHECK-NEXT: ret i32 [[Result]]
+;
+ %uaddc = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b)
+ %carry = extractvalue { i32, i1 } %uaddc, 1
+ %sum = extractvalue { i32, i1 } %uaddc, 0
+ %carry_zext = zext i1 %carry to i32
+ %result = add i32 %sum, %carry_zext
+ ret i32 %result
+}
+
+declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32)
+; CHECK: declare %dx.types.i32c @dx.op.binaryWithCarryOrBorrow.i32(i32, i32, i32)
+
diff --git a/llvm/test/CodeGen/DirectX/UAddc_errors.ll b/llvm/test/CodeGen/DirectX/UAddc_errors.ll
new file mode 100644
index 00000000000000..a643952febf977
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/UAddc_errors.ll
@@ -0,0 +1,17 @@
+; RUN: not opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s 2>&1 | FileCheck %s
+
+; DXIL operation UAddc only supports i32. Other integer types are unsupported.
+; CHECK: in function uaddc_i16
+; CHECK-SAME: Cannot create UAddc operation: Invalid overload type
+
+define noundef i16 @uaddc_i16(i16 noundef %a, i16 noundef %b) {
+ %uaddc = call { i16, i1 } @llvm.uadd.with.overflow.i16(i16 %a, i16 %b)
+ %carry = extractvalue { i16, i1 } %uaddc, 1
+ %sum = extractvalue { i16, i1 } %uaddc, 0
+ %carry_zext = zext i1 %carry to i16
+ %result = add i16 %sum, %carry_zext
+ ret i16 %result
+}
+
+declare { i16, i1 } @llvm.uadd.with.overflow.i16(i16, i16)
+
More information about the llvm-commits
mailing list