[clang] [llvm] [HLSL] Implement dot2add intrinsic (PR #131237)
Sumit Agarwal via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 20 15:25:16 PDT 2025
https://github.com/sumitsays updated https://github.com/llvm/llvm-project/pull/131237
>From 6d5c4053c90975d64e378e52779dab9c3ffb64cd Mon Sep 17 00:00:00 2001
From: Sumit Agarwal <sumitagarwal at microsoft.com>
Date: Thu, 13 Mar 2025 16:02:32 -0700
Subject: [PATCH 1/7] dot2add working for dxil without sema check
---
clang/include/clang/Basic/Builtins.td | 6 +
clang/lib/CodeGen/CGBuiltin.cpp | 10 +
clang/lib/CodeGen/CGHLSLRuntime.h | 1 +
clang/lib/Headers/hlsl/hlsl_intrinsics.h | 366 +++++++++++++++++++++
llvm/include/llvm/IR/IntrinsicsDirectX.td | 4 +
llvm/include/llvm/IR/IntrinsicsSPIRV.td | 5 +
llvm/lib/Target/DirectX/DXIL.td | 11 +
llvm/lib/Target/DirectX/DXILOpLowering.cpp | 33 ++
8 files changed, 436 insertions(+)
diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index 72a5e495c4059..8a0237171f7df 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -4891,6 +4891,12 @@ def HLSLDotProduct : LangBuiltin<"HLSL_LANG"> {
let Prototype = "void(...)";
}
+def HLSLDot2Add : LangBuiltin<"HLSL_LANG"> {
+ let Spellings = ["__builtin_hlsl_dot2add"];
+ let Attributes = [NoThrow, Const];
+ let Prototype = "void(...)";
+}
+
def HLSLDot4AddI8Packed : LangBuiltin<"HLSL_LANG"> {
let Spellings = ["__builtin_hlsl_dot4add_i8packed"];
let Attributes = [NoThrow, Const];
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index c126f88b9e3a5..8c0f42e0c85f7 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -19681,6 +19681,16 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
getDotProductIntrinsic(CGM.getHLSLRuntime(), VecTy0->getElementType()),
ArrayRef<Value *>{Op0, Op1}, nullptr, "hlsl.dot");
}
+ case Builtin::BI__builtin_hlsl_dot2add: {
+ Value *A = EmitScalarExpr(E->getArg(0));
+ Value *B = EmitScalarExpr(E->getArg(1));
+ Value *C = EmitScalarExpr(E->getArg(2));
+
+ Intrinsic::ID ID = CGM.getHLSLRuntime().getDot2AddIntrinsic();
+ return Builder.CreateIntrinsic(
+ /*ReturnType=*/C->getType(), ID, ArrayRef<Value *>{A, B, C}, nullptr,
+ "hlsl.dot2add");
+ }
case Builtin::BI__builtin_hlsl_dot4add_i8packed: {
Value *A = EmitScalarExpr(E->getArg(0));
Value *B = EmitScalarExpr(E->getArg(1));
diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h
index 23f187b24284f..5ba69819e5431 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.h
+++ b/clang/lib/CodeGen/CGHLSLRuntime.h
@@ -99,6 +99,7 @@ class CGHLSLRuntime {
GENERATE_HLSL_INTRINSIC_FUNCTION(FDot, fdot)
GENERATE_HLSL_INTRINSIC_FUNCTION(SDot, sdot)
GENERATE_HLSL_INTRINSIC_FUNCTION(UDot, udot)
+ GENERATE_HLSL_INTRINSIC_FUNCTION(Dot2Add, dot2add)
GENERATE_HLSL_INTRINSIC_FUNCTION(Dot4AddI8Packed, dot4add_i8packed)
GENERATE_HLSL_INTRINSIC_FUNCTION(Dot4AddU8Packed, dot4add_u8packed)
GENERATE_HLSL_INTRINSIC_FUNCTION(WaveActiveAllTrue, wave_all)
diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
index 5459cbeb34fd0..38422229e018d 100644
--- a/clang/lib/Headers/hlsl/hlsl_intrinsics.h
+++ b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
@@ -118,6 +118,372 @@ const inline float distance(__detail::HLSL_FIXED_VECTOR<float, N> X,
}
//===----------------------------------------------------------------------===//
+<<<<<<< Updated upstream
+=======
+// dot product builtins
+//===----------------------------------------------------------------------===//
+
+/// \fn K dot(T X, T Y)
+/// \brief Return the dot product (a scalar value) of \a X and \a Y.
+/// \param X The X input value.
+/// \param Y The Y input value.
+
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot)
+half dot(half, half);
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot)
+half dot(half2, half2);
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot)
+half dot(half3, half3);
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot)
+half dot(half4, half4);
+
+#ifdef __HLSL_ENABLE_16_BIT
+_HLSL_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot)
+int16_t dot(int16_t, int16_t);
+_HLSL_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot)
+int16_t dot(int16_t2, int16_t2);
+_HLSL_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot)
+int16_t dot(int16_t3, int16_t3);
+_HLSL_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot)
+int16_t dot(int16_t4, int16_t4);
+
+_HLSL_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot)
+uint16_t dot(uint16_t, uint16_t);
+_HLSL_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot)
+uint16_t dot(uint16_t2, uint16_t2);
+_HLSL_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot)
+uint16_t dot(uint16_t3, uint16_t3);
+_HLSL_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot)
+uint16_t dot(uint16_t4, uint16_t4);
+#endif
+
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot)
+float dot(float, float);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot)
+float dot(float2, float2);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot)
+float dot(float3, float3);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot)
+float dot(float4, float4);
+
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot)
+double dot(double, double);
+
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot)
+int dot(int, int);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot)
+int dot(int2, int2);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot)
+int dot(int3, int3);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot)
+int dot(int4, int4);
+
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot)
+uint dot(uint, uint);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot)
+uint dot(uint2, uint2);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot)
+uint dot(uint3, uint3);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot)
+uint dot(uint4, uint4);
+
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot)
+int64_t dot(int64_t, int64_t);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot)
+int64_t dot(int64_t2, int64_t2);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot)
+int64_t dot(int64_t3, int64_t3);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot)
+int64_t dot(int64_t4, int64_t4);
+
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot)
+uint64_t dot(uint64_t, uint64_t);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot)
+uint64_t dot(uint64_t2, uint64_t2);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot)
+uint64_t dot(uint64_t3, uint64_t3);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot)
+uint64_t dot(uint64_t4, uint64_t4);
+
+//===----------------------------------------------------------------------===//
+// dot4add builtins
+//===----------------------------------------------------------------------===//
+
+/// \fn int dot4add_i8packed(uint A, uint B, int C)
+
+_HLSL_AVAILABILITY(shadermodel, 6.4)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot4add_i8packed)
+int dot4add_i8packed(uint, uint, int);
+
+/// \fn uint dot4add_u8packed(uint A, uint B, uint C)
+
+_HLSL_AVAILABILITY(shadermodel, 6.4)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot4add_u8packed)
+uint dot4add_u8packed(uint, uint, uint);
+
+//===----------------------------------------------------------------------===//
+// dot2add builtins
+//===----------------------------------------------------------------------===//
+
+/// \fn float dot2add(half2 a, half2 b, float c)
+
+_HLSL_AVAILABILITY(shadermodel, 6.4)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot2add)
+float dot2add(half2, half2, float);
+
+//===----------------------------------------------------------------------===//
+// exp builtins
+//===----------------------------------------------------------------------===//
+
+/// \fn T exp(T x)
+/// \brief Returns the base-e exponential, or \a e**x, of the specified value.
+/// \param x The specified input value.
+///
+/// The return value is the base-e exponential of the \a x parameter.
+
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp)
+half exp(half);
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp)
+half2 exp(half2);
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp)
+half3 exp(half3);
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp)
+half4 exp(half4);
+
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp)
+float exp(float);
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp)
+float2 exp(float2);
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp)
+float3 exp(float3);
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp)
+float4 exp(float4);
+
+//===----------------------------------------------------------------------===//
+// exp2 builtins
+//===----------------------------------------------------------------------===//
+
+/// \fn T exp2(T x)
+/// \brief Returns the base 2 exponential, or \a 2**x, of the specified value.
+/// \param x The specified input value.
+///
+/// The base 2 exponential of the \a x parameter.
+
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp2)
+half exp2(half);
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp2)
+half2 exp2(half2);
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp2)
+half3 exp2(half3);
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp2)
+half4 exp2(half4);
+
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp2)
+float exp2(float);
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp2)
+float2 exp2(float2);
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp2)
+float3 exp2(float3);
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp2)
+float4 exp2(float4);
+
+//===----------------------------------------------------------------------===//
+// firstbithigh builtins
+//===----------------------------------------------------------------------===//
+
+/// \fn T firstbithigh(T Val)
+/// \brief Returns the location of the first set bit starting from the highest
+/// order bit and working downward, per component.
+/// \param Val the input value.
+
+#ifdef __HLSL_ENABLE_16_BIT
+_HLSL_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
+uint firstbithigh(int16_t);
+_HLSL_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
+uint2 firstbithigh(int16_t2);
+_HLSL_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
+uint3 firstbithigh(int16_t3);
+_HLSL_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
+uint4 firstbithigh(int16_t4);
+_HLSL_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
+uint firstbithigh(uint16_t);
+_HLSL_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
+uint2 firstbithigh(uint16_t2);
+_HLSL_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
+uint3 firstbithigh(uint16_t3);
+_HLSL_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
+uint4 firstbithigh(uint16_t4);
+#endif
+
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
+uint firstbithigh(int);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
+uint2 firstbithigh(int2);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
+uint3 firstbithigh(int3);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
+uint4 firstbithigh(int4);
+
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
+uint firstbithigh(uint);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
+uint2 firstbithigh(uint2);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
+uint3 firstbithigh(uint3);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
+uint4 firstbithigh(uint4);
+
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
+uint firstbithigh(int64_t);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
+uint2 firstbithigh(int64_t2);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
+uint3 firstbithigh(int64_t3);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
+uint4 firstbithigh(int64_t4);
+
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
+uint firstbithigh(uint64_t);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
+uint2 firstbithigh(uint64_t2);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
+uint3 firstbithigh(uint64_t3);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
+uint4 firstbithigh(uint64_t4);
+
+//===----------------------------------------------------------------------===//
+// firstbitlow builtins
+//===----------------------------------------------------------------------===//
+
+/// \fn T firstbitlow(T Val)
+/// \brief Returns the location of the first set bit starting from the lowest
+/// order bit and working upward, per component.
+/// \param Val the input value.
+
+#ifdef __HLSL_ENABLE_16_BIT
+_HLSL_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
+uint firstbitlow(int16_t);
+_HLSL_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
+uint2 firstbitlow(int16_t2);
+_HLSL_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
+uint3 firstbitlow(int16_t3);
+_HLSL_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
+uint4 firstbitlow(int16_t4);
+_HLSL_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
+uint firstbitlow(uint16_t);
+_HLSL_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
+uint2 firstbitlow(uint16_t2);
+_HLSL_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
+uint3 firstbitlow(uint16_t3);
+_HLSL_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
+uint4 firstbitlow(uint16_t4);
+#endif
+
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
+uint firstbitlow(int);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
+uint2 firstbitlow(int2);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
+uint3 firstbitlow(int3);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
+uint4 firstbitlow(int4);
+
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
+uint firstbitlow(uint);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
+uint2 firstbitlow(uint2);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
+uint3 firstbitlow(uint3);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
+uint4 firstbitlow(uint4);
+
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
+uint firstbitlow(int64_t);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
+uint2 firstbitlow(int64_t2);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
+uint3 firstbitlow(int64_t3);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
+uint4 firstbitlow(int64_t4);
+
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
+uint firstbitlow(uint64_t);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
+uint2 firstbitlow(uint64_t2);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
+uint3 firstbitlow(uint64_t3);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
+uint4 firstbitlow(uint64_t4);
+
+//===----------------------------------------------------------------------===//
+// floor builtins
+//===----------------------------------------------------------------------===//
+
+/// \fn T floor(T Val)
+/// \brief Returns the largest integer that is less than or equal to the input
+/// value, \a Val.
+/// \param Val The input value.
+
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_floor)
+half floor(half);
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_floor)
+half2 floor(half2);
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_floor)
+half3 floor(half3);
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_floor)
+half4 floor(half4);
+
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_floor)
+float floor(float);
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_floor)
+float2 floor(float2);
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_floor)
+float3 floor(float3);
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_floor)
+float4 floor(float4);
+
+//===----------------------------------------------------------------------===//
+>>>>>>> Stashed changes
// fmod builtins
//===----------------------------------------------------------------------===//
diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td
index ead7286f4311c..775d325feeb14 100644
--- a/llvm/include/llvm/IR/IntrinsicsDirectX.td
+++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td
@@ -100,6 +100,10 @@ def int_dx_udot :
DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
[llvm_anyint_ty, LLVMScalarOrSameVectorWidth<0, LLVMVectorElementType<0>>],
[IntrNoMem, Commutative] >;
+def int_dx_dot2add :
+ DefaultAttrsIntrinsic<[llvm_float_ty],
+ [llvm_anyfloat_ty, LLVMMatchType<0>, llvm_float_ty],
+ [IntrNoMem, Commutative]>;
def int_dx_dot4add_i8packed : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_dx_dot4add_u8packed : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
diff --git a/llvm/include/llvm/IR/IntrinsicsSPIRV.td b/llvm/include/llvm/IR/IntrinsicsSPIRV.td
index df3e137c80980..240c36513f0b6 100644
--- a/llvm/include/llvm/IR/IntrinsicsSPIRV.td
+++ b/llvm/include/llvm/IR/IntrinsicsSPIRV.td
@@ -87,6 +87,11 @@ let TargetPrefix = "spv" in {
DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
[llvm_anyint_ty, LLVMScalarOrSameVectorWidth<0, LLVMVectorElementType<0>>],
[IntrNoMem, Commutative] >;
+
+ def int_spv_dot2add : DefaultAttrsIntrinsic<[llvm_float_ty],
+ [llvm_anyfloat_ty, LLVMMatchType<0>, llvm_float_ty],
+ [IntrNoMem, Commutative]>;
+
def int_spv_dot4add_i8packed : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_spv_dot4add_u8packed : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_spv_wave_active_countbits : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i1_ty], [IntrConvergent, IntrNoMem]>;
diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td
index ebe1d876d58b1..193b592a525a0 100644
--- a/llvm/lib/Target/DirectX/DXIL.td
+++ b/llvm/lib/Target/DirectX/DXIL.td
@@ -1098,6 +1098,17 @@ def RawBufferStore : DXILOp<140, rawBufferStore> {
let stages = [Stages<DXIL1_2, [all_stages]>];
}
+def Dot2AddHalf : DXILOp<162, dot2AddHalf> {
+ let Doc = "dot product of 2 vectors of half having size = 2, returns "
+ "float";
+ let intrinsics = [IntrinSelect<int_dx_dot2add>];
+ let arguments = [FloatTy, HalfTy, HalfTy, HalfTy, HalfTy];
+ let result = FloatTy;
+ let overloads = [Overloads<DXIL1_0, []>];
+ let stages = [Stages<DXIL1_0, [all_stages]>];
+ let attributes = [Attributes<DXIL1_0, [ReadNone]>];
+}
+
def Dot4AddI8Packed : DXILOp<163, dot4AddPacked> {
let Doc = "signed dot product of 4 x i8 vectors packed into i32, with "
"accumulate to i32";
diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
index dff9f3e03079e..10a726aba5ff3 100644
--- a/llvm/lib/Target/DirectX/DXILOpLowering.cpp
+++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
@@ -75,6 +75,27 @@ static SmallVector<Value *> argVectorFlatten(CallInst *Orig,
return NewOperands;
}
+static SmallVector<Value *> argVectorFlattenExcludeLastElement(CallInst *Orig,
+ IRBuilder<> &Builder) {
+ // Note: arg[NumOperands-1] is a pointer and is not needed by our flattening.
+ unsigned NumOperands = Orig->getNumOperands() - 2;
+ assert(NumOperands > 0);
+ Value *Arg0 = Orig->getOperand(0);
+ [[maybe_unused]] auto *VecArg0 = dyn_cast<FixedVectorType>(Arg0->getType());
+ assert(VecArg0);
+ SmallVector<Value *> NewOperands = populateOperands(Arg0, Builder);
+ for (unsigned I = 1; I < NumOperands; ++I) {
+ Value *Arg = Orig->getOperand(I);
+ [[maybe_unused]] auto *VecArg = dyn_cast<FixedVectorType>(Arg->getType());
+ assert(VecArg);
+ assert(VecArg0->getElementType() == VecArg->getElementType());
+ assert(VecArg0->getNumElements() == VecArg->getNumElements());
+ auto NextOperandList = populateOperands(Arg, Builder);
+ NewOperands.append(NextOperandList.begin(), NextOperandList.end());
+ }
+ return NewOperands;
+}
+
namespace {
class OpLowerer {
Module &M;
@@ -168,6 +189,18 @@ class OpLowerer {
}
} else if (IsVectorArgExpansion) {
Args = argVectorFlatten(CI, OpBuilder.getIRB());
+ } else if (F.getIntrinsicID() == Intrinsic::dx_dot2add) {
+ unsigned NumOperands = CI->getNumOperands() - 1;
+ assert(NumOperands > 0);
+ Value *LastArg = CI->getOperand(NumOperands - 1);
+
+ Args.push_back(LastArg);
+
+ //dbgs() << "Value of LastArg" << LastArg->getName() << "\n";
+
+
+ //Args = populateOperands(LastArg, OpBuilder.getIRB());
+ Args.append(argVectorFlattenExcludeLastElement(CI, OpBuilder.getIRB()));
} else {
Args.append(CI->arg_begin(), CI->arg_end());
}
>From 3e265ffc7ba3486027e63105acf2064699e58807 Mon Sep 17 00:00:00 2001
From: Sumit Agarwal <sumitagarwal at microsoft.com>
Date: Tue, 18 Mar 2025 17:33:30 -0700
Subject: [PATCH 2/7] WIP
---
.../lib/Headers/hlsl/hlsl_alias_intrinsics.h | 10 +
clang/lib/Headers/hlsl/hlsl_intrinsics.h | 366 ------------------
clang/test/CodeGenHLSL/builtins/dot2add.hlsl | 15 +
llvm/test/CodeGen/DirectX/dot2add.ll | 8 +
4 files changed, 33 insertions(+), 366 deletions(-)
create mode 100644 clang/test/CodeGenHLSL/builtins/dot2add.hlsl
create mode 100644 llvm/test/CodeGen/DirectX/dot2add.ll
diff --git a/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h
index 62054b368691d..b7b9369b3ace9 100644
--- a/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h
+++ b/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h
@@ -982,6 +982,16 @@ uint64_t dot(uint64_t3, uint64_t3);
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot)
uint64_t dot(uint64_t4, uint64_t4);
+//===----------------------------------------------------------------------===//
+// dot2add builtins
+//===----------------------------------------------------------------------===//
+
+/// \fn float dot2add(half2 a, half2 b, float c)
+
+_HLSL_AVAILABILITY(shadermodel, 6.4)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot2add)
+float dot2add(half2, half2, float);
+
//===----------------------------------------------------------------------===//
// dot4add builtins
//===----------------------------------------------------------------------===//
diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
index 38422229e018d..5459cbeb34fd0 100644
--- a/clang/lib/Headers/hlsl/hlsl_intrinsics.h
+++ b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
@@ -118,372 +118,6 @@ const inline float distance(__detail::HLSL_FIXED_VECTOR<float, N> X,
}
//===----------------------------------------------------------------------===//
-<<<<<<< Updated upstream
-=======
-// dot product builtins
-//===----------------------------------------------------------------------===//
-
-/// \fn K dot(T X, T Y)
-/// \brief Return the dot product (a scalar value) of \a X and \a Y.
-/// \param X The X input value.
-/// \param Y The Y input value.
-
-_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot)
-half dot(half, half);
-_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot)
-half dot(half2, half2);
-_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot)
-half dot(half3, half3);
-_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot)
-half dot(half4, half4);
-
-#ifdef __HLSL_ENABLE_16_BIT
-_HLSL_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot)
-int16_t dot(int16_t, int16_t);
-_HLSL_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot)
-int16_t dot(int16_t2, int16_t2);
-_HLSL_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot)
-int16_t dot(int16_t3, int16_t3);
-_HLSL_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot)
-int16_t dot(int16_t4, int16_t4);
-
-_HLSL_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot)
-uint16_t dot(uint16_t, uint16_t);
-_HLSL_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot)
-uint16_t dot(uint16_t2, uint16_t2);
-_HLSL_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot)
-uint16_t dot(uint16_t3, uint16_t3);
-_HLSL_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot)
-uint16_t dot(uint16_t4, uint16_t4);
-#endif
-
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot)
-float dot(float, float);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot)
-float dot(float2, float2);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot)
-float dot(float3, float3);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot)
-float dot(float4, float4);
-
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot)
-double dot(double, double);
-
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot)
-int dot(int, int);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot)
-int dot(int2, int2);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot)
-int dot(int3, int3);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot)
-int dot(int4, int4);
-
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot)
-uint dot(uint, uint);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot)
-uint dot(uint2, uint2);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot)
-uint dot(uint3, uint3);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot)
-uint dot(uint4, uint4);
-
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot)
-int64_t dot(int64_t, int64_t);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot)
-int64_t dot(int64_t2, int64_t2);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot)
-int64_t dot(int64_t3, int64_t3);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot)
-int64_t dot(int64_t4, int64_t4);
-
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot)
-uint64_t dot(uint64_t, uint64_t);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot)
-uint64_t dot(uint64_t2, uint64_t2);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot)
-uint64_t dot(uint64_t3, uint64_t3);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot)
-uint64_t dot(uint64_t4, uint64_t4);
-
-//===----------------------------------------------------------------------===//
-// dot4add builtins
-//===----------------------------------------------------------------------===//
-
-/// \fn int dot4add_i8packed(uint A, uint B, int C)
-
-_HLSL_AVAILABILITY(shadermodel, 6.4)
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot4add_i8packed)
-int dot4add_i8packed(uint, uint, int);
-
-/// \fn uint dot4add_u8packed(uint A, uint B, uint C)
-
-_HLSL_AVAILABILITY(shadermodel, 6.4)
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot4add_u8packed)
-uint dot4add_u8packed(uint, uint, uint);
-
-//===----------------------------------------------------------------------===//
-// dot2add builtins
-//===----------------------------------------------------------------------===//
-
-/// \fn float dot2add(half2 a, half2 b, float c)
-
-_HLSL_AVAILABILITY(shadermodel, 6.4)
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot2add)
-float dot2add(half2, half2, float);
-
-//===----------------------------------------------------------------------===//
-// exp builtins
-//===----------------------------------------------------------------------===//
-
-/// \fn T exp(T x)
-/// \brief Returns the base-e exponential, or \a e**x, of the specified value.
-/// \param x The specified input value.
-///
-/// The return value is the base-e exponential of the \a x parameter.
-
-_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp)
-half exp(half);
-_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp)
-half2 exp(half2);
-_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp)
-half3 exp(half3);
-_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp)
-half4 exp(half4);
-
-_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp)
-float exp(float);
-_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp)
-float2 exp(float2);
-_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp)
-float3 exp(float3);
-_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp)
-float4 exp(float4);
-
-//===----------------------------------------------------------------------===//
-// exp2 builtins
-//===----------------------------------------------------------------------===//
-
-/// \fn T exp2(T x)
-/// \brief Returns the base 2 exponential, or \a 2**x, of the specified value.
-/// \param x The specified input value.
-///
-/// The base 2 exponential of the \a x parameter.
-
-_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp2)
-half exp2(half);
-_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp2)
-half2 exp2(half2);
-_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp2)
-half3 exp2(half3);
-_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp2)
-half4 exp2(half4);
-
-_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp2)
-float exp2(float);
-_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp2)
-float2 exp2(float2);
-_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp2)
-float3 exp2(float3);
-_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp2)
-float4 exp2(float4);
-
-//===----------------------------------------------------------------------===//
-// firstbithigh builtins
-//===----------------------------------------------------------------------===//
-
-/// \fn T firstbithigh(T Val)
-/// \brief Returns the location of the first set bit starting from the highest
-/// order bit and working downward, per component.
-/// \param Val the input value.
-
-#ifdef __HLSL_ENABLE_16_BIT
-_HLSL_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint firstbithigh(int16_t);
-_HLSL_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint2 firstbithigh(int16_t2);
-_HLSL_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint3 firstbithigh(int16_t3);
-_HLSL_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint4 firstbithigh(int16_t4);
-_HLSL_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint firstbithigh(uint16_t);
-_HLSL_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint2 firstbithigh(uint16_t2);
-_HLSL_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint3 firstbithigh(uint16_t3);
-_HLSL_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint4 firstbithigh(uint16_t4);
-#endif
-
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint firstbithigh(int);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint2 firstbithigh(int2);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint3 firstbithigh(int3);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint4 firstbithigh(int4);
-
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint firstbithigh(uint);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint2 firstbithigh(uint2);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint3 firstbithigh(uint3);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint4 firstbithigh(uint4);
-
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint firstbithigh(int64_t);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint2 firstbithigh(int64_t2);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint3 firstbithigh(int64_t3);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint4 firstbithigh(int64_t4);
-
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint firstbithigh(uint64_t);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint2 firstbithigh(uint64_t2);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint3 firstbithigh(uint64_t3);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint4 firstbithigh(uint64_t4);
-
-//===----------------------------------------------------------------------===//
-// firstbitlow builtins
-//===----------------------------------------------------------------------===//
-
-/// \fn T firstbitlow(T Val)
-/// \brief Returns the location of the first set bit starting from the lowest
-/// order bit and working upward, per component.
-/// \param Val the input value.
-
-#ifdef __HLSL_ENABLE_16_BIT
-_HLSL_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
-uint firstbitlow(int16_t);
-_HLSL_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
-uint2 firstbitlow(int16_t2);
-_HLSL_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
-uint3 firstbitlow(int16_t3);
-_HLSL_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
-uint4 firstbitlow(int16_t4);
-_HLSL_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
-uint firstbitlow(uint16_t);
-_HLSL_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
-uint2 firstbitlow(uint16_t2);
-_HLSL_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
-uint3 firstbitlow(uint16_t3);
-_HLSL_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
-uint4 firstbitlow(uint16_t4);
-#endif
-
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
-uint firstbitlow(int);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
-uint2 firstbitlow(int2);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
-uint3 firstbitlow(int3);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
-uint4 firstbitlow(int4);
-
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
-uint firstbitlow(uint);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
-uint2 firstbitlow(uint2);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
-uint3 firstbitlow(uint3);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
-uint4 firstbitlow(uint4);
-
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
-uint firstbitlow(int64_t);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
-uint2 firstbitlow(int64_t2);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
-uint3 firstbitlow(int64_t3);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
-uint4 firstbitlow(int64_t4);
-
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
-uint firstbitlow(uint64_t);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
-uint2 firstbitlow(uint64_t2);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
-uint3 firstbitlow(uint64_t3);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
-uint4 firstbitlow(uint64_t4);
-
-//===----------------------------------------------------------------------===//
-// floor builtins
-//===----------------------------------------------------------------------===//
-
-/// \fn T floor(T Val)
-/// \brief Returns the largest integer that is less than or equal to the input
-/// value, \a Val.
-/// \param Val The input value.
-
-_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_elementwise_floor)
-half floor(half);
-_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_elementwise_floor)
-half2 floor(half2);
-_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_elementwise_floor)
-half3 floor(half3);
-_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_elementwise_floor)
-half4 floor(half4);
-
-_HLSL_BUILTIN_ALIAS(__builtin_elementwise_floor)
-float floor(float);
-_HLSL_BUILTIN_ALIAS(__builtin_elementwise_floor)
-float2 floor(float2);
-_HLSL_BUILTIN_ALIAS(__builtin_elementwise_floor)
-float3 floor(float3);
-_HLSL_BUILTIN_ALIAS(__builtin_elementwise_floor)
-float4 floor(float4);
-
-//===----------------------------------------------------------------------===//
->>>>>>> Stashed changes
// fmod builtins
//===----------------------------------------------------------------------===//
diff --git a/clang/test/CodeGenHLSL/builtins/dot2add.hlsl b/clang/test/CodeGenHLSL/builtins/dot2add.hlsl
new file mode 100644
index 0000000000000..07e438063c29d
--- /dev/null
+++ b/clang/test/CodeGenHLSL/builtins/dot2add.hlsl
@@ -0,0 +1,15 @@
+// RUN: %clang_cc1 -finclude-default-header -triple \
+// RUN: dxil-pc-shadermodel6.3-compute %s -emit-llvm -disable-llvm-passes -o - | \
+// RUN: FileCheck %s -DTARGET=dx
+// RUN: %clang_cc1 -finclude-default-header -triple \
+// RUN: spirv-pc-vulkan-compute %s -emit-llvm -disable-llvm-passes -o - | \
+// RUN: FileCheck %s -DTARGET=spv
+
+// Test basic lowering to runtime function call.
+
+// CHECK-LABEL: test
+float test(half2 p1, half2 p2, float p3) {
+ return dot2add(p1, p2, p3);
+}
+
+// CHECK: declare [[TY]] @llvm.[[TARGET]].dot4add.i8packed([[TY]], [[TY]], [[TY]])
diff --git a/llvm/test/CodeGen/DirectX/dot2add.ll b/llvm/test/CodeGen/DirectX/dot2add.ll
new file mode 100644
index 0000000000000..b1019c36b56e8
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/dot2add.ll
@@ -0,0 +1,8 @@
+; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-compute %s | FileCheck %s
+
+define noundef float @dot2add_simple(<2 x half> noundef %a, <2 x half> noundef %b, float %c) {
+entry:
+; CHECK: call float @dx.op.dot2AddHalf(i32 162, float %c, half %0, half %1, half %2, half %3)
+ %ret = call float @llvm.dx.dot2add(<2 x half> %a, <2 x half> %b, float %c)
+ ret float %ret
+}
\ No newline at end of file
>From 7baee968d04f37b63072994f60cb20185dadb11a Mon Sep 17 00:00:00 2001
From: Sumit Agarwal <sumitagarwal at microsoft.com>
Date: Wed, 19 Mar 2025 02:53:09 -0700
Subject: [PATCH 3/7] WIP
---
clang/test/CodeGenHLSL/builtins/dot2add.hlsl | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/clang/test/CodeGenHLSL/builtins/dot2add.hlsl b/clang/test/CodeGenHLSL/builtins/dot2add.hlsl
index 07e438063c29d..ee7cb79bb26c4 100644
--- a/clang/test/CodeGenHLSL/builtins/dot2add.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/dot2add.hlsl
@@ -1,14 +1,13 @@
// RUN: %clang_cc1 -finclude-default-header -triple \
// RUN: dxil-pc-shadermodel6.3-compute %s -emit-llvm -disable-llvm-passes -o - | \
// RUN: FileCheck %s -DTARGET=dx
-// RUN: %clang_cc1 -finclude-default-header -triple \
-// RUN: spirv-pc-vulkan-compute %s -emit-llvm -disable-llvm-passes -o - | \
-// RUN: FileCheck %s -DTARGET=spv
// Test basic lowering to runtime function call.
// CHECK-LABEL: test
float test(half2 p1, half2 p2, float p3) {
+ // CHECK-DXIL: %hlsl.dot2add = call reassoc nnan ninf nsz arcp afn float @llvm.dx.dot2add.v2f32(<2 x float> %0, <2 x float> %1, float %2)
+ // CHECK: ret float %hlsl.dot2add
return dot2add(p1, p2, p3);
}
>From 0c5a6af1cc975cb4253f61d54622b40a62d3c373 Mon Sep 17 00:00:00 2001
From: Sumit Agarwal <sumitagarwal at microsoft.com>
Date: Wed, 19 Mar 2025 19:30:56 -0700
Subject: [PATCH 4/7] WIP: Mostly wrote everything.
---
clang/lib/CodeGen/CGBuiltin.cpp | 7 ++-
clang/lib/CodeGen/CGHLSLRuntime.h | 1 -
.../lib/Headers/hlsl/hlsl_alias_intrinsics.h | 10 -----
.../lib/Headers/hlsl/hlsl_intrinsic_helpers.h | 8 ++++
clang/lib/Headers/hlsl/hlsl_intrinsics.h | 12 +++++
clang/lib/Sema/SemaHLSL.cpp | 45 +++++++++++++++----
.../SemaHLSL/BuiltIns/Dot2Add-errors.hlsl | 11 +++++
llvm/include/llvm/IR/IntrinsicsSPIRV.td | 5 ---
llvm/lib/Target/DirectX/DXILOpLowering.cpp | 22 ++++++---
9 files changed, 90 insertions(+), 31 deletions(-)
create mode 100644 clang/test/SemaHLSL/BuiltIns/Dot2Add-errors.hlsl
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 8c0f42e0c85f7..b3d9db5be7d8d 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -19682,11 +19682,16 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
ArrayRef<Value *>{Op0, Op1}, nullptr, "hlsl.dot");
}
case Builtin::BI__builtin_hlsl_dot2add: {
+ llvm::Triple::ArchType Arch = CGM.getTarget().getTriple().getArch();
+ if (Arch != llvm::Triple::dxil) {
+ llvm_unreachable("Intrinsic dot2add can be executed as a builtin only on dxil");
+ }
Value *A = EmitScalarExpr(E->getArg(0));
Value *B = EmitScalarExpr(E->getArg(1));
Value *C = EmitScalarExpr(E->getArg(2));
- Intrinsic::ID ID = CGM.getHLSLRuntime().getDot2AddIntrinsic();
+ //llvm::Intrinsic::dx_##IntrinsicPostfix
+ Intrinsic::ID ID = llvm ::Intrinsic::dx_dot2add;
return Builder.CreateIntrinsic(
/*ReturnType=*/C->getType(), ID, ArrayRef<Value *>{A, B, C}, nullptr,
"hlsl.dot2add");
diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h
index 5ba69819e5431..23f187b24284f 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.h
+++ b/clang/lib/CodeGen/CGHLSLRuntime.h
@@ -99,7 +99,6 @@ class CGHLSLRuntime {
GENERATE_HLSL_INTRINSIC_FUNCTION(FDot, fdot)
GENERATE_HLSL_INTRINSIC_FUNCTION(SDot, sdot)
GENERATE_HLSL_INTRINSIC_FUNCTION(UDot, udot)
- GENERATE_HLSL_INTRINSIC_FUNCTION(Dot2Add, dot2add)
GENERATE_HLSL_INTRINSIC_FUNCTION(Dot4AddI8Packed, dot4add_i8packed)
GENERATE_HLSL_INTRINSIC_FUNCTION(Dot4AddU8Packed, dot4add_u8packed)
GENERATE_HLSL_INTRINSIC_FUNCTION(WaveActiveAllTrue, wave_all)
diff --git a/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h
index b7b9369b3ace9..62054b368691d 100644
--- a/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h
+++ b/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h
@@ -982,16 +982,6 @@ uint64_t dot(uint64_t3, uint64_t3);
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot)
uint64_t dot(uint64_t4, uint64_t4);
-//===----------------------------------------------------------------------===//
-// dot2add builtins
-//===----------------------------------------------------------------------===//
-
-/// \fn float dot2add(half2 a, half2 b, float c)
-
-_HLSL_AVAILABILITY(shadermodel, 6.4)
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot2add)
-float dot2add(half2, half2, float);
-
//===----------------------------------------------------------------------===//
// dot4add builtins
//===----------------------------------------------------------------------===//
diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h b/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h
index 5f7c047dbf340..46653d7b295b2 100644
--- a/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h
+++ b/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h
@@ -45,6 +45,14 @@ distance_vec_impl(vector<T, N> X, vector<T, N> Y) {
return length_vec_impl(X - Y);
}
+constexpr float dot2add_impl(half2 a, half2 b, float c) {
+#if defined(__DIRECTX__)
+ return __builtin_hlsl_dot2add(a, b, c);
+#else
+ return dot(a, b) + c;
+#endif
+}
+
template <typename T> constexpr T reflect_impl(T I, T N) {
return I - 2 * N * I * N;
}
diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
index 5459cbeb34fd0..b1c1335ce3328 100644
--- a/clang/lib/Headers/hlsl/hlsl_intrinsics.h
+++ b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
@@ -117,6 +117,18 @@ const inline float distance(__detail::HLSL_FIXED_VECTOR<float, N> X,
return __detail::distance_vec_impl(X, Y);
}
+//===----------------------------------------------------------------------===//
+// dot2add builtins
+//===----------------------------------------------------------------------===//
+
+/// \fn float dot2add(half2 a, half2 b, float c)
+/// \brief Dot product of 2 vector of type half and add a float scalar value.
+
+_HLSL_AVAILABILITY(shadermodel, 6.4)
+const inline float dot2add(half2 a, half2 b, float c) {
+ return __detail::dot2add_impl(a, b, c);
+}
+
//===----------------------------------------------------------------------===//
// fmod builtins
//===----------------------------------------------------------------------===//
diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index 36de110e75e8a..98dc1b3fe4e77 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -1989,7 +1989,7 @@ void SemaHLSL::diagnoseAvailabilityViolations(TranslationUnitDecl *TU) {
}
// Helper function for CheckHLSLBuiltinFunctionCall
-static bool CheckVectorElementCallArgs(Sema *S, CallExpr *TheCall) {
+static bool CheckVectorElementCallArgs(Sema *S, CallExpr *TheCall, unsigned NumArgs) {
assert(TheCall->getNumArgs() > 1);
ExprResult A = TheCall->getArg(0);
@@ -1999,7 +1999,7 @@ static bool CheckVectorElementCallArgs(Sema *S, CallExpr *TheCall) {
SourceLocation BuiltinLoc = TheCall->getBeginLoc();
bool AllBArgAreVectors = true;
- for (unsigned i = 1; i < TheCall->getNumArgs(); ++i) {
+ for (unsigned i = 1; i < NumArgs; ++i) {
ExprResult B = TheCall->getArg(i);
QualType ArgTyB = B.get()->getType();
auto *VecTyB = ArgTyB->getAs<VectorType>();
@@ -2049,6 +2049,10 @@ static bool CheckVectorElementCallArgs(Sema *S, CallExpr *TheCall) {
return false;
}
+static bool CheckVectorElementCallArgs(Sema *S, CallExpr *TheCall) {
+ return CheckVectorElementCallArgs(S, TheCall, TheCall->getNumArgs());
+}
+
static bool CheckAllArgsHaveSameType(Sema *S, CallExpr *TheCall) {
assert(TheCall->getNumArgs() > 1);
QualType ArgTy0 = TheCall->getArg(0)->getType();
@@ -2091,10 +2095,10 @@ static bool CheckArgTypeIsCorrect(
return false;
}
-static bool CheckAllArgTypesAreCorrect(
- Sema *S, CallExpr *TheCall, QualType ExpectedType,
+static bool CheckArgTypesAreCorrect(
+ Sema *S, CallExpr *TheCall, unsigned NumArgs, QualType ExpectedType,
llvm::function_ref<bool(clang::QualType PassedType)> Check) {
- for (unsigned i = 0; i < TheCall->getNumArgs(); ++i) {
+ for (unsigned i = 0; i < NumArgs; ++i) {
Expr *Arg = TheCall->getArg(i);
if (CheckArgTypeIsCorrect(S, Arg, ExpectedType, Check)) {
return true;
@@ -2103,6 +2107,13 @@ static bool CheckAllArgTypesAreCorrect(
return false;
}
+static bool CheckAllArgTypesAreCorrect(
+ Sema *S, CallExpr *TheCall, QualType ExpectedType,
+ llvm::function_ref<bool(clang::QualType PassedType)> Check) {
+ return CheckArgTypesAreCorrect(S, TheCall, TheCall->getNumArgs(),
+ ExpectedType, Check);
+}
+
static bool CheckAllArgsHaveFloatRepresentation(Sema *S, CallExpr *TheCall) {
auto checkAllFloatTypes = [](clang::QualType PassedType) -> bool {
return !PassedType->hasFloatingRepresentation();
@@ -2146,15 +2157,17 @@ static bool CheckModifiableLValue(Sema *S, CallExpr *TheCall,
return true;
}
-static bool CheckNoDoubleVectors(Sema *S, CallExpr *TheCall) {
+static bool CheckNoDoubleVectors(Sema *S, CallExpr *TheCall,
+ unsigned NumArgs, QualType ExpectedType) {
auto checkDoubleVector = [](clang::QualType PassedType) -> bool {
if (const auto *VecTy = PassedType->getAs<VectorType>())
return VecTy->getElementType()->isDoubleType();
return false;
};
- return CheckAllArgTypesAreCorrect(S, TheCall, S->Context.FloatTy,
- checkDoubleVector);
+ return CheckArgTypesAreCorrect(S, TheCall, NumArgs,
+ ExpectedType, checkDoubleVector);
}
+
static bool CheckFloatingOrIntRepresentation(Sema *S, CallExpr *TheCall) {
auto checkAllSignedTypes = [](clang::QualType PassedType) -> bool {
return !PassedType->hasIntegerRepresentation() &&
@@ -2468,7 +2481,21 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
return true;
if (SemaRef.BuiltinVectorToScalarMath(TheCall))
return true;
- if (CheckNoDoubleVectors(&SemaRef, TheCall))
+ if (CheckNoDoubleVectors(&SemaRef, TheCall,
+ TheCall->getNumArgs(), SemaRef.Context.FloatTy))
+ return true;
+ break;
+ }
+ case Builtin::BI__builtin_hlsl_dot2add: {
+ if (SemaRef.checkArgCount(TheCall, 3))
+ return true;
+ if (CheckVectorElementCallArgs(&SemaRef, TheCall, TheCall->getNumArgs() - 1))
+ return true;
+ if (CheckArgTypeMatches(&SemaRef, TheCall->getArg(2), SemaRef.getASTContext().FloatTy))
+ return true;
+ if (CheckNoDoubleVectors(&SemaRef, TheCall,
+ TheCall->getNumArgs() - 1,
+ SemaRef.Context.HalfTy))
return true;
break;
}
diff --git a/clang/test/SemaHLSL/BuiltIns/Dot2Add-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/Dot2Add-errors.hlsl
new file mode 100644
index 0000000000000..61282a319dafd
--- /dev/null
+++ b/clang/test/SemaHLSL/BuiltIns/Dot2Add-errors.hlsl
@@ -0,0 +1,11 @@
+// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -emit-llvm-only -disable-llvm-passes -verify
+
+bool test_too_few_arg() {
+ return __builtin_hlsl_dot2add();
+ // expected-error at -1 {{too few arguments to function call, expected 3, have 0}}
+}
+
+bool test_too_many_arg(half2 p1, half2 p2, float p3) {
+ return __builtin_hlsl_dot2add(p1, p2, p3, p1);
+ // expected-error at -1 {{too many arguments to function call, expected 3, have 4}}
+}
diff --git a/llvm/include/llvm/IR/IntrinsicsSPIRV.td b/llvm/include/llvm/IR/IntrinsicsSPIRV.td
index 240c36513f0b6..df3e137c80980 100644
--- a/llvm/include/llvm/IR/IntrinsicsSPIRV.td
+++ b/llvm/include/llvm/IR/IntrinsicsSPIRV.td
@@ -87,11 +87,6 @@ let TargetPrefix = "spv" in {
DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
[llvm_anyint_ty, LLVMScalarOrSameVectorWidth<0, LLVMVectorElementType<0>>],
[IntrNoMem, Commutative] >;
-
- def int_spv_dot2add : DefaultAttrsIntrinsic<[llvm_float_ty],
- [llvm_anyfloat_ty, LLVMMatchType<0>, llvm_float_ty],
- [IntrNoMem, Commutative]>;
-
def int_spv_dot4add_i8packed : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_spv_dot4add_u8packed : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_spv_wave_active_countbits : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i1_ty], [IntrConvergent, IntrNoMem]>;
diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
index 10a726aba5ff3..f7ed0c5071d75 100644
--- a/llvm/lib/Target/DirectX/DXILOpLowering.cpp
+++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
@@ -55,9 +55,8 @@ static SmallVector<Value *> populateOperands(Value *Arg, IRBuilder<> &Builder) {
}
static SmallVector<Value *> argVectorFlatten(CallInst *Orig,
- IRBuilder<> &Builder) {
- // Note: arg[NumOperands-1] is a pointer and is not needed by our flattening.
- unsigned NumOperands = Orig->getNumOperands() - 1;
+ IRBuilder<> &Builder,
+ unsigned NumOperands) {
assert(NumOperands > 0);
Value *Arg0 = Orig->getOperand(0);
[[maybe_unused]] auto *VecArg0 = dyn_cast<FixedVectorType>(Arg0->getType());
@@ -75,6 +74,12 @@ static SmallVector<Value *> argVectorFlatten(CallInst *Orig,
return NewOperands;
}
+static SmallVector<Value *> argVectorFlatten(CallInst *Orig,
+ IRBuilder<> &Builder) {
+ // Note: arg[NumOperands-1] is a pointer and is not needed by our flattening.
+ return argVectorFlatten(Orig, Builder, Orig->getNumOperands() - 1);
+}
+/*
static SmallVector<Value *> argVectorFlattenExcludeLastElement(CallInst *Orig,
IRBuilder<> &Builder) {
// Note: arg[NumOperands-1] is a pointer and is not needed by our flattening.
@@ -95,7 +100,7 @@ static SmallVector<Value *> argVectorFlattenExcludeLastElement(CallInst *Orig,
}
return NewOperands;
}
-
+*/
namespace {
class OpLowerer {
Module &M;
@@ -190,7 +195,13 @@ class OpLowerer {
} else if (IsVectorArgExpansion) {
Args = argVectorFlatten(CI, OpBuilder.getIRB());
} else if (F.getIntrinsicID() == Intrinsic::dx_dot2add) {
- unsigned NumOperands = CI->getNumOperands() - 1;
+ // arg[NumOperands-1] is a pointer and is not needed by our flattening.
+ // arg[NumOperands-2] also does not need to be flattened because it is a scalar.
+ unsigned NumOperands = CI->getNumOperands() - 2;
+ Args.push_back(CI->getArgOperand(NumOperands));
+ Args.append(argVectorFlatten(CI, OpBuilder.getIRB(), NumOperands));
+
+ /*unsigned NumOperands = CI->getNumOperands() - 1;
assert(NumOperands > 0);
Value *LastArg = CI->getOperand(NumOperands - 1);
@@ -201,6 +212,7 @@ class OpLowerer {
//Args = populateOperands(LastArg, OpBuilder.getIRB());
Args.append(argVectorFlattenExcludeLastElement(CI, OpBuilder.getIRB()));
+ */
} else {
Args.append(CI->arg_begin(), CI->arg_end());
}
>From 9599ba2646edf73c4efef0f3fe02b07577374276 Mon Sep 17 00:00:00 2001
From: Sumit Agarwal <sumitagarwal at microsoft.com>
Date: Thu, 20 Mar 2025 13:22:33 -0700
Subject: [PATCH 5/7] End to end working code for dot2add
---
clang/include/clang/Basic/Builtins.td | 4 ++--
clang/lib/Sema/SemaHLSL.cpp | 2 ++
clang/test/CodeGenHLSL/builtins/dot2add.hlsl | 17 ++++++++++-------
3 files changed, 14 insertions(+), 9 deletions(-)
diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index 8a0237171f7df..0357f02ead1c0 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -4893,8 +4893,8 @@ def HLSLDotProduct : LangBuiltin<"HLSL_LANG"> {
def HLSLDot2Add : LangBuiltin<"HLSL_LANG"> {
let Spellings = ["__builtin_hlsl_dot2add"];
- let Attributes = [NoThrow, Const];
- let Prototype = "void(...)";
+ let Attributes = [NoThrow, Const, CustomTypeChecking];
+ let Prototype = "float(...)";
}
def HLSLDot4AddI8Packed : LangBuiltin<"HLSL_LANG"> {
diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index 98dc1b3fe4e77..1283ecb82b5f4 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -2487,8 +2487,10 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
break;
}
case Builtin::BI__builtin_hlsl_dot2add: {
+ // Check number of arguments should be 3
if (SemaRef.checkArgCount(TheCall, 3))
return true;
+ // Check first two arguments should be vectors of same length
if (CheckVectorElementCallArgs(&SemaRef, TheCall, TheCall->getNumArgs() - 1))
return true;
if (CheckArgTypeMatches(&SemaRef, TheCall->getArg(2), SemaRef.getASTContext().FloatTy))
diff --git a/clang/test/CodeGenHLSL/builtins/dot2add.hlsl b/clang/test/CodeGenHLSL/builtins/dot2add.hlsl
index ee7cb79bb26c4..ce325327a01b5 100644
--- a/clang/test/CodeGenHLSL/builtins/dot2add.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/dot2add.hlsl
@@ -1,14 +1,17 @@
// RUN: %clang_cc1 -finclude-default-header -triple \
// RUN: dxil-pc-shadermodel6.3-compute %s -emit-llvm -disable-llvm-passes -o - | \
-// RUN: FileCheck %s -DTARGET=dx
+// RUN: FileCheck %s --check-prefixes=CHECK,CHECK-DXIL
+// RUN: %clang_cc1 -finclude-default-header -triple \
+// RUN: spirv-pc-vulkan-compute %s -emit-llvm -disable-llvm-passes -o - | \
+// RUN: FileCheck %s --check-prefixes=CHECK,CHECK-SPIRV
// Test basic lowering to runtime function call.
-// CHECK-LABEL: test
float test(half2 p1, half2 p2, float p3) {
- // CHECK-DXIL: %hlsl.dot2add = call reassoc nnan ninf nsz arcp afn float @llvm.dx.dot2add.v2f32(<2 x float> %0, <2 x float> %1, float %2)
- // CHECK: ret float %hlsl.dot2add
+ // CHECK-SPIRV: %[[MUL:.*]] = call {{.*}} float @llvm.spv.fdot.v2f32(<2 x float> %1, <2 x float> %2)
+ // CHECK-SPIRV: %[[C:.*]] = load float, ptr %c.addr, align 4
+ // CHECK-SPIRV: %[[RES:.*]] = fadd {{.*}} float %[[MUL]], %[[C]]
+ // CHECK-DXIL: %[[RES:.*]] = call {{.*}} float @llvm.dx.dot2add.v2f32(<2 x float> %0, <2 x float> %1, float %2)
+ // CHECK: ret float %[[RES]]
return dot2add(p1, p2, p3);
-}
-
-// CHECK: declare [[TY]] @llvm.[[TARGET]].dot4add.i8packed([[TY]], [[TY]], [[TY]])
+}
\ No newline at end of file
>From 2c9cee530b1abac5fdd9309fa05d60940e9b7cc7 Mon Sep 17 00:00:00 2001
From: Sumit Agarwal <sumitagarwal at microsoft.com>
Date: Thu, 20 Mar 2025 15:06:37 -0700
Subject: [PATCH 6/7] Update SemaCheck and set return type and make void as
return type in Builtin
---
clang/include/clang/Basic/Builtins.td | 2 +-
clang/lib/Sema/SemaHLSL.cpp | 24 ++++++++++++++++++------
2 files changed, 19 insertions(+), 7 deletions(-)
diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index 0357f02ead1c0..76ab463ca0ed6 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -4894,7 +4894,7 @@ def HLSLDotProduct : LangBuiltin<"HLSL_LANG"> {
def HLSLDot2Add : LangBuiltin<"HLSL_LANG"> {
let Spellings = ["__builtin_hlsl_dot2add"];
let Attributes = [NoThrow, Const, CustomTypeChecking];
- let Prototype = "float(...)";
+ let Prototype = "void(...)";
}
def HLSLDot4AddI8Packed : LangBuiltin<"HLSL_LANG"> {
diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index 1283ecb82b5f4..399371c4ae2f6 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -2490,15 +2490,27 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
// Check number of arguments should be 3
if (SemaRef.checkArgCount(TheCall, 3))
return true;
- // Check first two arguments should be vectors of same length
- if (CheckVectorElementCallArgs(&SemaRef, TheCall, TheCall->getNumArgs() - 1))
+
+ // Check first two arguments are vector of length 2 with half data type
+ auto checkHalfVectorOfSize2 = [](clang::QualType PassedType) -> bool {
+ if (const auto *VecTy = PassedType->getAs<VectorType>())
+ return !(VecTy->getNumElements() == 2 &&
+ VecTy->getElementType()->isHalfType());
return true;
- if (CheckArgTypeMatches(&SemaRef, TheCall->getArg(2), SemaRef.getASTContext().FloatTy))
+ };
+ if(CheckArgTypeIsCorrect(&SemaRef, TheCall->getArg(0),
+ SemaRef.getASTContext().HalfTy,
+ checkHalfVectorOfSize2))
return true;
- if (CheckNoDoubleVectors(&SemaRef, TheCall,
- TheCall->getNumArgs() - 1,
- SemaRef.Context.HalfTy))
+ if(CheckArgTypeIsCorrect(&SemaRef, TheCall->getArg(1),
+ SemaRef.getASTContext().HalfTy,
+ checkHalfVectorOfSize2))
+ return true;
+
+ // Check third argument is a float
+ if (CheckArgTypeMatches(&SemaRef, TheCall->getArg(2), SemaRef.getASTContext().FloatTy))
return true;
+ TheCall->setType(TheCall->getArg(2)->getType());
break;
}
case Builtin::BI__builtin_hlsl_elementwise_firstbithigh:
>From 2e4fe63bfb8a6f88cc5bf621df550b6501a89115 Mon Sep 17 00:00:00 2001
From: Sumit Agarwal <sumitagarwal at microsoft.com>
Date: Thu, 20 Mar 2025 15:25:01 -0700
Subject: [PATCH 7/7] Revert not required change
---
clang/lib/Sema/SemaHLSL.cpp | 32 +++++++++-----------------------
1 file changed, 9 insertions(+), 23 deletions(-)
diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index 399371c4ae2f6..9cd7a0dd7aad9 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -1989,7 +1989,7 @@ void SemaHLSL::diagnoseAvailabilityViolations(TranslationUnitDecl *TU) {
}
// Helper function for CheckHLSLBuiltinFunctionCall
-static bool CheckVectorElementCallArgs(Sema *S, CallExpr *TheCall, unsigned NumArgs) {
+static bool CheckVectorElementCallArgs(Sema *S, CallExpr *TheCall) {
assert(TheCall->getNumArgs() > 1);
ExprResult A = TheCall->getArg(0);
@@ -1999,7 +1999,7 @@ static bool CheckVectorElementCallArgs(Sema *S, CallExpr *TheCall, unsigned NumA
SourceLocation BuiltinLoc = TheCall->getBeginLoc();
bool AllBArgAreVectors = true;
- for (unsigned i = 1; i < NumArgs; ++i) {
+ for (unsigned i = 1; i < TheCall->getNumArgs(); ++i) {
ExprResult B = TheCall->getArg(i);
QualType ArgTyB = B.get()->getType();
auto *VecTyB = ArgTyB->getAs<VectorType>();
@@ -2049,10 +2049,6 @@ static bool CheckVectorElementCallArgs(Sema *S, CallExpr *TheCall, unsigned NumA
return false;
}
-static bool CheckVectorElementCallArgs(Sema *S, CallExpr *TheCall) {
- return CheckVectorElementCallArgs(S, TheCall, TheCall->getNumArgs());
-}
-
static bool CheckAllArgsHaveSameType(Sema *S, CallExpr *TheCall) {
assert(TheCall->getNumArgs() > 1);
QualType ArgTy0 = TheCall->getArg(0)->getType();
@@ -2095,10 +2091,10 @@ static bool CheckArgTypeIsCorrect(
return false;
}
-static bool CheckArgTypesAreCorrect(
- Sema *S, CallExpr *TheCall, unsigned NumArgs, QualType ExpectedType,
+static bool CheckAllArgTypesAreCorrect(
+ Sema *S, CallExpr *TheCall, QualType ExpectedType,
llvm::function_ref<bool(clang::QualType PassedType)> Check) {
- for (unsigned i = 0; i < NumArgs; ++i) {
+ for (unsigned i = 0; i < TheCall->getNumArgs(); ++i) {
Expr *Arg = TheCall->getArg(i);
if (CheckArgTypeIsCorrect(S, Arg, ExpectedType, Check)) {
return true;
@@ -2107,13 +2103,6 @@ static bool CheckArgTypesAreCorrect(
return false;
}
-static bool CheckAllArgTypesAreCorrect(
- Sema *S, CallExpr *TheCall, QualType ExpectedType,
- llvm::function_ref<bool(clang::QualType PassedType)> Check) {
- return CheckArgTypesAreCorrect(S, TheCall, TheCall->getNumArgs(),
- ExpectedType, Check);
-}
-
static bool CheckAllArgsHaveFloatRepresentation(Sema *S, CallExpr *TheCall) {
auto checkAllFloatTypes = [](clang::QualType PassedType) -> bool {
return !PassedType->hasFloatingRepresentation();
@@ -2157,17 +2146,15 @@ static bool CheckModifiableLValue(Sema *S, CallExpr *TheCall,
return true;
}
-static bool CheckNoDoubleVectors(Sema *S, CallExpr *TheCall,
- unsigned NumArgs, QualType ExpectedType) {
+static bool CheckNoDoubleVectors(Sema *S, CallExpr *TheCall) {
auto checkDoubleVector = [](clang::QualType PassedType) -> bool {
if (const auto *VecTy = PassedType->getAs<VectorType>())
return VecTy->getElementType()->isDoubleType();
return false;
};
- return CheckArgTypesAreCorrect(S, TheCall, NumArgs,
- ExpectedType, checkDoubleVector);
+ return CheckAllArgTypesAreCorrect(S, TheCall, S->Context.FloatTy,
+ checkDoubleVector);
}
-
static bool CheckFloatingOrIntRepresentation(Sema *S, CallExpr *TheCall) {
auto checkAllSignedTypes = [](clang::QualType PassedType) -> bool {
return !PassedType->hasIntegerRepresentation() &&
@@ -2481,8 +2468,7 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
return true;
if (SemaRef.BuiltinVectorToScalarMath(TheCall))
return true;
- if (CheckNoDoubleVectors(&SemaRef, TheCall,
- TheCall->getNumArgs(), SemaRef.Context.FloatTy))
+ if (CheckNoDoubleVectors(&SemaRef, TheCall))
return true;
break;
}
More information about the llvm-commits
mailing list