[clang] [llvm] [HLSL] Implement a header only distance intrinsic (PR #117240)
Farzon Lotfi via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 25 10:54:15 PST 2024
https://github.com/farzonl updated https://github.com/llvm/llvm-project/pull/117240
>From 49182ef1e88b8cb74aa9136dd88f1139c72a94b4 Mon Sep 17 00:00:00 2001
From: Farzon Lotfi <farzonlotfi at microsoft.com>
Date: Thu, 21 Nov 2024 14:46:31 -0500
Subject: [PATCH 1/6] [HLSL] Implement a header only distance intrinsic
---
clang/lib/Headers/hlsl/hlsl_detail.h | 8 ++
clang/lib/Headers/hlsl/hlsl_intrinsics.h | 39 ++++++++++
clang/test/CodeGenHLSL/builtins/distance.hlsl | 76 +++++++++++++++++++
.../SemaHLSL/BuiltIns/distance-errors.hlsl | 33 ++++++++
4 files changed, 156 insertions(+)
create mode 100644 clang/test/CodeGenHLSL/builtins/distance.hlsl
create mode 100644 clang/test/SemaHLSL/BuiltIns/distance-errors.hlsl
diff --git a/clang/lib/Headers/hlsl/hlsl_detail.h b/clang/lib/Headers/hlsl/hlsl_detail.h
index 8d5fd941331531..99cb3fa4a6d2e8 100644
--- a/clang/lib/Headers/hlsl/hlsl_detail.h
+++ b/clang/lib/Headers/hlsl/hlsl_detail.h
@@ -13,6 +13,14 @@ namespace hlsl {
namespace __detail {
+template <typename T, typename U> struct is_same {
+ static const bool value = false;
+};
+
+template <typename T> struct is_same<T, T> {
+ static const bool value = true;
+};
+
template <bool B, typename T> struct enable_if {};
template <typename T> struct enable_if<true, T> {
diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
index a3e0b5c65a6f52..bd66c73db27ee9 100644
--- a/clang/lib/Headers/hlsl/hlsl_intrinsics.h
+++ b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
@@ -871,6 +871,45 @@ float3 degrees(float3);
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_degrees)
float4 degrees(float4);
+//===----------------------------------------------------------------------===//
+// distance builtins
+//===----------------------------------------------------------------------===//
+
+/// \fn K distance(T X, T Y)
+/// \brief Returns a distance scalar between two vectors of \a X and \a Y.
+/// \param X The X input value.
+/// \param Y The Y input value.
+
+template <typename T>
+constexpr __detail::enable_if_t<
+ __detail::is_same<float, T>::value || __detail::is_same<half, T>::value, T>
+distance_impl(T X, T Y) {
+ return __builtin_elementwise_abs(X - Y);
+}
+
+template <typename T, int N>
+constexpr __detail::enable_if_t<
+ __detail::is_same<float, T>::value || __detail::is_same<half, T>::value, T>
+distance_vec_impl(vector<T, N> X, vector<T, N> Y) {
+ return __builtin_hlsl_length(X - Y);
+}
+
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+const inline half distance(half X, half Y) { return distance_impl(X, Y); }
+
+template <int N>
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+const inline half distance(vector<half, N> X, vector<half, N> Y) {
+ return distance_vec_impl(X, Y);
+}
+
+const inline float distance(float X, float Y) { return distance_impl(X, Y); }
+
+template <int N>
+const inline float distance(vector<float, N> X, vector<float, N> Y) {
+ return distance_vec_impl(X, Y);
+}
+
//===----------------------------------------------------------------------===//
// dot product builtins
//===----------------------------------------------------------------------===//
diff --git a/clang/test/CodeGenHLSL/builtins/distance.hlsl b/clang/test/CodeGenHLSL/builtins/distance.hlsl
new file mode 100644
index 00000000000000..2ff2947ac49095
--- /dev/null
+++ b/clang/test/CodeGenHLSL/builtins/distance.hlsl
@@ -0,0 +1,76 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
+// RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \
+// RUN: -emit-llvm -O1 -o - | FileCheck %s
+
+// CHECK-LABEL: define noundef half @_Z18test_distance_halfDhDh(
+// CHECK-SAME: half noundef [[X:%.*]], half noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SUB_I:%.*]] = fsub half [[X]], [[Y]]
+// CHECK-NEXT: [[ELT_ABS_I:%.*]] = tail call noundef half @llvm.fabs.f16(half [[SUB_I]])
+// CHECK-NEXT: ret half [[ELT_ABS_I]]
+//
+half test_distance_half(half X, half Y) { return distance(X, Y); }
+
+// CHECK-LABEL: define noundef half @_Z19test_distance_half2Dv2_DhS_(
+// CHECK-SAME: <2 x half> noundef [[X:%.*]], <2 x half> noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SUB_I:%.*]] = fsub <2 x half> [[X]], [[Y]]
+// CHECK-NEXT: [[HLSL_LENGTH_I:%.*]] = tail call noundef half @llvm.dx.length.v2f16(<2 x half> [[SUB_I]])
+// CHECK-NEXT: ret half [[HLSL_LENGTH_I]]
+//
+half test_distance_half2(half2 X, half2 Y) { return distance(X, Y); }
+
+// CHECK-LABEL: define noundef half @_Z19test_distance_half3Dv3_DhS_(
+// CHECK-SAME: <3 x half> noundef [[X:%.*]], <3 x half> noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SUB_I:%.*]] = fsub <3 x half> [[X]], [[Y]]
+// CHECK-NEXT: [[HLSL_LENGTH_I:%.*]] = tail call noundef half @llvm.dx.length.v3f16(<3 x half> [[SUB_I]])
+// CHECK-NEXT: ret half [[HLSL_LENGTH_I]]
+//
+half test_distance_half3(half3 X, half3 Y) { return distance(X, Y); }
+
+// CHECK-LABEL: define noundef half @_Z19test_distance_half4Dv4_DhS_(
+// CHECK-SAME: <4 x half> noundef [[X:%.*]], <4 x half> noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SUB_I:%.*]] = fsub <4 x half> [[X]], [[Y]]
+// CHECK-NEXT: [[HLSL_LENGTH_I:%.*]] = tail call noundef half @llvm.dx.length.v4f16(<4 x half> [[SUB_I]])
+// CHECK-NEXT: ret half [[HLSL_LENGTH_I]]
+//
+half test_distance_half4(half4 X, half4 Y) { return distance(X, Y); }
+
+// CHECK-LABEL: define noundef float @_Z19test_distance_floatff(
+// CHECK-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SUB_I:%.*]] = fsub float [[X]], [[Y]]
+// CHECK-NEXT: [[ELT_ABS_I:%.*]] = tail call noundef float @llvm.fabs.f32(float [[SUB_I]])
+// CHECK-NEXT: ret float [[ELT_ABS_I]]
+//
+float test_distance_float(float X, float Y) { return distance(X, Y); }
+
+// CHECK-LABEL: define noundef float @_Z20test_distance_float2Dv2_fS_(
+// CHECK-SAME: <2 x float> noundef [[X:%.*]], <2 x float> noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SUB_I:%.*]] = fsub <2 x float> [[X]], [[Y]]
+// CHECK-NEXT: [[HLSL_LENGTH_I:%.*]] = tail call noundef float @llvm.dx.length.v2f32(<2 x float> [[SUB_I]])
+// CHECK-NEXT: ret float [[HLSL_LENGTH_I]]
+//
+float test_distance_float2(float2 X, float2 Y) { return distance(X, Y); }
+
+// CHECK-LABEL: define noundef float @_Z20test_distance_float3Dv3_fS_(
+// CHECK-SAME: <3 x float> noundef [[X:%.*]], <3 x float> noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SUB_I:%.*]] = fsub <3 x float> [[X]], [[Y]]
+// CHECK-NEXT: [[HLSL_LENGTH_I:%.*]] = tail call noundef float @llvm.dx.length.v3f32(<3 x float> [[SUB_I]])
+// CHECK-NEXT: ret float [[HLSL_LENGTH_I]]
+//
+float test_distance_float3(float3 X, float3 Y) { return distance(X, Y); }
+
+// CHECK-LABEL: define noundef float @_Z20test_distance_float4Dv4_fS_(
+// CHECK-SAME: <4 x float> noundef [[X:%.*]], <4 x float> noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[SUB_I:%.*]] = fsub <4 x float> [[X]], [[Y]]
+// CHECK-NEXT: [[HLSL_LENGTH_I:%.*]] = tail call noundef float @llvm.dx.length.v4f32(<4 x float> [[SUB_I]])
+// CHECK-NEXT: ret float [[HLSL_LENGTH_I]]
+//
+float test_distance_float4(float4 X, float4 Y) { return distance(X, Y); }
diff --git a/clang/test/SemaHLSL/BuiltIns/distance-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/distance-errors.hlsl
new file mode 100644
index 00000000000000..e996bf5d2cb7c5
--- /dev/null
+++ b/clang/test/SemaHLSL/BuiltIns/distance-errors.hlsl
@@ -0,0 +1,33 @@
+// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only -disable-llvm-passes -verify
+
+float test_no_second_arg(float2 p0) {
+ return distance(p0);
+ // expected-error at -1 {{no matching function for call to 'distance'}}
+ // expected-note at hlsl/hlsl_intrinsics.h:* {{candidate function not viable: requires 2 arguments, but 1 was provided}}
+ // expected-note at hlsl/hlsl_intrinsics.h:* {{candidate function not viable: requires 2 arguments, but 1 was provided}}
+ // expected-note at hlsl/hlsl_intrinsics.h:* {{candidate function template not viable: requires 2 arguments, but 1 was provided}}
+ // expected-note at hlsl/hlsl_intrinsics.h:* {{candidate function template not viable: requires 2 arguments, but 1 was provided}}
+}
+
+float test_too_many_arg(float2 p0) {
+ return distance(p0, p0, p0);
+ // expected-error at -1 {{no matching function for call to 'distance'}}
+ // expected-note at hlsl/hlsl_intrinsics.h:* {{candidate function not viable: requires 2 arguments, but 3 were provided}}
+ // expected-note at hlsl/hlsl_intrinsics.h:* {{candidate function not viable: requires 2 arguments, but 3 were provided}}
+ // expected-note at hlsl/hlsl_intrinsics.h:* {{candidate function template not viable: requires 2 arguments, but 3 were provided}}
+ // expected-note at hlsl/hlsl_intrinsics.h:* {{candidate function template not viable: requires 2 arguments, but 3 were provided}}
+}
+
+float test_double_inputs(double p0, double p1) {
+ return distance(p0, p1);
+ // expected-error at -1 {{call to 'distance' is ambiguous}}
+ // expected-note at hlsl/hlsl_intrinsics.h:* {{candidate function}}
+ // expected-note at hlsl/hlsl_intrinsics.h:* {{candidate function}}
+}
+
+float test_int_inputs(int p0, int p1) {
+ return distance(p0, p1);
+ // expected-error at -1 {{call to 'distance' is ambiguous}}
+ // expected-note at hlsl/hlsl_intrinsics.h:* {{candidate function}}
+ // expected-note at hlsl/hlsl_intrinsics.h:* {{candidate function}}
+}
>From 29751cbc663294a101c460a61234a17bea8743fa Mon Sep 17 00:00:00 2001
From: Farzon Lotfi <farzonlotfi at microsoft.com>
Date: Thu, 21 Nov 2024 18:36:30 -0500
Subject: [PATCH 2/6] move distance_impl to details namespace
---
clang/lib/Headers/hlsl/hlsl_detail.h | 12 +++++++++++
clang/lib/Headers/hlsl/hlsl_intrinsics.h | 26 ++++++++----------------
2 files changed, 20 insertions(+), 18 deletions(-)
diff --git a/clang/lib/Headers/hlsl/hlsl_detail.h b/clang/lib/Headers/hlsl/hlsl_detail.h
index 99cb3fa4a6d2e8..f54f0037cbd066 100644
--- a/clang/lib/Headers/hlsl/hlsl_detail.h
+++ b/clang/lib/Headers/hlsl/hlsl_detail.h
@@ -41,6 +41,18 @@ constexpr enable_if_t<sizeof(U) == sizeof(T), U> bit_cast(T F) {
return __builtin_bit_cast(U, F);
}
+template <typename T>
+constexpr enable_if_t<is_same<float, T>::value || is_same<half, T>::value, T>
+distance_impl(T X, T Y) {
+ return __builtin_elementwise_abs(X - Y);
+}
+
+template <typename T, int N>
+constexpr enable_if_t<is_same<float, T>::value || is_same<half, T>::value, T>
+distance_vec_impl(vector<T, N> X, vector<T, N> Y) {
+ return __builtin_hlsl_length(X - Y);
+}
+
} // namespace __detail
} // namespace hlsl
#endif //_HLSL_HLSL_DETAILS_H_
diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
index bd66c73db27ee9..a7301aa8d8ce72 100644
--- a/clang/lib/Headers/hlsl/hlsl_intrinsics.h
+++ b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
@@ -880,34 +880,24 @@ float4 degrees(float4);
/// \param X The X input value.
/// \param Y The Y input value.
-template <typename T>
-constexpr __detail::enable_if_t<
- __detail::is_same<float, T>::value || __detail::is_same<half, T>::value, T>
-distance_impl(T X, T Y) {
- return __builtin_elementwise_abs(X - Y);
-}
-
-template <typename T, int N>
-constexpr __detail::enable_if_t<
- __detail::is_same<float, T>::value || __detail::is_same<half, T>::value, T>
-distance_vec_impl(vector<T, N> X, vector<T, N> Y) {
- return __builtin_hlsl_length(X - Y);
-}
-
_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
-const inline half distance(half X, half Y) { return distance_impl(X, Y); }
+const inline half distance(half X, half Y) {
+ return __detail::distance_impl(X, Y);
+}
template <int N>
_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
const inline half distance(vector<half, N> X, vector<half, N> Y) {
- return distance_vec_impl(X, Y);
+ return __detail::distance_vec_impl(X, Y);
}
-const inline float distance(float X, float Y) { return distance_impl(X, Y); }
+const inline float distance(float X, float Y) {
+ return __detail::distance_impl(X, Y);
+}
template <int N>
const inline float distance(vector<float, N> X, vector<float, N> Y) {
- return distance_vec_impl(X, Y);
+ return __detail::distance_vec_impl(X, Y);
}
//===----------------------------------------------------------------------===//
>From 1d439edbd45541672f7ebeb92d1d2fa107010022 Mon Sep 17 00:00:00 2001
From: Farzon Lotfi <farzonlotfi at microsoft.com>
Date: Thu, 21 Nov 2024 21:01:15 -0500
Subject: [PATCH 3/6] [HLSL] Move length to the header
---
clang/include/clang/Basic/Builtins.td | 12 +-
clang/lib/CodeGen/CGBuiltin.cpp | 29 +--
clang/lib/Headers/hlsl/hlsl_detail.h | 22 ++-
clang/lib/Headers/hlsl/hlsl_intrinsics.h | 33 ++--
clang/lib/Sema/SemaHLSL.cpp | 5 +-
clang/test/CodeGenHLSL/builtins/distance.hlsl | 36 ++--
clang/test/CodeGenHLSL/builtins/length.hlsl | 184 +++++++++++-------
.../test/SemaHLSL/BuiltIns/length-errors.hlsl | 51 +++--
llvm/include/llvm/IR/IntrinsicsDirectX.td | 1 -
.../Target/DirectX/DXILIntrinsicExpansion.cpp | 69 ++++---
llvm/test/CodeGen/DirectX/length.ll | 178 +++++++++--------
llvm/test/CodeGen/DirectX/length_error.ll | 10 -
12 files changed, 367 insertions(+), 263 deletions(-)
delete mode 100644 llvm/test/CodeGen/DirectX/length_error.ll
diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index 83c90b3d6e681b..82acd520c227e9 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -4756,6 +4756,12 @@ def HLSLAsDouble : LangBuiltin<"HLSL_LANG"> {
let Prototype = "void(...)";
}
+def HLSLReduceAdd : LangBuiltin<"HLSL_LANG"> {
+ let Spellings = ["__builtin_hlsl_reduce_add"];
+ let Attributes = [NoThrow, Const, Constexpr];
+ let Prototype = "void(...)";
+}
+
def HLSLWaveActiveAnyTrue : LangBuiltin<"HLSL_LANG"> {
let Spellings = ["__builtin_hlsl_wave_active_any_true"];
let Attributes = [NoThrow, Const];
@@ -4840,12 +4846,6 @@ def HLSLIsinf : LangBuiltin<"HLSL_LANG"> {
let Prototype = "void(...)";
}
-def HLSLLength : LangBuiltin<"HLSL_LANG"> {
- let Spellings = ["__builtin_hlsl_length"];
- let Attributes = [NoThrow, Const];
- let Prototype = "void(...)";
-}
-
def HLSLLerp : LangBuiltin<"HLSL_LANG"> {
let Spellings = ["__builtin_hlsl_lerp"];
let Attributes = [NoThrow, Const];
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 8f754953d28998..b81d1ba8f6ecce 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -19174,20 +19174,6 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
/*ReturnType=*/X->getType(), CGM.getHLSLRuntime().getLerpIntrinsic(),
ArrayRef<Value *>{X, Y, S}, nullptr, "hlsl.lerp");
}
- case Builtin::BI__builtin_hlsl_length: {
- Value *X = EmitScalarExpr(E->getArg(0));
-
- assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
- "length operand must have a float representation");
- // if the operand is a scalar, we can use the fabs llvm intrinsic directly
- if (!E->getArg(0)->getType()->isVectorType())
- return EmitFAbs(*this, X);
-
- return Builder.CreateIntrinsic(
- /*ReturnType=*/X->getType()->getScalarType(),
- CGM.getHLSLRuntime().getLengthIntrinsic(), ArrayRef<Value *>{X},
- nullptr, "hlsl.length");
- }
case Builtin::BI__builtin_hlsl_normalize: {
Value *X = EmitScalarExpr(E->getArg(0));
@@ -19289,6 +19275,21 @@ case Builtin::BI__builtin_hlsl_elementwise_isinf: {
CGM.getHLSLRuntime().getSaturateIntrinsic(), ArrayRef<Value *>{Op0},
nullptr, "hlsl.saturate");
}
+ case Builtin::BI__builtin_hlsl_reduce_add: {
+ Value *X = EmitScalarExpr(E->getArg(0));
+ auto EltTy = X->getType()->getScalarType();
+ if (E->getArg(0)->getType()->hasFloatingRepresentation()) {
+ Value *Seed = ConstantFP::get(EltTy, 0);
+ return Builder.CreateIntrinsic(
+ /*ReturnType=*/EltTy, llvm::Intrinsic::vector_reduce_fadd,
+ ArrayRef<Value *>{Seed, X}, nullptr, "rdx.fadd");
+ } else {
+ assert(E->getArg(0)->getType()->hasIntegerRepresentation());
+ return Builder.CreateIntrinsic(
+ /*ReturnType=*/EltTy, llvm::Intrinsic::vector_reduce_add,
+ ArrayRef<Value *>{X}, nullptr, "rdx.add");
+ }
+ }
case Builtin::BI__builtin_hlsl_select: {
Value *OpCond = EmitScalarExpr(E->getArg(0));
RValue RValTrue = EmitAnyExpr(E->getArg(1));
diff --git a/clang/lib/Headers/hlsl/hlsl_detail.h b/clang/lib/Headers/hlsl/hlsl_detail.h
index f54f0037cbd066..85ed08a1af06e9 100644
--- a/clang/lib/Headers/hlsl/hlsl_detail.h
+++ b/clang/lib/Headers/hlsl/hlsl_detail.h
@@ -41,16 +41,34 @@ constexpr enable_if_t<sizeof(U) == sizeof(T), U> bit_cast(T F) {
return __builtin_bit_cast(U, F);
}
+template <typename T>
+constexpr enable_if_t<is_same<float, T>::value || is_same<half, T>::value, T>
+length_impl(T X) {
+ return __builtin_elementwise_abs(X);
+}
+
+template <typename T, int N>
+constexpr enable_if_t<is_same<float, T>::value || is_same<half, T>::value, T>
+length_vec_impl(vector<T, N> X) {
+ vector<T, N> XSquared = X * X;
+ T XSquaredSum = __builtin_hlsl_reduce_add(XSquared);
+ /*T XSquaredSum = 0;
+ for(int I = 0; I < N; I++) {
+ XSquaredSum += XSquared[I];
+ }*/
+ return __builtin_elementwise_sqrt(XSquaredSum);
+}
+
template <typename T>
constexpr enable_if_t<is_same<float, T>::value || is_same<half, T>::value, T>
distance_impl(T X, T Y) {
- return __builtin_elementwise_abs(X - Y);
+ return length_impl(X - Y);
}
template <typename T, int N>
constexpr enable_if_t<is_same<float, T>::value || is_same<half, T>::value, T>
distance_vec_impl(vector<T, N> X, vector<T, N> Y) {
- return __builtin_hlsl_length(X - Y);
+ return length_vec_impl(X - Y);
}
} // namespace __detail
diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
index a7301aa8d8ce72..588335d06ab17f 100644
--- a/clang/lib/Headers/hlsl/hlsl_intrinsics.h
+++ b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
@@ -1327,26 +1327,19 @@ float4 lerp(float4, float4, float4);
/// Length is based on the following formula: sqrt(x[0]^2 + x[1]^2 + ...).
_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_length)
-half length(half);
-_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_length)
-half length(half2);
-_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_length)
-half length(half3);
-_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_length)
-half length(half4);
-
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_length)
-float length(float);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_length)
-float length(float2);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_length)
-float length(float3);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_length)
-float length(float4);
+const inline half length(half X) { return __detail::length_impl(X); }
+
+template <int N>
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+const inline half length(vector<half, N> X) {
+ return __detail::length_vec_impl(X);
+}
+
+const inline float length(float X) { return __detail::length_impl(X); }
+
+template <int N> const inline float length(vector<float, N> X) {
+ return __detail::length_vec_impl(X);
+}
//===----------------------------------------------------------------------===//
// log builtins
diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index 2bc93e4ec1181f..de2ff83d63fee4 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -2030,12 +2030,9 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
return true;
break;
}
- case Builtin::BI__builtin_hlsl_length: {
- if (CheckFloatOrHalfRepresentations(&SemaRef, TheCall))
- return true;
+ case Builtin::BI__builtin_hlsl_reduce_add: {
if (SemaRef.checkArgCount(TheCall, 1))
return true;
-
ExprResult A = TheCall->getArg(0);
QualType ArgTyA = A.get()->getType();
QualType RetTy;
diff --git a/clang/test/CodeGenHLSL/builtins/distance.hlsl b/clang/test/CodeGenHLSL/builtins/distance.hlsl
index 2ff2947ac49095..ea7988122ae6d4 100644
--- a/clang/test/CodeGenHLSL/builtins/distance.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/distance.hlsl
@@ -16,8 +16,10 @@ half test_distance_half(half X, half Y) { return distance(X, Y); }
// CHECK-SAME: <2 x half> noundef [[X:%.*]], <2 x half> noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[SUB_I:%.*]] = fsub <2 x half> [[X]], [[Y]]
-// CHECK-NEXT: [[HLSL_LENGTH_I:%.*]] = tail call noundef half @llvm.dx.length.v2f16(<2 x half> [[SUB_I]])
-// CHECK-NEXT: ret half [[HLSL_LENGTH_I]]
+// CHECK-NEXT: [[MUL_I:%.*]] = fmul <2 x half> [[SUB_I]], [[SUB_I]]
+// CHECK-NEXT: [[RDX_FADD_I:%.*]] = tail call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> [[MUL_I]])
+// CHECK-NEXT: [[TMP0:%.*]] = tail call noundef half @llvm.sqrt.f16(half [[RDX_FADD_I]])
+// CHECK-NEXT: ret half [[TMP0]]
//
half test_distance_half2(half2 X, half2 Y) { return distance(X, Y); }
@@ -25,8 +27,10 @@ half test_distance_half2(half2 X, half2 Y) { return distance(X, Y); }
// CHECK-SAME: <3 x half> noundef [[X:%.*]], <3 x half> noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[SUB_I:%.*]] = fsub <3 x half> [[X]], [[Y]]
-// CHECK-NEXT: [[HLSL_LENGTH_I:%.*]] = tail call noundef half @llvm.dx.length.v3f16(<3 x half> [[SUB_I]])
-// CHECK-NEXT: ret half [[HLSL_LENGTH_I]]
+// CHECK-NEXT: [[MUL_I:%.*]] = fmul <3 x half> [[SUB_I]], [[SUB_I]]
+// CHECK-NEXT: [[RDX_FADD_I:%.*]] = tail call half @llvm.vector.reduce.fadd.v3f16(half 0xH0000, <3 x half> [[MUL_I]])
+// CHECK-NEXT: [[TMP0:%.*]] = tail call noundef half @llvm.sqrt.f16(half [[RDX_FADD_I]])
+// CHECK-NEXT: ret half [[TMP0]]
//
half test_distance_half3(half3 X, half3 Y) { return distance(X, Y); }
@@ -34,8 +38,10 @@ half test_distance_half3(half3 X, half3 Y) { return distance(X, Y); }
// CHECK-SAME: <4 x half> noundef [[X:%.*]], <4 x half> noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[SUB_I:%.*]] = fsub <4 x half> [[X]], [[Y]]
-// CHECK-NEXT: [[HLSL_LENGTH_I:%.*]] = tail call noundef half @llvm.dx.length.v4f16(<4 x half> [[SUB_I]])
-// CHECK-NEXT: ret half [[HLSL_LENGTH_I]]
+// CHECK-NEXT: [[MUL_I:%.*]] = fmul <4 x half> [[SUB_I]], [[SUB_I]]
+// CHECK-NEXT: [[RDX_FADD_I:%.*]] = tail call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> [[MUL_I]])
+// CHECK-NEXT: [[TMP0:%.*]] = tail call noundef half @llvm.sqrt.f16(half [[RDX_FADD_I]])
+// CHECK-NEXT: ret half [[TMP0]]
//
half test_distance_half4(half4 X, half4 Y) { return distance(X, Y); }
@@ -52,8 +58,10 @@ float test_distance_float(float X, float Y) { return distance(X, Y); }
// CHECK-SAME: <2 x float> noundef [[X:%.*]], <2 x float> noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[SUB_I:%.*]] = fsub <2 x float> [[X]], [[Y]]
-// CHECK-NEXT: [[HLSL_LENGTH_I:%.*]] = tail call noundef float @llvm.dx.length.v2f32(<2 x float> [[SUB_I]])
-// CHECK-NEXT: ret float [[HLSL_LENGTH_I]]
+// CHECK-NEXT: [[MUL_I:%.*]] = fmul <2 x float> [[SUB_I]], [[SUB_I]]
+// CHECK-NEXT: [[RDX_FADD_I:%.*]] = tail call float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> [[MUL_I]])
+// CHECK-NEXT: [[TMP0:%.*]] = tail call noundef float @llvm.sqrt.f32(float [[RDX_FADD_I]])
+// CHECK-NEXT: ret float [[TMP0]]
//
float test_distance_float2(float2 X, float2 Y) { return distance(X, Y); }
@@ -61,8 +69,10 @@ float test_distance_float2(float2 X, float2 Y) { return distance(X, Y); }
// CHECK-SAME: <3 x float> noundef [[X:%.*]], <3 x float> noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[SUB_I:%.*]] = fsub <3 x float> [[X]], [[Y]]
-// CHECK-NEXT: [[HLSL_LENGTH_I:%.*]] = tail call noundef float @llvm.dx.length.v3f32(<3 x float> [[SUB_I]])
-// CHECK-NEXT: ret float [[HLSL_LENGTH_I]]
+// CHECK-NEXT: [[MUL_I:%.*]] = fmul <3 x float> [[SUB_I]], [[SUB_I]]
+// CHECK-NEXT: [[RDX_FADD_I:%.*]] = tail call float @llvm.vector.reduce.fadd.v3f32(float 0.000000e+00, <3 x float> [[MUL_I]])
+// CHECK-NEXT: [[TMP0:%.*]] = tail call noundef float @llvm.sqrt.f32(float [[RDX_FADD_I]])
+// CHECK-NEXT: ret float [[TMP0]]
//
float test_distance_float3(float3 X, float3 Y) { return distance(X, Y); }
@@ -70,7 +80,9 @@ float test_distance_float3(float3 X, float3 Y) { return distance(X, Y); }
// CHECK-SAME: <4 x float> noundef [[X:%.*]], <4 x float> noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[SUB_I:%.*]] = fsub <4 x float> [[X]], [[Y]]
-// CHECK-NEXT: [[HLSL_LENGTH_I:%.*]] = tail call noundef float @llvm.dx.length.v4f32(<4 x float> [[SUB_I]])
-// CHECK-NEXT: ret float [[HLSL_LENGTH_I]]
+// CHECK-NEXT: [[MUL_I:%.*]] = fmul <4 x float> [[SUB_I]], [[SUB_I]]
+// CHECK-NEXT: [[RDX_FADD_I:%.*]] = tail call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[MUL_I]])
+// CHECK-NEXT: [[TMP0:%.*]] = tail call noundef float @llvm.sqrt.f32(float [[RDX_FADD_I]])
+// CHECK-NEXT: ret float [[TMP0]]
//
float test_distance_float4(float4 X, float4 Y) { return distance(X, Y); }
diff --git a/clang/test/CodeGenHLSL/builtins/length.hlsl b/clang/test/CodeGenHLSL/builtins/length.hlsl
index 1c23b0df04df98..af9713bfd7628c 100644
--- a/clang/test/CodeGenHLSL/builtins/length.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/length.hlsl
@@ -1,73 +1,111 @@
-// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
-// RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \
-// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \
-// RUN: --check-prefixes=CHECK,NATIVE_HALF
-// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
-// RUN: dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \
-// RUN: -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF
-
-// NATIVE_HALF: define noundef half @
-// NATIVE_HALF: call half @llvm.fabs.f16(half
-// NO_HALF: call float @llvm.fabs.f32(float
-// NATIVE_HALF: ret half
-// NO_HALF: ret float
-half test_length_half(half p0)
-{
- return length(p0);
-}
-// NATIVE_HALF: define noundef half @
-// NATIVE_HALF: %hlsl.length = call half @llvm.dx.length.v2f16
-// NO_HALF: %hlsl.length = call float @llvm.dx.length.v2f32(
-// NATIVE_HALF: ret half %hlsl.length
-// NO_HALF: ret float %hlsl.length
-half test_length_half2(half2 p0)
-{
- return length(p0);
-}
-// NATIVE_HALF: define noundef half @
-// NATIVE_HALF: %hlsl.length = call half @llvm.dx.length.v3f16
-// NO_HALF: %hlsl.length = call float @llvm.dx.length.v3f32(
-// NATIVE_HALF: ret half %hlsl.length
-// NO_HALF: ret float %hlsl.length
-half test_length_half3(half3 p0)
-{
- return length(p0);
-}
-// NATIVE_HALF: define noundef half @
-// NATIVE_HALF: %hlsl.length = call half @llvm.dx.length.v4f16
-// NO_HALF: %hlsl.length = call float @llvm.dx.length.v4f32(
-// NATIVE_HALF: ret half %hlsl.length
-// NO_HALF: ret float %hlsl.length
-half test_length_half4(half4 p0)
-{
- return length(p0);
-}
-
-// CHECK: define noundef float @
-// CHECK: call float @llvm.fabs.f32(float
-// CHECK: ret float
-float test_length_float(float p0)
-{
- return length(p0);
-}
-// CHECK: define noundef float @
-// CHECK: %hlsl.length = call float @llvm.dx.length.v2f32(
-// CHECK: ret float %hlsl.length
-float test_length_float2(float2 p0)
-{
- return length(p0);
-}
-// CHECK: define noundef float @
-// CHECK: %hlsl.length = call float @llvm.dx.length.v3f32(
-// CHECK: ret float %hlsl.length
-float test_length_float3(float3 p0)
-{
- return length(p0);
-}
-// CHECK: define noundef float @
-// CHECK: %hlsl.length = call float @llvm.dx.length.v4f32(
-// CHECK: ret float %hlsl.length
-float test_length_float4(float4 p0)
-{
- return length(p0);
-}
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
+// RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \
+// RUN: -emit-llvm -O1 -o - | FileCheck %s
+
+
+// CHECK-LABEL: define noundef half @_Z16test_length_halfDh(
+// CHECK-SAME: half noundef [[P0:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[ELT_ABS_I:%.*]] = tail call noundef half @llvm.fabs.f16(half [[P0]])
+// CHECK-NEXT: ret half [[ELT_ABS_I]]
+//
+half test_length_half(half p0)
+{
+ return length(p0);
+}
+
+// CHECK-LABEL: define noundef half @_Z17test_length_half2Dv2_Dh(
+// CHECK-SAME: <2 x half> noundef [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = fmul <2 x half> [[P0]], [[P0]]
+// CHECK-NEXT: [[RDX_FADD_I:%.*]] = tail call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> [[MUL_I]])
+// CHECK-NEXT: [[TMP0:%.*]] = tail call noundef half @llvm.sqrt.f16(half [[RDX_FADD_I]])
+// CHECK-NEXT: ret half [[TMP0]]
+//
+half test_length_half2(half2 p0)
+{
+ return length(p0);
+}
+
+// CHECK-LABEL: define noundef half @_Z17test_length_half3Dv3_Dh(
+// CHECK-SAME: <3 x half> noundef [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = fmul <3 x half> [[P0]], [[P0]]
+// CHECK-NEXT: [[RDX_FADD_I:%.*]] = tail call half @llvm.vector.reduce.fadd.v3f16(half 0xH0000, <3 x half> [[MUL_I]])
+// CHECK-NEXT: [[TMP0:%.*]] = tail call noundef half @llvm.sqrt.f16(half [[RDX_FADD_I]])
+// CHECK-NEXT: ret half [[TMP0]]
+//
+half test_length_half3(half3 p0)
+{
+ return length(p0);
+}
+
+// CHECK-LABEL: define noundef half @_Z17test_length_half4Dv4_Dh(
+// CHECK-SAME: <4 x half> noundef [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = fmul <4 x half> [[P0]], [[P0]]
+// CHECK-NEXT: [[RDX_FADD_I:%.*]] = tail call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> [[MUL_I]])
+// CHECK-NEXT: [[TMP0:%.*]] = tail call noundef half @llvm.sqrt.f16(half [[RDX_FADD_I]])
+// CHECK-NEXT: ret half [[TMP0]]
+//
+half test_length_half4(half4 p0)
+{
+ return length(p0);
+}
+
+
+// CHECK-LABEL: define noundef float @_Z17test_length_floatf(
+// CHECK-SAME: float noundef [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[ELT_ABS_I:%.*]] = tail call noundef float @llvm.fabs.f32(float [[P0]])
+// CHECK-NEXT: ret float [[ELT_ABS_I]]
+//
+float test_length_float(float p0)
+{
+ return length(p0);
+}
+
+// CHECK-LABEL: define noundef float @_Z18test_length_float2Dv2_f(
+// CHECK-SAME: <2 x float> noundef [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = fmul <2 x float> [[P0]], [[P0]]
+// CHECK-NEXT: [[RDX_FADD_I:%.*]] = tail call float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> [[MUL_I]])
+// CHECK-NEXT: [[TMP0:%.*]] = tail call noundef float @llvm.sqrt.f32(float [[RDX_FADD_I]])
+// CHECK-NEXT: ret float [[TMP0]]
+//
+float test_length_float2(float2 p0)
+{
+ return length(p0);
+}
+
+// CHECK-LABEL: define noundef float @_Z18test_length_float3Dv3_f(
+// CHECK-SAME: <3 x float> noundef [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = fmul <3 x float> [[P0]], [[P0]]
+// CHECK-NEXT: [[RDX_FADD_I:%.*]] = tail call float @llvm.vector.reduce.fadd.v3f32(float 0.000000e+00, <3 x float> [[MUL_I]])
+// CHECK-NEXT: [[TMP0:%.*]] = tail call noundef float @llvm.sqrt.f32(float [[RDX_FADD_I]])
+// CHECK-NEXT: ret float [[TMP0]]
+//
+float test_length_float3(float3 p0)
+{
+ return length(p0);
+}
+
+// CHECK-LABEL: define noundef float @_Z18test_length_float4Dv4_f(
+// CHECK-SAME: <4 x float> noundef [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = fmul <4 x float> [[P0]], [[P0]]
+// CHECK-NEXT: [[RDX_FADD_I:%.*]] = tail call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[MUL_I]])
+// CHECK-NEXT: [[TMP0:%.*]] = tail call noundef float @llvm.sqrt.f32(float [[RDX_FADD_I]])
+// CHECK-NEXT: ret float [[TMP0]]
+//
+loat3 p0)
+{
+ return length(p0);
+}
+
+float test_length_float4(float4 p0)
+{
+ return length(p0);
+}
diff --git a/clang/test/SemaHLSL/BuiltIns/length-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/length-errors.hlsl
index 281faada6f5e94..a191f0419fbbaf 100644
--- a/clang/test/SemaHLSL/BuiltIns/length-errors.hlsl
+++ b/clang/test/SemaHLSL/BuiltIns/length-errors.hlsl
@@ -1,32 +1,53 @@
-// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -disable-llvm-passes -verify -verify-ignore-unexpected
-
+// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only -disable-llvm-passes -verify
void test_too_few_arg()
{
- return __builtin_hlsl_length();
- // expected-error at -1 {{too few arguments to function call, expected 1, have 0}}
+ return length();
+ // expected-error at -1 {{no matching function for call to 'length'}}
+ // expected-note at hlsl/hlsl_intrinsics.h:* {{candidate function not viable: requires single argument 'X', but no arguments were provided}}
+ // expected-note at hlsl/hlsl_intrinsics.h:* {{candidate function not viable: requires single argument 'X', but no arguments were provided}}
+ // expected-note at hlsl/hlsl_intrinsics.h:* {{candidate function template not viable: requires single argument 'X', but no arguments were provided}}
+ // expected-note at hlsl/hlsl_intrinsics.h:* {{candidate function template not viable: requires single argument 'X', but no arguments were provided}}
}
void test_too_many_arg(float2 p0)
{
- return __builtin_hlsl_length(p0, p0);
- // expected-error at -1 {{too many arguments to function call, expected 1, have 2}}
+ return length(p0, p0);
+ // expected-error at -1 {{no matching function for call to 'length'}}
+ // expected-note at hlsl/hlsl_intrinsics.h:* {{candidate function not viable: requires single argument 'X', but 2 arguments were provided}}
+ // expected-note at hlsl/hlsl_intrinsics.h:* {{candidate function template not viable: requires single argument 'X', but 2 arguments were provided}}
+ // expected-note at hlsl/hlsl_intrinsics.h:* {{candidate function not viable: requires single argument 'X', but 2 arguments were provided}}
+ // expected-note at hlsl/hlsl_intrinsics.h:* {{candidate function template not viable: requires single argument 'X', but 2 arguments were provided}}
+}
+
+float double_to_float_type(double p0) {
+ return length(p0);
+ // expected-error at -1 {{call to 'length' is ambiguous}}
+ // expected-note at hlsl/hlsl_intrinsics.h:* {{candidate function}}
+ // expected-note at hlsl/hlsl_intrinsics.h:* {{candidate function}}
}
-bool builtin_bool_to_float_type_promotion(bool p1)
+
+float bool_to_float_type_promotion(bool p1)
{
- return __builtin_hlsl_length(p1);
- // expected-error at -1 {passing 'bool' to parameter of incompatible type 'float'}}
+ return length(p1);
+ // expected-error at -1 {{call to 'length' is ambiguous}}
+ // expected-note at hlsl/hlsl_intrinsics.h:* {{candidate function}}
+ // expected-note at hlsl/hlsl_intrinsics.h:* {{candidate function}}
}
-bool builtin_length_int_to_float_promotion(int p1)
+float length_int_to_float_promotion(int p1)
{
- return __builtin_hlsl_length(p1);
- // expected-error at -1 {{passing 'int' to parameter of incompatible type 'float'}}
+ return length(p1);
+ // expected-error at -1 {{call to 'length' is ambiguous}}
+ // expected-note at hlsl/hlsl_intrinsics.h:* {{candidate function}}
+ // expected-note at hlsl/hlsl_intrinsics.h:* {{candidate function}}
}
-bool2 builtin_length_int2_to_float2_promotion(int2 p1)
+float2 length_int2_to_float2_promotion(int2 p1)
{
- return __builtin_hlsl_length(p1);
- // expected-error at -1 {{passing 'int2' (aka 'vector<int, 2>') to parameter of incompatible type '__attribute__((__vector_size__(2 * sizeof(float)))) float' (vector of 2 'float' values)}}
+ return length(p1);
+ // expected-error at -1 {{call to 'length' is ambiguous}}
+ // expected-note at hlsl/hlsl_intrinsics.h:* {{candidate function}}
+ // expected-note at hlsl/hlsl_intrinsics.h:* {{candidate function}}
}
diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td
index dad60a2535cf4d..e5ac98430f5ced 100644
--- a/llvm/include/llvm/IR/IntrinsicsDirectX.td
+++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td
@@ -89,7 +89,6 @@ def int_dx_isinf : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i1
def int_dx_lerp : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty, LLVMMatchType<0>,LLVMMatchType<0>],
[IntrNoMem]>;
-def int_dx_length : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], [llvm_anyfloat_ty], [IntrNoMem]>;
def int_dx_imad : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
def int_dx_umad : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
def int_dx_normalize : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty], [IntrNoMem]>;
diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
index d2bfca1fada559..37c20459fa4bf4 100644
--- a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
+++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
@@ -59,7 +59,6 @@ static bool isIntrinsicExpansion(Function &F) {
case Intrinsic::dx_nclamp:
case Intrinsic::dx_degrees:
case Intrinsic::dx_lerp:
- case Intrinsic::dx_length:
case Intrinsic::dx_normalize:
case Intrinsic::dx_fdot:
case Intrinsic::dx_sdot:
@@ -67,11 +66,44 @@ static bool isIntrinsicExpansion(Function &F) {
case Intrinsic::dx_sign:
case Intrinsic::dx_step:
case Intrinsic::dx_radians:
+ case Intrinsic::vector_reduce_add:
+ case Intrinsic::vector_reduce_fadd:
return true;
}
return false;
}
+static Value *expandVecReduceFAdd(CallInst *Orig) {
+ Value *Sum = Orig->getOperand(0);
+ Value *X = Orig->getOperand(1);
+ IRBuilder<> Builder(Orig);
+ Type *Ty = X->getType();
+ auto *XVec = dyn_cast<FixedVectorType>(Ty);
+ unsigned XVecSize = XVec->getNumElements();
+
+ for (unsigned I = 0; I < XVecSize; I++) {
+ Value *Elt = Builder.CreateExtractElement(X, I);
+ Sum = Builder.CreateFAdd(Sum, Elt);
+ }
+ return Sum;
+}
+
+static Value *expandVecReduceAdd(CallInst *Orig) {
+ Value *X = Orig->getOperand(0);
+ IRBuilder<> Builder(Orig);
+ Type *Ty = X->getType();
+ Type *EltTy = Ty->getScalarType();
+ auto *XVec = dyn_cast<FixedVectorType>(Ty);
+ unsigned XVecSize = XVec->getNumElements();
+
+ Value *Sum = ConstantInt::get(EltTy, 0);
+ for (unsigned I = 0; I < XVecSize; I++) {
+ Value *Elt = Builder.CreateExtractElement(X, I);
+ Sum = Builder.CreateAdd(Sum, Elt);
+ }
+ return Sum;
+}
+
static Value *expandAbs(CallInst *Orig) {
Value *X = Orig->getOperand(0);
IRBuilder<> Builder(Orig);
@@ -261,32 +293,6 @@ static Value *expandAnyOrAllIntrinsic(CallInst *Orig,
return Result;
}
-static Value *expandLengthIntrinsic(CallInst *Orig) {
- Value *X = Orig->getOperand(0);
- IRBuilder<> Builder(Orig);
- Type *Ty = X->getType();
- Type *EltTy = Ty->getScalarType();
-
- // Though dx.length does work on scalar type, we can optimize it to just emit
- // fabs, in CGBuiltin.cpp. We shouldn't see a scalar type here because
- // CGBuiltin.cpp should have emitted a fabs call.
- Value *Elt = Builder.CreateExtractElement(X, (uint64_t)0);
- auto *XVec = dyn_cast<FixedVectorType>(Ty);
- unsigned XVecSize = XVec->getNumElements();
- if (!(Ty->isVectorTy() && XVecSize > 1))
- report_fatal_error(Twine("Invalid input type for length intrinsic"),
- /* gen_crash_diag=*/false);
-
- Value *Sum = Builder.CreateFMul(Elt, Elt);
- for (unsigned I = 1; I < XVecSize; I++) {
- Elt = Builder.CreateExtractElement(X, I);
- Value *Mul = Builder.CreateFMul(Elt, Elt);
- Sum = Builder.CreateFAdd(Sum, Mul);
- }
- return Builder.CreateIntrinsic(EltTy, Intrinsic::sqrt, ArrayRef<Value *>{Sum},
- nullptr, "elt.sqrt");
-}
-
static Value *expandLerpIntrinsic(CallInst *Orig) {
Value *X = Orig->getOperand(0);
Value *Y = Orig->getOperand(1);
@@ -558,9 +564,6 @@ static bool expandIntrinsic(Function &F, CallInst *Orig) {
case Intrinsic::dx_lerp:
Result = expandLerpIntrinsic(Orig);
break;
- case Intrinsic::dx_length:
- Result = expandLengthIntrinsic(Orig);
- break;
case Intrinsic::dx_normalize:
Result = expandNormalizeIntrinsic(Orig);
break;
@@ -580,6 +583,12 @@ static bool expandIntrinsic(Function &F, CallInst *Orig) {
case Intrinsic::dx_radians:
Result = expandRadiansIntrinsic(Orig);
break;
+ case Intrinsic::vector_reduce_add:
+ Result = expandVecReduceAdd(Orig);
+ break;
+ case Intrinsic::vector_reduce_fadd:
+ Result = expandVecReduceFAdd(Orig);
+ break;
}
if (Result) {
Orig->replaceAllUsesWith(Result);
diff --git a/llvm/test/CodeGen/DirectX/length.ll b/llvm/test/CodeGen/DirectX/length.ll
index fc5868a7f6e82c..8df5aca6e10df3 100644
--- a/llvm/test/CodeGen/DirectX/length.ll
+++ b/llvm/test/CodeGen/DirectX/length.ll
@@ -3,114 +3,140 @@
; Make sure dxil operation function calls for length are generated for half/float.
-declare half @llvm.fabs.f16(half)
-declare half @llvm.dx.length.v2f16(<2 x half>)
-declare half @llvm.dx.length.v3f16(<3 x half>)
-declare half @llvm.dx.length.v4f16(<4 x half>)
-
-declare float @llvm.fabs.f32(float)
-declare float @llvm.dx.length.v2f32(<2 x float>)
-declare float @llvm.dx.length.v3f32(<3 x float>)
-declare float @llvm.dx.length.v4f32(<4 x float>)
-
define noundef half @test_length_half2(<2 x half> noundef %p0) {
+; CHECK-LABEL: define noundef half @test_length_half2(
+; CHECK-SAME: <2 x half> noundef [[P0:%.*]]) {
+; CHECK: [[ENTRY:.*:]]
+; CHECK: [[MUL_I:%.*]] = fmul <2 x half> [[P0]], [[P0]]
+; CHECK: [[TMP0:%.*]] = extractelement <2 x half> [[MUL_I]], i64 0
+; CHECK: [[TMP1:%.*]] = fadd half 0xH0000, [[TMP0]]
+; CHECK: [[TMP2:%.*]] = extractelement <2 x half> [[MUL_I]], i64 1
+; CHECK: [[TMP3:%.*]] = fadd half [[TMP1]], [[TMP2]]
+; EXPCHECK: [[HLSL_LENGTH:%.*]] = call half @llvm.sqrt.f16(half [[TMP3]])
+; DOPCHECK: [[HLSL_LENGTH:%.*]] = call half @dx.op.unary.f16(i32 24, half [[TMP3]])
+; CHECK: ret half [[HLSL_LENGTH]]
+;
entry:
- ; CHECK: extractelement <2 x half> %{{.*}}, i64 0
- ; CHECK: fmul half %{{.*}}, %{{.*}}
- ; CHECK: extractelement <2 x half> %{{.*}}, i64 1
- ; CHECK: fmul half %{{.*}}, %{{.*}}
- ; CHECK: fadd half %{{.*}}, %{{.*}}
- ; EXPCHECK: call half @llvm.sqrt.f16(half %{{.*}})
- ; DOPCHECK: call half @dx.op.unary.f16(i32 24, half %{{.*}})
- %hlsl.length = call half @llvm.dx.length.v2f16(<2 x half> %p0)
+ %mul.i = fmul <2 x half> %p0, %p0
+ %rdx.fadd.i = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> %mul.i)
+ %hlsl.length = call half @llvm.sqrt.f16(half %rdx.fadd.i)
ret half %hlsl.length
}
define noundef half @test_length_half3(<3 x half> noundef %p0) {
+; CHECK-LABEL: define noundef half @test_length_half3(
+; CHECK-SAME: <3 x half> noundef [[P0:%.*]]) {
+; CHECK: [[ENTRY:.*:]]
+; CHECK: [[MUL_I:%.*]] = fmul <3 x half> [[P0]], [[P0]]
+; CHECK: [[TMP0:%.*]] = extractelement <3 x half> [[MUL_I]], i64 0
+; CHECK: [[TMP1:%.*]] = fadd half 0xH0000, [[TMP0]]
+; CHECK: [[TMP2:%.*]] = extractelement <3 x half> [[MUL_I]], i64 1
+; CHECK: [[TMP3:%.*]] = fadd half [[TMP1]], [[TMP2]]
+; CHECK: [[TMP4:%.*]] = extractelement <3 x half> [[MUL_I]], i64 2
+; CHECK: [[TMP5:%.*]] = fadd half [[TMP3]], [[TMP4]]
+; EXPCHECK: [[HLSL_LENGTH:%.*]] = call half @llvm.sqrt.f16(half [[TMP5]])
+; DOPCHECK: [[HLSL_LENGTH:%.*]] = call half @dx.op.unary.f16(i32 24, half [[TMP5]])
+; CHECK: ret half [[HLSL_LENGTH]]
+;
entry:
- ; CHECK: extractelement <3 x half> %{{.*}}, i64 0
- ; CHECK: fmul half %{{.*}}, %{{.*}}
- ; CHECK: extractelement <3 x half> %{{.*}}, i64 1
- ; CHECK: fmul half %{{.*}}, %{{.*}}
- ; CHECK: fadd half %{{.*}}, %{{.*}}
- ; CHECK: extractelement <3 x half> %{{.*}}, i64 2
- ; CHECK: fmul half %{{.*}}, %{{.*}}
- ; CHECK: fadd half %{{.*}}, %{{.*}}
- ; EXPCHECK: call half @llvm.sqrt.f16(half %{{.*}})
- ; DOPCHECK: call half @dx.op.unary.f16(i32 24, half %{{.*}})
- %hlsl.length = call half @llvm.dx.length.v3f16(<3 x half> %p0)
+ %mul.i = fmul <3 x half> %p0, %p0
+ %rdx.fadd.i = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <3 x half> %mul.i)
+ %hlsl.length = call half @llvm.sqrt.f16(half %rdx.fadd.i)
ret half %hlsl.length
}
define noundef half @test_length_half4(<4 x half> noundef %p0) {
+; CHECK-LABEL: define noundef half @test_length_half4(
+; CHECK-SAME: <4 x half> noundef [[P0:%.*]]) {
+; CHECK: [[ENTRY:.*:]]
+; CHECK: [[MUL_I:%.*]] = fmul <4 x half> [[P0]], [[P0]]
+; CHECK: [[TMP0:%.*]] = extractelement <4 x half> [[MUL_I]], i64 0
+; CHECK: [[TMP1:%.*]] = fadd half 0xH0000, [[TMP0]]
+; CHECK: [[TMP2:%.*]] = extractelement <4 x half> [[MUL_I]], i64 1
+; CHECK: [[TMP3:%.*]] = fadd half [[TMP1]], [[TMP2]]
+; CHECK: [[TMP4:%.*]] = extractelement <4 x half> [[MUL_I]], i64 2
+; CHECK: [[TMP5:%.*]] = fadd half [[TMP3]], [[TMP4]]
+; CHECK: [[TMP6:%.*]] = extractelement <4 x half> [[MUL_I]], i64 3
+; CHECK: [[TMP7:%.*]] = fadd half [[TMP5]], [[TMP6]]
+; EXPCHECK: [[HLSL_LENGTH:%.*]] = call half @llvm.sqrt.f16(half [[TMP7]])
+; DOPCHECK: [[HLSL_LENGTH:%.*]] = call half @dx.op.unary.f16(i32 24, half [[TMP7]])
+; CHECK: ret half [[HLSL_LENGTH]]
+;
entry:
- ; CHECK: extractelement <4 x half> %{{.*}}, i64 0
- ; CHECK: fmul half %{{.*}}, %{{.*}}
- ; CHECK: extractelement <4 x half> %{{.*}}, i64 1
- ; CHECK: fmul half %{{.*}}, %{{.*}}
- ; CHECK: fadd half %{{.*}}, %{{.*}}
- ; CHECK: extractelement <4 x half> %{{.*}}, i64 2
- ; CHECK: fmul half %{{.*}}, %{{.*}}
- ; CHECK: fadd half %{{.*}}, %{{.*}}
- ; CHECK: extractelement <4 x half> %{{.*}}, i64 3
- ; CHECK: fmul half %{{.*}}, %{{.*}}
- ; CHECK: fadd half %{{.*}}, %{{.*}}
- ; EXPCHECK: call half @llvm.sqrt.f16(half %{{.*}})
- ; DOPCHECK: call half @dx.op.unary.f16(i32 24, half %{{.*}})
- %hlsl.length = call half @llvm.dx.length.v4f16(<4 x half> %p0)
+ %mul.i = fmul <4 x half> %p0, %p0
+ %rdx.fadd.i = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <4 x half> %mul.i)
+ %hlsl.length = call half @llvm.sqrt.f16(half %rdx.fadd.i)
ret half %hlsl.length
}
define noundef float @test_length_float2(<2 x float> noundef %p0) {
+; CHECK-LABEL: define noundef float @test_length_float2(
+; CHECK-SAME: <2 x float> noundef [[P0:%.*]]) {
+; CHECK: [[ENTRY:.*:]]
+; CHECK: [[MUL_I:%.*]] = fmul <2 x float> [[P0]], [[P0]]
+; CHECK: [[TMP0:%.*]] = extractelement <2 x float> [[MUL_I]], i64 0
+; CHECK: [[TMP1:%.*]] = fadd float 0.000000e+00, [[TMP0]]
+; CHECK: [[TMP2:%.*]] = extractelement <2 x float> [[MUL_I]], i64 1
+; CHECK: [[TMP3:%.*]] = fadd float [[TMP1]], [[TMP2]]
+; EXPCHECK: [[HLSL_LENGTH:%.*]] = call float @llvm.sqrt.f32(float [[TMP3]])
+; DOPCHECK: [[HLSL_LENGTH:%.*]] = call float @dx.op.unary.f32(i32 24, float [[TMP3]])
+; CHECK: ret float [[HLSL_LENGTH]]
+;
entry:
- ; CHECK: extractelement <2 x float> %{{.*}}, i64 0
- ; CHECK: fmul float %{{.*}}, %{{.*}}
- ; CHECK: extractelement <2 x float> %{{.*}}, i64 1
- ; CHECK: fmul float %{{.*}}, %{{.*}}
- ; CHECK: fadd float %{{.*}}, %{{.*}}
- ; EXPCHECK: call float @llvm.sqrt.f32(float %{{.*}})
- ; DOPCHECK: call float @dx.op.unary.f32(i32 24, float %{{.*}})
- %hlsl.length = call float @llvm.dx.length.v2f32(<2 x float> %p0)
+ %mul.i = fmul <2 x float> %p0, %p0
+ %rdx.fadd.i = call float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> %mul.i)
+ %hlsl.length = call float @llvm.sqrt.f32(float %rdx.fadd.i)
ret float %hlsl.length
}
define noundef float @test_length_float3(<3 x float> noundef %p0) {
+; CHECK-LABEL: define noundef float @test_length_float3(
+; CHECK-SAME: <3 x float> noundef [[P0:%.*]]) {
+; CHECK: [[ENTRY:.*:]]
+; CHECK: [[MUL_I:%.*]] = fmul <3 x float> [[P0]], [[P0]]
+; CHECK: [[TMP0:%.*]] = extractelement <3 x float> [[MUL_I]], i64 0
+; CHECK: [[TMP1:%.*]] = fadd float 0.000000e+00, [[TMP0]]
+; CHECK: [[TMP2:%.*]] = extractelement <3 x float> [[MUL_I]], i64 1
+; CHECK: [[TMP3:%.*]] = fadd float [[TMP1]], [[TMP2]]
+; CHECK: [[TMP4:%.*]] = extractelement <3 x float> [[MUL_I]], i64 2
+; CHECK: [[TMP5:%.*]] = fadd float [[TMP3]], [[TMP4]]
+; EXPCHECK: [[HLSL_LENGTH:%.*]] = call float @llvm.sqrt.f32(float [[TMP5]])
+; DOPCHECK: [[HLSL_LENGTH:%.*]] = call float @dx.op.unary.f32(i32 24, float [[TMP5]])
+; CHECK: ret float [[HLSL_LENGTH]]
+;
entry:
- ; CHECK: extractelement <3 x float> %{{.*}}, i64 0
- ; CHECK: fmul float %{{.*}}, %{{.*}}
- ; CHECK: extractelement <3 x float> %{{.*}}, i64 1
- ; CHECK: fmul float %{{.*}}, %{{.*}}
- ; CHECK: fadd float %{{.*}}, %{{.*}}
- ; CHECK: extractelement <3 x float> %{{.*}}, i64 2
- ; CHECK: fmul float %{{.*}}, %{{.*}}
- ; CHECK: fadd float %{{.*}}, %{{.*}}
- ; EXPCHECK: call float @llvm.sqrt.f32(float %{{.*}})
- ; DOPCHECK: call float @dx.op.unary.f32(i32 24, float %{{.*}})
- %hlsl.length = call float @llvm.dx.length.v3f32(<3 x float> %p0)
+ %mul.i = fmul <3 x float> %p0, %p0
+ %rdx.fadd.i = call float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <3 x float> %mul.i)
+ %hlsl.length = call float @llvm.sqrt.f32(float %rdx.fadd.i)
ret float %hlsl.length
}
define noundef float @test_length_float4(<4 x float> noundef %p0) {
+; CHECK-LABEL: define noundef float @test_length_float4(
+; CHECK-SAME: <4 x float> noundef [[P0:%.*]]) {
+; CHECK: [[ENTRY:.*:]]
+; CHECK: [[MUL_I:%.*]] = fmul <4 x float> [[P0]], [[P0]]
+; CHECK: [[TMP0:%.*]] = extractelement <4 x float> [[MUL_I]], i64 0
+; CHECK: [[TMP1:%.*]] = fadd float 0.000000e+00, [[TMP0]]
+; CHECK: [[TMP2:%.*]] = extractelement <4 x float> [[MUL_I]], i64 1
+; CHECK: [[TMP3:%.*]] = fadd float [[TMP1]], [[TMP2]]
+; CHECK: [[TMP4:%.*]] = extractelement <4 x float> [[MUL_I]], i64 2
+; CHECK: [[TMP5:%.*]] = fadd float [[TMP3]], [[TMP4]]
+; CHECK: [[TMP6:%.*]] = extractelement <4 x float> [[MUL_I]], i64 3
+; CHECK: [[TMP7:%.*]] = fadd float [[TMP5]], [[TMP6]]
+; EXPCHECK: [[HLSL_LENGTH:%.*]] = call float @llvm.sqrt.f32(float [[TMP7]])
+; DOPCHECK: [[HLSL_LENGTH:%.*]] = call float @dx.op.unary.f32(i32 24, float [[TMP7]])
+; CHECK: ret float [[HLSL_LENGTH]]
+;
entry:
- ; CHECK: extractelement <4 x float> %{{.*}}, i64 0
- ; CHECK: fmul float %{{.*}}, %{{.*}}
- ; CHECK: extractelement <4 x float> %{{.*}}, i64 1
- ; CHECK: fmul float %{{.*}}, %{{.*}}
- ; CHECK: fadd float %{{.*}}, %{{.*}}
- ; CHECK: extractelement <4 x float> %{{.*}}, i64 2
- ; CHECK: fmul float %{{.*}}, %{{.*}}
- ; CHECK: fadd float %{{.*}}, %{{.*}}
- ; CHECK: extractelement <4 x float> %{{.*}}, i64 3
- ; CHECK: fmul float %{{.*}}, %{{.*}}
- ; CHECK: fadd float %{{.*}}, %{{.*}}
- ; EXPCHECK: call float @llvm.sqrt.f32(float %{{.*}})
- ; DOPCHECK: call float @dx.op.unary.f32(i32 24, float %{{.*}})
- %hlsl.length = call float @llvm.dx.length.v4f32(<4 x float> %p0)
+ %mul.i = fmul <4 x float> %p0, %p0
+ %rdx.fadd.i = call float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <4 x float> %mul.i)
+ %hlsl.length = call float @llvm.sqrt.f32(float %rdx.fadd.i)
ret float %hlsl.length
}
diff --git a/llvm/test/CodeGen/DirectX/length_error.ll b/llvm/test/CodeGen/DirectX/length_error.ll
deleted file mode 100644
index 143b41fc506e1d..00000000000000
--- a/llvm/test/CodeGen/DirectX/length_error.ll
+++ /dev/null
@@ -1,10 +0,0 @@
-; RUN: not opt -S -dxil-intrinsic-expansion -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s 2>&1 | FileCheck %s
-
-; DXIL operation length does not support double overload type
-; CHECK: Cannot create Sqrt operation: Invalid overload type
-
-define noundef double @test_length_double2(<2 x double> noundef %p0) {
-entry:
- %hlsl.length = call double @llvm.dx.length.v2f32(<2 x double> %p0)
- ret double %hlsl.length
-}
>From c95a9ccd9c61980a95cda99446d38b13440f3e54 Mon Sep 17 00:00:00 2001
From: Farzon Lotfi <farzonlotfi at microsoft.com>
Date: Thu, 21 Nov 2024 21:27:51 -0500
Subject: [PATCH 4/6] add a small optimization to remove the extra add zero
---
clang/lib/CodeGen/CGHLSLRuntime.h | 1 -
clang/test/CodeGenHLSL/builtins/length.hlsl | 11 +-
.../Target/DirectX/DXILIntrinsicExpansion.cpp | 12 +-
llvm/test/CodeGen/DirectX/length.ll | 239 +++++++++++-------
4 files changed, 164 insertions(+), 99 deletions(-)
diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h
index a8e0ed42b79a35..8001e8a4c957e0 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.h
+++ b/clang/lib/CodeGen/CGHLSLRuntime.h
@@ -77,7 +77,6 @@ class CGHLSLRuntime {
GENERATE_HLSL_INTRINSIC_FUNCTION(Cross, cross)
GENERATE_HLSL_INTRINSIC_FUNCTION(Degrees, degrees)
GENERATE_HLSL_INTRINSIC_FUNCTION(Frac, frac)
- GENERATE_HLSL_INTRINSIC_FUNCTION(Length, length)
GENERATE_HLSL_INTRINSIC_FUNCTION(Lerp, lerp)
GENERATE_HLSL_INTRINSIC_FUNCTION(Normalize, normalize)
GENERATE_HLSL_INTRINSIC_FUNCTION(Rsqrt, rsqrt)
diff --git a/clang/test/CodeGenHLSL/builtins/length.hlsl b/clang/test/CodeGenHLSL/builtins/length.hlsl
index af9713bfd7628c..2dad0e18a4a888 100644
--- a/clang/test/CodeGenHLSL/builtins/length.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/length.hlsl
@@ -1,7 +1,7 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
-// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
-// RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \
-// RUN: -emit-llvm -O1 -o - | FileCheck %s
+// RUN: %clang_cc1 -finclude-default-header -triple \
+// RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \
+// RUN: -emit-llvm -O1 -o - | FileCheck %s
// CHECK-LABEL: define noundef half @_Z16test_length_halfDh(
@@ -100,11 +100,6 @@ float test_length_float3(float3 p0)
// CHECK-NEXT: [[TMP0:%.*]] = tail call noundef float @llvm.sqrt.f32(float [[RDX_FADD_I]])
// CHECK-NEXT: ret float [[TMP0]]
//
-loat3 p0)
-{
- return length(p0);
-}
-
float test_length_float4(float4 p0)
{
return length(p0);
diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
index 37c20459fa4bf4..eb89f6a2f95da7 100644
--- a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
+++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
@@ -28,6 +28,7 @@
#include "llvm/Pass.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
+#include <cstdint>
#define DEBUG_TYPE "dxil-intrinsic-expansion"
@@ -74,14 +75,15 @@ static bool isIntrinsicExpansion(Function &F) {
}
static Value *expandVecReduceFAdd(CallInst *Orig) {
- Value *Sum = Orig->getOperand(0);
+ // Note: vector_reduce_fadd first argument is a starting value
+ // Our use doesn't need it, so ignoring argument zero.
Value *X = Orig->getOperand(1);
IRBuilder<> Builder(Orig);
Type *Ty = X->getType();
auto *XVec = dyn_cast<FixedVectorType>(Ty);
unsigned XVecSize = XVec->getNumElements();
-
- for (unsigned I = 0; I < XVecSize; I++) {
+ Value *Sum = Builder.CreateExtractElement(X, static_cast<uint64_t>(0));
+ for (unsigned I = 1; I < XVecSize; I++) {
Value *Elt = Builder.CreateExtractElement(X, I);
Sum = Builder.CreateFAdd(Sum, Elt);
}
@@ -96,8 +98,8 @@ static Value *expandVecReduceAdd(CallInst *Orig) {
auto *XVec = dyn_cast<FixedVectorType>(Ty);
unsigned XVecSize = XVec->getNumElements();
- Value *Sum = ConstantInt::get(EltTy, 0);
- for (unsigned I = 0; I < XVecSize; I++) {
+ Value *Sum = Builder.CreateExtractElement(X, static_cast<uint64_t>(0));
+ for (unsigned I = 1; I < XVecSize; I++) {
Value *Elt = Builder.CreateExtractElement(X, I);
Sum = Builder.CreateAdd(Sum, Elt);
}
diff --git a/llvm/test/CodeGen/DirectX/length.ll b/llvm/test/CodeGen/DirectX/length.ll
index 8df5aca6e10df3..3feea03321b838 100644
--- a/llvm/test/CodeGen/DirectX/length.ll
+++ b/llvm/test/CodeGen/DirectX/length.ll
@@ -1,23 +1,32 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -S -dxil-intrinsic-expansion < %s | FileCheck %s --check-prefixes=CHECK,EXPCHECK
-; RUN: opt -S -dxil-intrinsic-expansion -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library < %s | FileCheck %s --check-prefixes=CHECK,DOPCHECK
+; RUN: opt -S -dxil-intrinsic-expansion -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library < %s | FileCheck %s --check-prefixes=CHECK,DOPCHECK
; Make sure dxil operation function calls for length are generated for half/float.
define noundef half @test_length_half2(<2 x half> noundef %p0) {
-; CHECK-LABEL: define noundef half @test_length_half2(
-; CHECK-SAME: <2 x half> noundef [[P0:%.*]]) {
-; CHECK: [[ENTRY:.*:]]
-; CHECK: [[MUL_I:%.*]] = fmul <2 x half> [[P0]], [[P0]]
-; CHECK: [[TMP0:%.*]] = extractelement <2 x half> [[MUL_I]], i64 0
-; CHECK: [[TMP1:%.*]] = fadd half 0xH0000, [[TMP0]]
-; CHECK: [[TMP2:%.*]] = extractelement <2 x half> [[MUL_I]], i64 1
-; CHECK: [[TMP3:%.*]] = fadd half [[TMP1]], [[TMP2]]
-; EXPCHECK: [[HLSL_LENGTH:%.*]] = call half @llvm.sqrt.f16(half [[TMP3]])
-; DOPCHECK: [[HLSL_LENGTH:%.*]] = call half @dx.op.unary.f16(i32 24, half [[TMP3]])
-; CHECK: ret half [[HLSL_LENGTH]]
+; EXPCHECK-LABEL: define noundef half @test_length_half2(
+; EXPCHECK-SAME: <2 x half> noundef [[P0:%.*]]) {
+; EXPCHECK-NEXT: [[ENTRY:.*:]]
+; EXPCHECK-NEXT: [[MUL_I:%.*]] = fmul <2 x half> [[P0]], [[P0]]
+; EXPCHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x half> [[MUL_I]], i64 0
+; EXPCHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x half> [[MUL_I]], i64 1
+; EXPCHECK-NEXT: [[TMP2:%.*]] = fadd half [[TMP0]], [[TMP1]]
+; EXPCHECK-NEXT: [[HLSL_LENGTH:%.*]] = call half @llvm.sqrt.f16(half [[TMP2]])
+; EXPCHECK-NEXT: ret half [[HLSL_LENGTH]]
+;
+; DOPCHECK-LABEL: define noundef half @test_length_half2(
+; DOPCHECK-SAME: <2 x half> noundef [[P0:%.*]]) {
+; DOPCHECK-NEXT: [[ENTRY:.*:]]
+; DOPCHECK-NEXT: [[P0_I0:%.*]] = extractelement <2 x half> [[P0]], i64 0
+; DOPCHECK-NEXT: [[MUL_I_I0:%.*]] = fmul half [[P0_I0]], [[P0_I0]]
+; DOPCHECK-NEXT: [[P0_I1:%.*]] = extractelement <2 x half> [[P0]], i64 1
+; DOPCHECK-NEXT: [[MUL_I_I1:%.*]] = fmul half [[P0_I1]], [[P0_I1]]
+; DOPCHECK-NEXT: [[TMP0:%.*]] = fadd half [[MUL_I_I0]], [[MUL_I_I1]]
+; DOPCHECK-NEXT: [[HLSL_LENGTH1:%.*]] = call half @dx.op.unary.f16(i32 24, half [[TMP0]])
+; DOPCHECK-NEXT: ret half [[HLSL_LENGTH1]]
;
entry:
-
%mul.i = fmul <2 x half> %p0, %p0
%rdx.fadd.i = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> %mul.i)
%hlsl.length = call half @llvm.sqrt.f16(half %rdx.fadd.i)
@@ -25,22 +34,33 @@ entry:
}
define noundef half @test_length_half3(<3 x half> noundef %p0) {
-; CHECK-LABEL: define noundef half @test_length_half3(
-; CHECK-SAME: <3 x half> noundef [[P0:%.*]]) {
-; CHECK: [[ENTRY:.*:]]
-; CHECK: [[MUL_I:%.*]] = fmul <3 x half> [[P0]], [[P0]]
-; CHECK: [[TMP0:%.*]] = extractelement <3 x half> [[MUL_I]], i64 0
-; CHECK: [[TMP1:%.*]] = fadd half 0xH0000, [[TMP0]]
-; CHECK: [[TMP2:%.*]] = extractelement <3 x half> [[MUL_I]], i64 1
-; CHECK: [[TMP3:%.*]] = fadd half [[TMP1]], [[TMP2]]
-; CHECK: [[TMP4:%.*]] = extractelement <3 x half> [[MUL_I]], i64 2
-; CHECK: [[TMP5:%.*]] = fadd half [[TMP3]], [[TMP4]]
-; EXPCHECK: [[HLSL_LENGTH:%.*]] = call half @llvm.sqrt.f16(half [[TMP5]])
-; DOPCHECK: [[HLSL_LENGTH:%.*]] = call half @dx.op.unary.f16(i32 24, half [[TMP5]])
-; CHECK: ret half [[HLSL_LENGTH]]
+; EXPCHECK-LABEL: define noundef half @test_length_half3(
+; EXPCHECK-SAME: <3 x half> noundef [[P0:%.*]]) {
+; EXPCHECK-NEXT: [[ENTRY:.*:]]
+; EXPCHECK-NEXT: [[MUL_I:%.*]] = fmul <3 x half> [[P0]], [[P0]]
+; EXPCHECK-NEXT: [[TMP0:%.*]] = extractelement <3 x half> [[MUL_I]], i64 0
+; EXPCHECK-NEXT: [[TMP1:%.*]] = extractelement <3 x half> [[MUL_I]], i64 1
+; EXPCHECK-NEXT: [[TMP2:%.*]] = fadd half [[TMP0]], [[TMP1]]
+; EXPCHECK-NEXT: [[TMP3:%.*]] = extractelement <3 x half> [[MUL_I]], i64 2
+; EXPCHECK-NEXT: [[TMP4:%.*]] = fadd half [[TMP2]], [[TMP3]]
+; EXPCHECK-NEXT: [[HLSL_LENGTH:%.*]] = call half @llvm.sqrt.f16(half [[TMP4]])
+; EXPCHECK-NEXT: ret half [[HLSL_LENGTH]]
+;
+; DOPCHECK-LABEL: define noundef half @test_length_half3(
+; DOPCHECK-SAME: <3 x half> noundef [[P0:%.*]]) {
+; DOPCHECK-NEXT: [[ENTRY:.*:]]
+; DOPCHECK-NEXT: [[P0_I0:%.*]] = extractelement <3 x half> [[P0]], i64 0
+; DOPCHECK-NEXT: [[MUL_I_I0:%.*]] = fmul half [[P0_I0]], [[P0_I0]]
+; DOPCHECK-NEXT: [[P0_I1:%.*]] = extractelement <3 x half> [[P0]], i64 1
+; DOPCHECK-NEXT: [[MUL_I_I1:%.*]] = fmul half [[P0_I1]], [[P0_I1]]
+; DOPCHECK-NEXT: [[P0_I2:%.*]] = extractelement <3 x half> [[P0]], i64 2
+; DOPCHECK-NEXT: [[MUL_I_I2:%.*]] = fmul half [[P0_I2]], [[P0_I2]]
+; DOPCHECK-NEXT: [[TMP0:%.*]] = fadd half [[MUL_I_I0]], [[MUL_I_I1]]
+; DOPCHECK-NEXT: [[TMP1:%.*]] = fadd half [[TMP0]], [[MUL_I_I2]]
+; DOPCHECK-NEXT: [[HLSL_LENGTH1:%.*]] = call half @dx.op.unary.f16(i32 24, half [[TMP1]])
+; DOPCHECK-NEXT: ret half [[HLSL_LENGTH1]]
;
entry:
-
%mul.i = fmul <3 x half> %p0, %p0
%rdx.fadd.i = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <3 x half> %mul.i)
%hlsl.length = call half @llvm.sqrt.f16(half %rdx.fadd.i)
@@ -48,24 +68,38 @@ entry:
}
define noundef half @test_length_half4(<4 x half> noundef %p0) {
-; CHECK-LABEL: define noundef half @test_length_half4(
-; CHECK-SAME: <4 x half> noundef [[P0:%.*]]) {
-; CHECK: [[ENTRY:.*:]]
-; CHECK: [[MUL_I:%.*]] = fmul <4 x half> [[P0]], [[P0]]
-; CHECK: [[TMP0:%.*]] = extractelement <4 x half> [[MUL_I]], i64 0
-; CHECK: [[TMP1:%.*]] = fadd half 0xH0000, [[TMP0]]
-; CHECK: [[TMP2:%.*]] = extractelement <4 x half> [[MUL_I]], i64 1
-; CHECK: [[TMP3:%.*]] = fadd half [[TMP1]], [[TMP2]]
-; CHECK: [[TMP4:%.*]] = extractelement <4 x half> [[MUL_I]], i64 2
-; CHECK: [[TMP5:%.*]] = fadd half [[TMP3]], [[TMP4]]
-; CHECK: [[TMP6:%.*]] = extractelement <4 x half> [[MUL_I]], i64 3
-; CHECK: [[TMP7:%.*]] = fadd half [[TMP5]], [[TMP6]]
-; EXPCHECK: [[HLSL_LENGTH:%.*]] = call half @llvm.sqrt.f16(half [[TMP7]])
-; DOPCHECK: [[HLSL_LENGTH:%.*]] = call half @dx.op.unary.f16(i32 24, half [[TMP7]])
-; CHECK: ret half [[HLSL_LENGTH]]
+; EXPCHECK-LABEL: define noundef half @test_length_half4(
+; EXPCHECK-SAME: <4 x half> noundef [[P0:%.*]]) {
+; EXPCHECK-NEXT: [[ENTRY:.*:]]
+; EXPCHECK-NEXT: [[MUL_I:%.*]] = fmul <4 x half> [[P0]], [[P0]]
+; EXPCHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x half> [[MUL_I]], i64 0
+; EXPCHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x half> [[MUL_I]], i64 1
+; EXPCHECK-NEXT: [[TMP2:%.*]] = fadd half [[TMP0]], [[TMP1]]
+; EXPCHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x half> [[MUL_I]], i64 2
+; EXPCHECK-NEXT: [[TMP4:%.*]] = fadd half [[TMP2]], [[TMP3]]
+; EXPCHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x half> [[MUL_I]], i64 3
+; EXPCHECK-NEXT: [[TMP6:%.*]] = fadd half [[TMP4]], [[TMP5]]
+; EXPCHECK-NEXT: [[HLSL_LENGTH:%.*]] = call half @llvm.sqrt.f16(half [[TMP6]])
+; EXPCHECK-NEXT: ret half [[HLSL_LENGTH]]
+;
+; DOPCHECK-LABEL: define noundef half @test_length_half4(
+; DOPCHECK-SAME: <4 x half> noundef [[P0:%.*]]) {
+; DOPCHECK-NEXT: [[ENTRY:.*:]]
+; DOPCHECK-NEXT: [[P0_I0:%.*]] = extractelement <4 x half> [[P0]], i64 0
+; DOPCHECK-NEXT: [[MUL_I_I0:%.*]] = fmul half [[P0_I0]], [[P0_I0]]
+; DOPCHECK-NEXT: [[P0_I1:%.*]] = extractelement <4 x half> [[P0]], i64 1
+; DOPCHECK-NEXT: [[MUL_I_I1:%.*]] = fmul half [[P0_I1]], [[P0_I1]]
+; DOPCHECK-NEXT: [[P0_I2:%.*]] = extractelement <4 x half> [[P0]], i64 2
+; DOPCHECK-NEXT: [[MUL_I_I2:%.*]] = fmul half [[P0_I2]], [[P0_I2]]
+; DOPCHECK-NEXT: [[P0_I3:%.*]] = extractelement <4 x half> [[P0]], i64 3
+; DOPCHECK-NEXT: [[MUL_I_I3:%.*]] = fmul half [[P0_I3]], [[P0_I3]]
+; DOPCHECK-NEXT: [[TMP0:%.*]] = fadd half [[MUL_I_I0]], [[MUL_I_I1]]
+; DOPCHECK-NEXT: [[TMP1:%.*]] = fadd half [[TMP0]], [[MUL_I_I2]]
+; DOPCHECK-NEXT: [[TMP2:%.*]] = fadd half [[TMP1]], [[MUL_I_I3]]
+; DOPCHECK-NEXT: [[HLSL_LENGTH1:%.*]] = call half @dx.op.unary.f16(i32 24, half [[TMP2]])
+; DOPCHECK-NEXT: ret half [[HLSL_LENGTH1]]
;
entry:
-
%mul.i = fmul <4 x half> %p0, %p0
%rdx.fadd.i = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <4 x half> %mul.i)
%hlsl.length = call half @llvm.sqrt.f16(half %rdx.fadd.i)
@@ -73,20 +107,28 @@ entry:
}
define noundef float @test_length_float2(<2 x float> noundef %p0) {
-; CHECK-LABEL: define noundef float @test_length_float2(
-; CHECK-SAME: <2 x float> noundef [[P0:%.*]]) {
-; CHECK: [[ENTRY:.*:]]
-; CHECK: [[MUL_I:%.*]] = fmul <2 x float> [[P0]], [[P0]]
-; CHECK: [[TMP0:%.*]] = extractelement <2 x float> [[MUL_I]], i64 0
-; CHECK: [[TMP1:%.*]] = fadd float 0.000000e+00, [[TMP0]]
-; CHECK: [[TMP2:%.*]] = extractelement <2 x float> [[MUL_I]], i64 1
-; CHECK: [[TMP3:%.*]] = fadd float [[TMP1]], [[TMP2]]
-; EXPCHECK: [[HLSL_LENGTH:%.*]] = call float @llvm.sqrt.f32(float [[TMP3]])
-; DOPCHECK: [[HLSL_LENGTH:%.*]] = call float @dx.op.unary.f32(i32 24, float [[TMP3]])
-; CHECK: ret float [[HLSL_LENGTH]]
+; EXPCHECK-LABEL: define noundef float @test_length_float2(
+; EXPCHECK-SAME: <2 x float> noundef [[P0:%.*]]) {
+; EXPCHECK-NEXT: [[ENTRY:.*:]]
+; EXPCHECK-NEXT: [[MUL_I:%.*]] = fmul <2 x float> [[P0]], [[P0]]
+; EXPCHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x float> [[MUL_I]], i64 0
+; EXPCHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[MUL_I]], i64 1
+; EXPCHECK-NEXT: [[TMP2:%.*]] = fadd float [[TMP0]], [[TMP1]]
+; EXPCHECK-NEXT: [[HLSL_LENGTH:%.*]] = call float @llvm.sqrt.f32(float [[TMP2]])
+; EXPCHECK-NEXT: ret float [[HLSL_LENGTH]]
+;
+; DOPCHECK-LABEL: define noundef float @test_length_float2(
+; DOPCHECK-SAME: <2 x float> noundef [[P0:%.*]]) {
+; DOPCHECK-NEXT: [[ENTRY:.*:]]
+; DOPCHECK-NEXT: [[P0_I0:%.*]] = extractelement <2 x float> [[P0]], i64 0
+; DOPCHECK-NEXT: [[MUL_I_I0:%.*]] = fmul float [[P0_I0]], [[P0_I0]]
+; DOPCHECK-NEXT: [[P0_I1:%.*]] = extractelement <2 x float> [[P0]], i64 1
+; DOPCHECK-NEXT: [[MUL_I_I1:%.*]] = fmul float [[P0_I1]], [[P0_I1]]
+; DOPCHECK-NEXT: [[TMP0:%.*]] = fadd float [[MUL_I_I0]], [[MUL_I_I1]]
+; DOPCHECK-NEXT: [[HLSL_LENGTH1:%.*]] = call float @dx.op.unary.f32(i32 24, float [[TMP0]])
+; DOPCHECK-NEXT: ret float [[HLSL_LENGTH1]]
;
entry:
-
%mul.i = fmul <2 x float> %p0, %p0
%rdx.fadd.i = call float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> %mul.i)
%hlsl.length = call float @llvm.sqrt.f32(float %rdx.fadd.i)
@@ -94,22 +136,33 @@ entry:
}
define noundef float @test_length_float3(<3 x float> noundef %p0) {
-; CHECK-LABEL: define noundef float @test_length_float3(
-; CHECK-SAME: <3 x float> noundef [[P0:%.*]]) {
-; CHECK: [[ENTRY:.*:]]
-; CHECK: [[MUL_I:%.*]] = fmul <3 x float> [[P0]], [[P0]]
-; CHECK: [[TMP0:%.*]] = extractelement <3 x float> [[MUL_I]], i64 0
-; CHECK: [[TMP1:%.*]] = fadd float 0.000000e+00, [[TMP0]]
-; CHECK: [[TMP2:%.*]] = extractelement <3 x float> [[MUL_I]], i64 1
-; CHECK: [[TMP3:%.*]] = fadd float [[TMP1]], [[TMP2]]
-; CHECK: [[TMP4:%.*]] = extractelement <3 x float> [[MUL_I]], i64 2
-; CHECK: [[TMP5:%.*]] = fadd float [[TMP3]], [[TMP4]]
-; EXPCHECK: [[HLSL_LENGTH:%.*]] = call float @llvm.sqrt.f32(float [[TMP5]])
-; DOPCHECK: [[HLSL_LENGTH:%.*]] = call float @dx.op.unary.f32(i32 24, float [[TMP5]])
-; CHECK: ret float [[HLSL_LENGTH]]
+; EXPCHECK-LABEL: define noundef float @test_length_float3(
+; EXPCHECK-SAME: <3 x float> noundef [[P0:%.*]]) {
+; EXPCHECK-NEXT: [[ENTRY:.*:]]
+; EXPCHECK-NEXT: [[MUL_I:%.*]] = fmul <3 x float> [[P0]], [[P0]]
+; EXPCHECK-NEXT: [[TMP0:%.*]] = extractelement <3 x float> [[MUL_I]], i64 0
+; EXPCHECK-NEXT: [[TMP1:%.*]] = extractelement <3 x float> [[MUL_I]], i64 1
+; EXPCHECK-NEXT: [[TMP2:%.*]] = fadd float [[TMP0]], [[TMP1]]
+; EXPCHECK-NEXT: [[TMP3:%.*]] = extractelement <3 x float> [[MUL_I]], i64 2
+; EXPCHECK-NEXT: [[TMP4:%.*]] = fadd float [[TMP2]], [[TMP3]]
+; EXPCHECK-NEXT: [[HLSL_LENGTH:%.*]] = call float @llvm.sqrt.f32(float [[TMP4]])
+; EXPCHECK-NEXT: ret float [[HLSL_LENGTH]]
+;
+; DOPCHECK-LABEL: define noundef float @test_length_float3(
+; DOPCHECK-SAME: <3 x float> noundef [[P0:%.*]]) {
+; DOPCHECK-NEXT: [[ENTRY:.*:]]
+; DOPCHECK-NEXT: [[P0_I0:%.*]] = extractelement <3 x float> [[P0]], i64 0
+; DOPCHECK-NEXT: [[MUL_I_I0:%.*]] = fmul float [[P0_I0]], [[P0_I0]]
+; DOPCHECK-NEXT: [[P0_I1:%.*]] = extractelement <3 x float> [[P0]], i64 1
+; DOPCHECK-NEXT: [[MUL_I_I1:%.*]] = fmul float [[P0_I1]], [[P0_I1]]
+; DOPCHECK-NEXT: [[P0_I2:%.*]] = extractelement <3 x float> [[P0]], i64 2
+; DOPCHECK-NEXT: [[MUL_I_I2:%.*]] = fmul float [[P0_I2]], [[P0_I2]]
+; DOPCHECK-NEXT: [[TMP0:%.*]] = fadd float [[MUL_I_I0]], [[MUL_I_I1]]
+; DOPCHECK-NEXT: [[TMP1:%.*]] = fadd float [[TMP0]], [[MUL_I_I2]]
+; DOPCHECK-NEXT: [[HLSL_LENGTH1:%.*]] = call float @dx.op.unary.f32(i32 24, float [[TMP1]])
+; DOPCHECK-NEXT: ret float [[HLSL_LENGTH1]]
;
entry:
-
%mul.i = fmul <3 x float> %p0, %p0
%rdx.fadd.i = call float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <3 x float> %mul.i)
%hlsl.length = call float @llvm.sqrt.f32(float %rdx.fadd.i)
@@ -117,26 +170,42 @@ entry:
}
define noundef float @test_length_float4(<4 x float> noundef %p0) {
-; CHECK-LABEL: define noundef float @test_length_float4(
-; CHECK-SAME: <4 x float> noundef [[P0:%.*]]) {
-; CHECK: [[ENTRY:.*:]]
-; CHECK: [[MUL_I:%.*]] = fmul <4 x float> [[P0]], [[P0]]
-; CHECK: [[TMP0:%.*]] = extractelement <4 x float> [[MUL_I]], i64 0
-; CHECK: [[TMP1:%.*]] = fadd float 0.000000e+00, [[TMP0]]
-; CHECK: [[TMP2:%.*]] = extractelement <4 x float> [[MUL_I]], i64 1
-; CHECK: [[TMP3:%.*]] = fadd float [[TMP1]], [[TMP2]]
-; CHECK: [[TMP4:%.*]] = extractelement <4 x float> [[MUL_I]], i64 2
-; CHECK: [[TMP5:%.*]] = fadd float [[TMP3]], [[TMP4]]
-; CHECK: [[TMP6:%.*]] = extractelement <4 x float> [[MUL_I]], i64 3
-; CHECK: [[TMP7:%.*]] = fadd float [[TMP5]], [[TMP6]]
-; EXPCHECK: [[HLSL_LENGTH:%.*]] = call float @llvm.sqrt.f32(float [[TMP7]])
-; DOPCHECK: [[HLSL_LENGTH:%.*]] = call float @dx.op.unary.f32(i32 24, float [[TMP7]])
-; CHECK: ret float [[HLSL_LENGTH]]
+; EXPCHECK-LABEL: define noundef float @test_length_float4(
+; EXPCHECK-SAME: <4 x float> noundef [[P0:%.*]]) {
+; EXPCHECK-NEXT: [[ENTRY:.*:]]
+; EXPCHECK-NEXT: [[MUL_I:%.*]] = fmul <4 x float> [[P0]], [[P0]]
+; EXPCHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x float> [[MUL_I]], i64 0
+; EXPCHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[MUL_I]], i64 1
+; EXPCHECK-NEXT: [[TMP2:%.*]] = fadd float [[TMP0]], [[TMP1]]
+; EXPCHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[MUL_I]], i64 2
+; EXPCHECK-NEXT: [[TMP4:%.*]] = fadd float [[TMP2]], [[TMP3]]
+; EXPCHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[MUL_I]], i64 3
+; EXPCHECK-NEXT: [[TMP6:%.*]] = fadd float [[TMP4]], [[TMP5]]
+; EXPCHECK-NEXT: [[HLSL_LENGTH:%.*]] = call float @llvm.sqrt.f32(float [[TMP6]])
+; EXPCHECK-NEXT: ret float [[HLSL_LENGTH]]
+;
+; DOPCHECK-LABEL: define noundef float @test_length_float4(
+; DOPCHECK-SAME: <4 x float> noundef [[P0:%.*]]) {
+; DOPCHECK-NEXT: [[ENTRY:.*:]]
+; DOPCHECK-NEXT: [[P0_I0:%.*]] = extractelement <4 x float> [[P0]], i64 0
+; DOPCHECK-NEXT: [[MUL_I_I0:%.*]] = fmul float [[P0_I0]], [[P0_I0]]
+; DOPCHECK-NEXT: [[P0_I1:%.*]] = extractelement <4 x float> [[P0]], i64 1
+; DOPCHECK-NEXT: [[MUL_I_I1:%.*]] = fmul float [[P0_I1]], [[P0_I1]]
+; DOPCHECK-NEXT: [[P0_I2:%.*]] = extractelement <4 x float> [[P0]], i64 2
+; DOPCHECK-NEXT: [[MUL_I_I2:%.*]] = fmul float [[P0_I2]], [[P0_I2]]
+; DOPCHECK-NEXT: [[P0_I3:%.*]] = extractelement <4 x float> [[P0]], i64 3
+; DOPCHECK-NEXT: [[MUL_I_I3:%.*]] = fmul float [[P0_I3]], [[P0_I3]]
+; DOPCHECK-NEXT: [[TMP0:%.*]] = fadd float [[MUL_I_I0]], [[MUL_I_I1]]
+; DOPCHECK-NEXT: [[TMP1:%.*]] = fadd float [[TMP0]], [[MUL_I_I2]]
+; DOPCHECK-NEXT: [[TMP2:%.*]] = fadd float [[TMP1]], [[MUL_I_I3]]
+; DOPCHECK-NEXT: [[HLSL_LENGTH1:%.*]] = call float @dx.op.unary.f32(i32 24, float [[TMP2]])
+; DOPCHECK-NEXT: ret float [[HLSL_LENGTH1]]
;
entry:
-
%mul.i = fmul <4 x float> %p0, %p0
%rdx.fadd.i = call float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <4 x float> %mul.i)
%hlsl.length = call float @llvm.sqrt.f32(float %rdx.fadd.i)
ret float %hlsl.length
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK: {{.*}}
>From 160e0743286c66a4caaf9ba7b578921fc1db7e2a Mon Sep 17 00:00:00 2001
From: Farzon Lotfi <farzonlotfi at microsoft.com>
Date: Sat, 23 Nov 2024 04:08:27 -0500
Subject: [PATCH 5/6] add a target lowering pass through
---
clang/include/clang/Basic/Builtins.td | 12 +
clang/lib/CodeGen/CGBuiltin.cpp | 25 ++
clang/lib/CodeGen/CGHLSLRuntime.h | 88 ++++--
clang/lib/Headers/hlsl/hlsl_detail.h | 38 ++-
clang/lib/Headers/hlsl/hlsl_intrinsics.h | 13 +-
clang/lib/Sema/SemaHLSL.cpp | 50 ++-
clang/test/CodeGenHLSL/builtins/distance.hlsl | 53 ++++
clang/test/CodeGenHLSL/builtins/length.hlsl | 57 +++-
llvm/include/llvm/IR/IntrinsicsSPIRV.td | 1 +
.../Target/DirectX/DXILIntrinsicExpansion.cpp | 1 -
.../Target/SPIRV/SPIRVInstructionSelector.cpp | 2 +
.../DirectX/length_invalid_intrinsic_error.ll | 10 -
.../length_invalid_intrinsic_error_scalar.ll | 10 -
.../test/CodeGen/DirectX/vector_reduce_add.ll | 293 ++++++++++++++++++
.../CodeGen/SPIRV/hlsl-intrinsics/cross.ll | 8 +-
.../CodeGen/SPIRV/hlsl-intrinsics/distance.ll | 33 ++
.../CodeGen/SPIRV/hlsl-intrinsics/length.ll | 14 +-
17 files changed, 621 insertions(+), 87 deletions(-)
delete mode 100644 llvm/test/CodeGen/DirectX/length_invalid_intrinsic_error.ll
delete mode 100644 llvm/test/CodeGen/DirectX/length_invalid_intrinsic_error_scalar.ll
create mode 100644 llvm/test/CodeGen/DirectX/vector_reduce_add.ll
create mode 100644 llvm/test/CodeGen/SPIRV/hlsl-intrinsics/distance.ll
diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index 82acd520c227e9..bbe7adc05e1baa 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -4810,6 +4810,12 @@ def HLSLDegrees : LangBuiltin<"HLSL_LANG"> {
let Prototype = "void(...)";
}
+def HLSLDistance : LangBuiltin<"HLSL_LANG"> {
+ let Spellings = ["__builtin_hlsl_distance"];
+ let Attributes = [NoThrow, Const];
+ let Prototype = "void(...)";
+}
+
def HLSLDotProduct : LangBuiltin<"HLSL_LANG"> {
let Spellings = ["__builtin_hlsl_dot"];
let Attributes = [NoThrow, Const];
@@ -4846,6 +4852,12 @@ def HLSLIsinf : LangBuiltin<"HLSL_LANG"> {
let Prototype = "void(...)";
}
+def HLSLLength : LangBuiltin<"HLSL_LANG"> {
+ let Spellings = ["__builtin_hlsl_length"];
+ let Attributes = [NoThrow, Const];
+ let Prototype = "void(...)";
+}
+
def HLSLLerp : LangBuiltin<"HLSL_LANG"> {
let Spellings = ["__builtin_hlsl_lerp"];
let Attributes = [NoThrow, Const];
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index b81d1ba8f6ecce..134b80822a206c 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -19097,6 +19097,20 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
/*ReturnType=*/Op0->getType(), CGM.getHLSLRuntime().getCrossIntrinsic(),
ArrayRef<Value *>{Op0, Op1}, nullptr, "hlsl.cross");
}
+ case Builtin::BI__builtin_hlsl_distance: {
+ Value *X = EmitScalarExpr(E->getArg(0));
+ Value *Y = EmitScalarExpr(E->getArg(1));
+ assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
+ E->getArg(1)->getType()->hasFloatingRepresentation() &&
+ "Distance operands must have a float representation");
+ assert(E->getArg(0)->getType()->isVectorType() &&
+ E->getArg(1)->getType()->isVectorType() &&
+ "Distance operands must be a vector");
+ return Builder.CreateIntrinsic(
+ /*ReturnType=*/X->getType()->getScalarType(),
+ CGM.getHLSLRuntime().getDistanceIntrinsic(), ArrayRef<Value *>{X, Y},
+ nullptr, "hlsl.distance");
+ }
case Builtin::BI__builtin_hlsl_dot: {
Value *Op0 = EmitScalarExpr(E->getArg(0));
Value *Op1 = EmitScalarExpr(E->getArg(1));
@@ -19174,6 +19188,17 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
/*ReturnType=*/X->getType(), CGM.getHLSLRuntime().getLerpIntrinsic(),
ArrayRef<Value *>{X, Y, S}, nullptr, "hlsl.lerp");
}
+ case Builtin::BI__builtin_hlsl_length: {
+ Value *X = EmitScalarExpr(E->getArg(0));
+ assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
+ "length operand must have a float representation");
+ assert(E->getArg(0)->getType()->isVectorType() &&
+ "length operand must be a vector");
+ return Builder.CreateIntrinsic(
+ /*ReturnType=*/X->getType()->getScalarType(),
+ CGM.getHLSLRuntime().getLengthIntrinsic(), ArrayRef<Value *>{X},
+ nullptr, "hlsl.length");
+ }
case Builtin::BI__builtin_hlsl_normalize: {
Value *X = EmitScalarExpr(E->getArg(0));
diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h
index 8001e8a4c957e0..34a06583789976 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.h
+++ b/clang/lib/CodeGen/CGHLSLRuntime.h
@@ -30,22 +30,36 @@
#include <optional>
#include <vector>
+#define GENERATE_HLSL_INTRINSIC_FUNCTION_DEFAULT(FunctionName, \
+ IntrinsicPostfix) \
+ GENERATE_HLSL_INTRINSIC_FUNCTION(FunctionName, IntrinsicPostfix, 1, 1)
+
// A function generator macro for picking the right intrinsic
// for the target backend
-#define GENERATE_HLSL_INTRINSIC_FUNCTION(FunctionName, IntrinsicPostfix) \
+#define GENERATE_HLSL_INTRINSIC_FUNCTION(FunctionName, IntrinsicPostfix, \
+ IncludeDXIL, IncludeSPIRV) \
llvm::Intrinsic::ID get##FunctionName##Intrinsic() { \
llvm::Triple::ArchType Arch = getArch(); \
switch (Arch) { \
- case llvm::Triple::dxil: \
- return llvm::Intrinsic::dx_##IntrinsicPostfix; \
- case llvm::Triple::spirv: \
- return llvm::Intrinsic::spv_##IntrinsicPostfix; \
+ /* Include DXIL case only if IncludeDXIL is true */ \
+ IF_INCLUDE(IncludeDXIL, case llvm::Triple::dxil \
+ : return llvm::Intrinsic::dx_##IntrinsicPostfix;) \
+ /* Include SPIRV case only if IncludeSPIRV is true */ \
+ IF_INCLUDE(IncludeSPIRV, case llvm::Triple::spirv \
+ : return llvm::Intrinsic::spv_##IntrinsicPostfix;) \
+ \
default: \
llvm_unreachable("Intrinsic " #IntrinsicPostfix \
" not supported by target architecture"); \
} \
}
+#define IF_INCLUDE(Condition, Code) IF_INCLUDE_IMPL(Condition, Code)
+#define IF_INCLUDE_IMPL(Condition, Code) IF_INCLUDE_##Condition(Code)
+
+#define IF_INCLUDE_1(Code) Code
+#define IF_INCLUDE_0(Code)
+
namespace llvm {
class GlobalVariable;
class Function;
@@ -72,35 +86,41 @@ class CGHLSLRuntime {
// Start of reserved area for HLSL intrinsic getters.
//===----------------------------------------------------------------------===//
- GENERATE_HLSL_INTRINSIC_FUNCTION(All, all)
- GENERATE_HLSL_INTRINSIC_FUNCTION(Any, any)
- GENERATE_HLSL_INTRINSIC_FUNCTION(Cross, cross)
- GENERATE_HLSL_INTRINSIC_FUNCTION(Degrees, degrees)
- GENERATE_HLSL_INTRINSIC_FUNCTION(Frac, frac)
- GENERATE_HLSL_INTRINSIC_FUNCTION(Lerp, lerp)
- GENERATE_HLSL_INTRINSIC_FUNCTION(Normalize, normalize)
- GENERATE_HLSL_INTRINSIC_FUNCTION(Rsqrt, rsqrt)
- GENERATE_HLSL_INTRINSIC_FUNCTION(Saturate, saturate)
- GENERATE_HLSL_INTRINSIC_FUNCTION(Sign, sign)
- GENERATE_HLSL_INTRINSIC_FUNCTION(Step, step)
- GENERATE_HLSL_INTRINSIC_FUNCTION(Radians, radians)
- GENERATE_HLSL_INTRINSIC_FUNCTION(ThreadId, thread_id)
- GENERATE_HLSL_INTRINSIC_FUNCTION(FDot, fdot)
- GENERATE_HLSL_INTRINSIC_FUNCTION(SDot, sdot)
- GENERATE_HLSL_INTRINSIC_FUNCTION(UDot, udot)
- GENERATE_HLSL_INTRINSIC_FUNCTION(Dot4AddI8Packed, dot4add_i8packed)
- GENERATE_HLSL_INTRINSIC_FUNCTION(Dot4AddU8Packed, dot4add_u8packed)
- GENERATE_HLSL_INTRINSIC_FUNCTION(WaveActiveAnyTrue, wave_any)
- GENERATE_HLSL_INTRINSIC_FUNCTION(WaveActiveCountBits, wave_active_countbits)
- GENERATE_HLSL_INTRINSIC_FUNCTION(WaveIsFirstLane, wave_is_first_lane)
- GENERATE_HLSL_INTRINSIC_FUNCTION(WaveReadLaneAt, wave_readlane)
- GENERATE_HLSL_INTRINSIC_FUNCTION(FirstBitUHigh, firstbituhigh)
- GENERATE_HLSL_INTRINSIC_FUNCTION(FirstBitSHigh, firstbitshigh)
- GENERATE_HLSL_INTRINSIC_FUNCTION(NClamp, nclamp)
- GENERATE_HLSL_INTRINSIC_FUNCTION(SClamp, sclamp)
- GENERATE_HLSL_INTRINSIC_FUNCTION(UClamp, uclamp)
-
- GENERATE_HLSL_INTRINSIC_FUNCTION(CreateHandleFromBinding, handle_fromBinding)
+ GENERATE_HLSL_INTRINSIC_FUNCTION_DEFAULT(All, all)
+ GENERATE_HLSL_INTRINSIC_FUNCTION_DEFAULT(Any, any)
+ GENERATE_HLSL_INTRINSIC_FUNCTION_DEFAULT(Cross, cross)
+ GENERATE_HLSL_INTRINSIC_FUNCTION_DEFAULT(Degrees, degrees)
+ GENERATE_HLSL_INTRINSIC_FUNCTION(Distance, distance, /*IncludeDXIL*/ 0,
+ /*IncludeSPIRV*/ 1)
+ GENERATE_HLSL_INTRINSIC_FUNCTION_DEFAULT(Frac, frac)
+ GENERATE_HLSL_INTRINSIC_FUNCTION(Length, length, /*IncludeDXIL*/ 0,
+ /*IncludeSPIRV*/ 1)
+ GENERATE_HLSL_INTRINSIC_FUNCTION_DEFAULT(Lerp, lerp)
+ GENERATE_HLSL_INTRINSIC_FUNCTION_DEFAULT(Normalize, normalize)
+ GENERATE_HLSL_INTRINSIC_FUNCTION_DEFAULT(Rsqrt, rsqrt)
+ GENERATE_HLSL_INTRINSIC_FUNCTION_DEFAULT(Saturate, saturate)
+ GENERATE_HLSL_INTRINSIC_FUNCTION_DEFAULT(Sign, sign)
+ GENERATE_HLSL_INTRINSIC_FUNCTION_DEFAULT(Step, step)
+ GENERATE_HLSL_INTRINSIC_FUNCTION_DEFAULT(Radians, radians)
+ GENERATE_HLSL_INTRINSIC_FUNCTION_DEFAULT(ThreadId, thread_id)
+ GENERATE_HLSL_INTRINSIC_FUNCTION_DEFAULT(FDot, fdot)
+ GENERATE_HLSL_INTRINSIC_FUNCTION_DEFAULT(SDot, sdot)
+ GENERATE_HLSL_INTRINSIC_FUNCTION_DEFAULT(UDot, udot)
+ GENERATE_HLSL_INTRINSIC_FUNCTION_DEFAULT(Dot4AddI8Packed, dot4add_i8packed)
+ GENERATE_HLSL_INTRINSIC_FUNCTION_DEFAULT(Dot4AddU8Packed, dot4add_u8packed)
+ GENERATE_HLSL_INTRINSIC_FUNCTION_DEFAULT(WaveActiveAnyTrue, wave_any)
+ GENERATE_HLSL_INTRINSIC_FUNCTION_DEFAULT(WaveActiveCountBits,
+ wave_active_countbits)
+ GENERATE_HLSL_INTRINSIC_FUNCTION_DEFAULT(WaveIsFirstLane, wave_is_first_lane)
+ GENERATE_HLSL_INTRINSIC_FUNCTION_DEFAULT(WaveReadLaneAt, wave_readlane)
+ GENERATE_HLSL_INTRINSIC_FUNCTION_DEFAULT(FirstBitUHigh, firstbituhigh)
+ GENERATE_HLSL_INTRINSIC_FUNCTION_DEFAULT(FirstBitSHigh, firstbitshigh)
+ GENERATE_HLSL_INTRINSIC_FUNCTION_DEFAULT(NClamp, nclamp)
+ GENERATE_HLSL_INTRINSIC_FUNCTION_DEFAULT(SClamp, sclamp)
+ GENERATE_HLSL_INTRINSIC_FUNCTION_DEFAULT(UClamp, uclamp)
+
+ GENERATE_HLSL_INTRINSIC_FUNCTION_DEFAULT(CreateHandleFromBinding,
+ handle_fromBinding)
//===----------------------------------------------------------------------===//
// End of reserved area for HLSL intrinsic getters.
diff --git a/clang/lib/Headers/hlsl/hlsl_detail.h b/clang/lib/Headers/hlsl/hlsl_detail.h
index 85ed08a1af06e9..e2a5f85b04bb47 100644
--- a/clang/lib/Headers/hlsl/hlsl_detail.h
+++ b/clang/lib/Headers/hlsl/hlsl_detail.h
@@ -9,6 +9,34 @@
#ifndef _HLSL_HLSL_DETAILS_H_
#define _HLSL_HLSL_DETAILS_H_
+#if __is_target_arch(dxil)
+#define IS_ARCH_DXIL 1
+#else
+#define IS_ARCH_DXIL 0
+#endif
+
+#if __is_target_arch(spirv)
+#define IS_ARCH_SPIRV 1
+#else
+#define IS_ARCH_SPIRV 0
+#endif
+
+#define ARCH_CONDITION(arch) \
+ if (IS_ARCH_##arch) \
+ return true;
+
+// Note: arch is used to bypass
+// the generic implementation
+#define EXPAND_ARCH_CONDITIONS(arch) \
+ ARCH_CONDITION(arch) \
+ /* Add more architectures as needed */
+
+#define DEFINE_TARGET_LOWERING(function_name, ...) \
+ constexpr bool Has##function_name##Lowering() { \
+ EXPAND_ARCH_CONDITIONS(__VA_ARGS__) \
+ return false; /* Default case if no match */ \
+ }
+
namespace hlsl {
namespace __detail {
@@ -41,6 +69,7 @@ constexpr enable_if_t<sizeof(U) == sizeof(T), U> bit_cast(T F) {
return __builtin_bit_cast(U, F);
}
+DEFINE_TARGET_LOWERING(Length, SPIRV)
template <typename T>
constexpr enable_if_t<is_same<float, T>::value || is_same<half, T>::value, T>
length_impl(T X) {
@@ -50,15 +79,14 @@ length_impl(T X) {
template <typename T, int N>
constexpr enable_if_t<is_same<float, T>::value || is_same<half, T>::value, T>
length_vec_impl(vector<T, N> X) {
+ if (HasLengthLowering())
+ return __builtin_hlsl_length(X);
vector<T, N> XSquared = X * X;
T XSquaredSum = __builtin_hlsl_reduce_add(XSquared);
- /*T XSquaredSum = 0;
- for(int I = 0; I < N; I++) {
- XSquaredSum += XSquared[I];
- }*/
return __builtin_elementwise_sqrt(XSquaredSum);
}
+DEFINE_TARGET_LOWERING(Distance, SPIRV)
template <typename T>
constexpr enable_if_t<is_same<float, T>::value || is_same<half, T>::value, T>
distance_impl(T X, T Y) {
@@ -68,6 +96,8 @@ distance_impl(T X, T Y) {
template <typename T, int N>
constexpr enable_if_t<is_same<float, T>::value || is_same<half, T>::value, T>
distance_vec_impl(vector<T, N> X, vector<T, N> Y) {
+ if (HasDistanceLowering())
+ return __builtin_hlsl_distance(X, Y);
return length_vec_impl(X - Y);
}
diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
index 588335d06ab17f..f51e5d4afbd6c9 100644
--- a/clang/lib/Headers/hlsl/hlsl_intrinsics.h
+++ b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
@@ -879,22 +879,21 @@ float4 degrees(float4);
/// \brief Returns a distance scalar between two vectors of \a X and \a Y.
/// \param X The X input value.
/// \param Y The Y input value.
-
_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
const inline half distance(half X, half Y) {
return __detail::distance_impl(X, Y);
}
+const inline float distance(float X, float Y) {
+ return __detail::distance_impl(X, Y);
+}
+
template <int N>
_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
const inline half distance(vector<half, N> X, vector<half, N> Y) {
return __detail::distance_vec_impl(X, Y);
}
-const inline float distance(float X, float Y) {
- return __detail::distance_impl(X, Y);
-}
-
template <int N>
const inline float distance(vector<float, N> X, vector<float, N> Y) {
return __detail::distance_vec_impl(X, Y);
@@ -1325,9 +1324,9 @@ float4 lerp(float4, float4, float4);
/// \param x [in] The vector of floats, or a scalar float.
///
/// Length is based on the following formula: sqrt(x[0]^2 + x[1]^2 + ...).
-
_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
const inline half length(half X) { return __detail::length_impl(X); }
+const inline float length(float X) { return __detail::length_impl(X); }
template <int N>
_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
@@ -1335,8 +1334,6 @@ const inline half length(vector<half, N> X) {
return __detail::length_vec_impl(X);
}
-const inline float length(float X) { return __detail::length_impl(X); }
-
template <int N> const inline float length(vector<float, N> X) {
return __detail::length_vec_impl(X);
}
diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index de2ff83d63fee4..d108e6e130b01d 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -1943,6 +1943,20 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
TheCall->setType(ArgTyA);
break;
}
+ case Builtin::BI__builtin_hlsl_distance: {
+ if (CheckFloatOrHalfRepresentations(&SemaRef, TheCall))
+ return true;
+ if (SemaRef.checkArgCount(TheCall, 2))
+ return true;
+ if (CheckVectorElementCallArgs(&SemaRef, TheCall))
+ return true;
+ ExprResult A = TheCall->getArg(0);
+ QualType ArgTyA = A.get()->getType();
+ auto *VTy = ArgTyA->getAs<VectorType>();
+ QualType RetTy = VTy->getElementType();
+ TheCall->setType(RetTy);
+ break;
+ }
case Builtin::BI__builtin_hlsl_dot: {
if (SemaRef.checkArgCount(TheCall, 2))
return true;
@@ -2030,18 +2044,23 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
return true;
break;
}
- case Builtin::BI__builtin_hlsl_reduce_add: {
+ case Builtin::BI__builtin_hlsl_length: {
+ if (CheckFloatOrHalfRepresentations(&SemaRef, TheCall))
+ return true;
if (SemaRef.checkArgCount(TheCall, 1))
return true;
ExprResult A = TheCall->getArg(0);
QualType ArgTyA = A.get()->getType();
- QualType RetTy;
-
- if (auto *VTy = ArgTyA->getAs<VectorType>())
- RetTy = VTy->getElementType();
- else
- RetTy = TheCall->getArg(0)->getType();
-
+ auto *VTy = ArgTyA->getAs<VectorType>();
+ if (VTy == nullptr) {
+ SemaRef.Diag(A.get()->getBeginLoc(),
+ diag::err_typecheck_convert_incompatible)
+ << ArgTyA
+ << SemaRef.Context.getVectorType(ArgTyA, 2, VectorKind::Generic) << 1
+ << 0 << 0;
+ return true;
+ }
+ QualType RetTy = VTy->getElementType();
TheCall->setType(RetTy);
break;
}
@@ -2068,6 +2087,21 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
TheCall->setType(ArgTyA);
break;
}
+ case Builtin::BI__builtin_hlsl_reduce_add: {
+ if (SemaRef.checkArgCount(TheCall, 1))
+ return true;
+ ExprResult A = TheCall->getArg(0);
+ QualType ArgTyA = A.get()->getType();
+ QualType RetTy;
+
+ if (auto *VTy = ArgTyA->getAs<VectorType>())
+ RetTy = VTy->getElementType();
+ else
+ RetTy = TheCall->getArg(0)->getType();
+
+ TheCall->setType(RetTy);
+ break;
+ }
case Builtin::BI__builtin_hlsl_elementwise_sign: {
if (CheckFloatingOrIntRepresentation(&SemaRef, TheCall))
return true;
diff --git a/clang/test/CodeGenHLSL/builtins/distance.hlsl b/clang/test/CodeGenHLSL/builtins/distance.hlsl
index ea7988122ae6d4..99b95114e51d29 100644
--- a/clang/test/CodeGenHLSL/builtins/distance.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/distance.hlsl
@@ -2,6 +2,9 @@
// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
// RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \
// RUN: -emit-llvm -O1 -o - | FileCheck %s
+// RUN: %clang_cc1 -finclude-default-header -triple \
+// RUN: spirv-unknown-vulkan-compute %s -fnative-half-type \
+// RUN: -emit-llvm -O1 -o - | FileCheck %s --check-prefix=SPVCHECK
// CHECK-LABEL: define noundef half @_Z18test_distance_halfDhDh(
// CHECK-SAME: half noundef [[X:%.*]], half noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
@@ -10,6 +13,13 @@
// CHECK-NEXT: [[ELT_ABS_I:%.*]] = tail call noundef half @llvm.fabs.f16(half [[SUB_I]])
// CHECK-NEXT: ret half [[ELT_ABS_I]]
//
+// SPVCHECK-LABEL: define spir_func noundef half @_Z18test_distance_halfDhDh(
+// SPVCHECK-SAME: half noundef [[X:%.*]], half noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// SPVCHECK-NEXT: [[ENTRY:.*:]]
+// SPVCHECK-NEXT: [[SUB_I:%.*]] = fsub half [[X]], [[Y]]
+// SPVCHECK-NEXT: [[ELT_ABS_I:%.*]] = tail call noundef half @llvm.fabs.f16(half [[SUB_I]])
+// SPVCHECK-NEXT: ret half [[ELT_ABS_I]]
+//
half test_distance_half(half X, half Y) { return distance(X, Y); }
// CHECK-LABEL: define noundef half @_Z19test_distance_half2Dv2_DhS_(
@@ -21,6 +31,12 @@ half test_distance_half(half X, half Y) { return distance(X, Y); }
// CHECK-NEXT: [[TMP0:%.*]] = tail call noundef half @llvm.sqrt.f16(half [[RDX_FADD_I]])
// CHECK-NEXT: ret half [[TMP0]]
//
+// SPVCHECK-LABEL: define spir_func noundef half @_Z19test_distance_half2Dv2_DhS_(
+// SPVCHECK-SAME: <2 x half> noundef [[X:%.*]], <2 x half> noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// SPVCHECK-NEXT: [[ENTRY:.*:]]
+// SPVCHECK-NEXT: [[HLSL_DISTANCE_I:%.*]] = tail call noundef half @llvm.spv.distance.v2f16(<2 x half> [[X]], <2 x half> [[Y]])
+// SPVCHECK-NEXT: ret half [[HLSL_DISTANCE_I]]
+//
half test_distance_half2(half2 X, half2 Y) { return distance(X, Y); }
// CHECK-LABEL: define noundef half @_Z19test_distance_half3Dv3_DhS_(
@@ -32,6 +48,12 @@ half test_distance_half2(half2 X, half2 Y) { return distance(X, Y); }
// CHECK-NEXT: [[TMP0:%.*]] = tail call noundef half @llvm.sqrt.f16(half [[RDX_FADD_I]])
// CHECK-NEXT: ret half [[TMP0]]
//
+// SPVCHECK-LABEL: define spir_func noundef half @_Z19test_distance_half3Dv3_DhS_(
+// SPVCHECK-SAME: <3 x half> noundef [[X:%.*]], <3 x half> noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// SPVCHECK-NEXT: [[ENTRY:.*:]]
+// SPVCHECK-NEXT: [[HLSL_DISTANCE_I:%.*]] = tail call noundef half @llvm.spv.distance.v3f16(<3 x half> [[X]], <3 x half> [[Y]])
+// SPVCHECK-NEXT: ret half [[HLSL_DISTANCE_I]]
+//
half test_distance_half3(half3 X, half3 Y) { return distance(X, Y); }
// CHECK-LABEL: define noundef half @_Z19test_distance_half4Dv4_DhS_(
@@ -43,6 +65,12 @@ half test_distance_half3(half3 X, half3 Y) { return distance(X, Y); }
// CHECK-NEXT: [[TMP0:%.*]] = tail call noundef half @llvm.sqrt.f16(half [[RDX_FADD_I]])
// CHECK-NEXT: ret half [[TMP0]]
//
+// SPVCHECK-LABEL: define spir_func noundef half @_Z19test_distance_half4Dv4_DhS_(
+// SPVCHECK-SAME: <4 x half> noundef [[X:%.*]], <4 x half> noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// SPVCHECK-NEXT: [[ENTRY:.*:]]
+// SPVCHECK-NEXT: [[HLSL_DISTANCE_I:%.*]] = tail call noundef half @llvm.spv.distance.v4f16(<4 x half> [[X]], <4 x half> [[Y]])
+// SPVCHECK-NEXT: ret half [[HLSL_DISTANCE_I]]
+//
half test_distance_half4(half4 X, half4 Y) { return distance(X, Y); }
// CHECK-LABEL: define noundef float @_Z19test_distance_floatff(
@@ -52,6 +80,13 @@ half test_distance_half4(half4 X, half4 Y) { return distance(X, Y); }
// CHECK-NEXT: [[ELT_ABS_I:%.*]] = tail call noundef float @llvm.fabs.f32(float [[SUB_I]])
// CHECK-NEXT: ret float [[ELT_ABS_I]]
//
+// SPVCHECK-LABEL: define spir_func noundef float @_Z19test_distance_floatff(
+// SPVCHECK-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// SPVCHECK-NEXT: [[ENTRY:.*:]]
+// SPVCHECK-NEXT: [[SUB_I:%.*]] = fsub float [[X]], [[Y]]
+// SPVCHECK-NEXT: [[ELT_ABS_I:%.*]] = tail call noundef float @llvm.fabs.f32(float [[SUB_I]])
+// SPVCHECK-NEXT: ret float [[ELT_ABS_I]]
+//
float test_distance_float(float X, float Y) { return distance(X, Y); }
// CHECK-LABEL: define noundef float @_Z20test_distance_float2Dv2_fS_(
@@ -63,6 +98,12 @@ float test_distance_float(float X, float Y) { return distance(X, Y); }
// CHECK-NEXT: [[TMP0:%.*]] = tail call noundef float @llvm.sqrt.f32(float [[RDX_FADD_I]])
// CHECK-NEXT: ret float [[TMP0]]
//
+// SPVCHECK-LABEL: define spir_func noundef float @_Z20test_distance_float2Dv2_fS_(
+// SPVCHECK-SAME: <2 x float> noundef [[X:%.*]], <2 x float> noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// SPVCHECK-NEXT: [[ENTRY:.*:]]
+// SPVCHECK-NEXT: [[HLSL_DISTANCE_I:%.*]] = tail call noundef float @llvm.spv.distance.v2f32(<2 x float> [[X]], <2 x float> [[Y]])
+// SPVCHECK-NEXT: ret float [[HLSL_DISTANCE_I]]
+//
float test_distance_float2(float2 X, float2 Y) { return distance(X, Y); }
// CHECK-LABEL: define noundef float @_Z20test_distance_float3Dv3_fS_(
@@ -74,6 +115,12 @@ float test_distance_float2(float2 X, float2 Y) { return distance(X, Y); }
// CHECK-NEXT: [[TMP0:%.*]] = tail call noundef float @llvm.sqrt.f32(float [[RDX_FADD_I]])
// CHECK-NEXT: ret float [[TMP0]]
//
+// SPVCHECK-LABEL: define spir_func noundef float @_Z20test_distance_float3Dv3_fS_(
+// SPVCHECK-SAME: <3 x float> noundef [[X:%.*]], <3 x float> noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// SPVCHECK-NEXT: [[ENTRY:.*:]]
+// SPVCHECK-NEXT: [[HLSL_DISTANCE_I:%.*]] = tail call noundef float @llvm.spv.distance.v3f32(<3 x float> [[X]], <3 x float> [[Y]])
+// SPVCHECK-NEXT: ret float [[HLSL_DISTANCE_I]]
+//
float test_distance_float3(float3 X, float3 Y) { return distance(X, Y); }
// CHECK-LABEL: define noundef float @_Z20test_distance_float4Dv4_fS_(
@@ -85,4 +132,10 @@ float test_distance_float3(float3 X, float3 Y) { return distance(X, Y); }
// CHECK-NEXT: [[TMP0:%.*]] = tail call noundef float @llvm.sqrt.f32(float [[RDX_FADD_I]])
// CHECK-NEXT: ret float [[TMP0]]
//
+// SPVCHECK-LABEL: define spir_func noundef float @_Z20test_distance_float4Dv4_fS_(
+// SPVCHECK-SAME: <4 x float> noundef [[X:%.*]], <4 x float> noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// SPVCHECK-NEXT: [[ENTRY:.*:]]
+// SPVCHECK-NEXT: [[HLSL_DISTANCE_I:%.*]] = tail call noundef float @llvm.spv.distance.v4f32(<4 x float> [[X]], <4 x float> [[Y]])
+// SPVCHECK-NEXT: ret float [[HLSL_DISTANCE_I]]
+//
float test_distance_float4(float4 X, float4 Y) { return distance(X, Y); }
diff --git a/clang/test/CodeGenHLSL/builtins/length.hlsl b/clang/test/CodeGenHLSL/builtins/length.hlsl
index 2dad0e18a4a888..fe80c37df6434f 100644
--- a/clang/test/CodeGenHLSL/builtins/length.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/length.hlsl
@@ -2,6 +2,9 @@
// RUN: %clang_cc1 -finclude-default-header -triple \
// RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \
// RUN: -emit-llvm -O1 -o - | FileCheck %s
+// RUN: %clang_cc1 -finclude-default-header -triple \
+// RUN: spirv-unknown-vulkan-compute %s -fnative-half-type \
+// RUN: -emit-llvm -O1 -o - | FileCheck %s --check-prefix=SPVCHECK
// CHECK-LABEL: define noundef half @_Z16test_length_halfDh(
@@ -10,6 +13,12 @@
// CHECK-NEXT: [[ELT_ABS_I:%.*]] = tail call noundef half @llvm.fabs.f16(half [[P0]])
// CHECK-NEXT: ret half [[ELT_ABS_I]]
//
+// SPVCHECK-LABEL: define spir_func noundef half @_Z16test_length_halfDh(
+// SPVCHECK-SAME: half noundef [[P0:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// SPVCHECK-NEXT: [[ENTRY:.*:]]
+// SPVCHECK-NEXT: [[ELT_ABS_I:%.*]] = tail call noundef half @llvm.fabs.f16(half [[P0]])
+// SPVCHECK-NEXT: ret half [[ELT_ABS_I]]
+//
half test_length_half(half p0)
{
return length(p0);
@@ -23,6 +32,12 @@ half test_length_half(half p0)
// CHECK-NEXT: [[TMP0:%.*]] = tail call noundef half @llvm.sqrt.f16(half [[RDX_FADD_I]])
// CHECK-NEXT: ret half [[TMP0]]
//
+// SPVCHECK-LABEL: define spir_func noundef half @_Z17test_length_half2Dv2_Dh(
+// SPVCHECK-SAME: <2 x half> noundef [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// SPVCHECK-NEXT: [[ENTRY:.*:]]
+// SPVCHECK-NEXT: [[HLSL_LENGTH_I:%.*]] = tail call noundef half @llvm.spv.length.v2f16(<2 x half> [[P0]])
+// SPVCHECK-NEXT: ret half [[HLSL_LENGTH_I]]
+//
half test_length_half2(half2 p0)
{
return length(p0);
@@ -36,6 +51,12 @@ half test_length_half2(half2 p0)
// CHECK-NEXT: [[TMP0:%.*]] = tail call noundef half @llvm.sqrt.f16(half [[RDX_FADD_I]])
// CHECK-NEXT: ret half [[TMP0]]
//
+// SPVCHECK-LABEL: define spir_func noundef half @_Z17test_length_half3Dv3_Dh(
+// SPVCHECK-SAME: <3 x half> noundef [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// SPVCHECK-NEXT: [[ENTRY:.*:]]
+// SPVCHECK-NEXT: [[HLSL_LENGTH_I:%.*]] = tail call noundef half @llvm.spv.length.v3f16(<3 x half> [[P0]])
+// SPVCHECK-NEXT: ret half [[HLSL_LENGTH_I]]
+//
half test_length_half3(half3 p0)
{
return length(p0);
@@ -49,6 +70,12 @@ half test_length_half3(half3 p0)
// CHECK-NEXT: [[TMP0:%.*]] = tail call noundef half @llvm.sqrt.f16(half [[RDX_FADD_I]])
// CHECK-NEXT: ret half [[TMP0]]
//
+// SPVCHECK-LABEL: define spir_func noundef half @_Z17test_length_half4Dv4_Dh(
+// SPVCHECK-SAME: <4 x half> noundef [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// SPVCHECK-NEXT: [[ENTRY:.*:]]
+// SPVCHECK-NEXT: [[HLSL_LENGTH_I:%.*]] = tail call noundef half @llvm.spv.length.v4f16(<4 x half> [[P0]])
+// SPVCHECK-NEXT: ret half [[HLSL_LENGTH_I]]
+//
half test_length_half4(half4 p0)
{
return length(p0);
@@ -61,6 +88,12 @@ half test_length_half4(half4 p0)
// CHECK-NEXT: [[ELT_ABS_I:%.*]] = tail call noundef float @llvm.fabs.f32(float [[P0]])
// CHECK-NEXT: ret float [[ELT_ABS_I]]
//
+// SPVCHECK-LABEL: define spir_func noundef float @_Z17test_length_floatf(
+// SPVCHECK-SAME: float noundef [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// SPVCHECK-NEXT: [[ENTRY:.*:]]
+// SPVCHECK-NEXT: [[ELT_ABS_I:%.*]] = tail call noundef float @llvm.fabs.f32(float [[P0]])
+// SPVCHECK-NEXT: ret float [[ELT_ABS_I]]
+//
float test_length_float(float p0)
{
return length(p0);
@@ -74,6 +107,12 @@ float test_length_float(float p0)
// CHECK-NEXT: [[TMP0:%.*]] = tail call noundef float @llvm.sqrt.f32(float [[RDX_FADD_I]])
// CHECK-NEXT: ret float [[TMP0]]
//
+// SPVCHECK-LABEL: define spir_func noundef float @_Z18test_length_float2Dv2_f(
+// SPVCHECK-SAME: <2 x float> noundef [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// SPVCHECK-NEXT: [[ENTRY:.*:]]
+// SPVCHECK-NEXT: [[HLSL_LENGTH_I:%.*]] = tail call noundef float @llvm.spv.length.v2f32(<2 x float> [[P0]])
+// SPVCHECK-NEXT: ret float [[HLSL_LENGTH_I]]
+//
float test_length_float2(float2 p0)
{
return length(p0);
@@ -87,6 +126,12 @@ float test_length_float2(float2 p0)
// CHECK-NEXT: [[TMP0:%.*]] = tail call noundef float @llvm.sqrt.f32(float [[RDX_FADD_I]])
// CHECK-NEXT: ret float [[TMP0]]
//
+// SPVCHECK-LABEL: define spir_func noundef float @_Z18test_length_float3Dv3_f(
+// SPVCHECK-SAME: <3 x float> noundef [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// SPVCHECK-NEXT: [[ENTRY:.*:]]
+// SPVCHECK-NEXT: [[HLSL_LENGTH_I:%.*]] = tail call noundef float @llvm.spv.length.v3f32(<3 x float> [[P0]])
+// SPVCHECK-NEXT: ret float [[HLSL_LENGTH_I]]
+//
float test_length_float3(float3 p0)
{
return length(p0);
@@ -98,9 +143,17 @@ float test_length_float3(float3 p0)
// CHECK-NEXT: [[MUL_I:%.*]] = fmul <4 x float> [[P0]], [[P0]]
// CHECK-NEXT: [[RDX_FADD_I:%.*]] = tail call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[MUL_I]])
// CHECK-NEXT: [[TMP0:%.*]] = tail call noundef float @llvm.sqrt.f32(float [[RDX_FADD_I]])
-// CHECK-NEXT: ret float [[TMP0]]
+// CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP0]], [[TMP0]]
+// CHECK-NEXT: ret float [[ADD]]
+//
+// SPVCHECK-LABEL: define spir_func noundef float @_Z18test_length_float4Dv4_f(
+// SPVCHECK-SAME: <4 x float> noundef [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// SPVCHECK-NEXT: [[ENTRY:.*:]]
+// SPVCHECK-NEXT: [[HLSL_LENGTH_I:%.*]] = tail call noundef float @llvm.spv.length.v4f32(<4 x float> [[P0]])
+// SPVCHECK-NEXT: [[ADD:%.*]] = fadd float [[HLSL_LENGTH_I]], [[HLSL_LENGTH_I]]
+// SPVCHECK-NEXT: ret float [[ADD]]
//
float test_length_float4(float4 p0)
{
- return length(p0);
+ return length(p0) + length(p0);
}
diff --git a/llvm/include/llvm/IR/IntrinsicsSPIRV.td b/llvm/include/llvm/IR/IntrinsicsSPIRV.td
index e1157085832866..9e8977634a5812 100644
--- a/llvm/include/llvm/IR/IntrinsicsSPIRV.td
+++ b/llvm/include/llvm/IR/IntrinsicsSPIRV.td
@@ -63,6 +63,7 @@ let TargetPrefix = "spv" in {
def int_spv_any : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_any_ty], [IntrNoMem]>;
def int_spv_cross : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
def int_spv_degrees : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty], [IntrNoMem]>;
+ def int_spv_distance : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], [llvm_anyfloat_ty, LLVMMatchType<0>], [IntrNoMem]>;
def int_spv_frac : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty], [IntrNoMem]>;
def int_spv_lerp : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty, LLVMMatchType<0>,LLVMMatchType<0>],
[IntrNoMem] >;
diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
index eb89f6a2f95da7..204dd9dfb85b26 100644
--- a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
+++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
@@ -94,7 +94,6 @@ static Value *expandVecReduceAdd(CallInst *Orig) {
Value *X = Orig->getOperand(0);
IRBuilder<> Builder(Orig);
Type *Ty = X->getType();
- Type *EltTy = Ty->getScalarType();
auto *XVec = dyn_cast<FixedVectorType>(Ty);
unsigned XVecSize = XVec->getNumElements();
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
index 9905691d412bf8..dc8ead88f59a42 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
@@ -2796,6 +2796,8 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg,
return selectAny(ResVReg, ResType, I);
case Intrinsic::spv_cross:
return selectExtInst(ResVReg, ResType, I, CL::cross, GL::Cross);
+ case Intrinsic::spv_distance:
+ return selectExtInst(ResVReg, ResType, I, CL::distance, GL::Distance);
case Intrinsic::spv_lerp:
return selectExtInst(ResVReg, ResType, I, CL::mix, GL::FMix);
case Intrinsic::spv_length:
diff --git a/llvm/test/CodeGen/DirectX/length_invalid_intrinsic_error.ll b/llvm/test/CodeGen/DirectX/length_invalid_intrinsic_error.ll
deleted file mode 100644
index f722de2f9029e7..00000000000000
--- a/llvm/test/CodeGen/DirectX/length_invalid_intrinsic_error.ll
+++ /dev/null
@@ -1,10 +0,0 @@
-; RUN: not opt -S -dxil-intrinsic-expansion -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s 2>&1 | FileCheck %s
-
-; DXIL operation length does not support 1-element vector types.
-; CHECK: LLVM ERROR: Invalid input type for length intrinsic
-
-define noundef float @test_length_float(<1 x float> noundef %p0) {
-entry:
- %hlsl.length = call float @llvm.dx.length.v1f32(<1 x float> %p0)
- ret float %hlsl.length
-}
diff --git a/llvm/test/CodeGen/DirectX/length_invalid_intrinsic_error_scalar.ll b/llvm/test/CodeGen/DirectX/length_invalid_intrinsic_error_scalar.ll
deleted file mode 100644
index ac3a0513eb6b27..00000000000000
--- a/llvm/test/CodeGen/DirectX/length_invalid_intrinsic_error_scalar.ll
+++ /dev/null
@@ -1,10 +0,0 @@
-; RUN: not opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s 2>&1 | FileCheck %s
-
-; DXIL operation length does not support scalar types
-; CHECK: error: invalid intrinsic signature
-
-define noundef float @test_length_float(float noundef %p0) {
-entry:
- %hlsl.length = call float @llvm.dx.length.f32(float %p0)
- ret float %hlsl.length
-}
diff --git a/llvm/test/CodeGen/DirectX/vector_reduce_add.ll b/llvm/test/CodeGen/DirectX/vector_reduce_add.ll
new file mode 100644
index 00000000000000..e7000027fc6e43
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/vector_reduce_add.ll
@@ -0,0 +1,293 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -dxil-intrinsic-expansion < %s | FileCheck %s
+
+; Make sure dxil operation function calls for lvm.vector.reduce.fadd and lvm.vector.reduce.add are generate.
+
+define noundef half @test_length_half2(<2 x half> noundef %p0) {
+; CHECK-LABEL: define noundef half @test_length_half2(
+; CHECK-SAME: <2 x half> noundef [[P0:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x half> [[P0]], i64 0
+; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x half> [[P0]], i64 1
+; CHECK-NEXT: [[TMP2:%.*]] = fadd half [[TMP0]], [[TMP1]]
+; CHECK-NEXT: ret half [[TMP2]]
+;
+entry:
+ %rdx.fadd = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> %p0)
+ ret half %rdx.fadd
+}
+
+define noundef half @test_length_half3(<3 x half> noundef %p0) {
+; CHECK-LABEL: define noundef half @test_length_half3(
+; CHECK-SAME: <3 x half> noundef [[P0:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[TMP0:%.*]] = extractelement <3 x half> [[P0]], i64 0
+; CHECK-NEXT: [[TMP1:%.*]] = extractelement <3 x half> [[P0]], i64 1
+; CHECK-NEXT: [[TMP2:%.*]] = fadd half [[TMP0]], [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = extractelement <3 x half> [[P0]], i64 2
+; CHECK-NEXT: [[TMP4:%.*]] = fadd half [[TMP2]], [[TMP3]]
+; CHECK-NEXT: ret half [[TMP4]]
+;
+entry:
+ %rdx.fadd = call half @llvm.vector.reduce.fadd.v3f16(half 0xH0000, <3 x half> %p0)
+ ret half %rdx.fadd
+}
+
+define noundef half @test_length_half4(<4 x half> noundef %p0) {
+; CHECK-LABEL: define noundef half @test_length_half4(
+; CHECK-SAME: <4 x half> noundef [[P0:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x half> [[P0]], i64 0
+; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x half> [[P0]], i64 1
+; CHECK-NEXT: [[TMP2:%.*]] = fadd half [[TMP0]], [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x half> [[P0]], i64 2
+; CHECK-NEXT: [[TMP4:%.*]] = fadd half [[TMP2]], [[TMP3]]
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x half> [[P0]], i64 3
+; CHECK-NEXT: [[TMP6:%.*]] = fadd half [[TMP4]], [[TMP5]]
+; CHECK-NEXT: ret half [[TMP6]]
+;
+entry:
+ %rdx.fadd = call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> %p0)
+ ret half %rdx.fadd
+}
+
+define noundef float @test_length_float2(<2 x float> noundef %p0) {
+; CHECK-LABEL: define noundef float @test_length_float2(
+; CHECK-SAME: <2 x float> noundef [[P0:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x float> [[P0]], i64 0
+; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[P0]], i64 1
+; CHECK-NEXT: [[TMP2:%.*]] = fadd float [[TMP0]], [[TMP1]]
+; CHECK-NEXT: ret float [[TMP2]]
+;
+entry:
+ %rdx.fadd = call float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> %p0)
+ ret float %rdx.fadd
+}
+
+define noundef float @test_length_float3(<3 x float> noundef %p0) {
+; CHECK-LABEL: define noundef float @test_length_float3(
+; CHECK-SAME: <3 x float> noundef [[P0:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[TMP0:%.*]] = extractelement <3 x float> [[P0]], i64 0
+; CHECK-NEXT: [[TMP1:%.*]] = extractelement <3 x float> [[P0]], i64 1
+; CHECK-NEXT: [[TMP2:%.*]] = fadd float [[TMP0]], [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = extractelement <3 x float> [[P0]], i64 2
+; CHECK-NEXT: [[TMP4:%.*]] = fadd float [[TMP2]], [[TMP3]]
+; CHECK-NEXT: ret float [[TMP4]]
+;
+entry:
+ %rdx.fadd = call float @llvm.vector.reduce.fadd.v3f32(float 0.000000e+00, <3 x float> %p0)
+ ret float %rdx.fadd
+}
+
+define noundef float @test_length_float4(<4 x float> noundef %p0) {
+; CHECK-LABEL: define noundef float @test_length_float4(
+; CHECK-SAME: <4 x float> noundef [[P0:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x float> [[P0]], i64 0
+; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[P0]], i64 1
+; CHECK-NEXT: [[TMP2:%.*]] = fadd float [[TMP0]], [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[P0]], i64 2
+; CHECK-NEXT: [[TMP4:%.*]] = fadd float [[TMP2]], [[TMP3]]
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[P0]], i64 3
+; CHECK-NEXT: [[TMP6:%.*]] = fadd float [[TMP4]], [[TMP5]]
+; CHECK-NEXT: ret float [[TMP6]]
+;
+entry:
+ %rdx.fadd = call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> %p0)
+ ret float %rdx.fadd
+}
+
+define noundef double @test_length_double2(<2 x double> noundef %p0) {
+; CHECK-LABEL: define noundef double @test_length_double2(
+; CHECK-SAME: <2 x double> noundef [[P0:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x double> [[P0]], i64 0
+; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[P0]], i64 1
+; CHECK-NEXT: [[TMP2:%.*]] = fadd double [[TMP0]], [[TMP1]]
+; CHECK-NEXT: ret double [[TMP2]]
+;
+entry:
+ %rdx.fadd = call double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> %p0)
+ ret double %rdx.fadd
+}
+
+define noundef double @test_length_double3(<3 x double> noundef %p0) {
+; CHECK-LABEL: define noundef double @test_length_double3(
+; CHECK-SAME: <3 x double> noundef [[P0:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[TMP0:%.*]] = extractelement <3 x double> [[P0]], i64 0
+; CHECK-NEXT: [[TMP1:%.*]] = extractelement <3 x double> [[P0]], i64 1
+; CHECK-NEXT: [[TMP2:%.*]] = fadd double [[TMP0]], [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = extractelement <3 x double> [[P0]], i64 2
+; CHECK-NEXT: [[TMP4:%.*]] = fadd double [[TMP2]], [[TMP3]]
+; CHECK-NEXT: ret double [[TMP4]]
+;
+entry:
+ %rdx.fadd = call double @llvm.vector.reduce.fadd.v3f64(double 0.000000e+00, <3 x double> %p0)
+ ret double %rdx.fadd
+}
+
+define noundef double @test_length_double4(<4 x double> noundef %p0) {
+; CHECK-LABEL: define noundef double @test_length_double4(
+; CHECK-SAME: <4 x double> noundef [[P0:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x double> [[P0]], i64 0
+; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x double> [[P0]], i64 1
+; CHECK-NEXT: [[TMP2:%.*]] = fadd double [[TMP0]], [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x double> [[P0]], i64 2
+; CHECK-NEXT: [[TMP4:%.*]] = fadd double [[TMP2]], [[TMP3]]
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x double> [[P0]], i64 3
+; CHECK-NEXT: [[TMP6:%.*]] = fadd double [[TMP4]], [[TMP5]]
+; CHECK-NEXT: ret double [[TMP6]]
+;
+entry:
+ %rdx.fadd = call double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> %p0)
+ ret double %rdx.fadd
+}
+
+define noundef i16 @test_length_short2(<2 x i16> noundef %p0) {
+; CHECK-LABEL: define noundef i16 @test_length_short2(
+; CHECK-SAME: <2 x i16> noundef [[P0:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i16> [[P0]], i64 0
+; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i16> [[P0]], i64 1
+; CHECK-NEXT: [[TMP2:%.*]] = add i16 [[TMP0]], [[TMP1]]
+; CHECK-NEXT: ret i16 [[TMP2]]
+;
+entry:
+ %rdx.add = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> %p0)
+ ret i16 %rdx.add
+}
+
+define noundef i16 @test_length_short3(<3 x i16> noundef %p0) {
+; CHECK-LABEL: define noundef i16 @test_length_short3(
+; CHECK-SAME: <3 x i16> noundef [[P0:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[TMP0:%.*]] = extractelement <3 x i16> [[P0]], i64 0
+; CHECK-NEXT: [[TMP1:%.*]] = extractelement <3 x i16> [[P0]], i64 1
+; CHECK-NEXT: [[TMP2:%.*]] = add i16 [[TMP0]], [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = extractelement <3 x i16> [[P0]], i64 2
+; CHECK-NEXT: [[TMP4:%.*]] = add i16 [[TMP2]], [[TMP3]]
+; CHECK-NEXT: ret i16 [[TMP4]]
+;
+entry:
+ %rdx.fadd = call i16 @llvm.vector.reduce.add.v3i16(<3 x i16> %p0)
+ ret i16 %rdx.fadd
+}
+
+define noundef i16 @test_length_short4(<4 x i16> noundef %p0) {
+; CHECK-LABEL: define noundef i16 @test_length_short4(
+; CHECK-SAME: <4 x i16> noundef [[P0:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x i16> [[P0]], i64 0
+; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i16> [[P0]], i64 1
+; CHECK-NEXT: [[TMP2:%.*]] = add i16 [[TMP0]], [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i16> [[P0]], i64 2
+; CHECK-NEXT: [[TMP4:%.*]] = add i16 [[TMP2]], [[TMP3]]
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i16> [[P0]], i64 3
+; CHECK-NEXT: [[TMP6:%.*]] = add i16 [[TMP4]], [[TMP5]]
+; CHECK-NEXT: ret i16 [[TMP6]]
+;
+entry:
+ %rdx.fadd = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %p0)
+ ret i16 %rdx.fadd
+}
+
+define noundef i32 @test_length_int2(<2 x i32> noundef %p0) {
+; CHECK-LABEL: define noundef i32 @test_length_int2(
+; CHECK-SAME: <2 x i32> noundef [[P0:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i32> [[P0]], i64 0
+; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[P0]], i64 1
+; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TMP0]], [[TMP1]]
+; CHECK-NEXT: ret i32 [[TMP2]]
+;
+entry:
+ %rdx.add = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %p0)
+ ret i32 %rdx.add
+}
+
+define noundef i32 @test_length_int3(<3 x i32> noundef %p0) {
+; CHECK-LABEL: define noundef i32 @test_length_int3(
+; CHECK-SAME: <3 x i32> noundef [[P0:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[TMP0:%.*]] = extractelement <3 x i32> [[P0]], i64 0
+; CHECK-NEXT: [[TMP1:%.*]] = extractelement <3 x i32> [[P0]], i64 1
+; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TMP0]], [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = extractelement <3 x i32> [[P0]], i64 2
+; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[TMP2]], [[TMP3]]
+; CHECK-NEXT: ret i32 [[TMP4]]
+;
+entry:
+ %rdx.fadd = call i32 @llvm.vector.reduce.add.v3i32(<3 x i32> %p0)
+ ret i32 %rdx.fadd
+}
+
+define noundef i32 @test_length_int4(<4 x i32> noundef %p0) {
+; CHECK-LABEL: define noundef i32 @test_length_int4(
+; CHECK-SAME: <4 x i32> noundef [[P0:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x i32> [[P0]], i64 0
+; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[P0]], i64 1
+; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TMP0]], [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[P0]], i64 2
+; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[TMP2]], [[TMP3]]
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[P0]], i64 3
+; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[TMP4]], [[TMP5]]
+; CHECK-NEXT: ret i32 [[TMP6]]
+;
+entry:
+ %rdx.fadd = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %p0)
+ ret i32 %rdx.fadd
+}
+
+define noundef i64 @test_length_int64_2(<2 x i64> noundef %p0) {
+; CHECK-LABEL: define noundef i64 @test_length_int64_2(
+; CHECK-SAME: <2 x i64> noundef [[P0:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[P0]], i64 0
+; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i64> [[P0]], i64 1
+; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[TMP0]], [[TMP1]]
+; CHECK-NEXT: ret i64 [[TMP2]]
+;
+entry:
+ %rdx.add = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %p0)
+ ret i64 %rdx.add
+}
+
+define noundef i64 @test_length_int64_3(<3 x i64> noundef %p0) {
+; CHECK-LABEL: define noundef i64 @test_length_int64_3(
+; CHECK-SAME: <3 x i64> noundef [[P0:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[TMP0:%.*]] = extractelement <3 x i64> [[P0]], i64 0
+; CHECK-NEXT: [[TMP1:%.*]] = extractelement <3 x i64> [[P0]], i64 1
+; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[TMP0]], [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = extractelement <3 x i64> [[P0]], i64 2
+; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[TMP2]], [[TMP3]]
+; CHECK-NEXT: ret i64 [[TMP4]]
+;
+entry:
+ %rdx.fadd = call i64 @llvm.vector.reduce.add.v3i64(<3 x i64> %p0)
+ ret i64 %rdx.fadd
+}
+
+define noundef i64 @test_length_int64_4(<4 x i64> noundef %p0) {
+; CHECK-LABEL: define noundef i64 @test_length_int64_4(
+; CHECK-SAME: <4 x i64> noundef [[P0:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x i64> [[P0]], i64 0
+; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i64> [[P0]], i64 1
+; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[TMP0]], [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i64> [[P0]], i64 2
+; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[TMP2]], [[TMP3]]
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[P0]], i64 3
+; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[TMP4]], [[TMP5]]
+; CHECK-NEXT: ret i64 [[TMP6]]
+;
+entry:
+ %rdx.fadd = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %p0)
+ ret i64 %rdx.fadd
+}
+
diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/cross.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/cross.ll
index 2e0eb8c429ac27..b1625c07111e44 100644
--- a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/cross.ll
+++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/cross.ll
@@ -15,7 +15,7 @@ entry:
; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#vec3_float_16]]
; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#vec3_float_16]]
; CHECK: %[[#]] = OpExtInst %[[#vec3_float_16]] %[[#op_ext_glsl]] Cross %[[#arg0]] %[[#arg1]]
- %hlsl.cross = call <3 x half> @llvm.spv.cross.v4f16(<3 x half> %a, <3 x half> %b)
+ %hlsl.cross = call <3 x half> @llvm.spv.cross.v3f16(<3 x half> %a, <3 x half> %b)
ret <3 x half> %hlsl.cross
}
@@ -25,9 +25,9 @@ entry:
; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#vec3_float_32]]
; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#vec3_float_32]]
; CHECK: %[[#]] = OpExtInst %[[#vec3_float_32]] %[[#op_ext_glsl]] Cross %[[#arg0]] %[[#arg1]]
- %hlsl.cross = call <3 x float> @llvm.spv.cross.v4f32(<3 x float> %a, <3 x float> %b)
+ %hlsl.cross = call <3 x float> @llvm.spv.cross.v3f32(<3 x float> %a, <3 x float> %b)
ret <3 x float> %hlsl.cross
}
-declare <3 x half> @llvm.spv.cross.v4f16(<3 x half>, <3 x half>)
-declare <3 x float> @llvm.spv.cross.v4f32(<3 x float>, <3 x float>)
+declare <3 x half> @llvm.spv.cross.v3f16(<3 x half>, <3 x half>)
+declare <3 x float> @llvm.spv.cross.v3f32(<3 x float>, <3 x float>)
diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/distance.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/distance.ll
new file mode 100644
index 00000000000000..848b0905675549
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/distance.ll
@@ -0,0 +1,33 @@
+; RUN: llc -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %}
+
+; Make sure SPIRV operation function calls for distance are lowered correctly.
+
+; CHECK-DAG: %[[#op_ext_glsl:]] = OpExtInstImport "GLSL.std.450"
+; CHECK-DAG: %[[#float_16:]] = OpTypeFloat 16
+; CHECK-DAG: %[[#vec4_float_16:]] = OpTypeVector %[[#float_16]] 4
+; CHECK-DAG: %[[#float_32:]] = OpTypeFloat 32
+; CHECK-DAG: %[[#vec4_float_32:]] = OpTypeVector %[[#float_32]] 4
+
+define noundef half @distance_half4(<4 x half> noundef %a, <4 x half> noundef %b) {
+entry:
+ ; CHECK: %[[#]] = OpFunction %[[#float_16]] None %[[#]]
+ ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#vec4_float_16]]
+ ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#vec4_float_16]]
+ ; CHECK: %[[#]] = OpExtInst %[[#float_16]] %[[#op_ext_glsl]] Distance %[[#arg0]] %[[#arg1]]
+ %hlsl.distance = call half @llvm.spv.distance.f16(<4 x half> %a, <4 x half> %b)
+ ret half %hlsl.distance
+}
+
+define noundef float @distance_float4(<4 x float> noundef %a, <4 x float> noundef %b) {
+entry:
+ ; CHECK: %[[#]] = OpFunction %[[#float_32]] None %[[#]]
+ ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#vec4_float_32]]
+ ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#vec4_float_32]]
+ ; CHECK: %[[#]] = OpExtInst %[[#float_32]] %[[#op_ext_glsl]] Distance %[[#arg0]] %[[#arg1]]
+ %hlsl.distance = call float @llvm.spv.distance.f32(<4 x float> %a, <4 x float> %b)
+ ret float %hlsl.distance
+}
+
+declare half @llvm.spv.distance.f16(<4 x half>, <4 x half>)
+declare float @llvm.spv.distance.f32(<4 x float>, <4 x float>)
diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/length.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/length.ll
index b4a9d8e0664b7e..1ac862b79a3fac 100644
--- a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/length.ll
+++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/length.ll
@@ -11,19 +11,21 @@
define noundef half @length_half4(<4 x half> noundef %a) {
entry:
- ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]]
+ ; CHECK: %[[#]] = OpFunction %[[#float_16]] None %[[#]]
+ ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#vec4_float_16]]
; CHECK: %[[#]] = OpExtInst %[[#float_16]] %[[#op_ext_glsl]] Length %[[#arg0]]
- %hlsl.length = call half @llvm.spv.length.v4f16(<4 x half> %a)
+ %hlsl.length = call half @llvm.spv.length.f16(<4 x half> %a)
ret half %hlsl.length
}
define noundef float @length_float4(<4 x float> noundef %a) {
entry:
- ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]]
+ ; CHECK: %[[#]] = OpFunction %[[#float_32]] None %[[#]]
+ ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#vec4_float_32]]
; CHECK: %[[#]] = OpExtInst %[[#float_32]] %[[#op_ext_glsl]] Length %[[#arg0]]
- %hlsl.length = call float @llvm.spv.length.v4f32(<4 x float> %a)
+ %hlsl.length = call float @llvm.spv.length.f32(<4 x float> %a)
ret float %hlsl.length
}
-declare half @llvm.spv.length.v4f16(<4 x half>)
-declare float @llvm.spv.length.v4f32(<4 x float>)
+declare half @llvm.spv.length.f16(<4 x half>)
+declare float @llvm.spv.length.f32(<4 x float>)
>From 7aff4b81674e6715458af253be2c0a5b478d210a Mon Sep 17 00:00:00 2001
From: Farzon Lotfi <farzonlotfi at microsoft.com>
Date: Mon, 25 Nov 2024 13:53:50 -0500
Subject: [PATCH 6/6] move length and distance to a target builtin
---
clang/include/clang/Basic/Builtins.td | 12 -------
clang/include/clang/Basic/BuiltinsSPIRV.td | 21 ++++++++++++
clang/include/clang/Basic/CMakeLists.txt | 4 +++
clang/include/clang/Basic/TargetBuiltins.h | 10 ++++++
clang/lib/Basic/Targets/SPIR.cpp | 13 ++++++++
clang/lib/Basic/Targets/SPIR.h | 2 +-
clang/lib/CodeGen/CGBuiltin.cpp | 4 +--
clang/lib/Headers/hlsl/hlsl_detail.h | 37 +++-------------------
clang/lib/Sema/SemaHLSL.cpp | 5 +--
9 files changed, 59 insertions(+), 49 deletions(-)
create mode 100644 clang/include/clang/Basic/BuiltinsSPIRV.td
diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index bbe7adc05e1baa..82acd520c227e9 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -4810,12 +4810,6 @@ def HLSLDegrees : LangBuiltin<"HLSL_LANG"> {
let Prototype = "void(...)";
}
-def HLSLDistance : LangBuiltin<"HLSL_LANG"> {
- let Spellings = ["__builtin_hlsl_distance"];
- let Attributes = [NoThrow, Const];
- let Prototype = "void(...)";
-}
-
def HLSLDotProduct : LangBuiltin<"HLSL_LANG"> {
let Spellings = ["__builtin_hlsl_dot"];
let Attributes = [NoThrow, Const];
@@ -4852,12 +4846,6 @@ def HLSLIsinf : LangBuiltin<"HLSL_LANG"> {
let Prototype = "void(...)";
}
-def HLSLLength : LangBuiltin<"HLSL_LANG"> {
- let Spellings = ["__builtin_hlsl_length"];
- let Attributes = [NoThrow, Const];
- let Prototype = "void(...)";
-}
-
def HLSLLerp : LangBuiltin<"HLSL_LANG"> {
let Spellings = ["__builtin_hlsl_lerp"];
let Attributes = [NoThrow, Const];
diff --git a/clang/include/clang/Basic/BuiltinsSPIRV.td b/clang/include/clang/Basic/BuiltinsSPIRV.td
new file mode 100644
index 00000000000000..3afe786a4f7546
--- /dev/null
+++ b/clang/include/clang/Basic/BuiltinsSPIRV.td
@@ -0,0 +1,21 @@
+//===--- BuiltinsSPIRV.td - SPIRV Builtin function database ---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+include "clang/Basic/BuiltinsBase.td"
+
+def HLSLDistance : LangBuiltin<"HLSL_LANG"> {
+ let Spellings = ["__builtin_hlsl_distance"];
+ let Attributes = [NoThrow, Const];
+ let Prototype = "void(...)";
+}
+
+def HLSLLength : LangBuiltin<"HLSL_LANG"> {
+ let Spellings = ["__builtin_hlsl_length"];
+ let Attributes = [NoThrow, Const];
+ let Prototype = "void(...)";
+}
\ No newline at end of file
diff --git a/clang/include/clang/Basic/CMakeLists.txt b/clang/include/clang/Basic/CMakeLists.txt
index 76ac3367e23a66..a53f537fc171a9 100644
--- a/clang/include/clang/Basic/CMakeLists.txt
+++ b/clang/include/clang/Basic/CMakeLists.txt
@@ -60,6 +60,10 @@ clang_tablegen(BuiltinsRISCV.inc -gen-clang-builtins
SOURCE BuiltinsRISCV.td
TARGET ClangBuiltinsRISCV)
+clang_tablegen(BuiltinsSPIRV.inc -gen-clang-builtins
+ SOURCE BuiltinsSPIRV.td
+ TARGET ClangBuiltinsSPIRV)
+
clang_tablegen(BuiltinsX86.inc -gen-clang-builtins
SOURCE BuiltinsX86.td
TARGET ClangBuiltinsX86)
diff --git a/clang/include/clang/Basic/TargetBuiltins.h b/clang/include/clang/Basic/TargetBuiltins.h
index 89ebf5758a5b55..ca0fbfecd43ff2 100644
--- a/clang/include/clang/Basic/TargetBuiltins.h
+++ b/clang/include/clang/Basic/TargetBuiltins.h
@@ -119,6 +119,16 @@ namespace clang {
};
}
+ /// SPIRV builtins
+ namespace SPIRV {
+ enum {
+ LastTIBuiltin = clang::Builtin::FirstTSBuiltin - 1,
+ #define BUILTIN(ID, TYPE, ATTRS) BI##ID,
+ #include "clang/Basic/BuiltinsSPIRV.inc"
+ LastTSBuiltin
+ };
+ } // namespace SPIRV
+
/// X86 builtins
namespace X86 {
enum {
diff --git a/clang/lib/Basic/Targets/SPIR.cpp b/clang/lib/Basic/Targets/SPIR.cpp
index 040303983594f8..a87c923b2904c1 100644
--- a/clang/lib/Basic/Targets/SPIR.cpp
+++ b/clang/lib/Basic/Targets/SPIR.cpp
@@ -14,10 +14,23 @@
#include "AMDGPU.h"
#include "Targets.h"
#include "llvm/TargetParser/TargetParser.h"
+#include "clang/Basic/MacroBuilder.h"
+#include "clang/Basic/TargetBuiltins.h"
using namespace clang;
using namespace clang::targets;
+static constexpr Builtin::Info BuiltinInfo[] = {
+#define BUILTIN(ID, TYPE, ATTRS) \
+ {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
+#include "clang/Basic/BuiltinsSPIRV.inc"
+};
+
+ArrayRef<Builtin::Info> SPIRVTargetInfo::getTargetBuiltins() const {
+ return llvm::ArrayRef(BuiltinInfo,
+ clang::SPIRV::LastTSBuiltin - Builtin::FirstTSBuiltin);
+}
+
void SPIRTargetInfo::getTargetDefines(const LangOptions &Opts,
MacroBuilder &Builder) const {
DefineStd(Builder, "SPIR", Opts);
diff --git a/clang/lib/Basic/Targets/SPIR.h b/clang/lib/Basic/Targets/SPIR.h
index 85e4bd920d8535..5a328b9ceeb1d1 100644
--- a/clang/lib/Basic/Targets/SPIR.h
+++ b/clang/lib/Basic/Targets/SPIR.h
@@ -313,7 +313,7 @@ class LLVM_LIBRARY_VISIBILITY SPIRVTargetInfo : public BaseSPIRVTargetInfo {
resetDataLayout("e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-"
"v256:256-v512:512-v1024:1024-n8:16:32:64-G1");
}
-
+ ArrayRef<Builtin::Info> getTargetBuiltins() const override;
void getTargetDefines(const LangOptions &Opts,
MacroBuilder &Builder) const override;
};
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 134b80822a206c..8c263a94b47642 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -19097,7 +19097,7 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
/*ReturnType=*/Op0->getType(), CGM.getHLSLRuntime().getCrossIntrinsic(),
ArrayRef<Value *>{Op0, Op1}, nullptr, "hlsl.cross");
}
- case Builtin::BI__builtin_hlsl_distance: {
+ case SPIRV::BI__builtin_hlsl_distance: {
Value *X = EmitScalarExpr(E->getArg(0));
Value *Y = EmitScalarExpr(E->getArg(1));
assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
@@ -19188,7 +19188,7 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
/*ReturnType=*/X->getType(), CGM.getHLSLRuntime().getLerpIntrinsic(),
ArrayRef<Value *>{X, Y, S}, nullptr, "hlsl.lerp");
}
- case Builtin::BI__builtin_hlsl_length: {
+ case SPIRV::BI__builtin_hlsl_length: {
Value *X = EmitScalarExpr(E->getArg(0));
assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
"length operand must have a float representation");
diff --git a/clang/lib/Headers/hlsl/hlsl_detail.h b/clang/lib/Headers/hlsl/hlsl_detail.h
index e2a5f85b04bb47..4c71a8da3d6a6c 100644
--- a/clang/lib/Headers/hlsl/hlsl_detail.h
+++ b/clang/lib/Headers/hlsl/hlsl_detail.h
@@ -9,34 +9,6 @@
#ifndef _HLSL_HLSL_DETAILS_H_
#define _HLSL_HLSL_DETAILS_H_
-#if __is_target_arch(dxil)
-#define IS_ARCH_DXIL 1
-#else
-#define IS_ARCH_DXIL 0
-#endif
-
-#if __is_target_arch(spirv)
-#define IS_ARCH_SPIRV 1
-#else
-#define IS_ARCH_SPIRV 0
-#endif
-
-#define ARCH_CONDITION(arch) \
- if (IS_ARCH_##arch) \
- return true;
-
-// Note: arch is used to bypass
-// the generic implementation
-#define EXPAND_ARCH_CONDITIONS(arch) \
- ARCH_CONDITION(arch) \
- /* Add more architectures as needed */
-
-#define DEFINE_TARGET_LOWERING(function_name, ...) \
- constexpr bool Has##function_name##Lowering() { \
- EXPAND_ARCH_CONDITIONS(__VA_ARGS__) \
- return false; /* Default case if no match */ \
- }
-
namespace hlsl {
namespace __detail {
@@ -69,7 +41,6 @@ constexpr enable_if_t<sizeof(U) == sizeof(T), U> bit_cast(T F) {
return __builtin_bit_cast(U, F);
}
-DEFINE_TARGET_LOWERING(Length, SPIRV)
template <typename T>
constexpr enable_if_t<is_same<float, T>::value || is_same<half, T>::value, T>
length_impl(T X) {
@@ -79,14 +50,15 @@ length_impl(T X) {
template <typename T, int N>
constexpr enable_if_t<is_same<float, T>::value || is_same<half, T>::value, T>
length_vec_impl(vector<T, N> X) {
- if (HasLengthLowering())
+ #if (__has_builtin(__builtin_hlsl_length))
return __builtin_hlsl_length(X);
+ #endif
vector<T, N> XSquared = X * X;
T XSquaredSum = __builtin_hlsl_reduce_add(XSquared);
return __builtin_elementwise_sqrt(XSquaredSum);
}
-DEFINE_TARGET_LOWERING(Distance, SPIRV)
+
template <typename T>
constexpr enable_if_t<is_same<float, T>::value || is_same<half, T>::value, T>
distance_impl(T X, T Y) {
@@ -96,8 +68,9 @@ distance_impl(T X, T Y) {
template <typename T, int N>
constexpr enable_if_t<is_same<float, T>::value || is_same<half, T>::value, T>
distance_vec_impl(vector<T, N> X, vector<T, N> Y) {
- if (HasDistanceLowering())
+ #if (__has_builtin(__builtin_hlsl_distance))
return __builtin_hlsl_distance(X, Y);
+ #endif
return length_vec_impl(X - Y);
}
diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index d108e6e130b01d..0fffebaeb6a0bb 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -23,6 +23,7 @@
#include "clang/Basic/DiagnosticSema.h"
#include "clang/Basic/LLVM.h"
#include "clang/Basic/SourceLocation.h"
+#include "clang/Basic/TargetBuiltins.h"
#include "clang/Basic/TargetInfo.h"
#include "clang/Sema/Initialization.h"
#include "clang/Sema/ParsedAttr.h"
@@ -1943,7 +1944,7 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
TheCall->setType(ArgTyA);
break;
}
- case Builtin::BI__builtin_hlsl_distance: {
+ case SPIRV::BI__builtin_hlsl_distance: {
if (CheckFloatOrHalfRepresentations(&SemaRef, TheCall))
return true;
if (SemaRef.checkArgCount(TheCall, 2))
@@ -2044,7 +2045,7 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
return true;
break;
}
- case Builtin::BI__builtin_hlsl_length: {
+ case SPIRV::BI__builtin_hlsl_length: {
if (CheckFloatOrHalfRepresentations(&SemaRef, TheCall))
return true;
if (SemaRef.checkArgCount(TheCall, 1))
More information about the llvm-commits
mailing list