[clang] [llvm] [HLSL] Implement a header only distance intrinsic (PR #117240)

Mon Nov 25 06:44:50 PST 2024

https://github.com/farzonl updated https://github.com/llvm/llvm-project/pull/117240

>From 49182ef1e88b8cb74aa9136dd88f1139c72a94b4 Mon Sep 17 00:00:00 2001
From: Farzon Lotfi <farzonlotfi at microsoft.com>
Date: Thu, 21 Nov 2024 14:46:31 -0500
Subject: [PATCH 1/5] [HLSL] Implement a header only distance intrinsic

---
 clang/lib/Headers/hlsl/hlsl_detail.h          |  8 ++
 clang/lib/Headers/hlsl/hlsl_intrinsics.h      | 39 ++++++++++
 clang/test/CodeGenHLSL/builtins/distance.hlsl | 76 +++++++++++++++++++
 .../SemaHLSL/BuiltIns/distance-errors.hlsl    | 33 ++++++++
 4 files changed, 156 insertions(+)
 create mode 100644 clang/test/CodeGenHLSL/builtins/distance.hlsl
 create mode 100644 clang/test/SemaHLSL/BuiltIns/distance-errors.hlsl

diff --git a/clang/lib/Headers/hlsl/hlsl_detail.h b/clang/lib/Headers/hlsl/hlsl_detail.h
index 8d5fd941331531..99cb3fa4a6d2e8 100644
--- a/clang/lib/Headers/hlsl/hlsl_detail.h
+++ b/clang/lib/Headers/hlsl/hlsl_detail.h
@@ -13,6 +13,14 @@ namespace hlsl {
 
 namespace __detail {
 
+template <typename T, typename U> struct is_same {
+  static const bool value = false;
+};
+
+template <typename T> struct is_same<T, T> {
+  static const bool value = true;
+};
+
 template <bool B, typename T> struct enable_if {};
 
 template <typename T> struct enable_if<true, T> {
diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
index a3e0b5c65a6f52..bd66c73db27ee9 100644
--- a/clang/lib/Headers/hlsl/hlsl_intrinsics.h
+++ b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
@@ -871,6 +871,45 @@ float3 degrees(float3);
 _HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_degrees)
 float4 degrees(float4);
 
+//===----------------------------------------------------------------------===//
+// distance builtins
+//===----------------------------------------------------------------------===//
+
+/// \fn K distance(T X, T Y)
+/// \brief Returns a distance scalar between two vectors of \a X and \a Y.
+/// \param X The X input value.
+/// \param Y The Y input value.
+
+template <typename T>
+constexpr __detail::enable_if_t<
+    __detail::is_same<float, T>::value || __detail::is_same<half, T>::value, T>
+distance_impl(T X, T Y) {
+  return __builtin_elementwise_abs(X - Y);
+}
+
+template <typename T, int N>
+constexpr __detail::enable_if_t<
+    __detail::is_same<float, T>::value || __detail::is_same<half, T>::value, T>
+distance_vec_impl(vector<T, N> X, vector<T, N> Y) {
+  return __builtin_hlsl_length(X - Y);
+}
+
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+const inline half distance(half X, half Y) { return distance_impl(X, Y); }
+
+template <int N>
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+const inline half distance(vector<half, N> X, vector<half, N> Y) {
+  return distance_vec_impl(X, Y);
+}
+
+const inline float distance(float X, float Y) { return distance_impl(X, Y); }
+
+template <int N>
+const inline float distance(vector<float, N> X, vector<float, N> Y) {
+  return distance_vec_impl(X, Y);
+}
+
 //===----------------------------------------------------------------------===//
 // dot product builtins
 //===----------------------------------------------------------------------===//
diff --git a/clang/test/CodeGenHLSL/builtins/distance.hlsl b/clang/test/CodeGenHLSL/builtins/distance.hlsl
new file mode 100644
index 00000000000000..2ff2947ac49095
--- /dev/null
+++ b/clang/test/CodeGenHLSL/builtins/distance.hlsl
@@ -0,0 +1,76 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
+// RUN:   dxil-pc-shadermodel6.3-library %s -fnative-half-type \
+// RUN:   -emit-llvm -O1 -o - | FileCheck %s
+
+// CHECK-LABEL: define noundef half @_Z18test_distance_halfDhDh(
+// CHECK-SAME: half noundef [[X:%.*]], half noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[SUB_I:%.*]] = fsub half [[X]], [[Y]]
+// CHECK-NEXT:    [[ELT_ABS_I:%.*]] = tail call noundef half @llvm.fabs.f16(half [[SUB_I]])
+// CHECK-NEXT:    ret half [[ELT_ABS_I]]
+//
+half test_distance_half(half X, half Y) { return distance(X, Y); }
+
+// CHECK-LABEL: define noundef half @_Z19test_distance_half2Dv2_DhS_(
+// CHECK-SAME: <2 x half> noundef [[X:%.*]], <2 x half> noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[SUB_I:%.*]] = fsub <2 x half> [[X]], [[Y]]
+// CHECK-NEXT:    [[HLSL_LENGTH_I:%.*]] = tail call noundef half @llvm.dx.length.v2f16(<2 x half> [[SUB_I]])
+// CHECK-NEXT:    ret half [[HLSL_LENGTH_I]]
+//
+half test_distance_half2(half2 X, half2 Y) { return distance(X, Y); }
+
+// CHECK-LABEL: define noundef half @_Z19test_distance_half3Dv3_DhS_(
+// CHECK-SAME: <3 x half> noundef [[X:%.*]], <3 x half> noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[SUB_I:%.*]] = fsub <3 x half> [[X]], [[Y]]
+// CHECK-NEXT:    [[HLSL_LENGTH_I:%.*]] = tail call noundef half @llvm.dx.length.v3f16(<3 x half> [[SUB_I]])
+// CHECK-NEXT:    ret half [[HLSL_LENGTH_I]]
+//
+half test_distance_half3(half3 X, half3 Y) { return distance(X, Y); }
+
+// CHECK-LABEL: define noundef half @_Z19test_distance_half4Dv4_DhS_(
+// CHECK-SAME: <4 x half> noundef [[X:%.*]], <4 x half> noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[SUB_I:%.*]] = fsub <4 x half> [[X]], [[Y]]
+// CHECK-NEXT:    [[HLSL_LENGTH_I:%.*]] = tail call noundef half @llvm.dx.length.v4f16(<4 x half> [[SUB_I]])
+// CHECK-NEXT:    ret half [[HLSL_LENGTH_I]]
+//
+half test_distance_half4(half4 X, half4 Y) { return distance(X, Y); }
+
+// CHECK-LABEL: define noundef float @_Z19test_distance_floatff(
+// CHECK-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[SUB_I:%.*]] = fsub float [[X]], [[Y]]
+// CHECK-NEXT:    [[ELT_ABS_I:%.*]] = tail call noundef float @llvm.fabs.f32(float [[SUB_I]])
+// CHECK-NEXT:    ret float [[ELT_ABS_I]]
+//
+float test_distance_float(float X, float Y) { return distance(X, Y); }
+
+// CHECK-LABEL: define noundef float @_Z20test_distance_float2Dv2_fS_(
+// CHECK-SAME: <2 x float> noundef [[X:%.*]], <2 x float> noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[SUB_I:%.*]] = fsub <2 x float> [[X]], [[Y]]
+// CHECK-NEXT:    [[HLSL_LENGTH_I:%.*]] = tail call noundef float @llvm.dx.length.v2f32(<2 x float> [[SUB_I]])
+// CHECK-NEXT:    ret float [[HLSL_LENGTH_I]]
+//
+float test_distance_float2(float2 X, float2 Y) { return distance(X, Y); }
+
+// CHECK-LABEL: define noundef float @_Z20test_distance_float3Dv3_fS_(
+// CHECK-SAME: <3 x float> noundef [[X:%.*]], <3 x float> noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[SUB_I:%.*]] = fsub <3 x float> [[X]], [[Y]]
+// CHECK-NEXT:    [[HLSL_LENGTH_I:%.*]] = tail call noundef float @llvm.dx.length.v3f32(<3 x float> [[SUB_I]])
+// CHECK-NEXT:    ret float [[HLSL_LENGTH_I]]
+//
+float test_distance_float3(float3 X, float3 Y) { return distance(X, Y); }
+
+// CHECK-LABEL: define noundef float @_Z20test_distance_float4Dv4_fS_(
+// CHECK-SAME: <4 x float> noundef [[X:%.*]], <4 x float> noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[SUB_I:%.*]] = fsub <4 x float> [[X]], [[Y]]
+// CHECK-NEXT:    [[HLSL_LENGTH_I:%.*]] = tail call noundef float @llvm.dx.length.v4f32(<4 x float> [[SUB_I]])
+// CHECK-NEXT:    ret float [[HLSL_LENGTH_I]]
+//
+float test_distance_float4(float4 X, float4 Y) { return distance(X, Y); }
diff --git a/clang/test/SemaHLSL/BuiltIns/distance-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/distance-errors.hlsl
new file mode 100644
index 00000000000000..e996bf5d2cb7c5
--- /dev/null
+++ b/clang/test/SemaHLSL/BuiltIns/distance-errors.hlsl
@@ -0,0 +1,33 @@
+// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only -disable-llvm-passes -verify
+
+float test_no_second_arg(float2 p0) {
+  return distance(p0);
+  // expected-error at -1 {{no matching function for call to 'distance'}}
+  // expected-note at hlsl/hlsl_intrinsics.h:* {{candidate function not viable: requires 2 arguments, but 1 was provided}}
+  // expected-note at hlsl/hlsl_intrinsics.h:* {{candidate function not viable: requires 2 arguments, but 1 was provided}}
+  // expected-note at hlsl/hlsl_intrinsics.h:* {{candidate function template not viable: requires 2 arguments, but 1 was provided}}
+  // expected-note at hlsl/hlsl_intrinsics.h:* {{candidate function template not viable: requires 2 arguments, but 1 was provided}}
+}
+
+float test_too_many_arg(float2 p0) {
+  return distance(p0, p0, p0);
+  // expected-error at -1 {{no matching function for call to 'distance'}}
+  // expected-note at hlsl/hlsl_intrinsics.h:* {{candidate function not viable: requires 2 arguments, but 3 were provided}}
+  // expected-note at hlsl/hlsl_intrinsics.h:* {{candidate function not viable: requires 2 arguments, but 3 were provided}}
+  // expected-note at hlsl/hlsl_intrinsics.h:* {{candidate function template not viable: requires 2 arguments, but 3 were provided}}
+  // expected-note at hlsl/hlsl_intrinsics.h:* {{candidate function template not viable: requires 2 arguments, but 3 were provided}}
+}
+
+float test_double_inputs(double p0, double p1) {
+  return distance(p0, p1);
+  // expected-error at -1  {{call to 'distance' is ambiguous}}
+  // expected-note at hlsl/hlsl_intrinsics.h:* {{candidate function}}
+  // expected-note at hlsl/hlsl_intrinsics.h:* {{candidate function}}
+}
+
+float test_int_inputs(int p0, int p1) {
+  return distance(p0, p1);
+  // expected-error at -1  {{call to 'distance' is ambiguous}}
+  // expected-note at hlsl/hlsl_intrinsics.h:* {{candidate function}}
+  // expected-note at hlsl/hlsl_intrinsics.h:* {{candidate function}}
+}

>From 29751cbc663294a101c460a61234a17bea8743fa Mon Sep 17 00:00:00 2001
From: Farzon Lotfi <farzonlotfi at microsoft.com>
Date: Thu, 21 Nov 2024 18:36:30 -0500
Subject: [PATCH 2/5] move distance_impl to details namespace

---
 clang/lib/Headers/hlsl/hlsl_detail.h     | 12 +++++++++++
 clang/lib/Headers/hlsl/hlsl_intrinsics.h | 26 ++++++++----------------
 2 files changed, 20 insertions(+), 18 deletions(-)

diff --git a/clang/lib/Headers/hlsl/hlsl_detail.h b/clang/lib/Headers/hlsl/hlsl_detail.h
index 99cb3fa4a6d2e8..f54f0037cbd066 100644
--- a/clang/lib/Headers/hlsl/hlsl_detail.h
+++ b/clang/lib/Headers/hlsl/hlsl_detail.h
@@ -41,6 +41,18 @@ constexpr enable_if_t<sizeof(U) == sizeof(T), U> bit_cast(T F) {
   return __builtin_bit_cast(U, F);
 }
 
+template <typename T>
+constexpr enable_if_t<is_same<float, T>::value || is_same<half, T>::value, T>
+distance_impl(T X, T Y) {
+  return __builtin_elementwise_abs(X - Y);
+}
+
+template <typename T, int N>
+constexpr enable_if_t<is_same<float, T>::value || is_same<half, T>::value, T>
+distance_vec_impl(vector<T, N> X, vector<T, N> Y) {
+  return __builtin_hlsl_length(X - Y);
+}
+
 } // namespace __detail
 } // namespace hlsl
 #endif //_HLSL_HLSL_DETAILS_H_
diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
index bd66c73db27ee9..a7301aa8d8ce72 100644
--- a/clang/lib/Headers/hlsl/hlsl_intrinsics.h
+++ b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
@@ -880,34 +880,24 @@ float4 degrees(float4);
 /// \param X The X input value.
 /// \param Y The Y input value.
 
-template <typename T>
-constexpr __detail::enable_if_t<
-    __detail::is_same<float, T>::value || __detail::is_same<half, T>::value, T>
-distance_impl(T X, T Y) {
-  return __builtin_elementwise_abs(X - Y);
-}
-
-template <typename T, int N>
-constexpr __detail::enable_if_t<
-    __detail::is_same<float, T>::value || __detail::is_same<half, T>::value, T>
-distance_vec_impl(vector<T, N> X, vector<T, N> Y) {
-  return __builtin_hlsl_length(X - Y);
-}
-
 _HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
-const inline half distance(half X, half Y) { return distance_impl(X, Y); }
+const inline half distance(half X, half Y) {
+  return __detail::distance_impl(X, Y);
+}
 
 template <int N>
 _HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
 const inline half distance(vector<half, N> X, vector<half, N> Y) {
-  return distance_vec_impl(X, Y);
+  return __detail::distance_vec_impl(X, Y);
 }
 
-const inline float distance(float X, float Y) { return distance_impl(X, Y); }
+const inline float distance(float X, float Y) {
+  return __detail::distance_impl(X, Y);
+}
 
 template <int N>
 const inline float distance(vector<float, N> X, vector<float, N> Y) {
-  return distance_vec_impl(X, Y);
+  return __detail::distance_vec_impl(X, Y);
 }
 
 //===----------------------------------------------------------------------===//

>From 1d439edbd45541672f7ebeb92d1d2fa107010022 Mon Sep 17 00:00:00 2001
From: Farzon Lotfi <farzonlotfi at microsoft.com>
Date: Thu, 21 Nov 2024 21:01:15 -0500
Subject: [PATCH 3/5] [HLSL] Move length to the header

---
 clang/include/clang/Basic/Builtins.td         |  12 +-
 clang/lib/CodeGen/CGBuiltin.cpp               |  29 +--
 clang/lib/Headers/hlsl/hlsl_detail.h          |  22 ++-
 clang/lib/Headers/hlsl/hlsl_intrinsics.h      |  33 ++--
 clang/lib/Sema/SemaHLSL.cpp                   |   5 +-
 clang/test/CodeGenHLSL/builtins/distance.hlsl |  36 ++--
 clang/test/CodeGenHLSL/builtins/length.hlsl   | 184 +++++++++++-------
 .../test/SemaHLSL/BuiltIns/length-errors.hlsl |  51 +++--
 llvm/include/llvm/IR/IntrinsicsDirectX.td     |   1 -
 .../Target/DirectX/DXILIntrinsicExpansion.cpp |  69 ++++---
 llvm/test/CodeGen/DirectX/length.ll           | 178 +++++++++--------
 llvm/test/CodeGen/DirectX/length_error.ll     |  10 -
 12 files changed, 367 insertions(+), 263 deletions(-)
 delete mode 100644 llvm/test/CodeGen/DirectX/length_error.ll

diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index 83c90b3d6e681b..82acd520c227e9 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -4756,6 +4756,12 @@ def HLSLAsDouble : LangBuiltin<"HLSL_LANG"> {
   let Prototype = "void(...)";
 }
 
+def HLSLReduceAdd : LangBuiltin<"HLSL_LANG"> {
+  let Spellings = ["__builtin_hlsl_reduce_add"];
+  let Attributes = [NoThrow, Const, Constexpr];
+  let Prototype = "void(...)";
+}
+
 def HLSLWaveActiveAnyTrue : LangBuiltin<"HLSL_LANG"> {
   let Spellings = ["__builtin_hlsl_wave_active_any_true"];
   let Attributes = [NoThrow, Const];
@@ -4840,12 +4846,6 @@ def HLSLIsinf : LangBuiltin<"HLSL_LANG"> {
   let Prototype = "void(...)";
 }
 
-def HLSLLength : LangBuiltin<"HLSL_LANG"> {
-  let Spellings = ["__builtin_hlsl_length"];
-  let Attributes = [NoThrow, Const];
-  let Prototype = "void(...)";
-}
-
 def HLSLLerp : LangBuiltin<"HLSL_LANG"> {
   let Spellings = ["__builtin_hlsl_lerp"];
   let Attributes = [NoThrow, Const];
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 8f754953d28998..b81d1ba8f6ecce 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -19174,20 +19174,6 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
         /*ReturnType=*/X->getType(), CGM.getHLSLRuntime().getLerpIntrinsic(),
         ArrayRef<Value *>{X, Y, S}, nullptr, "hlsl.lerp");
   }
-  case Builtin::BI__builtin_hlsl_length: {
-    Value *X = EmitScalarExpr(E->getArg(0));
-
-    assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
-           "length operand must have a float representation");
-    // if the operand is a scalar, we can use the fabs llvm intrinsic directly
-    if (!E->getArg(0)->getType()->isVectorType())
-      return EmitFAbs(*this, X);
-
-    return Builder.CreateIntrinsic(
-        /*ReturnType=*/X->getType()->getScalarType(),
-        CGM.getHLSLRuntime().getLengthIntrinsic(), ArrayRef<Value *>{X},
-        nullptr, "hlsl.length");
-  }
   case Builtin::BI__builtin_hlsl_normalize: {
     Value *X = EmitScalarExpr(E->getArg(0));
 
@@ -19289,6 +19275,21 @@ case Builtin::BI__builtin_hlsl_elementwise_isinf: {
         CGM.getHLSLRuntime().getSaturateIntrinsic(), ArrayRef<Value *>{Op0},
         nullptr, "hlsl.saturate");
   }
+  case Builtin::BI__builtin_hlsl_reduce_add: {
+    Value *X = EmitScalarExpr(E->getArg(0));
+    auto EltTy = X->getType()->getScalarType();
+    if (E->getArg(0)->getType()->hasFloatingRepresentation()) {
+      Value *Seed = ConstantFP::get(EltTy, 0);
+      return Builder.CreateIntrinsic(
+          /*ReturnType=*/EltTy, llvm::Intrinsic::vector_reduce_fadd,
+          ArrayRef<Value *>{Seed, X}, nullptr, "rdx.fadd");
+    } else {
+      assert(E->getArg(0)->getType()->hasIntegerRepresentation());
+      return Builder.CreateIntrinsic(
+          /*ReturnType=*/EltTy, llvm::Intrinsic::vector_reduce_add,
+          ArrayRef<Value *>{X}, nullptr, "rdx.add");
+    }
+  }
   case Builtin::BI__builtin_hlsl_select: {
     Value *OpCond = EmitScalarExpr(E->getArg(0));
     RValue RValTrue = EmitAnyExpr(E->getArg(1));
diff --git a/clang/lib/Headers/hlsl/hlsl_detail.h b/clang/lib/Headers/hlsl/hlsl_detail.h
index f54f0037cbd066..85ed08a1af06e9 100644
--- a/clang/lib/Headers/hlsl/hlsl_detail.h
+++ b/clang/lib/Headers/hlsl/hlsl_detail.h
@@ -41,16 +41,34 @@ constexpr enable_if_t<sizeof(U) == sizeof(T), U> bit_cast(T F) {
   return __builtin_bit_cast(U, F);
 }
 
+template <typename T>
+constexpr enable_if_t<is_same<float, T>::value || is_same<half, T>::value, T>
+length_impl(T X) {
+  return __builtin_elementwise_abs(X);
+}
+
+template <typename T, int N>
+constexpr enable_if_t<is_same<float, T>::value || is_same<half, T>::value, T>
+length_vec_impl(vector<T, N> X) {
+  vector<T, N> XSquared = X * X;
+  T XSquaredSum = __builtin_hlsl_reduce_add(XSquared);
+  /*T  XSquaredSum = 0;
+  for(int I = 0; I < N; I++) {
+    XSquaredSum += XSquared[I];
+  }*/
+  return __builtin_elementwise_sqrt(XSquaredSum);
+}
+
 template <typename T>
 constexpr enable_if_t<is_same<float, T>::value || is_same<half, T>::value, T>
 distance_impl(T X, T Y) {
-  return __builtin_elementwise_abs(X - Y);
+  return length_impl(X - Y);
 }
 
 template <typename T, int N>
 constexpr enable_if_t<is_same<float, T>::value || is_same<half, T>::value, T>
 distance_vec_impl(vector<T, N> X, vector<T, N> Y) {
-  return __builtin_hlsl_length(X - Y);
+  return length_vec_impl(X - Y);
 }
 
 } // namespace __detail
diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
index a7301aa8d8ce72..588335d06ab17f 100644
--- a/clang/lib/Headers/hlsl/hlsl_intrinsics.h
+++ b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
@@ -1327,26 +1327,19 @@ float4 lerp(float4, float4, float4);
 /// Length is based on the following formula: sqrt(x[0]^2 + x[1]^2 + ...).
 
 _HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_length)
-half length(half);
-_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_length)
-half length(half2);
-_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_length)
-half length(half3);
-_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_length)
-half length(half4);
-
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_length)
-float length(float);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_length)
-float length(float2);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_length)
-float length(float3);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_length)
-float length(float4);
+const inline half length(half X) { return __detail::length_impl(X); }
+
+template <int N>
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+const inline half length(vector<half, N> X) {
+  return __detail::length_vec_impl(X);
+}
+
+const inline float length(float X) { return __detail::length_impl(X); }
+
+template <int N> const inline float length(vector<float, N> X) {
+  return __detail::length_vec_impl(X);
+}
 
 //===----------------------------------------------------------------------===//
 // log builtins
diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index 2bc93e4ec1181f..de2ff83d63fee4 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -2030,12 +2030,9 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
       return true;
     break;
   }
-  case Builtin::BI__builtin_hlsl_length: {
-    if (CheckFloatOrHalfRepresentations(&SemaRef, TheCall))
-      return true;
+  case Builtin::BI__builtin_hlsl_reduce_add: {
     if (SemaRef.checkArgCount(TheCall, 1))
       return true;
-
     ExprResult A = TheCall->getArg(0);
     QualType ArgTyA = A.get()->getType();
     QualType RetTy;
diff --git a/clang/test/CodeGenHLSL/builtins/distance.hlsl b/clang/test/CodeGenHLSL/builtins/distance.hlsl
index 2ff2947ac49095..ea7988122ae6d4 100644
--- a/clang/test/CodeGenHLSL/builtins/distance.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/distance.hlsl
@@ -16,8 +16,10 @@ half test_distance_half(half X, half Y) { return distance(X, Y); }
 // CHECK-SAME: <2 x half> noundef [[X:%.*]], <2 x half> noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[SUB_I:%.*]] = fsub <2 x half> [[X]], [[Y]]
-// CHECK-NEXT:    [[HLSL_LENGTH_I:%.*]] = tail call noundef half @llvm.dx.length.v2f16(<2 x half> [[SUB_I]])
-// CHECK-NEXT:    ret half [[HLSL_LENGTH_I]]
+// CHECK-NEXT:    [[MUL_I:%.*]] = fmul <2 x half> [[SUB_I]], [[SUB_I]]
+// CHECK-NEXT:    [[RDX_FADD_I:%.*]] = tail call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> [[MUL_I]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call noundef half @llvm.sqrt.f16(half [[RDX_FADD_I]])
+// CHECK-NEXT:    ret half [[TMP0]]
 //
 half test_distance_half2(half2 X, half2 Y) { return distance(X, Y); }
 
@@ -25,8 +27,10 @@ half test_distance_half2(half2 X, half2 Y) { return distance(X, Y); }
 // CHECK-SAME: <3 x half> noundef [[X:%.*]], <3 x half> noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[SUB_I:%.*]] = fsub <3 x half> [[X]], [[Y]]
-// CHECK-NEXT:    [[HLSL_LENGTH_I:%.*]] = tail call noundef half @llvm.dx.length.v3f16(<3 x half> [[SUB_I]])
-// CHECK-NEXT:    ret half [[HLSL_LENGTH_I]]
+// CHECK-NEXT:    [[MUL_I:%.*]] = fmul <3 x half> [[SUB_I]], [[SUB_I]]
+// CHECK-NEXT:    [[RDX_FADD_I:%.*]] = tail call half @llvm.vector.reduce.fadd.v3f16(half 0xH0000, <3 x half> [[MUL_I]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call noundef half @llvm.sqrt.f16(half [[RDX_FADD_I]])
+// CHECK-NEXT:    ret half [[TMP0]]
 //
 half test_distance_half3(half3 X, half3 Y) { return distance(X, Y); }
 
@@ -34,8 +38,10 @@ half test_distance_half3(half3 X, half3 Y) { return distance(X, Y); }
 // CHECK-SAME: <4 x half> noundef [[X:%.*]], <4 x half> noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[SUB_I:%.*]] = fsub <4 x half> [[X]], [[Y]]
-// CHECK-NEXT:    [[HLSL_LENGTH_I:%.*]] = tail call noundef half @llvm.dx.length.v4f16(<4 x half> [[SUB_I]])
-// CHECK-NEXT:    ret half [[HLSL_LENGTH_I]]
+// CHECK-NEXT:    [[MUL_I:%.*]] = fmul <4 x half> [[SUB_I]], [[SUB_I]]
+// CHECK-NEXT:    [[RDX_FADD_I:%.*]] = tail call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> [[MUL_I]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call noundef half @llvm.sqrt.f16(half [[RDX_FADD_I]])
+// CHECK-NEXT:    ret half [[TMP0]]
 //
 half test_distance_half4(half4 X, half4 Y) { return distance(X, Y); }
 
@@ -52,8 +58,10 @@ float test_distance_float(float X, float Y) { return distance(X, Y); }
 // CHECK-SAME: <2 x float> noundef [[X:%.*]], <2 x float> noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[SUB_I:%.*]] = fsub <2 x float> [[X]], [[Y]]
-// CHECK-NEXT:    [[HLSL_LENGTH_I:%.*]] = tail call noundef float @llvm.dx.length.v2f32(<2 x float> [[SUB_I]])
-// CHECK-NEXT:    ret float [[HLSL_LENGTH_I]]
+// CHECK-NEXT:    [[MUL_I:%.*]] = fmul <2 x float> [[SUB_I]], [[SUB_I]]
+// CHECK-NEXT:    [[RDX_FADD_I:%.*]] = tail call float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> [[MUL_I]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call noundef float @llvm.sqrt.f32(float [[RDX_FADD_I]])
+// CHECK-NEXT:    ret float [[TMP0]]
 //
 float test_distance_float2(float2 X, float2 Y) { return distance(X, Y); }
 
@@ -61,8 +69,10 @@ float test_distance_float2(float2 X, float2 Y) { return distance(X, Y); }
 // CHECK-SAME: <3 x float> noundef [[X:%.*]], <3 x float> noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[SUB_I:%.*]] = fsub <3 x float> [[X]], [[Y]]
-// CHECK-NEXT:    [[HLSL_LENGTH_I:%.*]] = tail call noundef float @llvm.dx.length.v3f32(<3 x float> [[SUB_I]])
-// CHECK-NEXT:    ret float [[HLSL_LENGTH_I]]
+// CHECK-NEXT:    [[MUL_I:%.*]] = fmul <3 x float> [[SUB_I]], [[SUB_I]]
+// CHECK-NEXT:    [[RDX_FADD_I:%.*]] = tail call float @llvm.vector.reduce.fadd.v3f32(float 0.000000e+00, <3 x float> [[MUL_I]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call noundef float @llvm.sqrt.f32(float [[RDX_FADD_I]])
+// CHECK-NEXT:    ret float [[TMP0]]
 //
 float test_distance_float3(float3 X, float3 Y) { return distance(X, Y); }
 
@@ -70,7 +80,9 @@ float test_distance_float3(float3 X, float3 Y) { return distance(X, Y); }
 // CHECK-SAME: <4 x float> noundef [[X:%.*]], <4 x float> noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[SUB_I:%.*]] = fsub <4 x float> [[X]], [[Y]]
-// CHECK-NEXT:    [[HLSL_LENGTH_I:%.*]] = tail call noundef float @llvm.dx.length.v4f32(<4 x float> [[SUB_I]])
-// CHECK-NEXT:    ret float [[HLSL_LENGTH_I]]
+// CHECK-NEXT:    [[MUL_I:%.*]] = fmul <4 x float> [[SUB_I]], [[SUB_I]]
+// CHECK-NEXT:    [[RDX_FADD_I:%.*]] = tail call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[MUL_I]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call noundef float @llvm.sqrt.f32(float [[RDX_FADD_I]])
+// CHECK-NEXT:    ret float [[TMP0]]
 //
 float test_distance_float4(float4 X, float4 Y) { return distance(X, Y); }
diff --git a/clang/test/CodeGenHLSL/builtins/length.hlsl b/clang/test/CodeGenHLSL/builtins/length.hlsl
index 1c23b0df04df98..af9713bfd7628c 100644
--- a/clang/test/CodeGenHLSL/builtins/length.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/length.hlsl
@@ -1,73 +1,111 @@
-// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
-// RUN:   dxil-pc-shadermodel6.3-library %s -fnative-half-type \
-// RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s \ 
-// RUN:   --check-prefixes=CHECK,NATIVE_HALF
-// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
-// RUN:   dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \
-// RUN:   -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF
-
-// NATIVE_HALF: define noundef half @
-// NATIVE_HALF: call half @llvm.fabs.f16(half
-// NO_HALF: call float @llvm.fabs.f32(float
-// NATIVE_HALF: ret half
-// NO_HALF: ret float
-half test_length_half(half p0)
-{
-  return length(p0);
-}
-// NATIVE_HALF: define noundef half @
-// NATIVE_HALF: %hlsl.length = call half @llvm.dx.length.v2f16
-// NO_HALF: %hlsl.length = call float @llvm.dx.length.v2f32(
-// NATIVE_HALF: ret half %hlsl.length
-// NO_HALF: ret float %hlsl.length
-half test_length_half2(half2 p0)
-{
-  return length(p0);
-}
-// NATIVE_HALF: define noundef half @
-// NATIVE_HALF: %hlsl.length = call half @llvm.dx.length.v3f16
-// NO_HALF: %hlsl.length = call float @llvm.dx.length.v3f32(
-// NATIVE_HALF: ret half %hlsl.length
-// NO_HALF: ret float %hlsl.length
-half test_length_half3(half3 p0)
-{
-  return length(p0);
-}
-// NATIVE_HALF: define noundef half @
-// NATIVE_HALF: %hlsl.length = call half @llvm.dx.length.v4f16
-// NO_HALF: %hlsl.length = call float @llvm.dx.length.v4f32(
-// NATIVE_HALF: ret half %hlsl.length
-// NO_HALF: ret float %hlsl.length
-half test_length_half4(half4 p0)
-{
-  return length(p0);
-}
-
-// CHECK: define noundef float @
-// CHECK: call float @llvm.fabs.f32(float
-// CHECK: ret float
-float test_length_float(float p0)
-{
-  return length(p0);
-}
-// CHECK: define noundef float @
-// CHECK: %hlsl.length = call float @llvm.dx.length.v2f32(
-// CHECK: ret float %hlsl.length
-float test_length_float2(float2 p0)
-{
-  return length(p0);
-}
-// CHECK: define noundef float @
-// CHECK: %hlsl.length = call float @llvm.dx.length.v3f32(
-// CHECK: ret float %hlsl.length
-float test_length_float3(float3 p0)
-{
-  return length(p0);
-}
-// CHECK: define noundef float @
-// CHECK: %hlsl.length = call float @llvm.dx.length.v4f32(
-// CHECK: ret float %hlsl.length
-float test_length_float4(float4 p0)
-{
-  return length(p0);
-}
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
+// RUN:   dxil-pc-shadermodel6.3-library %s -fnative-half-type \
+// RUN:   -emit-llvm -O1 -o - | FileCheck %s
+
+
+// CHECK-LABEL: define noundef half @_Z16test_length_halfDh(
+// CHECK-SAME: half noundef [[P0:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[ELT_ABS_I:%.*]] = tail call noundef half @llvm.fabs.f16(half [[P0]])
+// CHECK-NEXT:    ret half [[ELT_ABS_I]]
+//
+half test_length_half(half p0)
+{
+  return length(p0);
+}
+
+// CHECK-LABEL: define noundef half @_Z17test_length_half2Dv2_Dh(
+// CHECK-SAME: <2 x half> noundef [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[MUL_I:%.*]] = fmul <2 x half> [[P0]], [[P0]]
+// CHECK-NEXT:    [[RDX_FADD_I:%.*]] = tail call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> [[MUL_I]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call noundef half @llvm.sqrt.f16(half [[RDX_FADD_I]])
+// CHECK-NEXT:    ret half [[TMP0]]
+//
+half test_length_half2(half2 p0)
+{
+  return length(p0);
+}
+
+// CHECK-LABEL: define noundef half @_Z17test_length_half3Dv3_Dh(
+// CHECK-SAME: <3 x half> noundef [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[MUL_I:%.*]] = fmul <3 x half> [[P0]], [[P0]]
+// CHECK-NEXT:    [[RDX_FADD_I:%.*]] = tail call half @llvm.vector.reduce.fadd.v3f16(half 0xH0000, <3 x half> [[MUL_I]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call noundef half @llvm.sqrt.f16(half [[RDX_FADD_I]])
+// CHECK-NEXT:    ret half [[TMP0]]
+//
+half test_length_half3(half3 p0)
+{
+  return length(p0);
+}
+
+// CHECK-LABEL: define noundef half @_Z17test_length_half4Dv4_Dh(
+// CHECK-SAME: <4 x half> noundef [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[MUL_I:%.*]] = fmul <4 x half> [[P0]], [[P0]]
+// CHECK-NEXT:    [[RDX_FADD_I:%.*]] = tail call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> [[MUL_I]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call noundef half @llvm.sqrt.f16(half [[RDX_FADD_I]])
+// CHECK-NEXT:    ret half [[TMP0]]
+//
+half test_length_half4(half4 p0)
+{
+  return length(p0);
+}
+
+
+// CHECK-LABEL: define noundef float @_Z17test_length_floatf(
+// CHECK-SAME: float noundef [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[ELT_ABS_I:%.*]] = tail call noundef float @llvm.fabs.f32(float [[P0]])
+// CHECK-NEXT:    ret float [[ELT_ABS_I]]
+//
+float test_length_float(float p0)
+{
+  return length(p0);
+}
+
+// CHECK-LABEL: define noundef float @_Z18test_length_float2Dv2_f(
+// CHECK-SAME: <2 x float> noundef [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[MUL_I:%.*]] = fmul <2 x float> [[P0]], [[P0]]
+// CHECK-NEXT:    [[RDX_FADD_I:%.*]] = tail call float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> [[MUL_I]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call noundef float @llvm.sqrt.f32(float [[RDX_FADD_I]])
+// CHECK-NEXT:    ret float [[TMP0]]
+//
+float test_length_float2(float2 p0)
+{
+  return length(p0);
+}
+
+// CHECK-LABEL: define noundef float @_Z18test_length_float3Dv3_f(
+// CHECK-SAME: <3 x float> noundef [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[MUL_I:%.*]] = fmul <3 x float> [[P0]], [[P0]]
+// CHECK-NEXT:    [[RDX_FADD_I:%.*]] = tail call float @llvm.vector.reduce.fadd.v3f32(float 0.000000e+00, <3 x float> [[MUL_I]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call noundef float @llvm.sqrt.f32(float [[RDX_FADD_I]])
+// CHECK-NEXT:    ret float [[TMP0]]
+//
+float test_length_float3(float3 p0)
+{
+  return length(p0);
+}
+
+// CHECK-LABEL: define noundef float @_Z18test_length_float4Dv4_f(
+// CHECK-SAME: <4 x float> noundef [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[MUL_I:%.*]] = fmul <4 x float> [[P0]], [[P0]]
+// CHECK-NEXT:    [[RDX_FADD_I:%.*]] = tail call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[MUL_I]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call noundef float @llvm.sqrt.f32(float [[RDX_FADD_I]])
+// CHECK-NEXT:    ret float [[TMP0]]
+//
+loat3 p0)
+{
+  return length(p0);
+}
+
+float test_length_float4(float4 p0)
+{
+  return length(p0);
+}
diff --git a/clang/test/SemaHLSL/BuiltIns/length-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/length-errors.hlsl
index 281faada6f5e94..a191f0419fbbaf 100644
--- a/clang/test/SemaHLSL/BuiltIns/length-errors.hlsl
+++ b/clang/test/SemaHLSL/BuiltIns/length-errors.hlsl
@@ -1,32 +1,53 @@
-// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -disable-llvm-passes -verify -verify-ignore-unexpected
-
+// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only -disable-llvm-passes -verify
 
 void test_too_few_arg()
 {
-  return __builtin_hlsl_length();
-  // expected-error at -1 {{too few arguments to function call, expected 1, have 0}}
+  return length();
+  // expected-error at -1 {{no matching function for call to 'length'}}
+  // expected-note at hlsl/hlsl_intrinsics.h:* {{candidate function not viable: requires single argument 'X', but no arguments were provided}}
+  // expected-note at hlsl/hlsl_intrinsics.h:* {{candidate function not viable: requires single argument 'X', but no arguments were provided}}
+  // expected-note at hlsl/hlsl_intrinsics.h:* {{candidate function template not viable: requires single argument 'X', but no arguments were provided}}
+  // expected-note at hlsl/hlsl_intrinsics.h:* {{candidate function template not viable: requires single argument 'X', but no arguments were provided}}
 }
 
 void test_too_many_arg(float2 p0)
 {
-  return __builtin_hlsl_length(p0, p0);
-  // expected-error at -1 {{too many arguments to function call, expected 1, have 2}}
+  return length(p0, p0);
+  // expected-error at -1 {{no matching function for call to 'length'}}
+  // expected-note at hlsl/hlsl_intrinsics.h:* {{candidate function not viable: requires single argument 'X', but 2 arguments were provided}}
+  // expected-note at hlsl/hlsl_intrinsics.h:* {{candidate function template not viable: requires single argument 'X', but 2 arguments were provided}}
+  // expected-note at hlsl/hlsl_intrinsics.h:* {{candidate function not viable: requires single argument 'X', but 2 arguments were provided}}
+  // expected-note at hlsl/hlsl_intrinsics.h:* {{candidate function template not viable: requires single argument 'X', but 2 arguments were provided}}
+}
+
+float double_to_float_type(double p0) {
+  return length(p0);
+  // expected-error at -1  {{call to 'length' is ambiguous}}
+  // expected-note at hlsl/hlsl_intrinsics.h:* {{candidate function}}
+  // expected-note at hlsl/hlsl_intrinsics.h:* {{candidate function}}
 }
 
-bool builtin_bool_to_float_type_promotion(bool p1)
+
+float bool_to_float_type_promotion(bool p1)
 {
-  return __builtin_hlsl_length(p1);
-  // expected-error at -1 {passing 'bool' to parameter of incompatible type 'float'}}
+  return length(p1);
+  // expected-error at -1  {{call to 'length' is ambiguous}}
+  // expected-note at hlsl/hlsl_intrinsics.h:* {{candidate function}}
+  // expected-note at hlsl/hlsl_intrinsics.h:* {{candidate function}}
 }
 
-bool builtin_length_int_to_float_promotion(int p1)
+float length_int_to_float_promotion(int p1)
 {
-  return __builtin_hlsl_length(p1);
-  // expected-error at -1 {{passing 'int' to parameter of incompatible type 'float'}}
+  return length(p1);
+  // expected-error at -1  {{call to 'length' is ambiguous}}
+  // expected-note at hlsl/hlsl_intrinsics.h:* {{candidate function}}
+  // expected-note at hlsl/hlsl_intrinsics.h:* {{candidate function}}
 }
 
-bool2 builtin_length_int2_to_float2_promotion(int2 p1)
+float2 length_int2_to_float2_promotion(int2 p1)
 {
-  return __builtin_hlsl_length(p1);
-  // expected-error at -1 {{passing 'int2' (aka 'vector<int, 2>') to parameter of incompatible type '__attribute__((__vector_size__(2 * sizeof(float)))) float' (vector of 2 'float' values)}}
+  return length(p1);
+  // expected-error at -1  {{call to 'length' is ambiguous}}
+  // expected-note at hlsl/hlsl_intrinsics.h:* {{candidate function}}
+  // expected-note at hlsl/hlsl_intrinsics.h:* {{candidate function}}
 }
diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td
index dad60a2535cf4d..e5ac98430f5ced 100644
--- a/llvm/include/llvm/IR/IntrinsicsDirectX.td
+++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td
@@ -89,7 +89,6 @@ def int_dx_isinf : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i1
 def int_dx_lerp : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty, LLVMMatchType<0>,LLVMMatchType<0>],
     [IntrNoMem]>;
 
-def int_dx_length : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], [llvm_anyfloat_ty], [IntrNoMem]>;
 def int_dx_imad : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
 def int_dx_umad : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
 def int_dx_normalize : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty], [IntrNoMem]>;
diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
index d2bfca1fada559..37c20459fa4bf4 100644
--- a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
+++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
@@ -59,7 +59,6 @@ static bool isIntrinsicExpansion(Function &F) {
   case Intrinsic::dx_nclamp:
   case Intrinsic::dx_degrees:
   case Intrinsic::dx_lerp:
-  case Intrinsic::dx_length:
   case Intrinsic::dx_normalize:
   case Intrinsic::dx_fdot:
   case Intrinsic::dx_sdot:
@@ -67,11 +66,44 @@ static bool isIntrinsicExpansion(Function &F) {
   case Intrinsic::dx_sign:
   case Intrinsic::dx_step:
   case Intrinsic::dx_radians:
+  case Intrinsic::vector_reduce_add:
+  case Intrinsic::vector_reduce_fadd:
     return true;
   }
   return false;
 }
 
+static Value *expandVecReduceFAdd(CallInst *Orig) {
+  Value *Sum = Orig->getOperand(0);
+  Value *X = Orig->getOperand(1);
+  IRBuilder<> Builder(Orig);
+  Type *Ty = X->getType();
+  auto *XVec = dyn_cast<FixedVectorType>(Ty);
+  unsigned XVecSize = XVec->getNumElements();
+
+  for (unsigned I = 0; I < XVecSize; I++) {
+    Value *Elt = Builder.CreateExtractElement(X, I);
+    Sum = Builder.CreateFAdd(Sum, Elt);
+  }
+  return Sum;
+}
+
+static Value *expandVecReduceAdd(CallInst *Orig) {
+  Value *X = Orig->getOperand(0);
+  IRBuilder<> Builder(Orig);
+  Type *Ty = X->getType();
+  Type *EltTy = Ty->getScalarType();
+  auto *XVec = dyn_cast<FixedVectorType>(Ty);
+  unsigned XVecSize = XVec->getNumElements();
+
+  Value *Sum = ConstantInt::get(EltTy, 0);
+  for (unsigned I = 0; I < XVecSize; I++) {
+    Value *Elt = Builder.CreateExtractElement(X, I);
+    Sum = Builder.CreateAdd(Sum, Elt);
+  }
+  return Sum;
+}
+
 static Value *expandAbs(CallInst *Orig) {
   Value *X = Orig->getOperand(0);
   IRBuilder<> Builder(Orig);
@@ -261,32 +293,6 @@ static Value *expandAnyOrAllIntrinsic(CallInst *Orig,
   return Result;
 }
 
-static Value *expandLengthIntrinsic(CallInst *Orig) {
-  Value *X = Orig->getOperand(0);
-  IRBuilder<> Builder(Orig);
-  Type *Ty = X->getType();
-  Type *EltTy = Ty->getScalarType();
-
-  // Though dx.length does work on scalar type, we can optimize it to just emit
-  // fabs, in CGBuiltin.cpp. We shouldn't see a scalar type here because
-  // CGBuiltin.cpp should have emitted a fabs call.
-  Value *Elt = Builder.CreateExtractElement(X, (uint64_t)0);
-  auto *XVec = dyn_cast<FixedVectorType>(Ty);
-  unsigned XVecSize = XVec->getNumElements();
-  if (!(Ty->isVectorTy() && XVecSize > 1))
-    report_fatal_error(Twine("Invalid input type for length intrinsic"),
-                       /* gen_crash_diag=*/false);
-
-  Value *Sum = Builder.CreateFMul(Elt, Elt);
-  for (unsigned I = 1; I < XVecSize; I++) {
-    Elt = Builder.CreateExtractElement(X, I);
-    Value *Mul = Builder.CreateFMul(Elt, Elt);
-    Sum = Builder.CreateFAdd(Sum, Mul);
-  }
-  return Builder.CreateIntrinsic(EltTy, Intrinsic::sqrt, ArrayRef<Value *>{Sum},
-                                 nullptr, "elt.sqrt");
-}
-
 static Value *expandLerpIntrinsic(CallInst *Orig) {
   Value *X = Orig->getOperand(0);
   Value *Y = Orig->getOperand(1);
@@ -558,9 +564,6 @@ static bool expandIntrinsic(Function &F, CallInst *Orig) {
   case Intrinsic::dx_lerp:
     Result = expandLerpIntrinsic(Orig);
     break;
-  case Intrinsic::dx_length:
-    Result = expandLengthIntrinsic(Orig);
-    break;
   case Intrinsic::dx_normalize:
     Result = expandNormalizeIntrinsic(Orig);
     break;
@@ -580,6 +583,12 @@ static bool expandIntrinsic(Function &F, CallInst *Orig) {
   case Intrinsic::dx_radians:
     Result = expandRadiansIntrinsic(Orig);
     break;
+  case Intrinsic::vector_reduce_add:
+    Result = expandVecReduceAdd(Orig);
+    break;
+  case Intrinsic::vector_reduce_fadd:
+    Result = expandVecReduceFAdd(Orig);
+    break;
   }
   if (Result) {
     Orig->replaceAllUsesWith(Result);
diff --git a/llvm/test/CodeGen/DirectX/length.ll b/llvm/test/CodeGen/DirectX/length.ll
index fc5868a7f6e82c..8df5aca6e10df3 100644
--- a/llvm/test/CodeGen/DirectX/length.ll
+++ b/llvm/test/CodeGen/DirectX/length.ll
@@ -3,114 +3,140 @@
 
 ; Make sure dxil operation function calls for length are generated for half/float.
 
-declare half @llvm.fabs.f16(half)
-declare half @llvm.dx.length.v2f16(<2 x half>)
-declare half @llvm.dx.length.v3f16(<3 x half>)
-declare half @llvm.dx.length.v4f16(<4 x half>)
-
-declare float @llvm.fabs.f32(float)
-declare float @llvm.dx.length.v2f32(<2 x float>)
-declare float @llvm.dx.length.v3f32(<3 x float>)
-declare float @llvm.dx.length.v4f32(<4 x float>)
-
 define noundef half @test_length_half2(<2 x half> noundef %p0) {
+; CHECK-LABEL: define noundef half @test_length_half2(
+; CHECK-SAME: <2 x half> noundef [[P0:%.*]]) {
+; CHECK:  [[ENTRY:.*:]]
+; CHECK:    [[MUL_I:%.*]] = fmul <2 x half> [[P0]], [[P0]]
+; CHECK:    [[TMP0:%.*]] = extractelement <2 x half> [[MUL_I]], i64 0
+; CHECK:    [[TMP1:%.*]] = fadd half 0xH0000, [[TMP0]]
+; CHECK:    [[TMP2:%.*]] = extractelement <2 x half> [[MUL_I]], i64 1
+; CHECK:    [[TMP3:%.*]] = fadd half [[TMP1]], [[TMP2]]
+; EXPCHECK: [[HLSL_LENGTH:%.*]] = call half @llvm.sqrt.f16(half [[TMP3]])
+; DOPCHECK: [[HLSL_LENGTH:%.*]] = call half @dx.op.unary.f16(i32 24, half [[TMP3]])
+; CHECK:    ret half [[HLSL_LENGTH]]
+;
 entry:
-  ; CHECK: extractelement <2 x half> %{{.*}}, i64 0
-  ; CHECK: fmul half %{{.*}}, %{{.*}}
-  ; CHECK: extractelement <2 x half> %{{.*}}, i64 1
-  ; CHECK: fmul half %{{.*}}, %{{.*}}
-  ; CHECK: fadd half %{{.*}}, %{{.*}}
-  ; EXPCHECK: call half @llvm.sqrt.f16(half %{{.*}})
-  ; DOPCHECK: call half @dx.op.unary.f16(i32 24, half %{{.*}})
 
-  %hlsl.length = call half @llvm.dx.length.v2f16(<2 x half> %p0)
+  %mul.i = fmul <2 x half> %p0, %p0
+  %rdx.fadd.i = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> %mul.i)
+  %hlsl.length = call half @llvm.sqrt.f16(half %rdx.fadd.i)
   ret half %hlsl.length
 }
 
 define noundef half @test_length_half3(<3 x half> noundef %p0) {
+; CHECK-LABEL: define noundef half @test_length_half3(
+; CHECK-SAME: <3 x half> noundef [[P0:%.*]]) {
+; CHECK:  [[ENTRY:.*:]]
+; CHECK:    [[MUL_I:%.*]] = fmul <3 x half> [[P0]], [[P0]]
+; CHECK:    [[TMP0:%.*]] = extractelement <3 x half> [[MUL_I]], i64 0
+; CHECK:    [[TMP1:%.*]] = fadd half 0xH0000, [[TMP0]]
+; CHECK:    [[TMP2:%.*]] = extractelement <3 x half> [[MUL_I]], i64 1
+; CHECK:    [[TMP3:%.*]] = fadd half [[TMP1]], [[TMP2]]
+; CHECK:    [[TMP4:%.*]] = extractelement <3 x half> [[MUL_I]], i64 2
+; CHECK:    [[TMP5:%.*]] = fadd half [[TMP3]], [[TMP4]]
+; EXPCHECK: [[HLSL_LENGTH:%.*]] = call half @llvm.sqrt.f16(half [[TMP5]])
+; DOPCHECK: [[HLSL_LENGTH:%.*]] = call half @dx.op.unary.f16(i32 24, half [[TMP5]])
+; CHECK:    ret half [[HLSL_LENGTH]]
+;
 entry:
-  ; CHECK: extractelement <3 x half> %{{.*}}, i64 0
-  ; CHECK: fmul half %{{.*}}, %{{.*}}
-  ; CHECK: extractelement <3 x half> %{{.*}}, i64 1
-  ; CHECK: fmul half %{{.*}}, %{{.*}}
-  ; CHECK: fadd half %{{.*}}, %{{.*}}
-  ; CHECK: extractelement <3 x half> %{{.*}}, i64 2
-  ; CHECK: fmul half %{{.*}}, %{{.*}}
-  ; CHECK: fadd half %{{.*}}, %{{.*}}
-  ; EXPCHECK: call half @llvm.sqrt.f16(half %{{.*}})
-  ; DOPCHECK: call half @dx.op.unary.f16(i32 24, half %{{.*}})
 
-  %hlsl.length = call half @llvm.dx.length.v3f16(<3 x half> %p0)
+  %mul.i = fmul <3 x half> %p0, %p0
+  %rdx.fadd.i = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <3 x half> %mul.i)
+  %hlsl.length = call half @llvm.sqrt.f16(half %rdx.fadd.i)
   ret half %hlsl.length
 }
 
 define noundef half @test_length_half4(<4 x half> noundef %p0) {
+; CHECK-LABEL: define noundef half @test_length_half4(
+; CHECK-SAME: <4 x half> noundef [[P0:%.*]]) {
+; CHECK:  [[ENTRY:.*:]]
+; CHECK:    [[MUL_I:%.*]] = fmul <4 x half> [[P0]], [[P0]]
+; CHECK:    [[TMP0:%.*]] = extractelement <4 x half> [[MUL_I]], i64 0
+; CHECK:    [[TMP1:%.*]] = fadd half 0xH0000, [[TMP0]]
+; CHECK:    [[TMP2:%.*]] = extractelement <4 x half> [[MUL_I]], i64 1
+; CHECK:    [[TMP3:%.*]] = fadd half [[TMP1]], [[TMP2]]
+; CHECK:    [[TMP4:%.*]] = extractelement <4 x half> [[MUL_I]], i64 2
+; CHECK:    [[TMP5:%.*]] = fadd half [[TMP3]], [[TMP4]]
+; CHECK:    [[TMP6:%.*]] = extractelement <4 x half> [[MUL_I]], i64 3
+; CHECK:    [[TMP7:%.*]] = fadd half [[TMP5]], [[TMP6]]
+; EXPCHECK: [[HLSL_LENGTH:%.*]] = call half @llvm.sqrt.f16(half [[TMP7]])
+; DOPCHECK: [[HLSL_LENGTH:%.*]] = call half @dx.op.unary.f16(i32 24, half [[TMP7]])
+; CHECK:    ret half [[HLSL_LENGTH]]
+;
 entry:
-  ; CHECK: extractelement <4 x half> %{{.*}}, i64 0
-  ; CHECK: fmul half %{{.*}}, %{{.*}}
-  ; CHECK: extractelement <4 x half> %{{.*}}, i64 1
-  ; CHECK: fmul half %{{.*}}, %{{.*}}
-  ; CHECK: fadd half %{{.*}}, %{{.*}}
-  ; CHECK: extractelement <4 x half> %{{.*}}, i64 2
-  ; CHECK: fmul half %{{.*}}, %{{.*}}
-  ; CHECK: fadd half %{{.*}}, %{{.*}}
-  ; CHECK: extractelement <4 x half> %{{.*}}, i64 3
-  ; CHECK: fmul half %{{.*}}, %{{.*}}
-  ; CHECK: fadd half %{{.*}}, %{{.*}}
-  ; EXPCHECK: call half @llvm.sqrt.f16(half %{{.*}})
-  ; DOPCHECK:  call half @dx.op.unary.f16(i32 24, half %{{.*}})
 
-  %hlsl.length = call half @llvm.dx.length.v4f16(<4 x half> %p0)
+  %mul.i = fmul <4 x half> %p0, %p0
+  %rdx.fadd.i = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <4 x half> %mul.i)
+  %hlsl.length = call half @llvm.sqrt.f16(half %rdx.fadd.i)
   ret half %hlsl.length
 }
 
 define noundef float @test_length_float2(<2 x float> noundef %p0) {
+; CHECK-LABEL: define noundef float @test_length_float2(
+; CHECK-SAME: <2 x float> noundef [[P0:%.*]]) {
+; CHECK:  [[ENTRY:.*:]]
+; CHECK:    [[MUL_I:%.*]] = fmul <2 x float> [[P0]], [[P0]]
+; CHECK:    [[TMP0:%.*]] = extractelement <2 x float> [[MUL_I]], i64 0
+; CHECK:    [[TMP1:%.*]] = fadd float 0.000000e+00, [[TMP0]]
+; CHECK:    [[TMP2:%.*]] = extractelement <2 x float> [[MUL_I]], i64 1
+; CHECK:    [[TMP3:%.*]] = fadd float [[TMP1]], [[TMP2]]
+; EXPCHECK: [[HLSL_LENGTH:%.*]] = call float @llvm.sqrt.f32(float [[TMP3]])
+; DOPCHECK: [[HLSL_LENGTH:%.*]] = call float @dx.op.unary.f32(i32 24, float [[TMP3]])
+; CHECK:    ret float [[HLSL_LENGTH]]
+;
 entry:
-  ; CHECK: extractelement <2 x float> %{{.*}}, i64 0
-  ; CHECK: fmul float %{{.*}}, %{{.*}}
-  ; CHECK: extractelement <2 x float> %{{.*}}, i64 1
-  ; CHECK: fmul float %{{.*}}, %{{.*}}
-  ; CHECK: fadd float %{{.*}}, %{{.*}}
-  ; EXPCHECK: call float @llvm.sqrt.f32(float %{{.*}})
-  ; DOPCHECK: call float @dx.op.unary.f32(i32 24, float %{{.*}})
 
-  %hlsl.length = call float @llvm.dx.length.v2f32(<2 x float> %p0)
+  %mul.i = fmul <2 x float> %p0, %p0
+  %rdx.fadd.i = call float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> %mul.i)
+  %hlsl.length = call float @llvm.sqrt.f32(float %rdx.fadd.i)
   ret float %hlsl.length
 }
 
 define noundef float @test_length_float3(<3 x float> noundef %p0) {
+; CHECK-LABEL: define noundef float @test_length_float3(
+; CHECK-SAME: <3 x float> noundef [[P0:%.*]]) {
+; CHECK:  [[ENTRY:.*:]]
+; CHECK:    [[MUL_I:%.*]] = fmul <3 x float> [[P0]], [[P0]]
+; CHECK:    [[TMP0:%.*]] = extractelement <3 x float> [[MUL_I]], i64 0
+; CHECK:    [[TMP1:%.*]] = fadd float 0.000000e+00, [[TMP0]]
+; CHECK:    [[TMP2:%.*]] = extractelement <3 x float> [[MUL_I]], i64 1
+; CHECK:    [[TMP3:%.*]] = fadd float [[TMP1]], [[TMP2]]
+; CHECK:    [[TMP4:%.*]] = extractelement <3 x float> [[MUL_I]], i64 2
+; CHECK:    [[TMP5:%.*]] = fadd float [[TMP3]], [[TMP4]]
+; EXPCHECK: [[HLSL_LENGTH:%.*]] = call float @llvm.sqrt.f32(float [[TMP5]])
+; DOPCHECK: [[HLSL_LENGTH:%.*]] = call float @dx.op.unary.f32(i32 24, float [[TMP5]])
+; CHECK:    ret float [[HLSL_LENGTH]]
+;
 entry:
-  ; CHECK: extractelement <3 x float> %{{.*}}, i64 0
-  ; CHECK: fmul float %{{.*}}, %{{.*}}
-  ; CHECK: extractelement <3 x float> %{{.*}}, i64 1
-  ; CHECK: fmul float %{{.*}}, %{{.*}}
-  ; CHECK: fadd float %{{.*}}, %{{.*}}
-  ; CHECK: extractelement <3 x float> %{{.*}}, i64 2
-  ; CHECK: fmul float %{{.*}}, %{{.*}}
-  ; CHECK: fadd float %{{.*}}, %{{.*}}
-  ; EXPCHECK: call float @llvm.sqrt.f32(float %{{.*}})
-  ; DOPCHECK: call float @dx.op.unary.f32(i32 24, float %{{.*}})
 
-  %hlsl.length = call float @llvm.dx.length.v3f32(<3 x float> %p0)
+  %mul.i = fmul <3 x float> %p0, %p0
+  %rdx.fadd.i = call float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <3 x float> %mul.i)
+  %hlsl.length = call float @llvm.sqrt.f32(float %rdx.fadd.i)
   ret float %hlsl.length
 }
 
 define noundef float @test_length_float4(<4 x float> noundef %p0) {
+; CHECK-LABEL: define noundef float @test_length_float4(
+; CHECK-SAME: <4 x float> noundef [[P0:%.*]]) {
+; CHECK:  [[ENTRY:.*:]]
+; CHECK:    [[MUL_I:%.*]] = fmul <4 x float> [[P0]], [[P0]]
+; CHECK:    [[TMP0:%.*]] = extractelement <4 x float> [[MUL_I]], i64 0
+; CHECK:    [[TMP1:%.*]] = fadd float 0.000000e+00, [[TMP0]]
+; CHECK:    [[TMP2:%.*]] = extractelement <4 x float> [[MUL_I]], i64 1
+; CHECK:    [[TMP3:%.*]] = fadd float [[TMP1]], [[TMP2]]
+; CHECK:    [[TMP4:%.*]] = extractelement <4 x float> [[MUL_I]], i64 2
+; CHECK:    [[TMP5:%.*]] = fadd float [[TMP3]], [[TMP4]]
+; CHECK:    [[TMP6:%.*]] = extractelement <4 x float> [[MUL_I]], i64 3
+; CHECK:    [[TMP7:%.*]] = fadd float [[TMP5]], [[TMP6]]
+; EXPCHECK: [[HLSL_LENGTH:%.*]] = call float @llvm.sqrt.f32(float [[TMP7]])
+; DOPCHECK: [[HLSL_LENGTH:%.*]] = call float @dx.op.unary.f32(i32 24, float [[TMP7]])
+; CHECK:    ret float [[HLSL_LENGTH]]
+;
 entry:
-  ; CHECK: extractelement <4 x float> %{{.*}}, i64 0
-  ; CHECK: fmul float %{{.*}}, %{{.*}}
-  ; CHECK: extractelement <4 x float> %{{.*}}, i64 1
-  ; CHECK: fmul float %{{.*}}, %{{.*}}
-  ; CHECK: fadd float %{{.*}}, %{{.*}}
-  ; CHECK: extractelement <4 x float> %{{.*}}, i64 2
-  ; CHECK: fmul float %{{.*}}, %{{.*}}
-  ; CHECK: fadd float %{{.*}}, %{{.*}}
-  ; CHECK: extractelement <4 x float> %{{.*}}, i64 3
-  ; CHECK: fmul float %{{.*}}, %{{.*}}
-  ; CHECK: fadd float %{{.*}}, %{{.*}}
-  ; EXPCHECK: call float @llvm.sqrt.f32(float %{{.*}})
-  ; DOPCHECK:  call float @dx.op.unary.f32(i32 24, float %{{.*}})
 
-  %hlsl.length = call float @llvm.dx.length.v4f32(<4 x float> %p0)
+  %mul.i = fmul <4 x float> %p0, %p0
+  %rdx.fadd.i = call float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <4 x float> %mul.i)
+  %hlsl.length = call float @llvm.sqrt.f32(float %rdx.fadd.i)
   ret float %hlsl.length
 }
diff --git a/llvm/test/CodeGen/DirectX/length_error.ll b/llvm/test/CodeGen/DirectX/length_error.ll
deleted file mode 100644
index 143b41fc506e1d..00000000000000
--- a/llvm/test/CodeGen/DirectX/length_error.ll
+++ /dev/null
@@ -1,10 +0,0 @@
-; RUN: not opt -S -dxil-intrinsic-expansion -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s 2>&1 | FileCheck %s
-
-; DXIL operation length does not support double overload type
-; CHECK: Cannot create Sqrt operation: Invalid overload type
-
-define noundef double @test_length_double2(<2 x double> noundef %p0) {
-entry:
-  %hlsl.length = call double @llvm.dx.length.v2f32(<2 x double> %p0)
-  ret double %hlsl.length
-}

>From c95a9ccd9c61980a95cda99446d38b13440f3e54 Mon Sep 17 00:00:00 2001
From: Farzon Lotfi <farzonlotfi at microsoft.com>
Date: Thu, 21 Nov 2024 21:27:51 -0500
Subject: [PATCH 4/5] add a small optimization to remove the extra add zero

---
 clang/lib/CodeGen/CGHLSLRuntime.h             |   1 -
 clang/test/CodeGenHLSL/builtins/length.hlsl   |  11 +-
 .../Target/DirectX/DXILIntrinsicExpansion.cpp |  12 +-
 llvm/test/CodeGen/DirectX/length.ll           | 239 +++++++++++-------
 4 files changed, 164 insertions(+), 99 deletions(-)

diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h
index a8e0ed42b79a35..8001e8a4c957e0 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.h
+++ b/clang/lib/CodeGen/CGHLSLRuntime.h
@@ -77,7 +77,6 @@ class CGHLSLRuntime {
   GENERATE_HLSL_INTRINSIC_FUNCTION(Cross, cross)
   GENERATE_HLSL_INTRINSIC_FUNCTION(Degrees, degrees)
   GENERATE_HLSL_INTRINSIC_FUNCTION(Frac, frac)
-  GENERATE_HLSL_INTRINSIC_FUNCTION(Length, length)
   GENERATE_HLSL_INTRINSIC_FUNCTION(Lerp, lerp)
   GENERATE_HLSL_INTRINSIC_FUNCTION(Normalize, normalize)
   GENERATE_HLSL_INTRINSIC_FUNCTION(Rsqrt, rsqrt)
diff --git a/clang/test/CodeGenHLSL/builtins/length.hlsl b/clang/test/CodeGenHLSL/builtins/length.hlsl
index af9713bfd7628c..2dad0e18a4a888 100644
--- a/clang/test/CodeGenHLSL/builtins/length.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/length.hlsl
@@ -1,7 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
-// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
-// RUN:   dxil-pc-shadermodel6.3-library %s -fnative-half-type \
-// RUN:   -emit-llvm -O1 -o - | FileCheck %s
+// RUN: %clang_cc1 -finclude-default-header -triple \
+// RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \
+// RUN: -emit-llvm -O1 -o - | FileCheck %s
 
 
 // CHECK-LABEL: define noundef half @_Z16test_length_halfDh(
@@ -100,11 +100,6 @@ float test_length_float3(float3 p0)
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call noundef float @llvm.sqrt.f32(float [[RDX_FADD_I]])
 // CHECK-NEXT:    ret float [[TMP0]]
 //
-loat3 p0)
-{
-  return length(p0);
-}
-
 float test_length_float4(float4 p0)
 {
   return length(p0);
diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
index 37c20459fa4bf4..eb89f6a2f95da7 100644
--- a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
+++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
@@ -28,6 +28,7 @@
 #include "llvm/Pass.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
+#include <cstdint>
 
 #define DEBUG_TYPE "dxil-intrinsic-expansion"
 
@@ -74,14 +75,15 @@ static bool isIntrinsicExpansion(Function &F) {
 }
 
 static Value *expandVecReduceFAdd(CallInst *Orig) {
-  Value *Sum = Orig->getOperand(0);
+  // Note: vector_reduce_fadd first argument is a starting value
+  // Our use doesn't need it, so ignoring argument zero.
   Value *X = Orig->getOperand(1);
   IRBuilder<> Builder(Orig);
   Type *Ty = X->getType();
   auto *XVec = dyn_cast<FixedVectorType>(Ty);
   unsigned XVecSize = XVec->getNumElements();
-
-  for (unsigned I = 0; I < XVecSize; I++) {
+  Value *Sum = Builder.CreateExtractElement(X, static_cast<uint64_t>(0));
+  for (unsigned I = 1; I < XVecSize; I++) {
     Value *Elt = Builder.CreateExtractElement(X, I);
     Sum = Builder.CreateFAdd(Sum, Elt);
   }
@@ -96,8 +98,8 @@ static Value *expandVecReduceAdd(CallInst *Orig) {
   auto *XVec = dyn_cast<FixedVectorType>(Ty);
   unsigned XVecSize = XVec->getNumElements();
 
-  Value *Sum = ConstantInt::get(EltTy, 0);
-  for (unsigned I = 0; I < XVecSize; I++) {
+  Value *Sum = Builder.CreateExtractElement(X, static_cast<uint64_t>(0));
+  for (unsigned I = 1; I < XVecSize; I++) {
     Value *Elt = Builder.CreateExtractElement(X, I);
     Sum = Builder.CreateAdd(Sum, Elt);
   }
diff --git a/llvm/test/CodeGen/DirectX/length.ll b/llvm/test/CodeGen/DirectX/length.ll
index 8df5aca6e10df3..3feea03321b838 100644
--- a/llvm/test/CodeGen/DirectX/length.ll
+++ b/llvm/test/CodeGen/DirectX/length.ll
@@ -1,23 +1,32 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
 ; RUN: opt -S  -dxil-intrinsic-expansion  < %s | FileCheck %s --check-prefixes=CHECK,EXPCHECK
-; RUN: opt -S  -dxil-intrinsic-expansion -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library < %s | FileCheck %s --check-prefixes=CHECK,DOPCHECK
+; RUN: opt -S  -dxil-intrinsic-expansion -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library < %s | FileCheck %s --check-prefixes=CHECK,DOPCHECK
 
 ; Make sure dxil operation function calls for length are generated for half/float.
 
 define noundef half @test_length_half2(<2 x half> noundef %p0) {
-; CHECK-LABEL: define noundef half @test_length_half2(
-; CHECK-SAME: <2 x half> noundef [[P0:%.*]]) {
-; CHECK:  [[ENTRY:.*:]]
-; CHECK:    [[MUL_I:%.*]] = fmul <2 x half> [[P0]], [[P0]]
-; CHECK:    [[TMP0:%.*]] = extractelement <2 x half> [[MUL_I]], i64 0
-; CHECK:    [[TMP1:%.*]] = fadd half 0xH0000, [[TMP0]]
-; CHECK:    [[TMP2:%.*]] = extractelement <2 x half> [[MUL_I]], i64 1
-; CHECK:    [[TMP3:%.*]] = fadd half [[TMP1]], [[TMP2]]
-; EXPCHECK: [[HLSL_LENGTH:%.*]] = call half @llvm.sqrt.f16(half [[TMP3]])
-; DOPCHECK: [[HLSL_LENGTH:%.*]] = call half @dx.op.unary.f16(i32 24, half [[TMP3]])
-; CHECK:    ret half [[HLSL_LENGTH]]
+; EXPCHECK-LABEL: define noundef half @test_length_half2(
+; EXPCHECK-SAME: <2 x half> noundef [[P0:%.*]]) {
+; EXPCHECK-NEXT:  [[ENTRY:.*:]]
+; EXPCHECK-NEXT:    [[MUL_I:%.*]] = fmul <2 x half> [[P0]], [[P0]]
+; EXPCHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x half> [[MUL_I]], i64 0
+; EXPCHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x half> [[MUL_I]], i64 1
+; EXPCHECK-NEXT:    [[TMP2:%.*]] = fadd half [[TMP0]], [[TMP1]]
+; EXPCHECK-NEXT:    [[HLSL_LENGTH:%.*]] = call half @llvm.sqrt.f16(half [[TMP2]])
+; EXPCHECK-NEXT:    ret half [[HLSL_LENGTH]]
+;
+; DOPCHECK-LABEL: define noundef half @test_length_half2(
+; DOPCHECK-SAME: <2 x half> noundef [[P0:%.*]]) {
+; DOPCHECK-NEXT:  [[ENTRY:.*:]]
+; DOPCHECK-NEXT:    [[P0_I0:%.*]] = extractelement <2 x half> [[P0]], i64 0
+; DOPCHECK-NEXT:    [[MUL_I_I0:%.*]] = fmul half [[P0_I0]], [[P0_I0]]
+; DOPCHECK-NEXT:    [[P0_I1:%.*]] = extractelement <2 x half> [[P0]], i64 1
+; DOPCHECK-NEXT:    [[MUL_I_I1:%.*]] = fmul half [[P0_I1]], [[P0_I1]]
+; DOPCHECK-NEXT:    [[TMP0:%.*]] = fadd half [[MUL_I_I0]], [[MUL_I_I1]]
+; DOPCHECK-NEXT:    [[HLSL_LENGTH1:%.*]] = call half @dx.op.unary.f16(i32 24, half [[TMP0]])
+; DOPCHECK-NEXT:    ret half [[HLSL_LENGTH1]]
 ;
 entry:
-
   %mul.i = fmul <2 x half> %p0, %p0
   %rdx.fadd.i = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> %mul.i)
   %hlsl.length = call half @llvm.sqrt.f16(half %rdx.fadd.i)
@@ -25,22 +34,33 @@ entry:
 }
 
 define noundef half @test_length_half3(<3 x half> noundef %p0) {
-; CHECK-LABEL: define noundef half @test_length_half3(
-; CHECK-SAME: <3 x half> noundef [[P0:%.*]]) {
-; CHECK:  [[ENTRY:.*:]]
-; CHECK:    [[MUL_I:%.*]] = fmul <3 x half> [[P0]], [[P0]]
-; CHECK:    [[TMP0:%.*]] = extractelement <3 x half> [[MUL_I]], i64 0
-; CHECK:    [[TMP1:%.*]] = fadd half 0xH0000, [[TMP0]]
-; CHECK:    [[TMP2:%.*]] = extractelement <3 x half> [[MUL_I]], i64 1
-; CHECK:    [[TMP3:%.*]] = fadd half [[TMP1]], [[TMP2]]
-; CHECK:    [[TMP4:%.*]] = extractelement <3 x half> [[MUL_I]], i64 2
-; CHECK:    [[TMP5:%.*]] = fadd half [[TMP3]], [[TMP4]]
-; EXPCHECK: [[HLSL_LENGTH:%.*]] = call half @llvm.sqrt.f16(half [[TMP5]])
-; DOPCHECK: [[HLSL_LENGTH:%.*]] = call half @dx.op.unary.f16(i32 24, half [[TMP5]])
-; CHECK:    ret half [[HLSL_LENGTH]]
+; EXPCHECK-LABEL: define noundef half @test_length_half3(
+; EXPCHECK-SAME: <3 x half> noundef [[P0:%.*]]) {
+; EXPCHECK-NEXT:  [[ENTRY:.*:]]
+; EXPCHECK-NEXT:    [[MUL_I:%.*]] = fmul <3 x half> [[P0]], [[P0]]
+; EXPCHECK-NEXT:    [[TMP0:%.*]] = extractelement <3 x half> [[MUL_I]], i64 0
+; EXPCHECK-NEXT:    [[TMP1:%.*]] = extractelement <3 x half> [[MUL_I]], i64 1
+; EXPCHECK-NEXT:    [[TMP2:%.*]] = fadd half [[TMP0]], [[TMP1]]
+; EXPCHECK-NEXT:    [[TMP3:%.*]] = extractelement <3 x half> [[MUL_I]], i64 2
+; EXPCHECK-NEXT:    [[TMP4:%.*]] = fadd half [[TMP2]], [[TMP3]]
+; EXPCHECK-NEXT:    [[HLSL_LENGTH:%.*]] = call half @llvm.sqrt.f16(half [[TMP4]])
+; EXPCHECK-NEXT:    ret half [[HLSL_LENGTH]]
+;
+; DOPCHECK-LABEL: define noundef half @test_length_half3(
+; DOPCHECK-SAME: <3 x half> noundef [[P0:%.*]]) {
+; DOPCHECK-NEXT:  [[ENTRY:.*:]]
+; DOPCHECK-NEXT:    [[P0_I0:%.*]] = extractelement <3 x half> [[P0]], i64 0
+; DOPCHECK-NEXT:    [[MUL_I_I0:%.*]] = fmul half [[P0_I0]], [[P0_I0]]
+; DOPCHECK-NEXT:    [[P0_I1:%.*]] = extractelement <3 x half> [[P0]], i64 1
+; DOPCHECK-NEXT:    [[MUL_I_I1:%.*]] = fmul half [[P0_I1]], [[P0_I1]]
+; DOPCHECK-NEXT:    [[P0_I2:%.*]] = extractelement <3 x half> [[P0]], i64 2
+; DOPCHECK-NEXT:    [[MUL_I_I2:%.*]] = fmul half [[P0_I2]], [[P0_I2]]
+; DOPCHECK-NEXT:    [[TMP0:%.*]] = fadd half [[MUL_I_I0]], [[MUL_I_I1]]
+; DOPCHECK-NEXT:    [[TMP1:%.*]] = fadd half [[TMP0]], [[MUL_I_I2]]
+; DOPCHECK-NEXT:    [[HLSL_LENGTH1:%.*]] = call half @dx.op.unary.f16(i32 24, half [[TMP1]])
+; DOPCHECK-NEXT:    ret half [[HLSL_LENGTH1]]
 ;
 entry:
-
   %mul.i = fmul <3 x half> %p0, %p0
   %rdx.fadd.i = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <3 x half> %mul.i)
   %hlsl.length = call half @llvm.sqrt.f16(half %rdx.fadd.i)
@@ -48,24 +68,38 @@ entry:
 }
 
 define noundef half @test_length_half4(<4 x half> noundef %p0) {
-; CHECK-LABEL: define noundef half @test_length_half4(
-; CHECK-SAME: <4 x half> noundef [[P0:%.*]]) {
-; CHECK:  [[ENTRY:.*:]]
-; CHECK:    [[MUL_I:%.*]] = fmul <4 x half> [[P0]], [[P0]]
-; CHECK:    [[TMP0:%.*]] = extractelement <4 x half> [[MUL_I]], i64 0
-; CHECK:    [[TMP1:%.*]] = fadd half 0xH0000, [[TMP0]]
-; CHECK:    [[TMP2:%.*]] = extractelement <4 x half> [[MUL_I]], i64 1
-; CHECK:    [[TMP3:%.*]] = fadd half [[TMP1]], [[TMP2]]
-; CHECK:    [[TMP4:%.*]] = extractelement <4 x half> [[MUL_I]], i64 2
-; CHECK:    [[TMP5:%.*]] = fadd half [[TMP3]], [[TMP4]]
-; CHECK:    [[TMP6:%.*]] = extractelement <4 x half> [[MUL_I]], i64 3
-; CHECK:    [[TMP7:%.*]] = fadd half [[TMP5]], [[TMP6]]
-; EXPCHECK: [[HLSL_LENGTH:%.*]] = call half @llvm.sqrt.f16(half [[TMP7]])
-; DOPCHECK: [[HLSL_LENGTH:%.*]] = call half @dx.op.unary.f16(i32 24, half [[TMP7]])
-; CHECK:    ret half [[HLSL_LENGTH]]
+; EXPCHECK-LABEL: define noundef half @test_length_half4(
+; EXPCHECK-SAME: <4 x half> noundef [[P0:%.*]]) {
+; EXPCHECK-NEXT:  [[ENTRY:.*:]]
+; EXPCHECK-NEXT:    [[MUL_I:%.*]] = fmul <4 x half> [[P0]], [[P0]]
+; EXPCHECK-NEXT:    [[TMP0:%.*]] = extractelement <4 x half> [[MUL_I]], i64 0
+; EXPCHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x half> [[MUL_I]], i64 1
+; EXPCHECK-NEXT:    [[TMP2:%.*]] = fadd half [[TMP0]], [[TMP1]]
+; EXPCHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x half> [[MUL_I]], i64 2
+; EXPCHECK-NEXT:    [[TMP4:%.*]] = fadd half [[TMP2]], [[TMP3]]
+; EXPCHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x half> [[MUL_I]], i64 3
+; EXPCHECK-NEXT:    [[TMP6:%.*]] = fadd half [[TMP4]], [[TMP5]]
+; EXPCHECK-NEXT:    [[HLSL_LENGTH:%.*]] = call half @llvm.sqrt.f16(half [[TMP6]])
+; EXPCHECK-NEXT:    ret half [[HLSL_LENGTH]]
+;
+; DOPCHECK-LABEL: define noundef half @test_length_half4(
+; DOPCHECK-SAME: <4 x half> noundef [[P0:%.*]]) {
+; DOPCHECK-NEXT:  [[ENTRY:.*:]]
+; DOPCHECK-NEXT:    [[P0_I0:%.*]] = extractelement <4 x half> [[P0]], i64 0
+; DOPCHECK-NEXT:    [[MUL_I_I0:%.*]] = fmul half [[P0_I0]], [[P0_I0]]
+; DOPCHECK-NEXT:    [[P0_I1:%.*]] = extractelement <4 x half> [[P0]], i64 1
+; DOPCHECK-NEXT:    [[MUL_I_I1:%.*]] = fmul half [[P0_I1]], [[P0_I1]]
+; DOPCHECK-NEXT:    [[P0_I2:%.*]] = extractelement <4 x half> [[P0]], i64 2
+; DOPCHECK-NEXT:    [[MUL_I_I2:%.*]] = fmul half [[P0_I2]], [[P0_I2]]
+; DOPCHECK-NEXT:    [[P0_I3:%.*]] = extractelement <4 x half> [[P0]], i64 3
+; DOPCHECK-NEXT:    [[MUL_I_I3:%.*]] = fmul half [[P0_I3]], [[P0_I3]]
+; DOPCHECK-NEXT:    [[TMP0:%.*]] = fadd half [[MUL_I_I0]], [[MUL_I_I1]]
+; DOPCHECK-NEXT:    [[TMP1:%.*]] = fadd half [[TMP0]], [[MUL_I_I2]]
+; DOPCHECK-NEXT:    [[TMP2:%.*]] = fadd half [[TMP1]], [[MUL_I_I3]]
+; DOPCHECK-NEXT:    [[HLSL_LENGTH1:%.*]] = call half @dx.op.unary.f16(i32 24, half [[TMP2]])
+; DOPCHECK-NEXT:    ret half [[HLSL_LENGTH1]]
 ;
 entry:
-
   %mul.i = fmul <4 x half> %p0, %p0
   %rdx.fadd.i = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <4 x half> %mul.i)
   %hlsl.length = call half @llvm.sqrt.f16(half %rdx.fadd.i)
@@ -73,20 +107,28 @@ entry:
 }
 
 define noundef float @test_length_float2(<2 x float> noundef %p0) {
-; CHECK-LABEL: define noundef float @test_length_float2(
-; CHECK-SAME: <2 x float> noundef [[P0:%.*]]) {
-; CHECK:  [[ENTRY:.*:]]
-; CHECK:    [[MUL_I:%.*]] = fmul <2 x float> [[P0]], [[P0]]
-; CHECK:    [[TMP0:%.*]] = extractelement <2 x float> [[MUL_I]], i64 0
-; CHECK:    [[TMP1:%.*]] = fadd float 0.000000e+00, [[TMP0]]
-; CHECK:    [[TMP2:%.*]] = extractelement <2 x float> [[MUL_I]], i64 1
-; CHECK:    [[TMP3:%.*]] = fadd float [[TMP1]], [[TMP2]]
-; EXPCHECK: [[HLSL_LENGTH:%.*]] = call float @llvm.sqrt.f32(float [[TMP3]])
-; DOPCHECK: [[HLSL_LENGTH:%.*]] = call float @dx.op.unary.f32(i32 24, float [[TMP3]])
-; CHECK:    ret float [[HLSL_LENGTH]]
+; EXPCHECK-LABEL: define noundef float @test_length_float2(
+; EXPCHECK-SAME: <2 x float> noundef [[P0:%.*]]) {
+; EXPCHECK-NEXT:  [[ENTRY:.*:]]
+; EXPCHECK-NEXT:    [[MUL_I:%.*]] = fmul <2 x float> [[P0]], [[P0]]
+; EXPCHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x float> [[MUL_I]], i64 0
+; EXPCHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x float> [[MUL_I]], i64 1
+; EXPCHECK-NEXT:    [[TMP2:%.*]] = fadd float [[TMP0]], [[TMP1]]
+; EXPCHECK-NEXT:    [[HLSL_LENGTH:%.*]] = call float @llvm.sqrt.f32(float [[TMP2]])
+; EXPCHECK-NEXT:    ret float [[HLSL_LENGTH]]
+;
+; DOPCHECK-LABEL: define noundef float @test_length_float2(
+; DOPCHECK-SAME: <2 x float> noundef [[P0:%.*]]) {
+; DOPCHECK-NEXT:  [[ENTRY:.*:]]
+; DOPCHECK-NEXT:    [[P0_I0:%.*]] = extractelement <2 x float> [[P0]], i64 0
+; DOPCHECK-NEXT:    [[MUL_I_I0:%.*]] = fmul float [[P0_I0]], [[P0_I0]]
+; DOPCHECK-NEXT:    [[P0_I1:%.*]] = extractelement <2 x float> [[P0]], i64 1
+; DOPCHECK-NEXT:    [[MUL_I_I1:%.*]] = fmul float [[P0_I1]], [[P0_I1]]
+; DOPCHECK-NEXT:    [[TMP0:%.*]] = fadd float [[MUL_I_I0]], [[MUL_I_I1]]
+; DOPCHECK-NEXT:    [[HLSL_LENGTH1:%.*]] = call float @dx.op.unary.f32(i32 24, float [[TMP0]])
+; DOPCHECK-NEXT:    ret float [[HLSL_LENGTH1]]
 ;
 entry:
-
   %mul.i = fmul <2 x float> %p0, %p0
   %rdx.fadd.i = call float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> %mul.i)
   %hlsl.length = call float @llvm.sqrt.f32(float %rdx.fadd.i)
@@ -94,22 +136,33 @@ entry:
 }
 
 define noundef float @test_length_float3(<3 x float> noundef %p0) {
-; CHECK-LABEL: define noundef float @test_length_float3(
-; CHECK-SAME: <3 x float> noundef [[P0:%.*]]) {
-; CHECK:  [[ENTRY:.*:]]
-; CHECK:    [[MUL_I:%.*]] = fmul <3 x float> [[P0]], [[P0]]
-; CHECK:    [[TMP0:%.*]] = extractelement <3 x float> [[MUL_I]], i64 0
-; CHECK:    [[TMP1:%.*]] = fadd float 0.000000e+00, [[TMP0]]
-; CHECK:    [[TMP2:%.*]] = extractelement <3 x float> [[MUL_I]], i64 1
-; CHECK:    [[TMP3:%.*]] = fadd float [[TMP1]], [[TMP2]]
-; CHECK:    [[TMP4:%.*]] = extractelement <3 x float> [[MUL_I]], i64 2
-; CHECK:    [[TMP5:%.*]] = fadd float [[TMP3]], [[TMP4]]
-; EXPCHECK: [[HLSL_LENGTH:%.*]] = call float @llvm.sqrt.f32(float [[TMP5]])
-; DOPCHECK: [[HLSL_LENGTH:%.*]] = call float @dx.op.unary.f32(i32 24, float [[TMP5]])
-; CHECK:    ret float [[HLSL_LENGTH]]
+; EXPCHECK-LABEL: define noundef float @test_length_float3(
+; EXPCHECK-SAME: <3 x float> noundef [[P0:%.*]]) {
+; EXPCHECK-NEXT:  [[ENTRY:.*:]]
+; EXPCHECK-NEXT:    [[MUL_I:%.*]] = fmul <3 x float> [[P0]], [[P0]]
+; EXPCHECK-NEXT:    [[TMP0:%.*]] = extractelement <3 x float> [[MUL_I]], i64 0
+; EXPCHECK-NEXT:    [[TMP1:%.*]] = extractelement <3 x float> [[MUL_I]], i64 1
+; EXPCHECK-NEXT:    [[TMP2:%.*]] = fadd float [[TMP0]], [[TMP1]]
+; EXPCHECK-NEXT:    [[TMP3:%.*]] = extractelement <3 x float> [[MUL_I]], i64 2
+; EXPCHECK-NEXT:    [[TMP4:%.*]] = fadd float [[TMP2]], [[TMP3]]
+; EXPCHECK-NEXT:    [[HLSL_LENGTH:%.*]] = call float @llvm.sqrt.f32(float [[TMP4]])
+; EXPCHECK-NEXT:    ret float [[HLSL_LENGTH]]
+;
+; DOPCHECK-LABEL: define noundef float @test_length_float3(
+; DOPCHECK-SAME: <3 x float> noundef [[P0:%.*]]) {
+; DOPCHECK-NEXT:  [[ENTRY:.*:]]
+; DOPCHECK-NEXT:    [[P0_I0:%.*]] = extractelement <3 x float> [[P0]], i64 0
+; DOPCHECK-NEXT:    [[MUL_I_I0:%.*]] = fmul float [[P0_I0]], [[P0_I0]]
+; DOPCHECK-NEXT:    [[P0_I1:%.*]] = extractelement <3 x float> [[P0]], i64 1
+; DOPCHECK-NEXT:    [[MUL_I_I1:%.*]] = fmul float [[P0_I1]], [[P0_I1]]
+; DOPCHECK-NEXT:    [[P0_I2:%.*]] = extractelement <3 x float> [[P0]], i64 2
+; DOPCHECK-NEXT:    [[MUL_I_I2:%.*]] = fmul float [[P0_I2]], [[P0_I2]]
+; DOPCHECK-NEXT:    [[TMP0:%.*]] = fadd float [[MUL_I_I0]], [[MUL_I_I1]]
+; DOPCHECK-NEXT:    [[TMP1:%.*]] = fadd float [[TMP0]], [[MUL_I_I2]]
+; DOPCHECK-NEXT:    [[HLSL_LENGTH1:%.*]] = call float @dx.op.unary.f32(i32 24, float [[TMP1]])
+; DOPCHECK-NEXT:    ret float [[HLSL_LENGTH1]]
 ;
 entry:
-
   %mul.i = fmul <3 x float> %p0, %p0
   %rdx.fadd.i = call float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <3 x float> %mul.i)
   %hlsl.length = call float @llvm.sqrt.f32(float %rdx.fadd.i)
@@ -117,26 +170,42 @@ entry:
 }
 
 define noundef float @test_length_float4(<4 x float> noundef %p0) {
-; CHECK-LABEL: define noundef float @test_length_float4(
-; CHECK-SAME: <4 x float> noundef [[P0:%.*]]) {
-; CHECK:  [[ENTRY:.*:]]
-; CHECK:    [[MUL_I:%.*]] = fmul <4 x float> [[P0]], [[P0]]
-; CHECK:    [[TMP0:%.*]] = extractelement <4 x float> [[MUL_I]], i64 0
-; CHECK:    [[TMP1:%.*]] = fadd float 0.000000e+00, [[TMP0]]
-; CHECK:    [[TMP2:%.*]] = extractelement <4 x float> [[MUL_I]], i64 1
-; CHECK:    [[TMP3:%.*]] = fadd float [[TMP1]], [[TMP2]]
-; CHECK:    [[TMP4:%.*]] = extractelement <4 x float> [[MUL_I]], i64 2
-; CHECK:    [[TMP5:%.*]] = fadd float [[TMP3]], [[TMP4]]
-; CHECK:    [[TMP6:%.*]] = extractelement <4 x float> [[MUL_I]], i64 3
-; CHECK:    [[TMP7:%.*]] = fadd float [[TMP5]], [[TMP6]]
-; EXPCHECK: [[HLSL_LENGTH:%.*]] = call float @llvm.sqrt.f32(float [[TMP7]])
-; DOPCHECK: [[HLSL_LENGTH:%.*]] = call float @dx.op.unary.f32(i32 24, float [[TMP7]])
-; CHECK:    ret float [[HLSL_LENGTH]]
+; EXPCHECK-LABEL: define noundef float @test_length_float4(
+; EXPCHECK-SAME: <4 x float> noundef [[P0:%.*]]) {
+; EXPCHECK-NEXT:  [[ENTRY:.*:]]
+; EXPCHECK-NEXT:    [[MUL_I:%.*]] = fmul <4 x float> [[P0]], [[P0]]
+; EXPCHECK-NEXT:    [[TMP0:%.*]] = extractelement <4 x float> [[MUL_I]], i64 0
+; EXPCHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[MUL_I]], i64 1
+; EXPCHECK-NEXT:    [[TMP2:%.*]] = fadd float [[TMP0]], [[TMP1]]
+; EXPCHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[MUL_I]], i64 2
+; EXPCHECK-NEXT:    [[TMP4:%.*]] = fadd float [[TMP2]], [[TMP3]]
+; EXPCHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x float> [[MUL_I]], i64 3
+; EXPCHECK-NEXT:    [[TMP6:%.*]] = fadd float [[TMP4]], [[TMP5]]
+; EXPCHECK-NEXT:    [[HLSL_LENGTH:%.*]] = call float @llvm.sqrt.f32(float [[TMP6]])
+; EXPCHECK-NEXT:    ret float [[HLSL_LENGTH]]
+;
+; DOPCHECK-LABEL: define noundef float @test_length_float4(
+; DOPCHECK-SAME: <4 x float> noundef [[P0:%.*]]) {
+; DOPCHECK-NEXT:  [[ENTRY:.*:]]
+; DOPCHECK-NEXT:    [[P0_I0:%.*]] = extractelement <4 x float> [[P0]], i64 0
+; DOPCHECK-NEXT:    [[MUL_I_I0:%.*]] = fmul float [[P0_I0]], [[P0_I0]]
+; DOPCHECK-NEXT:    [[P0_I1:%.*]] = extractelement <4 x float> [[P0]], i64 1
+; DOPCHECK-NEXT:    [[MUL_I_I1:%.*]] = fmul float [[P0_I1]], [[P0_I1]]
+; DOPCHECK-NEXT:    [[P0_I2:%.*]] = extractelement <4 x float> [[P0]], i64 2
+; DOPCHECK-NEXT:    [[MUL_I_I2:%.*]] = fmul float [[P0_I2]], [[P0_I2]]
+; DOPCHECK-NEXT:    [[P0_I3:%.*]] = extractelement <4 x float> [[P0]], i64 3
+; DOPCHECK-NEXT:    [[MUL_I_I3:%.*]] = fmul float [[P0_I3]], [[P0_I3]]
+; DOPCHECK-NEXT:    [[TMP0:%.*]] = fadd float [[MUL_I_I0]], [[MUL_I_I1]]
+; DOPCHECK-NEXT:    [[TMP1:%.*]] = fadd float [[TMP0]], [[MUL_I_I2]]
+; DOPCHECK-NEXT:    [[TMP2:%.*]] = fadd float [[TMP1]], [[MUL_I_I3]]
+; DOPCHECK-NEXT:    [[HLSL_LENGTH1:%.*]] = call float @dx.op.unary.f32(i32 24, float [[TMP2]])
+; DOPCHECK-NEXT:    ret float [[HLSL_LENGTH1]]
 ;
 entry:
-
   %mul.i = fmul <4 x float> %p0, %p0
   %rdx.fadd.i = call float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <4 x float> %mul.i)
   %hlsl.length = call float @llvm.sqrt.f32(float %rdx.fadd.i)
   ret float %hlsl.length
 }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK: {{.*}}

>From 160e0743286c66a4caaf9ba7b578921fc1db7e2a Mon Sep 17 00:00:00 2001
From: Farzon Lotfi <farzonlotfi at microsoft.com>
Date: Sat, 23 Nov 2024 04:08:27 -0500
Subject: [PATCH 5/5] add a target lowering pass through

---
 clang/include/clang/Basic/Builtins.td         |  12 +
 clang/lib/CodeGen/CGBuiltin.cpp               |  25 ++
 clang/lib/CodeGen/CGHLSLRuntime.h             |  88 ++++--
 clang/lib/Headers/hlsl/hlsl_detail.h          |  38 ++-
 clang/lib/Headers/hlsl/hlsl_intrinsics.h      |  13 +-
 clang/lib/Sema/SemaHLSL.cpp                   |  50 ++-
 clang/test/CodeGenHLSL/builtins/distance.hlsl |  53 ++++
 clang/test/CodeGenHLSL/builtins/length.hlsl   |  57 +++-
 llvm/include/llvm/IR/IntrinsicsSPIRV.td       |   1 +
 .../Target/DirectX/DXILIntrinsicExpansion.cpp |   1 -
 .../Target/SPIRV/SPIRVInstructionSelector.cpp |   2 +
 .../DirectX/length_invalid_intrinsic_error.ll |  10 -
 .../length_invalid_intrinsic_error_scalar.ll  |  10 -
 .../test/CodeGen/DirectX/vector_reduce_add.ll | 293 ++++++++++++++++++
 .../CodeGen/SPIRV/hlsl-intrinsics/cross.ll    |   8 +-
 .../CodeGen/SPIRV/hlsl-intrinsics/distance.ll |  33 ++
 .../CodeGen/SPIRV/hlsl-intrinsics/length.ll   |  14 +-
 17 files changed, 621 insertions(+), 87 deletions(-)
 delete mode 100644 llvm/test/CodeGen/DirectX/length_invalid_intrinsic_error.ll
 delete mode 100644 llvm/test/CodeGen/DirectX/length_invalid_intrinsic_error_scalar.ll
 create mode 100644 llvm/test/CodeGen/DirectX/vector_reduce_add.ll
 create mode 100644 llvm/test/CodeGen/SPIRV/hlsl-intrinsics/distance.ll

diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index 82acd520c227e9..bbe7adc05e1baa 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -4810,6 +4810,12 @@ def HLSLDegrees : LangBuiltin<"HLSL_LANG"> {
   let Prototype = "void(...)";
 }
 
+def HLSLDistance : LangBuiltin<"HLSL_LANG"> {
+  let Spellings = ["__builtin_hlsl_distance"];
+  let Attributes = [NoThrow, Const];
+  let Prototype = "void(...)";
+}
+
 def HLSLDotProduct : LangBuiltin<"HLSL_LANG"> {
   let Spellings = ["__builtin_hlsl_dot"];
   let Attributes = [NoThrow, Const];
@@ -4846,6 +4852,12 @@ def HLSLIsinf : LangBuiltin<"HLSL_LANG"> {
   let Prototype = "void(...)";
 }
 
+def HLSLLength : LangBuiltin<"HLSL_LANG"> {
+  let Spellings = ["__builtin_hlsl_length"];
+  let Attributes = [NoThrow, Const];
+  let Prototype = "void(...)";
+}
+
 def HLSLLerp : LangBuiltin<"HLSL_LANG"> {
   let Spellings = ["__builtin_hlsl_lerp"];
   let Attributes = [NoThrow, Const];
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index b81d1ba8f6ecce..134b80822a206c 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -19097,6 +19097,20 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
         /*ReturnType=*/Op0->getType(), CGM.getHLSLRuntime().getCrossIntrinsic(),
         ArrayRef<Value *>{Op0, Op1}, nullptr, "hlsl.cross");
   }
+  case Builtin::BI__builtin_hlsl_distance: {
+    Value *X = EmitScalarExpr(E->getArg(0));
+    Value *Y = EmitScalarExpr(E->getArg(1));
+    assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
+           E->getArg(1)->getType()->hasFloatingRepresentation() &&
+           "Distance operands must have a float representation");
+    assert(E->getArg(0)->getType()->isVectorType() &&
+           E->getArg(1)->getType()->isVectorType() &&
+           "Distance operands must be a vector");
+    return Builder.CreateIntrinsic(
+        /*ReturnType=*/X->getType()->getScalarType(),
+        CGM.getHLSLRuntime().getDistanceIntrinsic(), ArrayRef<Value *>{X, Y},
+        nullptr, "hlsl.distance");
+  }
   case Builtin::BI__builtin_hlsl_dot: {
     Value *Op0 = EmitScalarExpr(E->getArg(0));
     Value *Op1 = EmitScalarExpr(E->getArg(1));
@@ -19174,6 +19188,17 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
         /*ReturnType=*/X->getType(), CGM.getHLSLRuntime().getLerpIntrinsic(),
         ArrayRef<Value *>{X, Y, S}, nullptr, "hlsl.lerp");
   }
+  case Builtin::BI__builtin_hlsl_length: {
+    Value *X = EmitScalarExpr(E->getArg(0));
+    assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
+           "length operand must have a float representation");
+    assert(E->getArg(0)->getType()->isVectorType() &&
+           "length operand must be a vector");
+    return Builder.CreateIntrinsic(
+        /*ReturnType=*/X->getType()->getScalarType(),
+        CGM.getHLSLRuntime().getLengthIntrinsic(), ArrayRef<Value *>{X},
+        nullptr, "hlsl.length");
+  }
   case Builtin::BI__builtin_hlsl_normalize: {
     Value *X = EmitScalarExpr(E->getArg(0));
 
diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h
index 8001e8a4c957e0..34a06583789976 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.h
+++ b/clang/lib/CodeGen/CGHLSLRuntime.h
@@ -30,22 +30,36 @@
 #include <optional>
 #include <vector>
 
+#define GENERATE_HLSL_INTRINSIC_FUNCTION_DEFAULT(FunctionName,                 \
+                                                 IntrinsicPostfix)             \
+  GENERATE_HLSL_INTRINSIC_FUNCTION(FunctionName, IntrinsicPostfix, 1, 1)
+
 // A function generator macro for picking the right intrinsic
 // for the target backend
-#define GENERATE_HLSL_INTRINSIC_FUNCTION(FunctionName, IntrinsicPostfix)       \
+#define GENERATE_HLSL_INTRINSIC_FUNCTION(FunctionName, IntrinsicPostfix,       \
+                                         IncludeDXIL, IncludeSPIRV)            \
   llvm::Intrinsic::ID get##FunctionName##Intrinsic() {                         \
     llvm::Triple::ArchType Arch = getArch();                                   \
     switch (Arch) {                                                            \
-    case llvm::Triple::dxil:                                                   \
-      return llvm::Intrinsic::dx_##IntrinsicPostfix;                           \
-    case llvm::Triple::spirv:                                                  \
-      return llvm::Intrinsic::spv_##IntrinsicPostfix;                          \
+      /* Include DXIL case only if IncludeDXIL is true */                      \
+      IF_INCLUDE(IncludeDXIL, case llvm::Triple::dxil                          \
+                 : return llvm::Intrinsic::dx_##IntrinsicPostfix;)             \
+      /* Include SPIRV case only if IncludeSPIRV is true */                    \
+      IF_INCLUDE(IncludeSPIRV, case llvm::Triple::spirv                        \
+                 : return llvm::Intrinsic::spv_##IntrinsicPostfix;)            \
+                                                                               \
     default:                                                                   \
       llvm_unreachable("Intrinsic " #IntrinsicPostfix                          \
                        " not supported by target architecture");               \
     }                                                                          \
   }
 
+#define IF_INCLUDE(Condition, Code) IF_INCLUDE_IMPL(Condition, Code)
+#define IF_INCLUDE_IMPL(Condition, Code) IF_INCLUDE_##Condition(Code)
+
+#define IF_INCLUDE_1(Code) Code
+#define IF_INCLUDE_0(Code)
+
 namespace llvm {
 class GlobalVariable;
 class Function;
@@ -72,35 +86,41 @@ class CGHLSLRuntime {
   // Start of reserved area for HLSL intrinsic getters.
   //===----------------------------------------------------------------------===//
 
-  GENERATE_HLSL_INTRINSIC_FUNCTION(All, all)
-  GENERATE_HLSL_INTRINSIC_FUNCTION(Any, any)
-  GENERATE_HLSL_INTRINSIC_FUNCTION(Cross, cross)
-  GENERATE_HLSL_INTRINSIC_FUNCTION(Degrees, degrees)
-  GENERATE_HLSL_INTRINSIC_FUNCTION(Frac, frac)
-  GENERATE_HLSL_INTRINSIC_FUNCTION(Lerp, lerp)
-  GENERATE_HLSL_INTRINSIC_FUNCTION(Normalize, normalize)
-  GENERATE_HLSL_INTRINSIC_FUNCTION(Rsqrt, rsqrt)
-  GENERATE_HLSL_INTRINSIC_FUNCTION(Saturate, saturate)
-  GENERATE_HLSL_INTRINSIC_FUNCTION(Sign, sign)
-  GENERATE_HLSL_INTRINSIC_FUNCTION(Step, step)
-  GENERATE_HLSL_INTRINSIC_FUNCTION(Radians, radians)
-  GENERATE_HLSL_INTRINSIC_FUNCTION(ThreadId, thread_id)
-  GENERATE_HLSL_INTRINSIC_FUNCTION(FDot, fdot)
-  GENERATE_HLSL_INTRINSIC_FUNCTION(SDot, sdot)
-  GENERATE_HLSL_INTRINSIC_FUNCTION(UDot, udot)
-  GENERATE_HLSL_INTRINSIC_FUNCTION(Dot4AddI8Packed, dot4add_i8packed)
-  GENERATE_HLSL_INTRINSIC_FUNCTION(Dot4AddU8Packed, dot4add_u8packed)
-  GENERATE_HLSL_INTRINSIC_FUNCTION(WaveActiveAnyTrue, wave_any)
-  GENERATE_HLSL_INTRINSIC_FUNCTION(WaveActiveCountBits, wave_active_countbits)
-  GENERATE_HLSL_INTRINSIC_FUNCTION(WaveIsFirstLane, wave_is_first_lane)
-  GENERATE_HLSL_INTRINSIC_FUNCTION(WaveReadLaneAt, wave_readlane)
-  GENERATE_HLSL_INTRINSIC_FUNCTION(FirstBitUHigh, firstbituhigh)
-  GENERATE_HLSL_INTRINSIC_FUNCTION(FirstBitSHigh, firstbitshigh)
-  GENERATE_HLSL_INTRINSIC_FUNCTION(NClamp, nclamp)
-  GENERATE_HLSL_INTRINSIC_FUNCTION(SClamp, sclamp)
-  GENERATE_HLSL_INTRINSIC_FUNCTION(UClamp, uclamp)
-
-  GENERATE_HLSL_INTRINSIC_FUNCTION(CreateHandleFromBinding, handle_fromBinding)
+  GENERATE_HLSL_INTRINSIC_FUNCTION_DEFAULT(All, all)
+  GENERATE_HLSL_INTRINSIC_FUNCTION_DEFAULT(Any, any)
+  GENERATE_HLSL_INTRINSIC_FUNCTION_DEFAULT(Cross, cross)
+  GENERATE_HLSL_INTRINSIC_FUNCTION_DEFAULT(Degrees, degrees)
+  GENERATE_HLSL_INTRINSIC_FUNCTION(Distance, distance, /*IncludeDXIL*/ 0,
+                                   /*IncludeSPIRV*/ 1)
+  GENERATE_HLSL_INTRINSIC_FUNCTION_DEFAULT(Frac, frac)
+  GENERATE_HLSL_INTRINSIC_FUNCTION(Length, length, /*IncludeDXIL*/ 0,
+                                   /*IncludeSPIRV*/ 1)
+  GENERATE_HLSL_INTRINSIC_FUNCTION_DEFAULT(Lerp, lerp)
+  GENERATE_HLSL_INTRINSIC_FUNCTION_DEFAULT(Normalize, normalize)
+  GENERATE_HLSL_INTRINSIC_FUNCTION_DEFAULT(Rsqrt, rsqrt)
+  GENERATE_HLSL_INTRINSIC_FUNCTION_DEFAULT(Saturate, saturate)
+  GENERATE_HLSL_INTRINSIC_FUNCTION_DEFAULT(Sign, sign)
+  GENERATE_HLSL_INTRINSIC_FUNCTION_DEFAULT(Step, step)
+  GENERATE_HLSL_INTRINSIC_FUNCTION_DEFAULT(Radians, radians)
+  GENERATE_HLSL_INTRINSIC_FUNCTION_DEFAULT(ThreadId, thread_id)
+  GENERATE_HLSL_INTRINSIC_FUNCTION_DEFAULT(FDot, fdot)
+  GENERATE_HLSL_INTRINSIC_FUNCTION_DEFAULT(SDot, sdot)
+  GENERATE_HLSL_INTRINSIC_FUNCTION_DEFAULT(UDot, udot)
+  GENERATE_HLSL_INTRINSIC_FUNCTION_DEFAULT(Dot4AddI8Packed, dot4add_i8packed)
+  GENERATE_HLSL_INTRINSIC_FUNCTION_DEFAULT(Dot4AddU8Packed, dot4add_u8packed)
+  GENERATE_HLSL_INTRINSIC_FUNCTION_DEFAULT(WaveActiveAnyTrue, wave_any)
+  GENERATE_HLSL_INTRINSIC_FUNCTION_DEFAULT(WaveActiveCountBits,
+                                           wave_active_countbits)
+  GENERATE_HLSL_INTRINSIC_FUNCTION_DEFAULT(WaveIsFirstLane, wave_is_first_lane)
+  GENERATE_HLSL_INTRINSIC_FUNCTION_DEFAULT(WaveReadLaneAt, wave_readlane)
+  GENERATE_HLSL_INTRINSIC_FUNCTION_DEFAULT(FirstBitUHigh, firstbituhigh)
+  GENERATE_HLSL_INTRINSIC_FUNCTION_DEFAULT(FirstBitSHigh, firstbitshigh)
+  GENERATE_HLSL_INTRINSIC_FUNCTION_DEFAULT(NClamp, nclamp)
+  GENERATE_HLSL_INTRINSIC_FUNCTION_DEFAULT(SClamp, sclamp)
+  GENERATE_HLSL_INTRINSIC_FUNCTION_DEFAULT(UClamp, uclamp)
+
+  GENERATE_HLSL_INTRINSIC_FUNCTION_DEFAULT(CreateHandleFromBinding,
+                                           handle_fromBinding)
 
   //===----------------------------------------------------------------------===//
   // End of reserved area for HLSL intrinsic getters.
diff --git a/clang/lib/Headers/hlsl/hlsl_detail.h b/clang/lib/Headers/hlsl/hlsl_detail.h
index 85ed08a1af06e9..e2a5f85b04bb47 100644
--- a/clang/lib/Headers/hlsl/hlsl_detail.h
+++ b/clang/lib/Headers/hlsl/hlsl_detail.h
@@ -9,6 +9,34 @@
 #ifndef _HLSL_HLSL_DETAILS_H_
 #define _HLSL_HLSL_DETAILS_H_
 
+#if __is_target_arch(dxil)
+#define IS_ARCH_DXIL 1
+#else
+#define IS_ARCH_DXIL 0
+#endif
+
+#if __is_target_arch(spirv)
+#define IS_ARCH_SPIRV 1
+#else
+#define IS_ARCH_SPIRV 0
+#endif
+
+#define ARCH_CONDITION(arch)                                                   \
+  if (IS_ARCH_##arch)                                                          \
+    return true;
+
+// Note: arch is used to bypass
+// the generic implementation
+#define EXPAND_ARCH_CONDITIONS(arch)                                           \
+  ARCH_CONDITION(arch)                                                         \
+  /* Add more architectures as needed */
+
+#define DEFINE_TARGET_LOWERING(function_name, ...)                             \
+  constexpr bool Has##function_name##Lowering() {                              \
+    EXPAND_ARCH_CONDITIONS(__VA_ARGS__)                                        \
+    return false; /* Default case if no match */                               \
+  }
+
 namespace hlsl {
 
 namespace __detail {
@@ -41,6 +69,7 @@ constexpr enable_if_t<sizeof(U) == sizeof(T), U> bit_cast(T F) {
   return __builtin_bit_cast(U, F);
 }
 
+DEFINE_TARGET_LOWERING(Length, SPIRV)
 template <typename T>
 constexpr enable_if_t<is_same<float, T>::value || is_same<half, T>::value, T>
 length_impl(T X) {
@@ -50,15 +79,14 @@ length_impl(T X) {
 template <typename T, int N>
 constexpr enable_if_t<is_same<float, T>::value || is_same<half, T>::value, T>
 length_vec_impl(vector<T, N> X) {
+  if (HasLengthLowering())
+    return __builtin_hlsl_length(X);
   vector<T, N> XSquared = X * X;
   T XSquaredSum = __builtin_hlsl_reduce_add(XSquared);
-  /*T  XSquaredSum = 0;
-  for(int I = 0; I < N; I++) {
-    XSquaredSum += XSquared[I];
-  }*/
   return __builtin_elementwise_sqrt(XSquaredSum);
 }
 
+DEFINE_TARGET_LOWERING(Distance, SPIRV)
 template <typename T>
 constexpr enable_if_t<is_same<float, T>::value || is_same<half, T>::value, T>
 distance_impl(T X, T Y) {
@@ -68,6 +96,8 @@ distance_impl(T X, T Y) {
 template <typename T, int N>
 constexpr enable_if_t<is_same<float, T>::value || is_same<half, T>::value, T>
 distance_vec_impl(vector<T, N> X, vector<T, N> Y) {
+  if (HasDistanceLowering())
+    return __builtin_hlsl_distance(X, Y);
   return length_vec_impl(X - Y);
 }
 
diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
index 588335d06ab17f..f51e5d4afbd6c9 100644
--- a/clang/lib/Headers/hlsl/hlsl_intrinsics.h
+++ b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
@@ -879,22 +879,21 @@ float4 degrees(float4);
 /// \brief Returns a distance scalar between two vectors of \a X and \a Y.
 /// \param X The X input value.
 /// \param Y The Y input value.
-
 _HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
 const inline half distance(half X, half Y) {
   return __detail::distance_impl(X, Y);
 }
 
+const inline float distance(float X, float Y) {
+  return __detail::distance_impl(X, Y);
+}
+
 template <int N>
 _HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
 const inline half distance(vector<half, N> X, vector<half, N> Y) {
   return __detail::distance_vec_impl(X, Y);
 }
 
-const inline float distance(float X, float Y) {
-  return __detail::distance_impl(X, Y);
-}
-
 template <int N>
 const inline float distance(vector<float, N> X, vector<float, N> Y) {
   return __detail::distance_vec_impl(X, Y);
@@ -1325,9 +1324,9 @@ float4 lerp(float4, float4, float4);
 /// \param x [in] The vector of floats, or a scalar float.
 ///
 /// Length is based on the following formula: sqrt(x[0]^2 + x[1]^2 + ...).
-
 _HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
 const inline half length(half X) { return __detail::length_impl(X); }
+const inline float length(float X) { return __detail::length_impl(X); }
 
 template <int N>
 _HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
@@ -1335,8 +1334,6 @@ const inline half length(vector<half, N> X) {
   return __detail::length_vec_impl(X);
 }
 
-const inline float length(float X) { return __detail::length_impl(X); }
-
 template <int N> const inline float length(vector<float, N> X) {
   return __detail::length_vec_impl(X);
 }
diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index de2ff83d63fee4..d108e6e130b01d 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -1943,6 +1943,20 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
     TheCall->setType(ArgTyA);
     break;
   }
+  case Builtin::BI__builtin_hlsl_distance: {
+    if (CheckFloatOrHalfRepresentations(&SemaRef, TheCall))
+      return true;
+    if (SemaRef.checkArgCount(TheCall, 2))
+      return true;
+    if (CheckVectorElementCallArgs(&SemaRef, TheCall))
+      return true;
+    ExprResult A = TheCall->getArg(0);
+    QualType ArgTyA = A.get()->getType();
+    auto *VTy = ArgTyA->getAs<VectorType>();
+    QualType RetTy = VTy->getElementType();
+    TheCall->setType(RetTy);
+    break;
+  }
   case Builtin::BI__builtin_hlsl_dot: {
     if (SemaRef.checkArgCount(TheCall, 2))
       return true;
@@ -2030,18 +2044,23 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
       return true;
     break;
   }
-  case Builtin::BI__builtin_hlsl_reduce_add: {
+  case Builtin::BI__builtin_hlsl_length: {
+    if (CheckFloatOrHalfRepresentations(&SemaRef, TheCall))
+      return true;
     if (SemaRef.checkArgCount(TheCall, 1))
       return true;
     ExprResult A = TheCall->getArg(0);
     QualType ArgTyA = A.get()->getType();
-    QualType RetTy;
-
-    if (auto *VTy = ArgTyA->getAs<VectorType>())
-      RetTy = VTy->getElementType();
-    else
-      RetTy = TheCall->getArg(0)->getType();
-
+    auto *VTy = ArgTyA->getAs<VectorType>();
+    if (VTy == nullptr) {
+      SemaRef.Diag(A.get()->getBeginLoc(),
+                   diag::err_typecheck_convert_incompatible)
+          << ArgTyA
+          << SemaRef.Context.getVectorType(ArgTyA, 2, VectorKind::Generic) << 1
+          << 0 << 0;
+      return true;
+    }
+    QualType RetTy = VTy->getElementType();
     TheCall->setType(RetTy);
     break;
   }
@@ -2068,6 +2087,21 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
     TheCall->setType(ArgTyA);
     break;
   }
+  case Builtin::BI__builtin_hlsl_reduce_add: {
+    if (SemaRef.checkArgCount(TheCall, 1))
+      return true;
+    ExprResult A = TheCall->getArg(0);
+    QualType ArgTyA = A.get()->getType();
+    QualType RetTy;
+
+    if (auto *VTy = ArgTyA->getAs<VectorType>())
+      RetTy = VTy->getElementType();
+    else
+      RetTy = TheCall->getArg(0)->getType();
+
+    TheCall->setType(RetTy);
+    break;
+  }
   case Builtin::BI__builtin_hlsl_elementwise_sign: {
     if (CheckFloatingOrIntRepresentation(&SemaRef, TheCall))
       return true;
diff --git a/clang/test/CodeGenHLSL/builtins/distance.hlsl b/clang/test/CodeGenHLSL/builtins/distance.hlsl
index ea7988122ae6d4..99b95114e51d29 100644
--- a/clang/test/CodeGenHLSL/builtins/distance.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/distance.hlsl
@@ -2,6 +2,9 @@
 // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
 // RUN:   dxil-pc-shadermodel6.3-library %s -fnative-half-type \
 // RUN:   -emit-llvm -O1 -o - | FileCheck %s
+// RUN: %clang_cc1 -finclude-default-header -triple \
+// RUN: spirv-unknown-vulkan-compute %s -fnative-half-type \
+// RUN: -emit-llvm -O1 -o - | FileCheck %s --check-prefix=SPVCHECK
 
 // CHECK-LABEL: define noundef half @_Z18test_distance_halfDhDh(
 // CHECK-SAME: half noundef [[X:%.*]], half noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
@@ -10,6 +13,13 @@
 // CHECK-NEXT:    [[ELT_ABS_I:%.*]] = tail call noundef half @llvm.fabs.f16(half [[SUB_I]])
 // CHECK-NEXT:    ret half [[ELT_ABS_I]]
 //
+// SPVCHECK-LABEL: define spir_func noundef half @_Z18test_distance_halfDhDh(
+// SPVCHECK-SAME: half noundef [[X:%.*]], half noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// SPVCHECK-NEXT:  [[ENTRY:.*:]]
+// SPVCHECK-NEXT:    [[SUB_I:%.*]] = fsub half [[X]], [[Y]]
+// SPVCHECK-NEXT:    [[ELT_ABS_I:%.*]] = tail call noundef half @llvm.fabs.f16(half [[SUB_I]])
+// SPVCHECK-NEXT:    ret half [[ELT_ABS_I]]
+//
 half test_distance_half(half X, half Y) { return distance(X, Y); }
 
 // CHECK-LABEL: define noundef half @_Z19test_distance_half2Dv2_DhS_(
@@ -21,6 +31,12 @@ half test_distance_half(half X, half Y) { return distance(X, Y); }
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call noundef half @llvm.sqrt.f16(half [[RDX_FADD_I]])
 // CHECK-NEXT:    ret half [[TMP0]]
 //
+// SPVCHECK-LABEL: define spir_func noundef half @_Z19test_distance_half2Dv2_DhS_(
+// SPVCHECK-SAME: <2 x half> noundef [[X:%.*]], <2 x half> noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// SPVCHECK-NEXT:  [[ENTRY:.*:]]
+// SPVCHECK-NEXT:    [[HLSL_DISTANCE_I:%.*]] = tail call noundef half @llvm.spv.distance.v2f16(<2 x half> [[X]], <2 x half> [[Y]])
+// SPVCHECK-NEXT:    ret half [[HLSL_DISTANCE_I]]
+//
 half test_distance_half2(half2 X, half2 Y) { return distance(X, Y); }
 
 // CHECK-LABEL: define noundef half @_Z19test_distance_half3Dv3_DhS_(
@@ -32,6 +48,12 @@ half test_distance_half2(half2 X, half2 Y) { return distance(X, Y); }
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call noundef half @llvm.sqrt.f16(half [[RDX_FADD_I]])
 // CHECK-NEXT:    ret half [[TMP0]]
 //
+// SPVCHECK-LABEL: define spir_func noundef half @_Z19test_distance_half3Dv3_DhS_(
+// SPVCHECK-SAME: <3 x half> noundef [[X:%.*]], <3 x half> noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// SPVCHECK-NEXT:  [[ENTRY:.*:]]
+// SPVCHECK-NEXT:    [[HLSL_DISTANCE_I:%.*]] = tail call noundef half @llvm.spv.distance.v3f16(<3 x half> [[X]], <3 x half> [[Y]])
+// SPVCHECK-NEXT:    ret half [[HLSL_DISTANCE_I]]
+//
 half test_distance_half3(half3 X, half3 Y) { return distance(X, Y); }
 
 // CHECK-LABEL: define noundef half @_Z19test_distance_half4Dv4_DhS_(
@@ -43,6 +65,12 @@ half test_distance_half3(half3 X, half3 Y) { return distance(X, Y); }
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call noundef half @llvm.sqrt.f16(half [[RDX_FADD_I]])
 // CHECK-NEXT:    ret half [[TMP0]]
 //
+// SPVCHECK-LABEL: define spir_func noundef half @_Z19test_distance_half4Dv4_DhS_(
+// SPVCHECK-SAME: <4 x half> noundef [[X:%.*]], <4 x half> noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// SPVCHECK-NEXT:  [[ENTRY:.*:]]
+// SPVCHECK-NEXT:    [[HLSL_DISTANCE_I:%.*]] = tail call noundef half @llvm.spv.distance.v4f16(<4 x half> [[X]], <4 x half> [[Y]])
+// SPVCHECK-NEXT:    ret half [[HLSL_DISTANCE_I]]
+//
 half test_distance_half4(half4 X, half4 Y) { return distance(X, Y); }
 
 // CHECK-LABEL: define noundef float @_Z19test_distance_floatff(
@@ -52,6 +80,13 @@ half test_distance_half4(half4 X, half4 Y) { return distance(X, Y); }
 // CHECK-NEXT:    [[ELT_ABS_I:%.*]] = tail call noundef float @llvm.fabs.f32(float [[SUB_I]])
 // CHECK-NEXT:    ret float [[ELT_ABS_I]]
 //
+// SPVCHECK-LABEL: define spir_func noundef float @_Z19test_distance_floatff(
+// SPVCHECK-SAME: float noundef [[X:%.*]], float noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// SPVCHECK-NEXT:  [[ENTRY:.*:]]
+// SPVCHECK-NEXT:    [[SUB_I:%.*]] = fsub float [[X]], [[Y]]
+// SPVCHECK-NEXT:    [[ELT_ABS_I:%.*]] = tail call noundef float @llvm.fabs.f32(float [[SUB_I]])
+// SPVCHECK-NEXT:    ret float [[ELT_ABS_I]]
+//
 float test_distance_float(float X, float Y) { return distance(X, Y); }
 
 // CHECK-LABEL: define noundef float @_Z20test_distance_float2Dv2_fS_(
@@ -63,6 +98,12 @@ float test_distance_float(float X, float Y) { return distance(X, Y); }
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call noundef float @llvm.sqrt.f32(float [[RDX_FADD_I]])
 // CHECK-NEXT:    ret float [[TMP0]]
 //
+// SPVCHECK-LABEL: define spir_func noundef float @_Z20test_distance_float2Dv2_fS_(
+// SPVCHECK-SAME: <2 x float> noundef [[X:%.*]], <2 x float> noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// SPVCHECK-NEXT:  [[ENTRY:.*:]]
+// SPVCHECK-NEXT:    [[HLSL_DISTANCE_I:%.*]] = tail call noundef float @llvm.spv.distance.v2f32(<2 x float> [[X]], <2 x float> [[Y]])
+// SPVCHECK-NEXT:    ret float [[HLSL_DISTANCE_I]]
+//
 float test_distance_float2(float2 X, float2 Y) { return distance(X, Y); }
 
 // CHECK-LABEL: define noundef float @_Z20test_distance_float3Dv3_fS_(
@@ -74,6 +115,12 @@ float test_distance_float2(float2 X, float2 Y) { return distance(X, Y); }
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call noundef float @llvm.sqrt.f32(float [[RDX_FADD_I]])
 // CHECK-NEXT:    ret float [[TMP0]]
 //
+// SPVCHECK-LABEL: define spir_func noundef float @_Z20test_distance_float3Dv3_fS_(
+// SPVCHECK-SAME: <3 x float> noundef [[X:%.*]], <3 x float> noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// SPVCHECK-NEXT:  [[ENTRY:.*:]]
+// SPVCHECK-NEXT:    [[HLSL_DISTANCE_I:%.*]] = tail call noundef float @llvm.spv.distance.v3f32(<3 x float> [[X]], <3 x float> [[Y]])
+// SPVCHECK-NEXT:    ret float [[HLSL_DISTANCE_I]]
+//
 float test_distance_float3(float3 X, float3 Y) { return distance(X, Y); }
 
 // CHECK-LABEL: define noundef float @_Z20test_distance_float4Dv4_fS_(
@@ -85,4 +132,10 @@ float test_distance_float3(float3 X, float3 Y) { return distance(X, Y); }
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call noundef float @llvm.sqrt.f32(float [[RDX_FADD_I]])
 // CHECK-NEXT:    ret float [[TMP0]]
 //
+// SPVCHECK-LABEL: define spir_func noundef float @_Z20test_distance_float4Dv4_fS_(
+// SPVCHECK-SAME: <4 x float> noundef [[X:%.*]], <4 x float> noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// SPVCHECK-NEXT:  [[ENTRY:.*:]]
+// SPVCHECK-NEXT:    [[HLSL_DISTANCE_I:%.*]] = tail call noundef float @llvm.spv.distance.v4f32(<4 x float> [[X]], <4 x float> [[Y]])
+// SPVCHECK-NEXT:    ret float [[HLSL_DISTANCE_I]]
+//
 float test_distance_float4(float4 X, float4 Y) { return distance(X, Y); }
diff --git a/clang/test/CodeGenHLSL/builtins/length.hlsl b/clang/test/CodeGenHLSL/builtins/length.hlsl
index 2dad0e18a4a888..fe80c37df6434f 100644
--- a/clang/test/CodeGenHLSL/builtins/length.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/length.hlsl
@@ -2,6 +2,9 @@
 // RUN: %clang_cc1 -finclude-default-header -triple \
 // RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \
 // RUN: -emit-llvm -O1 -o - | FileCheck %s
+// RUN: %clang_cc1 -finclude-default-header -triple \
+// RUN: spirv-unknown-vulkan-compute %s -fnative-half-type \
+// RUN: -emit-llvm -O1 -o - | FileCheck %s --check-prefix=SPVCHECK
 
 
 // CHECK-LABEL: define noundef half @_Z16test_length_halfDh(
@@ -10,6 +13,12 @@
 // CHECK-NEXT:    [[ELT_ABS_I:%.*]] = tail call noundef half @llvm.fabs.f16(half [[P0]])
 // CHECK-NEXT:    ret half [[ELT_ABS_I]]
 //
+// SPVCHECK-LABEL: define spir_func noundef half @_Z16test_length_halfDh(
+// SPVCHECK-SAME: half noundef [[P0:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// SPVCHECK-NEXT:  [[ENTRY:.*:]]
+// SPVCHECK-NEXT:    [[ELT_ABS_I:%.*]] = tail call noundef half @llvm.fabs.f16(half [[P0]])
+// SPVCHECK-NEXT:    ret half [[ELT_ABS_I]]
+//
 half test_length_half(half p0)
 {
   return length(p0);
@@ -23,6 +32,12 @@ half test_length_half(half p0)
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call noundef half @llvm.sqrt.f16(half [[RDX_FADD_I]])
 // CHECK-NEXT:    ret half [[TMP0]]
 //
+// SPVCHECK-LABEL: define spir_func noundef half @_Z17test_length_half2Dv2_Dh(
+// SPVCHECK-SAME: <2 x half> noundef [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// SPVCHECK-NEXT:  [[ENTRY:.*:]]
+// SPVCHECK-NEXT:    [[HLSL_LENGTH_I:%.*]] = tail call noundef half @llvm.spv.length.v2f16(<2 x half> [[P0]])
+// SPVCHECK-NEXT:    ret half [[HLSL_LENGTH_I]]
+//
 half test_length_half2(half2 p0)
 {
   return length(p0);
@@ -36,6 +51,12 @@ half test_length_half2(half2 p0)
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call noundef half @llvm.sqrt.f16(half [[RDX_FADD_I]])
 // CHECK-NEXT:    ret half [[TMP0]]
 //
+// SPVCHECK-LABEL: define spir_func noundef half @_Z17test_length_half3Dv3_Dh(
+// SPVCHECK-SAME: <3 x half> noundef [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// SPVCHECK-NEXT:  [[ENTRY:.*:]]
+// SPVCHECK-NEXT:    [[HLSL_LENGTH_I:%.*]] = tail call noundef half @llvm.spv.length.v3f16(<3 x half> [[P0]])
+// SPVCHECK-NEXT:    ret half [[HLSL_LENGTH_I]]
+//
 half test_length_half3(half3 p0)
 {
   return length(p0);
@@ -49,6 +70,12 @@ half test_length_half3(half3 p0)
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call noundef half @llvm.sqrt.f16(half [[RDX_FADD_I]])
 // CHECK-NEXT:    ret half [[TMP0]]
 //
+// SPVCHECK-LABEL: define spir_func noundef half @_Z17test_length_half4Dv4_Dh(
+// SPVCHECK-SAME: <4 x half> noundef [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// SPVCHECK-NEXT:  [[ENTRY:.*:]]
+// SPVCHECK-NEXT:    [[HLSL_LENGTH_I:%.*]] = tail call noundef half @llvm.spv.length.v4f16(<4 x half> [[P0]])
+// SPVCHECK-NEXT:    ret half [[HLSL_LENGTH_I]]
+//
 half test_length_half4(half4 p0)
 {
   return length(p0);
@@ -61,6 +88,12 @@ half test_length_half4(half4 p0)
 // CHECK-NEXT:    [[ELT_ABS_I:%.*]] = tail call noundef float @llvm.fabs.f32(float [[P0]])
 // CHECK-NEXT:    ret float [[ELT_ABS_I]]
 //
+// SPVCHECK-LABEL: define spir_func noundef float @_Z17test_length_floatf(
+// SPVCHECK-SAME: float noundef [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// SPVCHECK-NEXT:  [[ENTRY:.*:]]
+// SPVCHECK-NEXT:    [[ELT_ABS_I:%.*]] = tail call noundef float @llvm.fabs.f32(float [[P0]])
+// SPVCHECK-NEXT:    ret float [[ELT_ABS_I]]
+//
 float test_length_float(float p0)
 {
   return length(p0);
@@ -74,6 +107,12 @@ float test_length_float(float p0)
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call noundef float @llvm.sqrt.f32(float [[RDX_FADD_I]])
 // CHECK-NEXT:    ret float [[TMP0]]
 //
+// SPVCHECK-LABEL: define spir_func noundef float @_Z18test_length_float2Dv2_f(
+// SPVCHECK-SAME: <2 x float> noundef [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// SPVCHECK-NEXT:  [[ENTRY:.*:]]
+// SPVCHECK-NEXT:    [[HLSL_LENGTH_I:%.*]] = tail call noundef float @llvm.spv.length.v2f32(<2 x float> [[P0]])
+// SPVCHECK-NEXT:    ret float [[HLSL_LENGTH_I]]
+//
 float test_length_float2(float2 p0)
 {
   return length(p0);
@@ -87,6 +126,12 @@ float test_length_float2(float2 p0)
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call noundef float @llvm.sqrt.f32(float [[RDX_FADD_I]])
 // CHECK-NEXT:    ret float [[TMP0]]
 //
+// SPVCHECK-LABEL: define spir_func noundef float @_Z18test_length_float3Dv3_f(
+// SPVCHECK-SAME: <3 x float> noundef [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// SPVCHECK-NEXT:  [[ENTRY:.*:]]
+// SPVCHECK-NEXT:    [[HLSL_LENGTH_I:%.*]] = tail call noundef float @llvm.spv.length.v3f32(<3 x float> [[P0]])
+// SPVCHECK-NEXT:    ret float [[HLSL_LENGTH_I]]
+//
 float test_length_float3(float3 p0)
 {
   return length(p0);
@@ -98,9 +143,17 @@ float test_length_float3(float3 p0)
 // CHECK-NEXT:    [[MUL_I:%.*]] = fmul <4 x float> [[P0]], [[P0]]
 // CHECK-NEXT:    [[RDX_FADD_I:%.*]] = tail call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[MUL_I]])
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call noundef float @llvm.sqrt.f32(float [[RDX_FADD_I]])
-// CHECK-NEXT:    ret float [[TMP0]]
+// CHECK-NEXT:    [[ADD:%.*]] = fadd float [[TMP0]], [[TMP0]]
+// CHECK-NEXT:    ret float [[ADD]]
+//
+// SPVCHECK-LABEL: define spir_func noundef float @_Z18test_length_float4Dv4_f(
+// SPVCHECK-SAME: <4 x float> noundef [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// SPVCHECK-NEXT:  [[ENTRY:.*:]]
+// SPVCHECK-NEXT:    [[HLSL_LENGTH_I:%.*]] = tail call noundef float @llvm.spv.length.v4f32(<4 x float> [[P0]])
+// SPVCHECK-NEXT:    [[ADD:%.*]] = fadd float [[HLSL_LENGTH_I]], [[HLSL_LENGTH_I]]
+// SPVCHECK-NEXT:    ret float [[ADD]]
 //
 float test_length_float4(float4 p0)
 {
-  return length(p0);
+  return length(p0) + length(p0);
 }
diff --git a/llvm/include/llvm/IR/IntrinsicsSPIRV.td b/llvm/include/llvm/IR/IntrinsicsSPIRV.td
index e1157085832866..9e8977634a5812 100644
--- a/llvm/include/llvm/IR/IntrinsicsSPIRV.td
+++ b/llvm/include/llvm/IR/IntrinsicsSPIRV.td
@@ -63,6 +63,7 @@ let TargetPrefix = "spv" in {
   def int_spv_any : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_any_ty], [IntrNoMem]>;
   def int_spv_cross : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
   def int_spv_degrees : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty], [IntrNoMem]>;
+  def int_spv_distance : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], [llvm_anyfloat_ty, LLVMMatchType<0>], [IntrNoMem]>;
   def int_spv_frac : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty], [IntrNoMem]>;
   def int_spv_lerp : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty, LLVMMatchType<0>,LLVMMatchType<0>],
     [IntrNoMem] >;
diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
index eb89f6a2f95da7..204dd9dfb85b26 100644
--- a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
+++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
@@ -94,7 +94,6 @@ static Value *expandVecReduceAdd(CallInst *Orig) {
   Value *X = Orig->getOperand(0);
   IRBuilder<> Builder(Orig);
   Type *Ty = X->getType();
-  Type *EltTy = Ty->getScalarType();
   auto *XVec = dyn_cast<FixedVectorType>(Ty);
   unsigned XVecSize = XVec->getNumElements();
 
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
index 9905691d412bf8..dc8ead88f59a42 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
@@ -2796,6 +2796,8 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg,
     return selectAny(ResVReg, ResType, I);
   case Intrinsic::spv_cross:
     return selectExtInst(ResVReg, ResType, I, CL::cross, GL::Cross);
+  case Intrinsic::spv_distance:
+    return selectExtInst(ResVReg, ResType, I, CL::distance, GL::Distance);
   case Intrinsic::spv_lerp:
     return selectExtInst(ResVReg, ResType, I, CL::mix, GL::FMix);
   case Intrinsic::spv_length:
diff --git a/llvm/test/CodeGen/DirectX/length_invalid_intrinsic_error.ll b/llvm/test/CodeGen/DirectX/length_invalid_intrinsic_error.ll
deleted file mode 100644
index f722de2f9029e7..00000000000000
--- a/llvm/test/CodeGen/DirectX/length_invalid_intrinsic_error.ll
+++ /dev/null
@@ -1,10 +0,0 @@
-; RUN: not opt -S -dxil-intrinsic-expansion -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s 2>&1 | FileCheck %s
-
-; DXIL operation length does not support 1-element vector types.
-; CHECK: LLVM ERROR: Invalid input type for length intrinsic
-
-define noundef float @test_length_float(<1 x float> noundef %p0) {
-entry:
-  %hlsl.length = call float @llvm.dx.length.v1f32(<1 x float> %p0)
-  ret float %hlsl.length
-}
diff --git a/llvm/test/CodeGen/DirectX/length_invalid_intrinsic_error_scalar.ll b/llvm/test/CodeGen/DirectX/length_invalid_intrinsic_error_scalar.ll
deleted file mode 100644
index ac3a0513eb6b27..00000000000000
--- a/llvm/test/CodeGen/DirectX/length_invalid_intrinsic_error_scalar.ll
+++ /dev/null
@@ -1,10 +0,0 @@
-; RUN: not opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s 2>&1 | FileCheck %s
-
-; DXIL operation length does not support scalar types
-; CHECK: error: invalid intrinsic signature
-
-define noundef float @test_length_float(float noundef %p0) {
-entry:
-  %hlsl.length = call float @llvm.dx.length.f32(float %p0)
-  ret float %hlsl.length
-}
diff --git a/llvm/test/CodeGen/DirectX/vector_reduce_add.ll b/llvm/test/CodeGen/DirectX/vector_reduce_add.ll
new file mode 100644
index 00000000000000..e7000027fc6e43
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/vector_reduce_add.ll
@@ -0,0 +1,293 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S  -dxil-intrinsic-expansion  < %s | FileCheck %s
+
+; Make sure dxil operation function calls for lvm.vector.reduce.fadd and lvm.vector.reduce.add are generate.
+
+define noundef half @test_length_half2(<2 x half> noundef %p0) {
+; CHECK-LABEL: define noundef half @test_length_half2(
+; CHECK-SAME: <2 x half> noundef [[P0:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x half> [[P0]], i64 0
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x half> [[P0]], i64 1
+; CHECK-NEXT:    [[TMP2:%.*]] = fadd half [[TMP0]], [[TMP1]]
+; CHECK-NEXT:    ret half [[TMP2]]
+;
+entry:
+  %rdx.fadd = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> %p0)
+  ret half %rdx.fadd
+}
+
+define noundef half @test_length_half3(<3 x half> noundef %p0) {
+; CHECK-LABEL: define noundef half @test_length_half3(
+; CHECK-SAME: <3 x half> noundef [[P0:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <3 x half> [[P0]], i64 0
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <3 x half> [[P0]], i64 1
+; CHECK-NEXT:    [[TMP2:%.*]] = fadd half [[TMP0]], [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <3 x half> [[P0]], i64 2
+; CHECK-NEXT:    [[TMP4:%.*]] = fadd half [[TMP2]], [[TMP3]]
+; CHECK-NEXT:    ret half [[TMP4]]
+;
+entry:
+  %rdx.fadd = call half @llvm.vector.reduce.fadd.v3f16(half 0xH0000, <3 x half> %p0)
+  ret half %rdx.fadd
+}
+
+define noundef half @test_length_half4(<4 x half> noundef %p0) {
+; CHECK-LABEL: define noundef half @test_length_half4(
+; CHECK-SAME: <4 x half> noundef [[P0:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <4 x half> [[P0]], i64 0
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x half> [[P0]], i64 1
+; CHECK-NEXT:    [[TMP2:%.*]] = fadd half [[TMP0]], [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x half> [[P0]], i64 2
+; CHECK-NEXT:    [[TMP4:%.*]] = fadd half [[TMP2]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x half> [[P0]], i64 3
+; CHECK-NEXT:    [[TMP6:%.*]] = fadd half [[TMP4]], [[TMP5]]
+; CHECK-NEXT:    ret half [[TMP6]]
+;
+entry:
+  %rdx.fadd = call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> %p0)
+  ret half %rdx.fadd
+}
+
+define noundef float @test_length_float2(<2 x float> noundef %p0) {
+; CHECK-LABEL: define noundef float @test_length_float2(
+; CHECK-SAME: <2 x float> noundef [[P0:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x float> [[P0]], i64 0
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x float> [[P0]], i64 1
+; CHECK-NEXT:    [[TMP2:%.*]] = fadd float [[TMP0]], [[TMP1]]
+; CHECK-NEXT:    ret float [[TMP2]]
+;
+entry:
+  %rdx.fadd = call float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> %p0)
+  ret float %rdx.fadd
+}
+
+define noundef float @test_length_float3(<3 x float> noundef %p0) {
+; CHECK-LABEL: define noundef float @test_length_float3(
+; CHECK-SAME: <3 x float> noundef [[P0:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <3 x float> [[P0]], i64 0
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <3 x float> [[P0]], i64 1
+; CHECK-NEXT:    [[TMP2:%.*]] = fadd float [[TMP0]], [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <3 x float> [[P0]], i64 2
+; CHECK-NEXT:    [[TMP4:%.*]] = fadd float [[TMP2]], [[TMP3]]
+; CHECK-NEXT:    ret float [[TMP4]]
+;
+entry:
+  %rdx.fadd = call float @llvm.vector.reduce.fadd.v3f32(float 0.000000e+00, <3 x float> %p0)
+  ret float %rdx.fadd
+}
+
+define noundef float @test_length_float4(<4 x float> noundef %p0) {
+; CHECK-LABEL: define noundef float @test_length_float4(
+; CHECK-SAME: <4 x float> noundef [[P0:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <4 x float> [[P0]], i64 0
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[P0]], i64 1
+; CHECK-NEXT:    [[TMP2:%.*]] = fadd float [[TMP0]], [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[P0]], i64 2
+; CHECK-NEXT:    [[TMP4:%.*]] = fadd float [[TMP2]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x float> [[P0]], i64 3
+; CHECK-NEXT:    [[TMP6:%.*]] = fadd float [[TMP4]], [[TMP5]]
+; CHECK-NEXT:    ret float [[TMP6]]
+;
+entry:
+  %rdx.fadd = call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> %p0)
+  ret float %rdx.fadd
+}
+
+define noundef double @test_length_double2(<2 x double> noundef %p0) {
+; CHECK-LABEL: define noundef double @test_length_double2(
+; CHECK-SAME: <2 x double> noundef [[P0:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x double> [[P0]], i64 0
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x double> [[P0]], i64 1
+; CHECK-NEXT:    [[TMP2:%.*]] = fadd double [[TMP0]], [[TMP1]]
+; CHECK-NEXT:    ret double [[TMP2]]
+;
+entry:
+  %rdx.fadd = call double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> %p0)
+  ret double %rdx.fadd
+}
+
+define noundef double @test_length_double3(<3 x double> noundef %p0) {
+; CHECK-LABEL: define noundef double @test_length_double3(
+; CHECK-SAME: <3 x double> noundef [[P0:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <3 x double> [[P0]], i64 0
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <3 x double> [[P0]], i64 1
+; CHECK-NEXT:    [[TMP2:%.*]] = fadd double [[TMP0]], [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <3 x double> [[P0]], i64 2
+; CHECK-NEXT:    [[TMP4:%.*]] = fadd double [[TMP2]], [[TMP3]]
+; CHECK-NEXT:    ret double [[TMP4]]
+;
+entry:
+  %rdx.fadd = call double @llvm.vector.reduce.fadd.v3f64(double 0.000000e+00, <3 x double> %p0)
+  ret double %rdx.fadd
+}
+
+define noundef double @test_length_double4(<4 x double> noundef %p0) {
+; CHECK-LABEL: define noundef double @test_length_double4(
+; CHECK-SAME: <4 x double> noundef [[P0:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <4 x double> [[P0]], i64 0
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x double> [[P0]], i64 1
+; CHECK-NEXT:    [[TMP2:%.*]] = fadd double [[TMP0]], [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x double> [[P0]], i64 2
+; CHECK-NEXT:    [[TMP4:%.*]] = fadd double [[TMP2]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x double> [[P0]], i64 3
+; CHECK-NEXT:    [[TMP6:%.*]] = fadd double [[TMP4]], [[TMP5]]
+; CHECK-NEXT:    ret double [[TMP6]]
+;
+entry:
+  %rdx.fadd = call double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> %p0)
+  ret double %rdx.fadd
+}
+
+define noundef i16 @test_length_short2(<2 x i16> noundef %p0) {
+; CHECK-LABEL: define noundef i16 @test_length_short2(
+; CHECK-SAME: <2 x i16> noundef [[P0:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x i16> [[P0]], i64 0
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x i16> [[P0]], i64 1
+; CHECK-NEXT:    [[TMP2:%.*]] = add i16 [[TMP0]], [[TMP1]]
+; CHECK-NEXT:    ret i16 [[TMP2]]
+;
+entry:
+  %rdx.add = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> %p0)
+  ret i16 %rdx.add
+}
+
+define noundef i16 @test_length_short3(<3 x i16> noundef %p0) {
+; CHECK-LABEL: define noundef i16 @test_length_short3(
+; CHECK-SAME: <3 x i16> noundef [[P0:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <3 x i16> [[P0]], i64 0
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <3 x i16> [[P0]], i64 1
+; CHECK-NEXT:    [[TMP2:%.*]] = add i16 [[TMP0]], [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <3 x i16> [[P0]], i64 2
+; CHECK-NEXT:    [[TMP4:%.*]] = add i16 [[TMP2]], [[TMP3]]
+; CHECK-NEXT:    ret i16 [[TMP4]]
+;
+entry:
+  %rdx.fadd = call i16 @llvm.vector.reduce.add.v3i16(<3 x i16> %p0)
+  ret i16 %rdx.fadd
+}
+
+define noundef i16 @test_length_short4(<4 x i16> noundef %p0) {
+; CHECK-LABEL: define noundef i16 @test_length_short4(
+; CHECK-SAME: <4 x i16> noundef [[P0:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <4 x i16> [[P0]], i64 0
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i16> [[P0]], i64 1
+; CHECK-NEXT:    [[TMP2:%.*]] = add i16 [[TMP0]], [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x i16> [[P0]], i64 2
+; CHECK-NEXT:    [[TMP4:%.*]] = add i16 [[TMP2]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x i16> [[P0]], i64 3
+; CHECK-NEXT:    [[TMP6:%.*]] = add i16 [[TMP4]], [[TMP5]]
+; CHECK-NEXT:    ret i16 [[TMP6]]
+;
+entry:
+  %rdx.fadd = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %p0)
+  ret i16 %rdx.fadd
+}
+
+define noundef i32 @test_length_int2(<2 x i32> noundef %p0) {
+; CHECK-LABEL: define noundef i32 @test_length_int2(
+; CHECK-SAME: <2 x i32> noundef [[P0:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x i32> [[P0]], i64 0
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x i32> [[P0]], i64 1
+; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[TMP0]], [[TMP1]]
+; CHECK-NEXT:    ret i32 [[TMP2]]
+;
+entry:
+  %rdx.add = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %p0)
+  ret i32 %rdx.add
+}
+
+define noundef i32 @test_length_int3(<3 x i32> noundef %p0) {
+; CHECK-LABEL: define noundef i32 @test_length_int3(
+; CHECK-SAME: <3 x i32> noundef [[P0:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <3 x i32> [[P0]], i64 0
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <3 x i32> [[P0]], i64 1
+; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[TMP0]], [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <3 x i32> [[P0]], i64 2
+; CHECK-NEXT:    [[TMP4:%.*]] = add i32 [[TMP2]], [[TMP3]]
+; CHECK-NEXT:    ret i32 [[TMP4]]
+;
+entry:
+  %rdx.fadd = call i32 @llvm.vector.reduce.add.v3i32(<3 x i32> %p0)
+  ret i32 %rdx.fadd
+}
+
+define noundef i32 @test_length_int4(<4 x i32> noundef %p0) {
+; CHECK-LABEL: define noundef i32 @test_length_int4(
+; CHECK-SAME: <4 x i32> noundef [[P0:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <4 x i32> [[P0]], i64 0
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i32> [[P0]], i64 1
+; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[TMP0]], [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x i32> [[P0]], i64 2
+; CHECK-NEXT:    [[TMP4:%.*]] = add i32 [[TMP2]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x i32> [[P0]], i64 3
+; CHECK-NEXT:    [[TMP6:%.*]] = add i32 [[TMP4]], [[TMP5]]
+; CHECK-NEXT:    ret i32 [[TMP6]]
+;
+entry:
+  %rdx.fadd = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %p0)
+  ret i32 %rdx.fadd
+}
+
+define noundef i64 @test_length_int64_2(<2 x i64> noundef %p0) {
+; CHECK-LABEL: define noundef i64 @test_length_int64_2(
+; CHECK-SAME: <2 x i64> noundef [[P0:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x i64> [[P0]], i64 0
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x i64> [[P0]], i64 1
+; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[TMP0]], [[TMP1]]
+; CHECK-NEXT:    ret i64 [[TMP2]]
+;
+entry:
+  %rdx.add = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %p0)
+  ret i64 %rdx.add
+}
+
+define noundef i64 @test_length_int64_3(<3 x i64> noundef %p0) {
+; CHECK-LABEL: define noundef i64 @test_length_int64_3(
+; CHECK-SAME: <3 x i64> noundef [[P0:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <3 x i64> [[P0]], i64 0
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <3 x i64> [[P0]], i64 1
+; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[TMP0]], [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <3 x i64> [[P0]], i64 2
+; CHECK-NEXT:    [[TMP4:%.*]] = add i64 [[TMP2]], [[TMP3]]
+; CHECK-NEXT:    ret i64 [[TMP4]]
+;
+entry:
+  %rdx.fadd = call i64 @llvm.vector.reduce.add.v3i64(<3 x i64> %p0)
+  ret i64 %rdx.fadd
+}
+
+define noundef i64 @test_length_int64_4(<4 x i64> noundef %p0) {
+; CHECK-LABEL: define noundef i64 @test_length_int64_4(
+; CHECK-SAME: <4 x i64> noundef [[P0:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <4 x i64> [[P0]], i64 0
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i64> [[P0]], i64 1
+; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[TMP0]], [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x i64> [[P0]], i64 2
+; CHECK-NEXT:    [[TMP4:%.*]] = add i64 [[TMP2]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x i64> [[P0]], i64 3
+; CHECK-NEXT:    [[TMP6:%.*]] = add i64 [[TMP4]], [[TMP5]]
+; CHECK-NEXT:    ret i64 [[TMP6]]
+;
+entry:
+  %rdx.fadd = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %p0)
+  ret i64 %rdx.fadd
+}
+
diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/cross.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/cross.ll
index 2e0eb8c429ac27..b1625c07111e44 100644
--- a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/cross.ll
+++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/cross.ll
@@ -15,7 +15,7 @@ entry:
   ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#vec3_float_16]]
   ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#vec3_float_16]]
   ; CHECK: %[[#]] = OpExtInst %[[#vec3_float_16]] %[[#op_ext_glsl]] Cross %[[#arg0]] %[[#arg1]]
-  %hlsl.cross = call <3 x half> @llvm.spv.cross.v4f16(<3 x half> %a, <3 x half> %b)
+  %hlsl.cross = call <3 x half> @llvm.spv.cross.v3f16(<3 x half> %a, <3 x half> %b)
   ret <3 x half> %hlsl.cross
 }
 
@@ -25,9 +25,9 @@ entry:
   ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#vec3_float_32]]
   ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#vec3_float_32]]
   ; CHECK: %[[#]] = OpExtInst %[[#vec3_float_32]] %[[#op_ext_glsl]] Cross %[[#arg0]] %[[#arg1]]
-  %hlsl.cross = call <3 x float> @llvm.spv.cross.v4f32(<3 x float> %a, <3 x float> %b)
+  %hlsl.cross = call <3 x float> @llvm.spv.cross.v3f32(<3 x float> %a, <3 x float> %b)
   ret <3 x float> %hlsl.cross
 }
 
-declare <3 x half> @llvm.spv.cross.v4f16(<3 x half>, <3 x half>)
-declare <3 x float> @llvm.spv.cross.v4f32(<3 x float>, <3 x float>)
+declare <3 x half> @llvm.spv.cross.v3f16(<3 x half>, <3 x half>)
+declare <3 x float> @llvm.spv.cross.v3f32(<3 x float>, <3 x float>)
diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/distance.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/distance.ll
new file mode 100644
index 00000000000000..848b0905675549
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/distance.ll
@@ -0,0 +1,33 @@
+; RUN: llc -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %}
+
+; Make sure SPIRV operation function calls for distance are lowered correctly.
+
+; CHECK-DAG: %[[#op_ext_glsl:]] = OpExtInstImport "GLSL.std.450"
+; CHECK-DAG: %[[#float_16:]] = OpTypeFloat 16
+; CHECK-DAG: %[[#vec4_float_16:]] = OpTypeVector %[[#float_16]] 4
+; CHECK-DAG: %[[#float_32:]] = OpTypeFloat 32
+; CHECK-DAG: %[[#vec4_float_32:]] = OpTypeVector %[[#float_32]] 4
+
+define noundef half @distance_half4(<4 x half> noundef %a, <4 x half> noundef %b) {
+entry:
+  ; CHECK: %[[#]] = OpFunction %[[#float_16]] None %[[#]]
+  ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#vec4_float_16]]
+  ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#vec4_float_16]]
+  ; CHECK: %[[#]] = OpExtInst %[[#float_16]] %[[#op_ext_glsl]] Distance %[[#arg0]] %[[#arg1]]
+  %hlsl.distance = call half @llvm.spv.distance.f16(<4 x half> %a, <4 x half> %b)
+  ret half %hlsl.distance
+}
+
+define noundef float @distance_float4(<4 x float> noundef %a, <4 x float> noundef %b) {
+entry:
+  ; CHECK: %[[#]] = OpFunction %[[#float_32]] None %[[#]]
+  ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#vec4_float_32]]
+  ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#vec4_float_32]]
+  ; CHECK: %[[#]] = OpExtInst %[[#float_32]] %[[#op_ext_glsl]] Distance %[[#arg0]] %[[#arg1]]
+  %hlsl.distance = call float @llvm.spv.distance.f32(<4 x float> %a, <4 x float> %b)
+  ret float %hlsl.distance
+}
+
+declare half @llvm.spv.distance.f16(<4 x half>, <4 x half>)
+declare float @llvm.spv.distance.f32(<4 x float>, <4 x float>)
diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/length.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/length.ll
index b4a9d8e0664b7e..1ac862b79a3fac 100644
--- a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/length.ll
+++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/length.ll
@@ -11,19 +11,21 @@
 
 define noundef half @length_half4(<4 x half> noundef %a) {
 entry:
-  ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]]
+  ; CHECK: %[[#]] = OpFunction %[[#float_16]] None %[[#]]
+  ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#vec4_float_16]]
   ; CHECK: %[[#]] = OpExtInst %[[#float_16]] %[[#op_ext_glsl]] Length %[[#arg0]]
-  %hlsl.length = call half @llvm.spv.length.v4f16(<4 x half> %a)
+  %hlsl.length = call half @llvm.spv.length.f16(<4 x half> %a)
   ret half %hlsl.length
 }
 
 define noundef float @length_float4(<4 x float> noundef %a) {
 entry:
-  ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]]
+  ; CHECK: %[[#]] = OpFunction %[[#float_32]] None %[[#]]
+  ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#vec4_float_32]]
   ; CHECK: %[[#]] = OpExtInst %[[#float_32]] %[[#op_ext_glsl]] Length %[[#arg0]]
-  %hlsl.length = call float @llvm.spv.length.v4f32(<4 x float> %a)
+  %hlsl.length = call float @llvm.spv.length.f32(<4 x float> %a)
   ret float %hlsl.length
 }
 
-declare half @llvm.spv.length.v4f16(<4 x half>)
-declare float @llvm.spv.length.v4f32(<4 x float>)
+declare half @llvm.spv.length.f16(<4 x half>)
+declare float @llvm.spv.length.f32(<4 x float>)