[llvm] da17ced - [DirectX] Use scalar arguments for @llvm.dx.dot intrinsics (#134570)

Mon Apr 14 10:34:20 PDT 2025

Author: Justin Bogner
Date: 2025-04-14T10:34:15-07:00
New Revision: da17ced11b1cf44b433cb2b850978df4b6bff279

URL: https://github.com/llvm/llvm-project/commit/da17ced11b1cf44b433cb2b850978df4b6bff279
DIFF: https://github.com/llvm/llvm-project/commit/da17ced11b1cf44b433cb2b850978df4b6bff279.diff

LOG: [DirectX] Use scalar arguments for @llvm.dx.dot intrinsics (#134570)

The `dx.dot2`, `dot3`, and `dot4` intrinsics exist purely to lower
`dx.fdot`, and they map exactly to the DXIL ops of the same name. Using
vectors for their arguments adds unnecessary complexity and causes us to
have vector operations that are not trivial to lower post-scalarizer.

Similarly, the `dx.dot2add` intrinsic is overly generic for something
that only needs to lower to a single `dot2AddHalf` DXIL op. Update its
signature to match the operation it lowers to.

Fixes #134569.

Added: 
    

Modified: 
    clang/lib/CodeGen/TargetBuiltins/DirectX.cpp
    clang/test/CodeGenDirectX/Builtins/dot2add.c
    clang/test/CodeGenHLSL/builtins/dot2add.hlsl
    llvm/include/llvm/IR/IntrinsicsDirectX.td
    llvm/lib/Target/DirectX/DXIL.td
    llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
    llvm/lib/Target/DirectX/DXILOpLowering.cpp
    llvm/test/CodeGen/DirectX/dot2_error.ll
    llvm/test/CodeGen/DirectX/dot2add.ll
    llvm/test/CodeGen/DirectX/dot3_error.ll
    llvm/test/CodeGen/DirectX/dot4_error.ll
    llvm/test/CodeGen/DirectX/fdot.ll
    llvm/test/CodeGen/DirectX/normalize.ll

Removed: 
    


################################################################################
diff  --git a/clang/lib/CodeGen/TargetBuiltins/DirectX.cpp b/clang/lib/CodeGen/TargetBuiltins/DirectX.cpp
index 202601e257036..51202331bb779 100644

--- a/clang/lib/CodeGen/TargetBuiltins/DirectX.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/DirectX.cpp
@@ -25,12 +25,17 @@ Value *CodeGenFunction::EmitDirectXBuiltinExpr(unsigned BuiltinID,
   case DirectX::BI__builtin_dx_dot2add: {
     Value *A = EmitScalarExpr(E->getArg(0));
     Value *B = EmitScalarExpr(E->getArg(1));
-    Value *C = EmitScalarExpr(E->getArg(2));
+    Value *Acc = EmitScalarExpr(E->getArg(2));
+
+    Value *AX = Builder.CreateExtractElement(A, Builder.getSize(0));
+    Value *AY = Builder.CreateExtractElement(A, Builder.getSize(1));
+    Value *BX = Builder.CreateExtractElement(B, Builder.getSize(0));
+    Value *BY = Builder.CreateExtractElement(B, Builder.getSize(1));
 
     Intrinsic::ID ID = llvm ::Intrinsic::dx_dot2add;
     return Builder.CreateIntrinsic(
-        /*ReturnType=*/C->getType(), ID, ArrayRef<Value *>{A, B, C}, nullptr,
-        "dx.dot2add");
+        /*ReturnType=*/Acc->getType(), ID,
+        ArrayRef<Value *>{Acc, AX, AY, BX, BY}, nullptr, "dx.dot2add");
   }
   }
   return nullptr;

diff  --git a/clang/test/CodeGenDirectX/Builtins/dot2add.c b/clang/test/CodeGenDirectX/Builtins/dot2add.c
index 181f61fea1544..47c639b5986ce 100644
--- a/clang/test/CodeGenDirectX/Builtins/dot2add.c
+++ b/clang/test/CodeGenDirectX/Builtins/dot2add.c
@@ -17,7 +17,13 @@ typedef half half2 __attribute__((ext_vector_type(2)));
 // CHECK-NEXT:    [[TMP0:%.*]] = load <2 x half>, ptr [[X_ADDR]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load <2 x half>, ptr [[Y_ADDR]], align 4
 // CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[Z_ADDR]], align 4
-// CHECK-NEXT:    [[DX_DOT2ADD:%.*]] = call float @llvm.dx.dot2add.v2f16(<2 x half> [[TMP0]], <2 x half> [[TMP1]], float [[TMP2]])
+// CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x half> [[TMP0]], i32 0
+// CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x half> [[TMP0]], i32 1
+// CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x half> [[TMP1]], i32 0
+// CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x half> [[TMP1]], i32 1
+// CHECK-NEXT:    [[DX_DOT2ADD:%.*]] = call float @llvm.dx.dot2add(float [[TMP2]], half [[TMP3]], half [[TMP4]], half [[TMP5]], half [[TMP6]])
 // CHECK-NEXT:    ret float [[DX_DOT2ADD]]
 //
-float test_dot2add(half2 X, half2 Y, float Z) { return __builtin_dx_dot2add(X, Y, Z); }
+float test_dot2add(half2 X, half2 Y, float Z) {
+  return __builtin_dx_dot2add(X, Y, Z);
+}

diff  --git a/clang/test/CodeGenHLSL/builtins/dot2add.hlsl b/clang/test/CodeGenHLSL/builtins/dot2add.hlsl
index 2464607dd636c..c345e17476e08 100644
--- a/clang/test/CodeGenHLSL/builtins/dot2add.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/dot2add.hlsl
@@ -13,7 +13,11 @@ float test_default_parameter_type(half2 p1, half2 p2, float p3) {
   // CHECK-SPIRV:  %[[CONV:.*]] = fpext reassoc nnan ninf nsz arcp afn half %[[MUL]] to float
   // CHECK-SPIRV:  %[[C:.*]] = load float, ptr %c.addr.i, align 4
   // CHECK-SPIRV:  %[[RES:.*]] = fadd reassoc nnan ninf nsz arcp afn float %[[CONV]], %[[C]]
-  // CHECK-DXIL:  %[[RES:.*]] = call {{.*}} float @llvm.dx.dot2add.v2f16(<2 x half> %{{.*}}, <2 x half> %{{.*}}, float %{{.*}})
+  // CHECK-DXIL:  %[[AX:.*]] = extractelement <2 x half> %{{.*}}, i32 0
+  // CHECK-DXIL:  %[[AY:.*]] = extractelement <2 x half> %{{.*}}, i32 1
+  // CHECK-DXIL:  %[[BX:.*]] = extractelement <2 x half> %{{.*}}, i32 0
+  // CHECK-DXIL:  %[[BY:.*]] = extractelement <2 x half> %{{.*}}, i32 1
+  // CHECK-DXIL:  %[[RES:.*]] = call {{.*}} float @llvm.dx.dot2add(float %{{.*}}, half %[[AX]], half %[[AY]], half %[[BX]], half %[[BY]])
   // CHECK:  ret float %[[RES]]
   return dot2add(p1, p2, p3);
 }
@@ -25,7 +29,11 @@ float test_float_arg2_type(half2 p1, float2 p2, float p3) {
   // CHECK-SPIRV:  %[[CONV:.*]] = fpext reassoc nnan ninf nsz arcp afn half %[[MUL]] to float
   // CHECK-SPIRV:  %[[C:.*]] = load float, ptr %c.addr.i, align 4
   // CHECK-SPIRV:  %[[RES:.*]] = fadd reassoc nnan ninf nsz arcp afn float %[[CONV]], %[[C]]
-  // CHECK-DXIL:  %[[RES:.*]] = call {{.*}} float @llvm.dx.dot2add.v2f16(<2 x half> %{{.*}}, <2 x half> %{{.*}}, float %{{.*}})
+  // CHECK-DXIL:  %[[AX:.*]] = extractelement <2 x half> %{{.*}}, i32 0
+  // CHECK-DXIL:  %[[AY:.*]] = extractelement <2 x half> %{{.*}}, i32 1
+  // CHECK-DXIL:  %[[BX:.*]] = extractelement <2 x half> %{{.*}}, i32 0
+  // CHECK-DXIL:  %[[BY:.*]] = extractelement <2 x half> %{{.*}}, i32 1
+  // CHECK-DXIL:  %[[RES:.*]] = call {{.*}} float @llvm.dx.dot2add(float %{{.*}}, half %[[AX]], half %[[AY]], half %[[BX]], half %[[BY]])
   // CHECK:  ret float %[[RES]]
   return dot2add(p1, p2, p3);
 }
@@ -37,7 +45,11 @@ float test_float_arg1_type(float2 p1, half2 p2, float p3) {
   // CHECK-SPIRV:  %[[CONV:.*]] = fpext reassoc nnan ninf nsz arcp afn half %[[MUL]] to float
   // CHECK-SPIRV:  %[[C:.*]] = load float, ptr %c.addr.i, align 4
   // CHECK-SPIRV:  %[[RES:.*]] = fadd reassoc nnan ninf nsz arcp afn float %[[CONV]], %[[C]]
-  // CHECK-DXIL:  %[[RES:.*]] = call {{.*}} float @llvm.dx.dot2add.v2f16(<2 x half> %{{.*}}, <2 x half> %{{.*}}, float %{{.*}})
+  // CHECK-DXIL:  %[[AX:.*]] = extractelement <2 x half> %{{.*}}, i32 0
+  // CHECK-DXIL:  %[[AY:.*]] = extractelement <2 x half> %{{.*}}, i32 1
+  // CHECK-DXIL:  %[[BX:.*]] = extractelement <2 x half> %{{.*}}, i32 0
+  // CHECK-DXIL:  %[[BY:.*]] = extractelement <2 x half> %{{.*}}, i32 1
+  // CHECK-DXIL:  %[[RES:.*]] = call {{.*}} float @llvm.dx.dot2add(float %{{.*}}, half %[[AX]], half %[[AY]], half %[[BX]], half %[[BY]])
   // CHECK:  ret float %[[RES]]
   return dot2add(p1, p2, p3);
 }
@@ -49,7 +61,11 @@ float test_double_arg3_type(half2 p1, half2 p2, double p3) {
   // CHECK-SPIRV:  %[[CONV:.*]] = fpext reassoc nnan ninf nsz arcp afn half %[[MUL]] to float
   // CHECK-SPIRV:  %[[C:.*]] = load float, ptr %c.addr.i, align 4
   // CHECK-SPIRV:  %[[RES:.*]] = fadd reassoc nnan ninf nsz arcp afn float %[[CONV]], %[[C]]
-  // CHECK-DXIL:  %[[RES:.*]] = call {{.*}} float @llvm.dx.dot2add.v2f16(<2 x half> %{{.*}}, <2 x half> %{{.*}}, float %{{.*}})
+  // CHECK-DXIL:  %[[AX:.*]] = extractelement <2 x half> %{{.*}}, i32 0
+  // CHECK-DXIL:  %[[AY:.*]] = extractelement <2 x half> %{{.*}}, i32 1
+  // CHECK-DXIL:  %[[BX:.*]] = extractelement <2 x half> %{{.*}}, i32 0
+  // CHECK-DXIL:  %[[BY:.*]] = extractelement <2 x half> %{{.*}}, i32 1
+  // CHECK-DXIL:  %[[RES:.*]] = call {{.*}} float @llvm.dx.dot2add(float %{{.*}}, half %[[AX]], half %[[AY]], half %[[BX]], half %[[BY]])
   // CHECK:  ret float %[[RES]]
   return dot2add(p1, p2, p3);
 }
@@ -62,7 +78,11 @@ float test_float_arg1_arg2_type(float2 p1, float2 p2, float p3) {
   // CHECK-SPIRV:  %[[CONV:.*]] = fpext reassoc nnan ninf nsz arcp afn half %[[MUL]] to float
   // CHECK-SPIRV:  %[[C:.*]] = load float, ptr %c.addr.i, align 4
   // CHECK-SPIRV:  %[[RES:.*]] = fadd reassoc nnan ninf nsz arcp afn float %[[CONV]], %[[C]]
-  // CHECK-DXIL:  %[[RES:.*]] = call {{.*}} float @llvm.dx.dot2add.v2f16(<2 x half> %{{.*}}, <2 x half> %{{.*}}, float %{{.*}})
+  // CHECK-DXIL:  %[[AX:.*]] = extractelement <2 x half> %{{.*}}, i32 0
+  // CHECK-DXIL:  %[[AY:.*]] = extractelement <2 x half> %{{.*}}, i32 1
+  // CHECK-DXIL:  %[[BX:.*]] = extractelement <2 x half> %{{.*}}, i32 0
+  // CHECK-DXIL:  %[[BY:.*]] = extractelement <2 x half> %{{.*}}, i32 1
+  // CHECK-DXIL:  %[[RES:.*]] = call {{.*}} float @llvm.dx.dot2add(float %{{.*}}, half %[[AX]], half %[[AY]], half %[[BX]], half %[[BY]])
   // CHECK:  ret float %[[RES]]
   return dot2add(p1, p2, p3);
 }
@@ -75,7 +95,11 @@ float test_double_arg1_arg2_type(double2 p1, double2 p2, float p3) {
   // CHECK-SPIRV:  %[[CONV:.*]] = fpext reassoc nnan ninf nsz arcp afn half %[[MUL]] to float
   // CHECK-SPIRV:  %[[C:.*]] = load float, ptr %c.addr.i, align 4
   // CHECK-SPIRV:  %[[RES:.*]] = fadd reassoc nnan ninf nsz arcp afn float %[[CONV]], %[[C]]
-  // CHECK-DXIL:  %[[RES:.*]] = call {{.*}} float @llvm.dx.dot2add.v2f16(<2 x half> %{{.*}}, <2 x half> %{{.*}}, float %{{.*}})
+  // CHECK-DXIL:  %[[AX:.*]] = extractelement <2 x half> %{{.*}}, i32 0
+  // CHECK-DXIL:  %[[AY:.*]] = extractelement <2 x half> %{{.*}}, i32 1
+  // CHECK-DXIL:  %[[BX:.*]] = extractelement <2 x half> %{{.*}}, i32 0
+  // CHECK-DXIL:  %[[BY:.*]] = extractelement <2 x half> %{{.*}}, i32 1
+  // CHECK-DXIL:  %[[RES:.*]] = call {{.*}} float @llvm.dx.dot2add(float %{{.*}}, half %[[AX]], half %[[AY]], half %[[BX]], half %[[BY]])
   // CHECK:  ret float %[[RES]]
   return dot2add(p1, p2, p3);
 }
@@ -88,7 +112,11 @@ float test_int16_arg1_arg2_type(int16_t2 p1, int16_t2 p2, float p3) {
   // CHECK-SPIRV:  %[[CONV:.*]] = fpext reassoc nnan ninf nsz arcp afn half %[[MUL]] to float
   // CHECK-SPIRV:  %[[C:.*]] = load float, ptr %c.addr.i, align 4
   // CHECK-SPIRV:  %[[RES:.*]] = fadd reassoc nnan ninf nsz arcp afn float %[[CONV]], %[[C]]
-  // CHECK-DXIL:  %[[RES:.*]] = call {{.*}} float @llvm.dx.dot2add.v2f16(<2 x half> %{{.*}}, <2 x half> %{{.*}}, float %{{.*}})
+  // CHECK-DXIL:  %[[AX:.*]] = extractelement <2 x half> %{{.*}}, i32 0
+  // CHECK-DXIL:  %[[AY:.*]] = extractelement <2 x half> %{{.*}}, i32 1
+  // CHECK-DXIL:  %[[BX:.*]] = extractelement <2 x half> %{{.*}}, i32 0
+  // CHECK-DXIL:  %[[BY:.*]] = extractelement <2 x half> %{{.*}}, i32 1
+  // CHECK-DXIL:  %[[RES:.*]] = call {{.*}} float @llvm.dx.dot2add(float %{{.*}}, half %[[AX]], half %[[AY]], half %[[BX]], half %[[BY]])
   // CHECK:  ret float %[[RES]]
   return dot2add(p1, p2, p3);
 }
@@ -101,7 +129,11 @@ float test_int32_arg1_arg2_type(int32_t2 p1, int32_t2 p2, float p3) {
   // CHECK-SPIRV:  %[[CONV:.*]] = fpext reassoc nnan ninf nsz arcp afn half %[[MUL]] to float
   // CHECK-SPIRV:  %[[C:.*]] = load float, ptr %c.addr.i, align 4
   // CHECK-SPIRV:  %[[RES:.*]] = fadd reassoc nnan ninf nsz arcp afn float %[[CONV]], %[[C]]
-  // CHECK-DXIL:  %[[RES:.*]] = call {{.*}} float @llvm.dx.dot2add.v2f16(<2 x half> %{{.*}}, <2 x half> %{{.*}}, float %{{.*}})
+  // CHECK-DXIL:  %[[AX:.*]] = extractelement <2 x half> %{{.*}}, i32 0
+  // CHECK-DXIL:  %[[AY:.*]] = extractelement <2 x half> %{{.*}}, i32 1
+  // CHECK-DXIL:  %[[BX:.*]] = extractelement <2 x half> %{{.*}}, i32 0
+  // CHECK-DXIL:  %[[BY:.*]] = extractelement <2 x half> %{{.*}}, i32 1
+  // CHECK-DXIL:  %[[RES:.*]] = call {{.*}} float @llvm.dx.dot2add(float %{{.*}}, half %[[AX]], half %[[AY]], half %[[BX]], half %[[BY]])
   // CHECK:  ret float %[[RES]]
   return dot2add(p1, p2, p3);
 }
@@ -114,7 +146,11 @@ float test_int64_arg1_arg2_type(int64_t2 p1, int64_t2 p2, float p3) {
   // CHECK-SPIRV:  %[[CONV:.*]] = fpext reassoc nnan ninf nsz arcp afn half %[[MUL]] to float
   // CHECK-SPIRV:  %[[C:.*]] = load float, ptr %c.addr.i, align 4
   // CHECK-SPIRV:  %[[RES:.*]] = fadd reassoc nnan ninf nsz arcp afn float %[[CONV]], %[[C]]
-  // CHECK-DXIL:  %[[RES:.*]] = call {{.*}} float @llvm.dx.dot2add.v2f16(<2 x half> %{{.*}}, <2 x half> %{{.*}}, float %{{.*}})
+  // CHECK-DXIL:  %[[AX:.*]] = extractelement <2 x half> %{{.*}}, i32 0
+  // CHECK-DXIL:  %[[AY:.*]] = extractelement <2 x half> %{{.*}}, i32 1
+  // CHECK-DXIL:  %[[BX:.*]] = extractelement <2 x half> %{{.*}}, i32 0
+  // CHECK-DXIL:  %[[BY:.*]] = extractelement <2 x half> %{{.*}}, i32 1
+  // CHECK-DXIL:  %[[RES:.*]] = call {{.*}} float @llvm.dx.dot2add(float %{{.*}}, half %[[AX]], half %[[AY]], half %[[BX]], half %[[BY]])
   // CHECK:  ret float %[[RES]]
   return dot2add(p1, p2, p3);
 }
@@ -129,7 +165,11 @@ float test_bool_arg1_arg2_type(bool2 p1, bool2 p2, float p3) {
   // CHECK-SPIRV:  %[[CONV:.*]] = fpext reassoc nnan ninf nsz arcp afn half %[[MUL]] to float
   // CHECK-SPIRV:  %[[C:.*]] = load float, ptr %c.addr.i, align 4
   // CHECK-SPIRV:  %[[RES:.*]] = fadd reassoc nnan ninf nsz arcp afn float %[[CONV]], %[[C]]
-  // CHECK-DXIL:  %[[RES:.*]] = call {{.*}} float @llvm.dx.dot2add.v2f16(<2 x half> %{{.*}}, <2 x half> %{{.*}}, float %{{.*}})
+  // CHECK-DXIL:  %[[AX:.*]] = extractelement <2 x half> %{{.*}}, i32 0
+  // CHECK-DXIL:  %[[AY:.*]] = extractelement <2 x half> %{{.*}}, i32 1
+  // CHECK-DXIL:  %[[BX:.*]] = extractelement <2 x half> %{{.*}}, i32 0
+  // CHECK-DXIL:  %[[BY:.*]] = extractelement <2 x half> %{{.*}}, i32 1
+  // CHECK-DXIL:  %[[RES:.*]] = call {{.*}} float @llvm.dx.dot2add(float %{{.*}}, half %[[AX]], half %[[AY]], half %[[BX]], half %[[BY]])
   // CHECK:  ret float %[[RES]]
   return dot2add(p1, p2, p3);
 }

diff  --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td
index 775d325feeb14..b1a27311e2a9c 100644
--- a/llvm/include/llvm/IR/IntrinsicsDirectX.td
+++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td
@@ -76,18 +76,27 @@ def int_dx_nclamp : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>,
 def int_dx_cross : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
 def int_dx_saturate : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
 
-def int_dx_dot2 :
-    DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
-    [llvm_anyfloat_ty, LLVMScalarOrSameVectorWidth<0, LLVMVectorElementType<0>>],
-    [IntrNoMem, Commutative] >;
-def int_dx_dot3 :
-    DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
-    [llvm_anyfloat_ty, LLVMScalarOrSameVectorWidth<0, LLVMVectorElementType<0>>],
-    [IntrNoMem, Commutative] >;
-def int_dx_dot4 :
-    DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
-    [llvm_anyfloat_ty, LLVMScalarOrSameVectorWidth<0, LLVMVectorElementType<0>>],
-    [IntrNoMem, Commutative] >;
+def int_dx_dot2 : DefaultAttrsIntrinsic<[LLVMMatchType<0>],
+                                        [
+                                          llvm_anyfloat_ty, LLVMMatchType<0>,
+                                          LLVMMatchType<0>, LLVMMatchType<0>
+                                        ],
+                                        [IntrNoMem, Commutative]>;
+def int_dx_dot3 : DefaultAttrsIntrinsic<[LLVMMatchType<0>],
+                                        [
+                                          llvm_anyfloat_ty, LLVMMatchType<0>,
+                                          LLVMMatchType<0>, LLVMMatchType<0>,
+                                          LLVMMatchType<0>, LLVMMatchType<0>
+                                        ],
+                                        [IntrNoMem, Commutative]>;
+def int_dx_dot4 : DefaultAttrsIntrinsic<[LLVMMatchType<0>],
+                                        [
+                                          llvm_anyfloat_ty, LLVMMatchType<0>,
+                                          LLVMMatchType<0>, LLVMMatchType<0>,
+                                          LLVMMatchType<0>, LLVMMatchType<0>,
+                                          LLVMMatchType<0>, LLVMMatchType<0>
+                                        ],
+                                        [IntrNoMem, Commutative]>;
 def int_dx_fdot :
     DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
     [llvm_anyfloat_ty, LLVMScalarOrSameVectorWidth<0, LLVMVectorElementType<0>>],
@@ -100,9 +109,9 @@ def int_dx_udot :
     DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
     [llvm_anyint_ty, LLVMScalarOrSameVectorWidth<0, LLVMVectorElementType<0>>],
     [IntrNoMem, Commutative] >;
-def int_dx_dot2add : 
-    DefaultAttrsIntrinsic<[llvm_float_ty], 
-    [llvm_anyfloat_ty, LLVMMatchType<0>, llvm_float_ty], 
+def int_dx_dot2add :
+    DefaultAttrsIntrinsic<[llvm_float_ty],
+    [llvm_float_ty, llvm_half_ty, llvm_half_ty, llvm_half_ty, llvm_half_ty],
     [IntrNoMem, Commutative]>;
 def int_dx_dot4add_i8packed : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
 def int_dx_dot4add_u8packed : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;

diff  --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td
index b1e7406ead675..645105ade72b6 100644
--- a/llvm/lib/Target/DirectX/DXIL.td
+++ b/llvm/lib/Target/DirectX/DXIL.td
@@ -1078,8 +1078,7 @@ def RawBufferStore : DXILOp<140, rawBufferStore> {
 }
 
 def Dot2AddHalf : DXILOp<162, dot2AddHalf> {
-  let Doc = "dot product of 2 vectors of half having size = 2, returns "
-            "float";
+  let Doc = "2D half dot product with accumulate to float";
   let intrinsics = [IntrinSelect<int_dx_dot2add>];
   let arguments = [FloatTy, HalfTy, HalfTy, HalfTy, HalfTy];
   let result = FloatTy;

diff  --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
index 70f284e08b250..8ec5ed0e22974 100644
--- a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
+++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
@@ -185,7 +185,8 @@ static Value *expandFloatDotIntrinsic(CallInst *Orig, Value *A, Value *B) {
   assert(ATy->getScalarType()->isFloatingPointTy());
 
   Intrinsic::ID DotIntrinsic = Intrinsic::dx_dot4;
-  switch (AVec->getNumElements()) {
+  int NumElts = AVec->getNumElements();
+  switch (NumElts) {
   case 2:
     DotIntrinsic = Intrinsic::dx_dot2;
     break;
@@ -201,8 +202,14 @@ static Value *expandFloatDotIntrinsic(CallInst *Orig, Value *A, Value *B) {
         /* gen_crash_diag=*/false);
     return nullptr;
   }
-  return Builder.CreateIntrinsic(ATy->getScalarType(), DotIntrinsic,
-                                 ArrayRef<Value *>{A, B}, nullptr, "dot");
+
+  SmallVector<Value *> Args;
+  for (int I = 0; I < NumElts; ++I)
+    Args.push_back(Builder.CreateExtractElement(A, Builder.getInt32(I)));
+  for (int I = 0; I < NumElts; ++I)
+    Args.push_back(Builder.CreateExtractElement(B, Builder.getInt32(I)));
+  return Builder.CreateIntrinsic(ATy->getScalarType(), DotIntrinsic, Args,
+                                 nullptr, "dot");
 }
 
 // Create the appropriate DXIL float dot intrinsic for the operands of Orig

diff  --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
index 41a9426998826..4574e5f7bbd96 100644
--- a/llvm/lib/Target/DirectX/DXILOpLowering.cpp
+++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
@@ -33,52 +33,6 @@
 using namespace llvm;
 using namespace llvm::dxil;
 
-static bool isVectorArgExpansion(Function &F) {
-  switch (F.getIntrinsicID()) {
-  case Intrinsic::dx_dot2:
-  case Intrinsic::dx_dot3:
-  case Intrinsic::dx_dot4:
-    return true;
-  }
-  return false;
-}
-
-static SmallVector<Value *> populateOperands(Value *Arg, IRBuilder<> &Builder) {
-  SmallVector<Value *> ExtractedElements;
-  auto *VecArg = dyn_cast<FixedVectorType>(Arg->getType());
-  for (unsigned I = 0; I < VecArg->getNumElements(); ++I) {
-    Value *Index = ConstantInt::get(Type::getInt32Ty(Arg->getContext()), I);
-    Value *ExtractedElement = Builder.CreateExtractElement(Arg, Index);
-    ExtractedElements.push_back(ExtractedElement);
-  }
-  return ExtractedElements;
-}
-
-static SmallVector<Value *>
-argVectorFlatten(CallInst *Orig, IRBuilder<> &Builder, unsigned NumOperands) {
-  assert(NumOperands > 0);
-  Value *Arg0 = Orig->getOperand(0);
-  [[maybe_unused]] auto *VecArg0 = dyn_cast<FixedVectorType>(Arg0->getType());
-  assert(VecArg0);
-  SmallVector<Value *> NewOperands = populateOperands(Arg0, Builder);
-  for (unsigned I = 1; I < NumOperands; ++I) {
-    Value *Arg = Orig->getOperand(I);
-    [[maybe_unused]] auto *VecArg = dyn_cast<FixedVectorType>(Arg->getType());
-    assert(VecArg);
-    assert(VecArg0->getElementType() == VecArg->getElementType());
-    assert(VecArg0->getNumElements() == VecArg->getNumElements());
-    auto NextOperandList = populateOperands(Arg, Builder);
-    NewOperands.append(NextOperandList.begin(), NextOperandList.end());
-  }
-  return NewOperands;
-}
-
-static SmallVector<Value *> argVectorFlatten(CallInst *Orig,
-                                             IRBuilder<> &Builder) {
-  // Note: arg[NumOperands-1] is a pointer and is not needed by our flattening.
-  return argVectorFlatten(Orig, Builder, Orig->getNumOperands() - 1);
-}
-
 namespace {
 class OpLowerer {
   Module &M;
@@ -150,9 +104,6 @@ class OpLowerer {
   [[nodiscard]] bool
   replaceFunctionWithOp(Function &F, dxil::OpCode DXILOp,
                         ArrayRef<IntrinArgSelect> ArgSelects) {
-    bool IsVectorArgExpansion = isVectorArgExpansion(F);
-    assert(!(IsVectorArgExpansion && ArgSelects.size()) &&
-           "Cann't do vector arg expansion when using arg selects.");
     return replaceFunction(F, [&](CallInst *CI) -> Error {
       OpBuilder.getIRB().SetInsertPoint(CI);
       SmallVector<Value *> Args;
@@ -170,15 +121,6 @@ class OpLowerer {
             break;
           }
         }
-      } else if (IsVectorArgExpansion) {
-        Args = argVectorFlatten(CI, OpBuilder.getIRB());
-      } else if (F.getIntrinsicID() == Intrinsic::dx_dot2add) {
-        // arg[NumOperands-1] is a pointer and is not needed by our flattening.
-        // arg[NumOperands-2] also does not need to be flattened because it is a
-        // scalar.
-        unsigned NumOperands = CI->getNumOperands() - 2;
-        Args.push_back(CI->getArgOperand(NumOperands));
-        Args.append(argVectorFlatten(CI, OpBuilder.getIRB(), NumOperands));
       } else {
         Args.append(CI->arg_begin(), CI->arg_end());
       }

diff  --git a/llvm/test/CodeGen/DirectX/dot2_error.ll b/llvm/test/CodeGen/DirectX/dot2_error.ll
index 97b025d36f018..f2167aa516057 100644
--- a/llvm/test/CodeGen/DirectX/dot2_error.ll
+++ b/llvm/test/CodeGen/DirectX/dot2_error.ll
@@ -4,8 +4,9 @@
 ; CHECK: in function dot_double2
 ; CHECK-SAME: Cannot create Dot2 operation: Invalid overload type
 
-define noundef double @dot_double2(<2 x double> noundef %a, <2 x double> noundef %b) {
+define noundef double @dot_double2(double noundef %a1, double noundef %a2,
+                                   double noundef %b1, double noundef %b2) {
 entry:
-  %dx.dot = call double @llvm.dx.dot2.v2f64(<2 x double> %a, <2 x double> %b)
+  %dx.dot = call double @llvm.dx.dot2(double %a1, double %a2, double %b1, double %b2)
   ret double %dx.dot
 }

diff  --git a/llvm/test/CodeGen/DirectX/dot2add.ll b/llvm/test/CodeGen/DirectX/dot2add.ll
index 40c6cdafc83da..3a2bbcc074f2d 100644
--- a/llvm/test/CodeGen/DirectX/dot2add.ll
+++ b/llvm/test/CodeGen/DirectX/dot2add.ll
@@ -1,8 +1,13 @@
 ; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-compute %s | FileCheck %s
 
-define noundef float @dot2add_simple(<2 x half> noundef %a, <2 x half> noundef %b, float %c) {
+define noundef float @dot2add_simple(<2 x half> noundef %a, <2 x half> noundef %b, float %acc) {
 entry:
-; CHECK: call float @dx.op.dot2AddHalf(i32 162, float %c, half %0, half %1, half %2, half %3)
-  %ret = call float @llvm.dx.dot2add(<2 x half> %a, <2 x half> %b, float %c)
+  %ax = extractelement <2 x half> %a, i32 0
+  %ay = extractelement <2 x half> %a, i32 1
+  %bx = extractelement <2 x half> %b, i32 0
+  %by = extractelement <2 x half> %b, i32 1
+
+; CHECK: call float @dx.op.dot2AddHalf(i32 162, float %acc, half %ax, half %ay, half %bx, half %by)
+  %ret = call float @llvm.dx.dot2add(float %acc, half %ax, half %ay, half %bx, half %by)
   ret float %ret
 }

diff  --git a/llvm/test/CodeGen/DirectX/dot3_error.ll b/llvm/test/CodeGen/DirectX/dot3_error.ll
index 3b5dc41ebeb6b..69cfb32047f23 100644
--- a/llvm/test/CodeGen/DirectX/dot3_error.ll
+++ b/llvm/test/CodeGen/DirectX/dot3_error.ll
@@ -4,8 +4,10 @@
 ; CHECK: in function dot_double3
 ; CHECK-SAME: Cannot create Dot3 operation: Invalid overload type
 
-define noundef double @dot_double3(<3 x double> noundef %a, <3 x double> noundef %b) {
+define noundef double @dot_double3(double noundef %a1, double noundef %a2,
+                                   double noundef %a3, double noundef %b1,
+                                   double noundef %b2, double noundef %b3) {
 entry:
-  %dx.dot = call double @llvm.dx.dot3.v3f64(<3 x double> %a, <3 x double> %b)
+  %dx.dot = call double @llvm.dx.dot3(double %a1, double %a2, double %a3, double %b1, double %b2, double %b3)
   ret double %dx.dot
 }

diff  --git a/llvm/test/CodeGen/DirectX/dot4_error.ll b/llvm/test/CodeGen/DirectX/dot4_error.ll
index 0a5969616220e..f6c7ad93bd136 100644
--- a/llvm/test/CodeGen/DirectX/dot4_error.ll
+++ b/llvm/test/CodeGen/DirectX/dot4_error.ll
@@ -4,8 +4,11 @@
 ; CHECK: in function dot_double4
 ; CHECK-SAME: Cannot create Dot4 operation: Invalid overload type
 
-define noundef double @dot_double4(<4 x double> noundef %a, <4 x double> noundef %b) {
+define noundef double @dot_double4(double noundef %a1, double noundef %a2,
+                                   double noundef %a3, double noundef %a4,
+                                   double noundef %b1, double noundef %b2,
+                                   double noundef %b3, double noundef %b4) {
 entry:
-  %dx.dot = call double @llvm.dx.dot4.v4f64(<4 x double> %a, <4 x double> %b)
+  %dx.dot = call double @llvm.dx.dot4(double %a1, double %a2, double %a3, double %a4, double %b1, double %b2, double %b3, double %b4)
   ret double %dx.dot
 }

diff  --git a/llvm/test/CodeGen/DirectX/fdot.ll b/llvm/test/CodeGen/DirectX/fdot.ll
index c6f36087ba91d..a623321c2d346 100644
--- a/llvm/test/CodeGen/DirectX/fdot.ll
+++ b/llvm/test/CodeGen/DirectX/fdot.ll
@@ -6,12 +6,12 @@
 ; CHECK-LABEL: dot_half2
 define noundef half @dot_half2(<2 x half> noundef %a, <2 x half> noundef %b) {
 entry:
-; DOPCHECK: extractelement <2 x half> %a, i32 0
-; DOPCHECK: extractelement <2 x half> %a, i32 1
-; DOPCHECK: extractelement <2 x half> %b, i32 0
-; DOPCHECK: extractelement <2 x half> %b, i32 1
-; DOPCHECK: call half @dx.op.dot2.f16(i32 54, half %{{.*}}, half %{{.*}}, half %{{.*}}, half %{{.*}}) #[[#ATTR:]]
-; EXPCHECK: call half @llvm.dx.dot2.v2f16(<2 x half> %a, <2 x half> %b)
+; CHECK: [[A1:%.*]] = extractelement <2 x half> %a, i32 0
+; CHECK: [[A2:%.*]] = extractelement <2 x half> %a, i32 1
+; CHECK: [[B1:%.*]] = extractelement <2 x half> %b, i32 0
+; CHECK: [[B2:%.*]] = extractelement <2 x half> %b, i32 1
+; DOPCHECK: call half @dx.op.dot2.f16(i32 54, half [[A1]], half [[A2]], half [[B1]], half [[B2]]) #[[#ATTR:]]
+; EXPCHECK: call half @llvm.dx.dot2.f16(half [[A1]], half [[A2]], half [[B1]], half [[B2]])
   %dx.dot = call half @llvm.dx.fdot.v2f16(<2 x half> %a, <2 x half> %b)
   ret half %dx.dot
 }
@@ -19,14 +19,14 @@ entry:
 ; CHECK-LABEL: dot_half3
 define noundef half @dot_half3(<3 x half> noundef %a, <3 x half> noundef %b) {
 entry:
-; DOPCHECK: extractelement <3 x half> %a, i32 0
-; DOPCHECK: extractelement <3 x half> %a, i32 1
-; DOPCHECK: extractelement <3 x half> %a, i32 2
-; DOPCHECK: extractelement <3 x half> %b, i32 0
-; DOPCHECK: extractelement <3 x half> %b, i32 1
-; DOPCHECK: extractelement <3 x half> %b, i32 2
+; CHECK: extractelement <3 x half> %a, i32 0
+; CHECK: extractelement <3 x half> %a, i32 1
+; CHECK: extractelement <3 x half> %a, i32 2
+; CHECK: extractelement <3 x half> %b, i32 0
+; CHECK: extractelement <3 x half> %b, i32 1
+; CHECK: extractelement <3 x half> %b, i32 2
 ; DOPCHECK: call half @dx.op.dot3.f16(i32 55, half %{{.*}}, half %{{.*}}, half %{{.*}}, half %{{.*}}, half %{{.*}}, half %{{.*}}) #[[#ATTR]]
-; EXPCHECK: call half @llvm.dx.dot3.v3f16(<3 x half> %a, <3 x half> %b)
+; EXPCHECK: call half @llvm.dx.dot3.f16(half %{{.*}}, half %{{.*}}, half %{{.*}}, half %{{.*}}, half %{{.*}}, half %{{.*}})
   %dx.dot = call half @llvm.dx.fdot.v3f16(<3 x half> %a, <3 x half> %b)
   ret half %dx.dot
 }
@@ -34,16 +34,16 @@ entry:
 ; CHECK-LABEL: dot_half4
 define noundef half @dot_half4(<4 x half> noundef %a, <4 x half> noundef %b) {
 entry:
-; DOPCHECK: extractelement <4 x half> %a, i32 0
-; DOPCHECK: extractelement <4 x half> %a, i32 1
-; DOPCHECK: extractelement <4 x half> %a, i32 2
-; DOPCHECK: extractelement <4 x half> %a, i32 3
-; DOPCHECK: extractelement <4 x half> %b, i32 0
-; DOPCHECK: extractelement <4 x half> %b, i32 1
-; DOPCHECK: extractelement <4 x half> %b, i32 2
-; DOPCHECK: extractelement <4 x half> %b, i32 3
+; CHECK: extractelement <4 x half> %a, i32 0
+; CHECK: extractelement <4 x half> %a, i32 1
+; CHECK: extractelement <4 x half> %a, i32 2
+; CHECK: extractelement <4 x half> %a, i32 3
+; CHECK: extractelement <4 x half> %b, i32 0
+; CHECK: extractelement <4 x half> %b, i32 1
+; CHECK: extractelement <4 x half> %b, i32 2
+; CHECK: extractelement <4 x half> %b, i32 3
 ; DOPCHECK: call half @dx.op.dot4.f16(i32 56, half %{{.*}}, half %{{.*}}, half %{{.*}}, half %{{.*}}, half %{{.*}}, half %{{.*}}, half %{{.*}}, half %{{.*}}) #[[#ATTR]]
-; EXPCHECK: call half @llvm.dx.dot4.v4f16(<4 x half> %a, <4 x half> %b)
+; EXPCHECK: call half @llvm.dx.dot4.f16(half %{{.*}}, half %{{.*}}, half %{{.*}}, half %{{.*}}, half %{{.*}}, half %{{.*}}, half %{{.*}}, half %{{.*}})
   %dx.dot = call half @llvm.dx.fdot.v4f16(<4 x half> %a, <4 x half> %b)
   ret half %dx.dot
 }
@@ -51,12 +51,12 @@ entry:
 ; CHECK-LABEL: dot_float2
 define noundef float @dot_float2(<2 x float> noundef %a, <2 x float> noundef %b) {
 entry:
-; DOPCHECK: extractelement <2 x float> %a, i32 0
-; DOPCHECK: extractelement <2 x float> %a, i32 1
-; DOPCHECK: extractelement <2 x float> %b, i32 0
-; DOPCHECK: extractelement <2 x float> %b, i32 1
+; CHECK: extractelement <2 x float> %a, i32 0
+; CHECK: extractelement <2 x float> %a, i32 1
+; CHECK: extractelement <2 x float> %b, i32 0
+; CHECK: extractelement <2 x float> %b, i32 1
 ; DOPCHECK: call float @dx.op.dot2.f32(i32 54, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}}) #[[#ATTR]]
-; EXPCHECK: call float @llvm.dx.dot2.v2f32(<2 x float> %a, <2 x float> %b)
+; EXPCHECK: call float @llvm.dx.dot2.f32(float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}})
   %dx.dot = call float @llvm.dx.fdot.v2f32(<2 x float> %a, <2 x float> %b)
   ret float %dx.dot
 }
@@ -64,14 +64,14 @@ entry:
 ; CHECK-LABEL: dot_float3
 define noundef float @dot_float3(<3 x float> noundef %a, <3 x float> noundef %b) {
 entry:
-; DOPCHECK: extractelement <3 x float> %a, i32 0
-; DOPCHECK: extractelement <3 x float> %a, i32 1
-; DOPCHECK: extractelement <3 x float> %a, i32 2
-; DOPCHECK: extractelement <3 x float> %b, i32 0
-; DOPCHECK: extractelement <3 x float> %b, i32 1
-; DOPCHECK: extractelement <3 x float> %b, i32 2
+; CHECK: extractelement <3 x float> %a, i32 0
+; CHECK: extractelement <3 x float> %a, i32 1
+; CHECK: extractelement <3 x float> %a, i32 2
+; CHECK: extractelement <3 x float> %b, i32 0
+; CHECK: extractelement <3 x float> %b, i32 1
+; CHECK: extractelement <3 x float> %b, i32 2
 ; DOPCHECK: call float @dx.op.dot3.f32(i32 55, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}}) #[[#ATTR]]
-; EXPCHECK: call float @llvm.dx.dot3.v3f32(<3 x float> %a, <3 x float> %b)
+; EXPCHECK: call float @llvm.dx.dot3.f32(float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}})
   %dx.dot = call float @llvm.dx.fdot.v3f32(<3 x float> %a, <3 x float> %b)
   ret float %dx.dot
 }
@@ -79,16 +79,16 @@ entry:
 ; CHECK-LABEL: dot_float4
 define noundef float @dot_float4(<4 x float> noundef %a, <4 x float> noundef %b) {
 entry:
-; DOPCHECK: extractelement <4 x float> %a, i32 0
-; DOPCHECK: extractelement <4 x float> %a, i32 1
-; DOPCHECK: extractelement <4 x float> %a, i32 2
-; DOPCHECK: extractelement <4 x float> %a, i32 3
-; DOPCHECK: extractelement <4 x float> %b, i32 0
-; DOPCHECK: extractelement <4 x float> %b, i32 1
-; DOPCHECK: extractelement <4 x float> %b, i32 2
-; DOPCHECK: extractelement <4 x float> %b, i32 3
+; CHECK: extractelement <4 x float> %a, i32 0
+; CHECK: extractelement <4 x float> %a, i32 1
+; CHECK: extractelement <4 x float> %a, i32 2
+; CHECK: extractelement <4 x float> %a, i32 3
+; CHECK: extractelement <4 x float> %b, i32 0
+; CHECK: extractelement <4 x float> %b, i32 1
+; CHECK: extractelement <4 x float> %b, i32 2
+; CHECK: extractelement <4 x float> %b, i32 3
 ; DOPCHECK: call float @dx.op.dot4.f32(i32 56, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}}) #[[#ATTR]]
-; EXPCHECK: call float @llvm.dx.dot4.v4f32(<4 x float> %a, <4 x float> %b)
+; EXPCHECK: call float @llvm.dx.dot4.f32(float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}})
   %dx.dot = call float @llvm.dx.fdot.v4f32(<4 x float> %a, <4 x float> %b)
   ret float %dx.dot
 }

diff  --git a/llvm/test/CodeGen/DirectX/normalize.ll b/llvm/test/CodeGen/DirectX/normalize.ll
index 2aba9d5f74d78..cde09dacf4742 100644
--- a/llvm/test/CodeGen/DirectX/normalize.ll
+++ b/llvm/test/CodeGen/DirectX/normalize.ll
@@ -22,7 +22,7 @@ entry:
 
 define noundef <2 x half> @test_normalize_half2(<2 x half> noundef %p0) {
 entry:
-  ; EXPCHECK: [[doth2:%.*]] = call half @llvm.dx.dot2.v2f16(<2 x half> %{{.*}}, <2 x half> %{{.*}})
+  ; EXPCHECK: [[doth2:%.*]] = call half @llvm.dx.dot2.f16(half %{{.*}}, half %{{.*}}, half %{{.*}}, half %{{.*}})
   ; DOPCHECK: [[doth2:%.*]] = call half @dx.op.dot2.f16(i32 54, half %{{.*}}, half %{{.*}}, half %{{.*}}, half %{{.*}})
   ; EXPCHECK: [[rsqrt:%.*]] = call half @llvm.dx.rsqrt.f16(half [[doth2]])
   ; DOPCHECK: [[rsqrt:%.*]] = call half @dx.op.unary.f16(i32 25, half [[doth2]])
@@ -36,7 +36,7 @@ entry:
 
 define noundef <3 x half> @test_normalize_half3(<3 x half> noundef %p0) {
 entry:
-  ; EXPCHECK: [[doth3:%.*]] = call half @llvm.dx.dot3.v3f16(<3 x half> %{{.*}}, <3 x half> %{{.*}})
+  ; EXPCHECK: [[doth3:%.*]] = call half @llvm.dx.dot3.f16(half %{{.*}}, half %{{.*}}, half %{{.*}}, half %{{.*}})
   ; DOPCHECK: [[doth3:%.*]] = call half @dx.op.dot3.f16(i32 55, half %{{.*}}, half %{{.*}}, half %{{.*}}, half %{{.*}})
   ; EXPCHECK: [[rsqrt:%.*]] = call half @llvm.dx.rsqrt.f16(half [[doth3]])
   ; DOPCHECK: [[rsqrt:%.*]] = call half @dx.op.unary.f16(i32 25, half [[doth3]])
@@ -50,7 +50,7 @@ entry:
 
 define noundef <4 x half> @test_normalize_half4(<4 x half> noundef %p0) {
 entry:
-  ; EXPCHECK: [[doth4:%.*]] = call half @llvm.dx.dot4.v4f16(<4 x half> %{{.*}}, <4 x half> %{{.*}})
+  ; EXPCHECK: [[doth4:%.*]] = call half @llvm.dx.dot4.f16(half %{{.*}}, half %{{.*}}, half %{{.*}}, half %{{.*}})
   ; DOPCHECK: [[doth4:%.*]] = call half @dx.op.dot4.f16(i32 56, half %{{.*}}, half %{{.*}}, half %{{.*}}, half %{{.*}})
   ; EXPCHECK: [[rsqrt:%.*]] = call half @llvm.dx.rsqrt.f16(half [[doth4]])
   ; DOPCHECK: [[rsqrt:%.*]] = call half @dx.op.unary.f16(i32 25, half [[doth4]])
@@ -71,7 +71,7 @@ entry:
 
 define noundef <2 x float> @test_normalize_float2(<2 x float> noundef %p0) {
 entry:
-  ; EXPCHECK: [[dotf2:%.*]] = call float @llvm.dx.dot2.v2f32(<2 x float> %{{.*}}, <2 x float> %{{.*}})
+  ; EXPCHECK: [[dotf2:%.*]] = call float @llvm.dx.dot2.f32(float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}})
   ; DOPCHECK: [[dotf2:%.*]] = call float @dx.op.dot2.f32(i32 54, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}})
   ; EXPCHECK: [[rsqrt:%.*]] = call float @llvm.dx.rsqrt.f32(float [[dotf2]])
   ; DOPCHECK: [[rsqrt:%.*]] = call float @dx.op.unary.f32(i32 25, float [[dotf2]])
@@ -85,7 +85,7 @@ entry:
 
 define noundef <3 x float> @test_normalize_float3(<3 x float> noundef %p0) {
 entry:
-  ; EXPCHECK: [[dotf3:%.*]] = call float @llvm.dx.dot3.v3f32(<3 x float> %{{.*}}, <3 x float> %{{.*}})
+  ; EXPCHECK: [[dotf3:%.*]] = call float @llvm.dx.dot3.f32(float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}})
   ; DOPCHECK: [[dotf3:%.*]] = call float @dx.op.dot3.f32(i32 55, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}})
   ; EXPCHECK: [[rsqrt:%.*]] = call float @llvm.dx.rsqrt.f32(float [[dotf3]])
   ; DOPCHECK: [[rsqrt:%.*]] = call float @dx.op.unary.f32(i32 25, float [[dotf3]])
@@ -99,7 +99,7 @@ entry:
 
 define noundef <4 x float> @test_normalize_float4(<4 x float> noundef %p0) {
 entry:
-  ; EXPCHECK: [[dotf4:%.*]] = call float @llvm.dx.dot4.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}})
+  ; EXPCHECK: [[dotf4:%.*]] = call float @llvm.dx.dot4.f32(float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}})
   ; DOPCHECK: [[dotf4:%.*]] = call float @dx.op.dot4.f32(i32 56, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}})
   ; EXPCHECK: [[rsqrt:%.*]] = call float @llvm.dx.rsqrt.f32(float [[dotf4]])
   ; DOPCHECK: [[rsqrt:%.*]] = call float @dx.op.unary.f32(i32 25, float [[dotf4]])