[clang] [llvm] [SPIRV][HLSL] Add mad intrinsic lowering for spirv (PR #89130)

Wed Apr 17 12:57:34 PDT 2024

https://github.com/farzonl created https://github.com/llvm/llvm-project/pull/89130

- `clang/lib/CodeGen/CGBuiltin.cpp` - Add a generic mull add implementation. Make DXIL implementation tied to target.
- `clang/lib/CodeGen/CGHLSLRuntime.cpp` - existing target switching only lets you swap on intrinsics, but in this case there is no spirv intrinsic so we needed another way to swap on behavior. My ideas was to use lambdas and capture the variables that needed to be used to keep the call generic.
- `clang/lib/CodeGen/CGHLSLRuntime.h` - function signature for the behavior switching per target.
-  `clang/lib/CodeGen/CodeGenFunction.h` - Add `EmitHLSLMadIntrinsic` so mad behavior still has access to underlying CodeGenFunction properties like `Builder`.

resolves #88944

>From f1406f8ee7ef0db485186606d5c373322596765a Mon Sep 17 00:00:00 2001
From: Farzon Lotfi <farzonlotfi at microsoft.com>
Date: Wed, 17 Apr 2024 15:25:33 -0400
Subject: [PATCH] [SPIRV][HLSL] Add mad intrinsic lowering for spirv

---
 clang/lib/CodeGen/CGBuiltin.cpp               |  52 ++-
 clang/lib/CodeGen/CGHLSLRuntime.cpp           |  15 +
 clang/lib/CodeGen/CGHLSLRuntime.h             |   9 +-
 clang/lib/CodeGen/CodeGenFunction.h           |   1 +
 clang/test/CodeGenHLSL/builtins/mad.hlsl      | 240 ++++++++------
 llvm/test/CodeGen/DirectX/fmad.ll             |  12 +-
 .../CodeGen/SPIRV/hlsl-intrinsics/fmad.ll     |  12 +-
 .../CodeGen/SPIRV/hlsl-intrinsics/imad.ll     | 302 ++++++++++++++++++
 8 files changed, 521 insertions(+), 122 deletions(-)
 create mode 100644 llvm/test/CodeGen/SPIRV/hlsl-intrinsics/imad.ll

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index a05874e63c73c2..3f9f5aa9b42ed0 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -18186,6 +18186,40 @@ Intrinsic::ID getDotProductIntrinsic(QualType QT, int elementCount) {
   return Intrinsic::dx_udot;
 }
 
+Value *CodeGenFunction::EmitHLSLMadIntrinsic(const CallExpr *E) {
+  Value *M = EmitScalarExpr(E->getArg(0));
+  Value *A = EmitScalarExpr(E->getArg(1));
+  Value *B = EmitScalarExpr(E->getArg(2));
+  if (E->getArg(0)->getType()->hasFloatingRepresentation())
+    return Builder.CreateIntrinsic(
+        /*ReturnType*/ M->getType(), Intrinsic::fmuladd,
+        ArrayRef<Value *>{M, A, B}, nullptr, "hlsl.fmad");
+
+  auto EmitHLSLIMadDirectX = [E, M, A, B, this]() -> Value * {
+    if (E->getArg(0)->getType()->hasSignedIntegerRepresentation())
+      return Builder.CreateIntrinsic(
+          /*ReturnType*/ M->getType(), Intrinsic::dx_imad,
+          ArrayRef<Value *>{M, A, B}, nullptr, "dx.imad");
+    assert(E->getArg(0)->getType()->hasUnsignedIntegerRepresentation());
+    return Builder.CreateIntrinsic(
+        /*ReturnType=*/M->getType(), Intrinsic::dx_umad,
+        ArrayRef<Value *>{M, A, B}, nullptr, "dx.umad");
+  };
+
+  auto EmitHLSLIMadGeneric = [E, M, A, B, this]() -> Value * {
+    if (E->getArg(0)->getType()->hasSignedIntegerRepresentation()) {
+      Value *Mul = Builder.CreateNSWMul(M, A);
+      return Builder.CreateNSWAdd(Mul, B);
+    }
+    assert(E->getArg(0)->getType()->hasUnsignedIntegerRepresentation());
+    Value *Mul = Builder.CreateNUWMul(M, A);
+    return Builder.CreateNUWAdd(Mul, B);
+  };
+
+  return CGM.getHLSLRuntime().emitHLSLIntrinsic(
+      EmitHLSLIMadDirectX, EmitHLSLIMadGeneric, EmitHLSLIMadGeneric);
+}
+
 Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
                                             const CallExpr *E) {
   if (!getLangOpts().HLSL)
@@ -18291,23 +18325,7 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
                                    ArrayRef<Value *>{Op0}, nullptr, "dx.isinf");
   }
   case Builtin::BI__builtin_hlsl_mad: {
-    Value *M = EmitScalarExpr(E->getArg(0));
-    Value *A = EmitScalarExpr(E->getArg(1));
-    Value *B = EmitScalarExpr(E->getArg(2));
-    if (E->getArg(0)->getType()->hasFloatingRepresentation()) {
-      return Builder.CreateIntrinsic(
-          /*ReturnType*/ M->getType(), Intrinsic::fmuladd,
-          ArrayRef<Value *>{M, A, B}, nullptr, "dx.fmad");
-    }
-    if (E->getArg(0)->getType()->hasSignedIntegerRepresentation()) {
-      return Builder.CreateIntrinsic(
-          /*ReturnType*/ M->getType(), Intrinsic::dx_imad,
-          ArrayRef<Value *>{M, A, B}, nullptr, "dx.imad");
-    }
-    assert(E->getArg(0)->getType()->hasUnsignedIntegerRepresentation());
-    return Builder.CreateIntrinsic(
-        /*ReturnType=*/M->getType(), Intrinsic::dx_umad,
-        ArrayRef<Value *>{M, A, B}, nullptr, "dx.umad");
+    return EmitHLSLMadIntrinsic(E);
   }
   case Builtin::BI__builtin_hlsl_elementwise_rcp: {
     Value *Op0 = EmitScalarExpr(E->getArg(0));
diff --git a/clang/lib/CodeGen/CGHLSLRuntime.cpp b/clang/lib/CodeGen/CGHLSLRuntime.cpp
index 5e6a3dd4878f46..1c801a4d1b06a0 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.cpp
+++ b/clang/lib/CodeGen/CGHLSLRuntime.cpp
@@ -119,6 +119,21 @@ llvm::Triple::ArchType CGHLSLRuntime::getArch() {
   return CGM.getTarget().getTriple().getArch();
 }
 
+Value *
+CGHLSLRuntime::emitHLSLIntrinsic(llvm::function_ref<Value *()> DxilEmitter,
+                                 llvm::function_ref<Value *()> SPIRVEmitter,
+                                 llvm::function_ref<Value *()> GenericEmitter) {
+  llvm::Triple::ArchType Arch = getArch();
+  switch (Arch) {
+  case llvm::Triple::dxil:
+    return DxilEmitter();
+  case llvm::Triple::spirv:
+    return SPIRVEmitter();
+  default:
+    return GenericEmitter();
+  }
+}
+
 void CGHLSLRuntime::addConstant(VarDecl *D, Buffer &CB) {
   if (D->getStorageClass() == SC_Static) {
     // For static inside cbuffer, take as global static.
diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h
index 506b364f5b2ec7..923cf2140c13f0 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.h
+++ b/clang/lib/CodeGen/CGHLSLRuntime.h
@@ -50,6 +50,7 @@ namespace llvm {
 class GlobalVariable;
 class Function;
 class StructType;
+class Value;
 } // namespace llvm
 
 namespace clang {
@@ -79,7 +80,13 @@ class CGHLSLRuntime {
   //===----------------------------------------------------------------------===//
   // End of reserved area for HLSL intrinsic getters.
   //===----------------------------------------------------------------------===//
-
+  llvm::Value *emitHLSLIntrinsic(
+      llvm::function_ref<llvm::Value *()> DxilEmitter,
+      llvm::function_ref<llvm::Value *()> SPIRVEmitter,
+      llvm::function_ref<llvm::Value *()> GenericEmitter =
+          []() -> llvm::Value * {
+        llvm_unreachable("Intrinsic not supported by target architecture.");
+      });
   struct BufferResBinding {
     // The ID like 2 in register(b2, space1).
     std::optional<unsigned> Reg;
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index ff1873325d409f..f0e923949f2047 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -4548,6 +4548,7 @@ class CodeGenFunction : public CodeGenTypeCache {
   llvm::Value *EmitPPCBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
   llvm::Value *EmitAMDGPUBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
   llvm::Value *EmitHLSLBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
+  llvm::Value *EmitHLSLMadIntrinsic(const CallExpr *E);
   llvm::Value *EmitScalarOrConstFoldImmArg(unsigned ICEArguments, unsigned Idx,
                                            const CallExpr *E);
   llvm::Value *EmitSystemZBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
diff --git a/clang/test/CodeGenHLSL/builtins/mad.hlsl b/clang/test/CodeGenHLSL/builtins/mad.hlsl
index 749eac6d64736d..bd4f38067a5c59 100644
--- a/clang/test/CodeGenHLSL/builtins/mad.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/mad.hlsl
@@ -1,182 +1,238 @@
 // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
 // RUN:   dxil-pc-shadermodel6.3-library %s -fnative-half-type \
 // RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s \ 
-// RUN:   --check-prefixes=CHECK,NATIVE_HALF
+// RUN:   --check-prefixes=CHECK,DXIL_CHECK,DXIL_NATIVE_HALF,NATIVE_HALF
 // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
 // RUN:   dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \
-// RUN:   -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF
+// RUN:   -o - | FileCheck %s --check-prefixes=CHECK,DXIL_CHECK,NO_HALF
+
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
+// RUN:   spirv-unknown-vulkan-compute %s -fnative-half-type \
+// RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s \ 
+// RUN:   --check-prefixes=CHECK,NATIVE_HALF,SPIR_NATIVE_HALF,SPIR_CHECK
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
+// RUN:   spirv-unknown-vulkan-compute %s -emit-llvm -disable-llvm-passes \
+// RUN:   -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF,SPIR_CHECK
 
 #ifdef __HLSL_ENABLE_16_BIT
-// NATIVE_HALF: %dx.umad = call i16 @llvm.dx.umad.i16(i16 %0, i16 %1, i16 %2)
-// NATIVE_HALF: ret i16 %dx.umad
+// DXIL_NATIVE_HALF: %dx.umad = call i16 @llvm.dx.umad.i16(i16 %0, i16 %1, i16 %2)
+// DXIL_NATIVE_HALF: ret i16 %dx.umad
+// SPIR_NATIVE_HALF: mul nuw i16 %{{.*}}, %{{.*}}
+// SPIR_NATIVE_HALF: add nuw i16 %{{.*}}, %{{.*}}
 uint16_t test_mad_uint16_t(uint16_t p0, uint16_t p1, uint16_t p2) { return mad(p0, p1, p2); }
 
-// NATIVE_HALF: %dx.umad = call <2 x i16>  @llvm.dx.umad.v2i16(<2 x i16> %0, <2 x i16> %1, <2 x i16> %2)
-// NATIVE_HALF: ret <2 x i16> %dx.umad
+// DXIL_NATIVE_HALF: %dx.umad = call <2 x i16>  @llvm.dx.umad.v2i16(<2 x i16> %0, <2 x i16> %1, <2 x i16> %2)
+// DXIL_NATIVE_HALF: ret <2 x i16> %dx.umad
+// SPIR_NATIVE_HALF: mul nuw <2 x i16>  %{{.*}}, %{{.*}}
+// SPIR_NATIVE_HALF: add nuw <2 x i16>  %{{.*}}, %{{.*}}
 uint16_t2 test_mad_uint16_t2(uint16_t2 p0, uint16_t2 p1, uint16_t2 p2) { return mad(p0, p1, p2); }
 
-// NATIVE_HALF: %dx.umad = call <3 x i16>  @llvm.dx.umad.v3i16(<3 x i16> %0, <3 x i16> %1, <3 x i16> %2)
-// NATIVE_HALF: ret <3 x i16> %dx.umad
+// DXIL_NATIVE_HALF: %dx.umad = call <3 x i16>  @llvm.dx.umad.v3i16(<3 x i16> %0, <3 x i16> %1, <3 x i16> %2)
+// DXIL_NATIVE_HALF: ret <3 x i16> %dx.umad
+// SPIR_NATIVE_HALF: mul nuw <3 x i16>  %{{.*}}, %{{.*}}
+// SPIR_NATIVE_HALF: add nuw <3 x i16>  %{{.*}}, %{{.*}}
 uint16_t3 test_mad_uint16_t3(uint16_t3 p0, uint16_t3 p1, uint16_t3 p2) { return mad(p0, p1, p2); }
 
-// NATIVE_HALF: %dx.umad = call <4 x i16>  @llvm.dx.umad.v4i16(<4 x i16> %0, <4 x i16> %1, <4 x i16> %2)
-// NATIVE_HALF: ret <4 x i16> %dx.umad
+// DXIL_NATIVE_HALF: %dx.umad = call <4 x i16>  @llvm.dx.umad.v4i16(<4 x i16> %0, <4 x i16> %1, <4 x i16> %2)
+// DXIL_NATIVE_HALF: ret <4 x i16> %dx.umad
+// SPIR_NATIVE_HALF: mul nuw <4 x i16>  %{{.*}}, %{{.*}}
+// SPIR_NATIVE_HALF: add nuw <4 x i16>  %{{.*}}, %{{.*}}
 uint16_t4 test_mad_uint16_t4(uint16_t4 p0, uint16_t4 p1, uint16_t4 p2) { return mad(p0, p1, p2); }
 
-// NATIVE_HALF: %dx.imad = call i16 @llvm.dx.imad.i16(i16 %0, i16 %1, i16 %2)
-// NATIVE_HALF: ret i16 %dx.imad
+// DXIL_NATIVE_HALF: %dx.imad = call i16 @llvm.dx.imad.i16(i16 %0, i16 %1, i16 %2)
+// DXIL_NATIVE_HALF: ret i16 %dx.imad
+// SPIR_NATIVE_HALF: mul nsw i16 %{{.*}}, %{{.*}}
+// SPIR_NATIVE_HALF: add nsw i16 %{{.*}}, %{{.*}}
 int16_t test_mad_int16_t(int16_t p0, int16_t p1, int16_t p2) { return mad(p0, p1, p2); }
 
-// NATIVE_HALF: %dx.imad = call <2 x i16>  @llvm.dx.imad.v2i16(<2 x i16> %0, <2 x i16> %1, <2 x i16> %2)
-// NATIVE_HALF: ret <2 x i16> %dx.imad
+// DXIL_NATIVE_HALF: %dx.imad = call <2 x i16>  @llvm.dx.imad.v2i16(<2 x i16> %0, <2 x i16> %1, <2 x i16> %2)
+// DXIL_NATIVE_HALF: ret <2 x i16> %dx.imad
+// SPIR_NATIVE_HALF: mul nsw <2 x i16>  %{{.*}}, %{{.*}}
+// SPIR_NATIVE_HALF: add nsw <2 x i16>  %{{.*}}, %{{.*}}
 int16_t2 test_mad_int16_t2(int16_t2 p0, int16_t2 p1, int16_t2 p2) { return mad(p0, p1, p2); }
 
-// NATIVE_HALF: %dx.imad = call <3 x i16>  @llvm.dx.imad.v3i16(<3 x i16> %0, <3 x i16> %1, <3 x i16> %2)
-// NATIVE_HALF: ret <3 x i16> %dx.imad
+// DXIL_NATIVE_HALF: %dx.imad = call <3 x i16>  @llvm.dx.imad.v3i16(<3 x i16> %0, <3 x i16> %1, <3 x i16> %2)
+// DXIL_NATIVE_HALF: ret <3 x i16> %dx.imad
+// SPIR_NATIVE_HALF: mul nsw <3 x i16>  %{{.*}}, %{{.*}}
+// SPIR_NATIVE_HALF: add nsw <3 x i16>  %{{.*}}, %{{.*}}
 int16_t3 test_mad_int16_t3(int16_t3 p0, int16_t3 p1, int16_t3 p2) { return mad(p0, p1, p2); }
 
-// NATIVE_HALF: %dx.imad = call <4 x i16>  @llvm.dx.imad.v4i16(<4 x i16> %0, <4 x i16> %1, <4 x i16> %2)
-// NATIVE_HALF: ret <4 x i16> %dx.imad
+// DXIL_NATIVE_HALF: %dx.imad = call <4 x i16>  @llvm.dx.imad.v4i16(<4 x i16> %0, <4 x i16> %1, <4 x i16> %2)
+// DXIL_NATIVE_HALF: ret <4 x i16> %dx.imad
+// SPIR_NATIVE_HALF: mul nsw <4 x i16>  %{{.*}}, %{{.*}}
+// SPIR_NATIVE_HALF: add nsw <4 x i16>  %{{.*}}, %{{.*}}
 int16_t4 test_mad_int16_t4(int16_t4 p0, int16_t4 p1, int16_t4 p2) { return mad(p0, p1, p2); }
 #endif // __HLSL_ENABLE_16_BIT
 
-// NATIVE_HALF: %dx.fmad = call half @llvm.fmuladd.f16(half %0, half %1, half %2)
-// NATIVE_HALF: ret half %dx.fmad
-// NO_HALF: %dx.fmad = call float @llvm.fmuladd.f32(float %0, float %1, float %2)
-// NO_HALF: ret float %dx.fmad
+// NATIVE_HALF: %hlsl.fmad = call half @llvm.fmuladd.f16(half %0, half %1, half %2)
+// NATIVE_HALF: ret half %hlsl.fmad
+// NO_HALF: %hlsl.fmad = call float @llvm.fmuladd.f32(float %0, float %1, float %2)
+// NO_HALF: ret float %hlsl.fmad
 half test_mad_half(half p0, half p1, half p2) { return mad(p0, p1, p2); }
 
-// NATIVE_HALF: %dx.fmad = call <2 x half>  @llvm.fmuladd.v2f16(<2 x half> %0, <2 x half> %1, <2 x half> %2)
-// NATIVE_HALF: ret <2 x half> %dx.fmad
-// NO_HALF: %dx.fmad = call <2 x float>  @llvm.fmuladd.v2f32(<2 x float> %0, <2 x float> %1, <2 x float> %2)
-// NO_HALF: ret <2 x float> %dx.fmad
+// NATIVE_HALF: %hlsl.fmad = call <2 x half>  @llvm.fmuladd.v2f16(<2 x half> %0, <2 x half> %1, <2 x half> %2)
+// NATIVE_HALF: ret <2 x half> %hlsl.fmad
+// NO_HALF: %hlsl.fmad = call <2 x float>  @llvm.fmuladd.v2f32(<2 x float> %0, <2 x float> %1, <2 x float> %2)
+// NO_HALF: ret <2 x float> %hlsl.fmad
 half2 test_mad_half2(half2 p0, half2 p1, half2 p2) { return mad(p0, p1, p2); }
 
-// NATIVE_HALF: %dx.fmad = call <3 x half>  @llvm.fmuladd.v3f16(<3 x half> %0, <3 x half> %1, <3 x half> %2)
-// NATIVE_HALF: ret <3 x half> %dx.fmad
-// NO_HALF: %dx.fmad = call <3 x float>  @llvm.fmuladd.v3f32(<3 x float> %0, <3 x float> %1, <3 x float> %2)
-// NO_HALF: ret <3 x float> %dx.fmad
+// NATIVE_HALF: %hlsl.fmad = call <3 x half>  @llvm.fmuladd.v3f16(<3 x half> %0, <3 x half> %1, <3 x half> %2)
+// NATIVE_HALF: ret <3 x half> %hlsl.fmad
+// NO_HALF: %hlsl.fmad = call <3 x float>  @llvm.fmuladd.v3f32(<3 x float> %0, <3 x float> %1, <3 x float> %2)
+// NO_HALF: ret <3 x float> %hlsl.fmad
 half3 test_mad_half3(half3 p0, half3 p1, half3 p2) { return mad(p0, p1, p2); }
 
-// NATIVE_HALF: %dx.fmad = call <4 x half>  @llvm.fmuladd.v4f16(<4 x half> %0, <4 x half> %1, <4 x half> %2)
-// NATIVE_HALF: ret <4 x half> %dx.fmad
-// NO_HALF: %dx.fmad = call <4 x float>  @llvm.fmuladd.v4f32(<4 x float> %0, <4 x float> %1, <4 x float> %2)
-// NO_HALF: ret <4 x float> %dx.fmad
+// NATIVE_HALF: %hlsl.fmad = call <4 x half>  @llvm.fmuladd.v4f16(<4 x half> %0, <4 x half> %1, <4 x half> %2)
+// NATIVE_HALF: ret <4 x half> %hlsl.fmad
+// NO_HALF: %hlsl.fmad = call <4 x float>  @llvm.fmuladd.v4f32(<4 x float> %0, <4 x float> %1, <4 x float> %2)
+// NO_HALF: ret <4 x float> %hlsl.fmad
 half4 test_mad_half4(half4 p0, half4 p1, half4 p2) { return mad(p0, p1, p2); }
 
-// CHECK: %dx.fmad = call float @llvm.fmuladd.f32(float %0, float %1, float %2)
-// CHECK: ret float %dx.fmad
+// CHECK: %hlsl.fmad = call float @llvm.fmuladd.f32(float %0, float %1, float %2)
+// CHECK: ret float %hlsl.fmad
 float test_mad_float(float p0, float p1, float p2) { return mad(p0, p1, p2); }
 
-// CHECK: %dx.fmad = call <2 x float>  @llvm.fmuladd.v2f32(<2 x float> %0, <2 x float> %1, <2 x float> %2)
-// CHECK: ret <2 x float> %dx.fmad
+// CHECK: %hlsl.fmad = call <2 x float>  @llvm.fmuladd.v2f32(<2 x float> %0, <2 x float> %1, <2 x float> %2)
+// CHECK: ret <2 x float> %hlsl.fmad
 float2 test_mad_float2(float2 p0, float2 p1, float2 p2) { return mad(p0, p1, p2); }
 
-// CHECK: %dx.fmad = call <3 x float>  @llvm.fmuladd.v3f32(<3 x float> %0, <3 x float> %1, <3 x float> %2)
-// CHECK: ret <3 x float> %dx.fmad
+// CHECK: %hlsl.fmad = call <3 x float>  @llvm.fmuladd.v3f32(<3 x float> %0, <3 x float> %1, <3 x float> %2)
+// CHECK: ret <3 x float> %hlsl.fmad
 float3 test_mad_float3(float3 p0, float3 p1, float3 p2) { return mad(p0, p1, p2); }
 
-// CHECK: %dx.fmad = call <4 x float>  @llvm.fmuladd.v4f32(<4 x float> %0, <4 x float> %1, <4 x float> %2)
-// CHECK: ret <4 x float> %dx.fmad
+// CHECK: %hlsl.fmad = call <4 x float>  @llvm.fmuladd.v4f32(<4 x float> %0, <4 x float> %1, <4 x float> %2)
+// CHECK: ret <4 x float> %hlsl.fmad
 float4 test_mad_float4(float4 p0, float4 p1, float4 p2) { return mad(p0, p1, p2); }
 
-// CHECK: %dx.fmad = call double @llvm.fmuladd.f64(double %0, double %1, double %2)
-// CHECK: ret double %dx.fmad
+// CHECK: %hlsl.fmad = call double @llvm.fmuladd.f64(double %0, double %1, double %2)
+// CHECK: ret double %hlsl.fmad
 double test_mad_double(double p0, double p1, double p2) { return mad(p0, p1, p2); }
 
-// CHECK: %dx.fmad = call <2 x double>  @llvm.fmuladd.v2f64(<2 x double> %0, <2 x double> %1, <2 x double> %2)
-// CHECK: ret <2 x double> %dx.fmad
+// CHECK: %hlsl.fmad = call <2 x double>  @llvm.fmuladd.v2f64(<2 x double> %0, <2 x double> %1, <2 x double> %2)
+// CHECK: ret <2 x double> %hlsl.fmad
 double2 test_mad_double2(double2 p0, double2 p1, double2 p2) { return mad(p0, p1, p2); }
 
-// CHECK: %dx.fmad = call <3 x double>  @llvm.fmuladd.v3f64(<3 x double> %0, <3 x double> %1, <3 x double> %2)
-// CHECK: ret <3 x double> %dx.fmad
+// CHECK: %hlsl.fmad = call <3 x double>  @llvm.fmuladd.v3f64(<3 x double> %0, <3 x double> %1, <3 x double> %2)
+// CHECK: ret <3 x double> %hlsl.fmad
 double3 test_mad_double3(double3 p0, double3 p1, double3 p2) { return mad(p0, p1, p2); }
 
-// CHECK: %dx.fmad = call <4 x double>  @llvm.fmuladd.v4f64(<4 x double> %0, <4 x double> %1, <4 x double> %2)
-// CHECK: ret <4 x double> %dx.fmad
+// CHECK: %hlsl.fmad = call <4 x double>  @llvm.fmuladd.v4f64(<4 x double> %0, <4 x double> %1, <4 x double> %2)
+// CHECK: ret <4 x double> %hlsl.fmad
 double4 test_mad_double4(double4 p0, double4 p1, double4 p2) { return mad(p0, p1, p2); }
 
-// CHECK: %dx.imad = call i32 @llvm.dx.imad.i32(i32 %0, i32 %1, i32 %2)
-// CHECK: ret i32 %dx.imad
+// DXIL_CHECK: %dx.imad = call i32 @llvm.dx.imad.i32(i32 %0, i32 %1, i32 %2)
+// DXIL_CHECK: ret i32 %dx.imad
+// SPIR_CHECK: mul nsw i32 %{{.*}}, %{{.*}}
+// SPIR_CHECK: add nsw i32 %{{.*}}, %{{.*}}
 int test_mad_int(int p0, int p1, int p2) { return mad(p0, p1, p2); }
 
-// CHECK: %dx.imad = call <2 x i32>  @llvm.dx.imad.v2i32(<2 x i32> %0, <2 x i32> %1, <2 x i32> %2)
-// CHECK: ret <2 x i32> %dx.imad
+// DXIL_CHECK: %dx.imad = call <2 x i32>  @llvm.dx.imad.v2i32(<2 x i32> %0, <2 x i32> %1, <2 x i32> %2)
+// DXIL_CHECK: ret <2 x i32> %dx.imad
+// SPIR_CHECK: mul nsw <2 x i32>  %{{.*}}, %{{.*}}
+// SPIR_CHECK: add nsw <2 x i32>  %{{.*}}, %{{.*}}
 int2 test_mad_int2(int2 p0, int2 p1, int2 p2) { return mad(p0, p1, p2); }
 
-// CHECK: %dx.imad = call <3 x i32>  @llvm.dx.imad.v3i32(<3 x i32> %0, <3 x i32> %1, <3 x i32> %2)
-// CHECK: ret <3 x i32> %dx.imad
+// DXIL_CHECK: %dx.imad = call <3 x i32>  @llvm.dx.imad.v3i32(<3 x i32> %0, <3 x i32> %1, <3 x i32> %2)
+// DXIL_CHECK: ret <3 x i32> %dx.imad
+// SPIR_CHECK: mul nsw <3 x i32>  %{{.*}}, %{{.*}}
+// SPIR_CHECK: add nsw <3 x i32>  %{{.*}}, %{{.*}}
 int3 test_mad_int3(int3 p0, int3 p1, int3 p2) { return mad(p0, p1, p2); }
 
-// CHECK: %dx.imad = call <4 x i32>  @llvm.dx.imad.v4i32(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2)
-// CHECK: ret <4 x i32> %dx.imad
+// DXIL_CHECK: %dx.imad = call <4 x i32>  @llvm.dx.imad.v4i32(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2)
+// DXIL_CHECK: ret <4 x i32> %dx.imad
+// SPIR_CHECK: mul nsw <4 x i32>  %{{.*}}, %{{.*}}
+// SPIR_CHECK: add nsw <4 x i32>  %{{.*}}, %{{.*}}
 int4 test_mad_int4(int4 p0, int4 p1, int4 p2) { return mad(p0, p1, p2); }
 
-// CHECK: %dx.imad = call i64 @llvm.dx.imad.i64(i64 %0, i64 %1, i64 %2)
-// CHECK: ret i64 %dx.imad
+// DXIL_CHECK: %dx.imad = call i64 @llvm.dx.imad.i64(i64 %0, i64 %1, i64 %2)
+// DXIL_CHECK: ret i64 %dx.imad
+// SPIR_CHECK: mul nsw i64 %{{.*}}, %{{.*}}
+// SPIR_CHECK: add nsw i64 %{{.*}}, %{{.*}}
 int64_t test_mad_int64_t(int64_t p0, int64_t p1, int64_t p2) { return mad(p0, p1, p2); }
 
-// CHECK: %dx.imad = call <2 x i64>  @llvm.dx.imad.v2i64(<2 x i64> %0, <2 x i64> %1, <2 x i64> %2)
-// CHECK: ret <2 x i64> %dx.imad
+// DXIL_CHECK: %dx.imad = call <2 x i64>  @llvm.dx.imad.v2i64(<2 x i64> %0, <2 x i64> %1, <2 x i64> %2)
+// DXIL_CHECK: ret <2 x i64> %dx.imad
+// SPIR_CHECK: mul nsw <2 x i64>  %{{.*}}, %{{.*}}
+// SPIR_CHECK: add nsw <2 x i64>  %{{.*}}, %{{.*}}
 int64_t2 test_mad_int64_t2(int64_t2 p0, int64_t2 p1, int64_t2 p2) { return mad(p0, p1, p2); }
 
-// CHECK: %dx.imad = call <3 x i64>  @llvm.dx.imad.v3i64(<3 x i64> %0, <3 x i64> %1, <3 x i64> %2)
-// CHECK: ret <3 x i64> %dx.imad
+// DXIL_CHECK: %dx.imad = call <3 x i64>  @llvm.dx.imad.v3i64(<3 x i64> %0, <3 x i64> %1, <3 x i64> %2)
+// DXIL_CHECK: ret <3 x i64> %dx.imad
+// SPIR_CHECK: mul nsw <3 x i64>  %{{.*}}, %{{.*}}
+// SPIR_CHECK: add nsw <3 x i64>  %{{.*}}, %{{.*}}
 int64_t3 test_mad_int64_t3(int64_t3 p0, int64_t3 p1, int64_t3 p2) { return mad(p0, p1, p2); }
 
-// CHECK: %dx.imad = call <4 x i64>  @llvm.dx.imad.v4i64(<4 x i64> %0, <4 x i64> %1, <4 x i64> %2)
-// CHECK: ret <4 x i64> %dx.imad
+// DXIL_CHECK: %dx.imad = call <4 x i64>  @llvm.dx.imad.v4i64(<4 x i64> %0, <4 x i64> %1, <4 x i64> %2)
+// DXIL_CHECK: ret <4 x i64> %dx.imad
+// SPIR_CHECK: mul nsw <4 x i64>  %{{.*}}, %{{.*}}
+// SPIR_CHECK: add nsw <4 x i64>  %{{.*}}, %{{.*}}
 int64_t4 test_mad_int64_t4(int64_t4 p0, int64_t4 p1, int64_t4 p2) { return mad(p0, p1, p2); }
 
-// CHECK: %dx.umad = call i32 @llvm.dx.umad.i32(i32 %0, i32 %1, i32 %2)
-// CHECK: ret i32 %dx.umad
+// DXIL_CHECK: %dx.umad = call i32 @llvm.dx.umad.i32(i32 %0, i32 %1, i32 %2)
+// DXIL_CHECK: ret i32 %dx.umad
+// SPIR_CHECK: mul nuw i32 %{{.*}}, %{{.*}}
+// SPIR_CHECK: add nuw i32 %{{.*}}, %{{.*}}
 uint test_mad_uint(uint p0, uint p1, uint p2) { return mad(p0, p1, p2); }
 
-// CHECK: %dx.umad = call <2 x i32>  @llvm.dx.umad.v2i32(<2 x i32> %0, <2 x i32> %1, <2 x i32> %2)
-// CHECK: ret <2 x i32> %dx.umad
+// DXIL_CHECK: %dx.umad = call <2 x i32>  @llvm.dx.umad.v2i32(<2 x i32> %0, <2 x i32> %1, <2 x i32> %2)
+// DXIL_CHECK: ret <2 x i32> %dx.umad
+// SPIR_CHECK: mul nuw <2 x i32>  %{{.*}}, %{{.*}}
+// SPIR_CHECK: add nuw <2 x i32>  %{{.*}}, %{{.*}}
 uint2 test_mad_uint2(uint2 p0, uint2 p1, uint2 p2) { return mad(p0, p1, p2); }
 
-// CHECK: %dx.umad = call <3 x i32>  @llvm.dx.umad.v3i32(<3 x i32> %0, <3 x i32> %1, <3 x i32> %2)
-// CHECK: ret <3 x i32> %dx.umad
+// DXIL_CHECK: %dx.umad = call <3 x i32>  @llvm.dx.umad.v3i32(<3 x i32> %0, <3 x i32> %1, <3 x i32> %2)
+// DXIL_CHECK: ret <3 x i32> %dx.umad
+// SPIR_CHECK: mul nuw <3 x i32>  %{{.*}}, %{{.*}}
+// SPIR_CHECK: add nuw <3 x i32>  %{{.*}}, %{{.*}}
 uint3 test_mad_uint3(uint3 p0, uint3 p1, uint3 p2) { return mad(p0, p1, p2); }
 
-// CHECK: %dx.umad = call <4 x i32>  @llvm.dx.umad.v4i32(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2)
-// CHECK: ret <4 x i32> %dx.umad
+// DXIL_CHECK: %dx.umad = call <4 x i32>  @llvm.dx.umad.v4i32(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2)
+// DXIL_CHECK: ret <4 x i32> %dx.umad
+// SPIR_CHECK: mul nuw <4 x i32>  %{{.*}}, %{{.*}}
+// SPIR_CHECK: add nuw <4 x i32>  %{{.*}}, %{{.*}}
 uint4 test_mad_uint4(uint4 p0, uint4 p1, uint4 p2) { return mad(p0, p1, p2); }
 
-// CHECK: %dx.umad = call i64 @llvm.dx.umad.i64(i64 %0, i64 %1, i64 %2)
-// CHECK: ret i64 %dx.umad
+// DXIL_CHECK: %dx.umad = call i64 @llvm.dx.umad.i64(i64 %0, i64 %1, i64 %2)
+// DXIL_CHECK: ret i64 %dx.umad
+// SPIR_CHECK: mul nuw i64 %{{.*}}, %{{.*}}
+// SPIR_CHECK: add nuw i64 %{{.*}}, %{{.*}}
 uint64_t test_mad_uint64_t(uint64_t p0, uint64_t p1, uint64_t p2) { return mad(p0, p1, p2); }
 
-// CHECK: %dx.umad = call <2 x i64>  @llvm.dx.umad.v2i64(<2 x i64> %0, <2 x i64> %1, <2 x i64> %2)
-// CHECK: ret <2 x i64> %dx.umad
+// DXIL_CHECK: %dx.umad = call <2 x i64>  @llvm.dx.umad.v2i64(<2 x i64> %0, <2 x i64> %1, <2 x i64> %2)
+// DXIL_CHECK: ret <2 x i64> %dx.umad
+// SPIR_CHECK: mul nuw <2 x i64>  %{{.*}}, %{{.*}}
+// SPIR_CHECK: add nuw <2 x i64>  %{{.*}}, %{{.*}}
 uint64_t2 test_mad_uint64_t2(uint64_t2 p0, uint64_t2 p1, uint64_t2 p2) { return mad(p0, p1, p2); }
 
-// CHECK: %dx.umad = call <3 x i64>  @llvm.dx.umad.v3i64(<3 x i64> %0, <3 x i64> %1, <3 x i64> %2)
-// CHECK: ret <3 x i64> %dx.umad
+// DXIL_CHECK: %dx.umad = call <3 x i64>  @llvm.dx.umad.v3i64(<3 x i64> %0, <3 x i64> %1, <3 x i64> %2)
+// DXIL_CHECK: ret <3 x i64> %dx.umad
+// SPIR_CHECK: mul nuw <3 x i64>  %{{.*}}, %{{.*}}
+// SPIR_CHECK: add nuw <3 x i64>  %{{.*}}, %{{.*}}
 uint64_t3 test_mad_uint64_t3(uint64_t3 p0, uint64_t3 p1, uint64_t3 p2) { return mad(p0, p1, p2); }
 
-// CHECK: %dx.umad = call <4 x i64>  @llvm.dx.umad.v4i64(<4 x i64> %0, <4 x i64> %1, <4 x i64> %2)
-// CHECK: ret <4 x i64> %dx.umad
+// DXIL_CHECK: %dx.umad = call <4 x i64>  @llvm.dx.umad.v4i64(<4 x i64> %0, <4 x i64> %1, <4 x i64> %2)
+// DXIL_CHECK: ret <4 x i64> %dx.umad
+// SPIR_CHECK: mul nuw <4 x i64>  %{{.*}}, %{{.*}}
+// SPIR_CHECK: add nuw <4 x i64>  %{{.*}}, %{{.*}}
 uint64_t4 test_mad_uint64_t4(uint64_t4 p0, uint64_t4 p1, uint64_t4 p2) { return mad(p0, p1, p2); }
 
-// CHECK: %dx.fmad = call <2 x float>  @llvm.fmuladd.v2f32(<2 x float> %splat.splat, <2 x float> %1, <2 x float> %2)
-// CHECK: ret <2 x float> %dx.fmad
+// CHECK: %hlsl.fmad = call <2 x float>  @llvm.fmuladd.v2f32(<2 x float> %splat.splat, <2 x float> %1, <2 x float> %2)
+// CHECK: ret <2 x float> %hlsl.fmad
 float2 test_mad_float2_splat(float p0, float2 p1, float2 p2) { return mad(p0, p1, p2); }
 
-// CHECK: %dx.fmad = call <3 x float>  @llvm.fmuladd.v3f32(<3 x float> %splat.splat, <3 x float> %1, <3 x float> %2)
-// CHECK: ret <3 x float> %dx.fmad
+// CHECK: %hlsl.fmad = call <3 x float>  @llvm.fmuladd.v3f32(<3 x float> %splat.splat, <3 x float> %1, <3 x float> %2)
+// CHECK: ret <3 x float> %hlsl.fmad
 float3 test_mad_float3_splat(float p0, float3 p1, float3 p2) { return mad(p0, p1, p2); }
 
-// CHECK:  %dx.fmad = call <4 x float>  @llvm.fmuladd.v4f32(<4 x float> %splat.splat, <4 x float> %1, <4 x float> %2)
-// CHECK:  ret <4 x float> %dx.fmad
+// CHECK:  %hlsl.fmad = call <4 x float>  @llvm.fmuladd.v4f32(<4 x float> %splat.splat, <4 x float> %1, <4 x float> %2)
+// CHECK:  ret <4 x float> %hlsl.fmad
 float4 test_mad_float4_splat(float p0, float4 p1, float4 p2) { return mad(p0, p1, p2); }
 
 // CHECK: %conv = sitofp i32 %2 to float
 // CHECK: %splat.splatinsert = insertelement <2 x float> poison, float %conv, i64 0
 // CHECK: %splat.splat = shufflevector <2 x float> %splat.splatinsert, <2 x float> poison, <2 x i32> zeroinitializer
-// CHECK: %dx.fmad = call <2 x float>  @llvm.fmuladd.v2f32(<2 x float> %0, <2 x float> %1, <2 x float> %splat.splat)
-// CHECK: ret <2 x float> %dx.fmad
+// CHECK: %hlsl.fmad = call <2 x float>  @llvm.fmuladd.v2f32(<2 x float> %0, <2 x float> %1, <2 x float> %splat.splat)
+// CHECK: ret <2 x float> %hlsl.fmad
 float2 test_mad_float2_int_splat(float2 p0, float2 p1, int p2) {
   return mad(p0, p1, p2);
 }
@@ -184,8 +240,8 @@ float2 test_mad_float2_int_splat(float2 p0, float2 p1, int p2) {
 // CHECK: %conv = sitofp i32 %2 to float
 // CHECK: %splat.splatinsert = insertelement <3 x float> poison, float %conv, i64 0
 // CHECK: %splat.splat = shufflevector <3 x float> %splat.splatinsert, <3 x float> poison, <3 x i32> zeroinitializer
-// CHECK:  %dx.fmad = call <3 x float>  @llvm.fmuladd.v3f32(<3 x float> %0, <3 x float> %1, <3 x float> %splat.splat)
-// CHECK: ret <3 x float> %dx.fmad
+// CHECK:  %hlsl.fmad = call <3 x float>  @llvm.fmuladd.v3f32(<3 x float> %0, <3 x float> %1, <3 x float> %splat.splat)
+// CHECK: ret <3 x float> %hlsl.fmad
 float3 test_mad_float3_int_splat(float3 p0, float3 p1, int p2) {
   return mad(p0, p1, p2);
 }
diff --git a/llvm/test/CodeGen/DirectX/fmad.ll b/llvm/test/CodeGen/DirectX/fmad.ll
index 693e237e70dc02..e1f4e5cd50c4f0 100644
--- a/llvm/test/CodeGen/DirectX/fmad.ll
+++ b/llvm/test/CodeGen/DirectX/fmad.ll
@@ -21,8 +21,8 @@ entry:
   %0 = load half, ptr %p0.addr, align 2
   %1 = load half, ptr %p1.addr, align 2
   %2 = load half, ptr %p2.addr, align 2
-  %dx.fmad = call half @llvm.fmuladd.f16(half %0, half %1, half %2)
-  ret half %dx.fmad
+  %hlsl.fmad = call half @llvm.fmuladd.f16(half %0, half %1, half %2)
+  ret half %hlsl.fmad
 }
 
 ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
@@ -40,8 +40,8 @@ entry:
   %0 = load float, ptr %p0.addr, align 4
   %1 = load float, ptr %p1.addr, align 4
   %2 = load float, ptr %p2.addr, align 4
-  %dx.fmad = call float @llvm.fmuladd.f32(float %0, float %1, float %2)
-  ret float %dx.fmad
+  %hlsl.fmad = call float @llvm.fmuladd.f32(float %0, float %1, float %2)
+  ret float %hlsl.fmad
 }
 
 ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
@@ -59,8 +59,8 @@ entry:
   %0 = load double, ptr %p0.addr, align 8
   %1 = load double, ptr %p1.addr, align 8
   %2 = load double, ptr %p2.addr, align 8
-  %dx.fmad = call double @llvm.fmuladd.f64(double %0, double %1, double %2)
-  ret double %dx.fmad
+  %hlsl.fmad = call double @llvm.fmuladd.f64(double %0, double %1, double %2)
+  ret double %hlsl.fmad
 }
 
 ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/fmad.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/fmad.ll
index a3fec10a9e4bc9..ce9b8f09daead1 100644
--- a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/fmad.ll
+++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/fmad.ll
@@ -6,22 +6,22 @@
 define noundef half @fmad_half(half noundef %a, half noundef %b, half noundef %c) #0 {
 entry:
 ; CHECK: %[[#]] = OpExtInst %[[#]] %[[#]] Fma %[[#]] %[[#]] %[[#]]
-  %dx.fmad = call half @llvm.fmuladd.f16(half %a, half %b, half %c)
-  ret half %dx.fmad
+  %hlsl.fmad = call half @llvm.fmuladd.f16(half %a, half %b, half %c)
+  ret half %hlsl.fmad
 }
 
 define noundef float @fmad_float(float noundef %a, float noundef %b, float noundef %c) #0 {
 entry:
 ; CHECK: %[[#]] = OpExtInst %[[#]] %[[#]] Fma %[[#]] %[[#]] %[[#]]
-  %dx.fmad = call float @llvm.fmuladd.f32(float %a, float %b, float %c)
-  ret float %dx.fmad
+  %hlsl.fmad = call float @llvm.fmuladd.f32(float %a, float %b, float %c)
+  ret float %hlsl.fmad
 }
 
 define noundef double @fmad_double(double noundef %a, double noundef %b, double noundef %c) {
 entry:
 ; CHECK: %[[#]] = OpExtInst %[[#]] %[[#]] Fma %[[#]] %[[#]] %[[#]]
-  %dx.fmad = call double @llvm.fmuladd.f64(double %a, double %b, double %c)
-  ret double %dx.fmad
+  %hlsl.fmad = call double @llvm.fmuladd.f64(double %a, double %b, double %c)
+  ret double %hlsl.fmad
 }
 
 declare half @llvm.fmuladd.f16(half, half, half)
diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/imad.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/imad.ll
new file mode 100644
index 00000000000000..b854412b6ec12a
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/imad.ll
@@ -0,0 +1,302 @@
+; RUN: llc -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %}
+
+; CHECK-DAG: %[[#int_16:]] = OpTypeInt 16 0
+; CHECK-DAG: %[[#vec2_16:]] = OpTypeVector %[[#int_16]] 2
+; CHECK-DAG: %[[#vec3_16:]] = OpTypeVector %[[#int_16]] 3
+; CHECK-DAG: %[[#vec4_16:]] = OpTypeVector %[[#int_16]] 4
+; CHECK-DAG: %[[#int_32:]] = OpTypeInt 32 0
+; CHECK-DAG: %[[#vec2_32:]] = OpTypeVector %[[#int_32]] 2
+; CHECK-DAG: %[[#vec3_32:]] = OpTypeVector %[[#int_32]] 3
+; CHECK-DAG: %[[#vec4_32:]] = OpTypeVector %[[#int_32]] 4
+; CHECK-DAG: %[[#int_64:]] = OpTypeInt 64 0
+; CHECK-DAG: %[[#vec2_64:]] = OpTypeVector %[[#int_64]] 2
+; CHECK-DAG: %[[#vec3_64:]] = OpTypeVector %[[#int_64]] 3
+; CHECK-DAG: %[[#vec4_64:]] = OpTypeVector %[[#int_64]] 4
+
+define spir_func noundef i16 @test_mad_uint16_t(i16 noundef %p0, i16 noundef %p1, i16 noundef %p2) {
+entry:
+  ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]]
+  ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#]]
+  ; CHECK: %[[#arg2:]] = OpFunctionParameter %[[#]]
+  ; CHECK: %[[#mul:]] = OpIMul %[[#int_16]] %[[#arg0]] %[[#arg1]]
+  ; CHECK: OpIAdd %[[#int_16]] %[[#mul]] %[[#arg2]]
+  %3 = mul nuw i16 %p0, %p1
+  %4 = add nuw i16 %3, %p2
+  ret i16 %4
+}
+
+define spir_func noundef <2 x i16> @test_mad_uint16_t2(<2 x i16> noundef %p0, <2 x i16> noundef %p1, <2 x i16> noundef %p2) {
+entry:
+  ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]]
+  ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#]]
+  ; CHECK: %[[#arg2:]] = OpFunctionParameter %[[#]]
+  ; CHECK: %[[#mul:]] = OpIMul %[[#vec2_16]] %[[#arg0]] %[[#arg1]]
+  ; CHECK: OpIAdd %[[#vec2_16]] %[[#mul]] %[[#arg2]]
+  %3 = mul nuw <2 x i16> %p0, %p1
+  %4 = add nuw <2 x i16> %3, %p2
+  ret <2 x i16> %4
+}
+
+define spir_func noundef <3 x i16> @test_mad_uint16_t3(<3 x i16> noundef %p0, <3 x i16> noundef %p1, <3 x i16> noundef %p2) {
+entry:
+  ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]]
+  ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#]]
+  ; CHECK: %[[#arg2:]] = OpFunctionParameter %[[#]]
+  ; CHECK: %[[#mul:]] = OpIMul %[[#vec3_16]] %[[#arg0]] %[[#arg1]]
+  ; CHECK: OpIAdd %[[#vec3_16]] %[[#mul]] %[[#arg2]]
+  %3 = mul nuw <3 x i16> %p0, %p1
+  %4 = add nuw <3 x i16> %3, %p2
+  ret <3 x i16> %4
+}
+
+define spir_func noundef <4 x i16> @test_mad_uint16_t4(<4 x i16> noundef %p0, <4 x i16> noundef %p1, <4 x i16> noundef %p2) {
+entry:
+  ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]]
+  ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#]]
+  ; CHECK: %[[#arg2:]] = OpFunctionParameter %[[#]]
+  ; CHECK: %[[#mul:]] = OpIMul %[[#vec4_16]] %[[#arg0]] %[[#arg1]]
+  ; CHECK: OpIAdd %[[#vec4_16]] %[[#mul]] %[[#arg2]]
+  %3 = mul nuw <4 x i16> %p0, %p1
+  %4 = add nuw <4 x i16> %3, %p2
+  ret <4 x i16> %4
+}
+
+define spir_func noundef i16 @test_mad_int16_t(i16 noundef %p0, i16 noundef %p1, i16 noundef %p2) {
+entry:
+  ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]]
+  ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#]]
+  ; CHECK: %[[#arg2:]] = OpFunctionParameter %[[#]]
+  ; CHECK: %[[#mul:]] = OpIMul %[[#int_16]] %[[#arg0]] %[[#arg1]]
+  ; CHECK: OpIAdd %[[#int_16]] %[[#mul]] %[[#arg2]]
+  %3 = mul nsw i16 %p0, %p1
+  %4 = add nsw i16 %3, %p2
+  ret i16 %4
+}
+
+define spir_func noundef <2 x i16> @test_mad_int16_t2(<2 x i16> noundef %p0, <2 x i16> noundef %p1, <2 x i16> noundef %p2) {
+entry:
+  ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]]
+  ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#]]
+  ; CHECK: %[[#arg2:]] = OpFunctionParameter %[[#]]
+  ; CHECK: %[[#mul:]] = OpIMul %[[#vec2_16]] %[[#arg0]] %[[#arg1]]
+  ; CHECK: OpIAdd %[[#vec2_16]] %[[#mul]] %[[#arg2]]
+  %3 = mul nsw <2 x i16> %p0, %p1
+  %4 = add nsw <2 x i16> %3, %p2
+  ret <2 x i16> %4
+}
+
+define spir_func noundef <3 x i16> @test_mad_int16_t3(<3 x i16> noundef %p0, <3 x i16> noundef %p1, <3 x i16> noundef %p2) {
+entry:
+  ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]]
+  ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#]]
+  ; CHECK: %[[#arg2:]] = OpFunctionParameter %[[#]]
+  ; CHECK: %[[#mul:]] = OpIMul %[[#vec3_16]] %[[#arg0]] %[[#arg1]]
+  ; CHECK: OpIAdd %[[#vec3_16]] %[[#mul]] %[[#arg2]]
+  %3 = mul nsw <3 x i16> %p0, %p1
+  %4 = add nsw <3 x i16> %3, %p2
+  ret <3 x i16> %4
+}
+
+define spir_func noundef <4 x i16> @test_mad_int16_t4(<4 x i16> noundef %p0, <4 x i16> noundef %p1, <4 x i16> noundef %p2) {
+entry:
+  ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]]
+  ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#]]
+  ; CHECK: %[[#arg2:]] = OpFunctionParameter %[[#]]
+  ; CHECK: %[[#mul:]] = OpIMul %[[#vec4_16]] %[[#arg0]] %[[#arg1]]
+  ; CHECK: OpIAdd %[[#vec4_16]] %[[#mul]] %[[#arg2]]
+  %3 = mul nsw <4 x i16> %p0, %p1
+  %4 = add nsw <4 x i16> %3, %p2
+  ret <4 x i16> %4
+}
+define spir_func noundef i32 @test_mad_int(i32 noundef %p0, i32 noundef %p1, i32 noundef %p2) {
+entry:
+  ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]]
+  ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#]]
+  ; CHECK: %[[#arg2:]] = OpFunctionParameter %[[#]]
+  ; CHECK: %[[#mul:]] = OpIMul %[[#int_32]] %[[#arg0]] %[[#arg1]]
+  ; CHECK: OpIAdd %[[#int_32]] %[[#mul]] %[[#arg2]]
+  %3 = mul nsw i32 %p0, %p1
+  %4 = add nsw i32 %3, %p2
+  ret i32 %4
+}
+
+define spir_func noundef <2 x i32> @test_mad_int2(<2 x i32> noundef %p0, <2 x i32> noundef %p1, <2 x i32> noundef %p2) {
+entry:
+  ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]]
+  ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#]]
+  ; CHECK: %[[#arg2:]] = OpFunctionParameter %[[#]]
+  ; CHECK: %[[#mul:]] = OpIMul %[[#vec2_32]] %[[#arg0]] %[[#arg1]]
+  ; CHECK: OpIAdd %[[#vec2_32]] %[[#mul]] %[[#arg2]]
+  %3 = mul nsw <2 x i32> %p0, %p1
+  %4 = add nsw <2 x i32> %3, %p2
+  ret <2 x i32> %4
+}
+
+define spir_func noundef <3 x i32> @test_mad_int3(<3 x i32> noundef %p0, <3 x i32> noundef %p1, <3 x i32> noundef %p2) {
+entry:
+  ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]]
+  ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#]]
+  ; CHECK: %[[#arg2:]] = OpFunctionParameter %[[#]]
+  ; CHECK: %[[#mul:]] = OpIMul %[[#vec3_32]] %[[#arg0]] %[[#arg1]]
+  ; CHECK: OpIAdd %[[#vec3_32]] %[[#mul]] %[[#arg2]]
+  %3 = mul nsw <3 x i32> %p0, %p1
+  %4 = add nsw <3 x i32> %3, %p2
+  ret <3 x i32> %4
+}
+
+define spir_func noundef <4 x i32> @test_mad_int4(<4 x i32> noundef %p0, <4 x i32> noundef %p1, <4 x i32> noundef %p2) {
+entry:
+  ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]]
+  ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#]]
+  ; CHECK: %[[#arg2:]] = OpFunctionParameter %[[#]]
+  ; CHECK: %[[#mul:]] = OpIMul %[[#vec4_32]] %[[#arg0]] %[[#arg1]]
+  ; CHECK: OpIAdd %[[#vec4_32]] %[[#mul]] %[[#arg2]]
+  %3 = mul nsw <4 x i32> %p0, %p1
+  %4 = add nsw <4 x i32> %3, %p2
+  ret <4 x i32> %4
+}
+
+define spir_func noundef i64 @test_mad_int64_t(i64 noundef %p0, i64 noundef %p1, i64 noundef %p2) {
+entry:
+  ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]]
+  ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#]]
+  ; CHECK: %[[#arg2:]] = OpFunctionParameter %[[#]]
+  ; CHECK: %[[#mul:]] = OpIMul %[[#int_64]] %[[#arg0]] %[[#arg1]]
+  ; CHECK: OpIAdd %[[#int_64]] %[[#mul]] %[[#arg2]]
+  %3 = mul nsw i64 %p0, %p1
+  %4 = add nsw i64 %3, %p2
+  ret i64 %4
+}
+
+define spir_func noundef <2 x i64> @test_mad_int64_t2(<2 x i64> noundef %p0, <2 x i64> noundef %p1, <2 x i64> noundef %p2) {
+entry:
+  ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]]
+  ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#]]
+  ; CHECK: %[[#arg2:]] = OpFunctionParameter %[[#]]
+  ; CHECK: %[[#mul:]] = OpIMul %[[#vec2_64]] %[[#arg0]] %[[#arg1]]
+  ; CHECK: OpIAdd %[[#vec2_64]] %[[#mul]] %[[#arg2]]
+  %3 = mul nsw <2 x i64> %p0, %p1
+  %4 = add nsw <2 x i64> %3, %p2
+  ret <2 x i64> %4
+}
+
+define spir_func noundef <3 x i64> @test_mad_int64_t3(<3 x i64> noundef %p0, <3 x i64> noundef %p1, <3 x i64> noundef %p2) {
+entry:
+  ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]]
+  ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#]]
+  ; CHECK: %[[#arg2:]] = OpFunctionParameter %[[#]]
+  ; CHECK: %[[#mul:]] = OpIMul %[[#vec3_64]] %[[#arg0]] %[[#arg1]]
+  ; CHECK: OpIAdd %[[#vec3_64]] %[[#mul]] %[[#arg2]]
+  %3 = mul nsw <3 x i64> %p0, %p1
+  %4 = add nsw <3 x i64> %3, %p2
+  ret <3 x i64> %4
+}
+
+define spir_func noundef <4 x i64> @test_mad_int64_t4(<4 x i64> noundef %p0, <4 x i64> noundef %p1, <4 x i64> noundef %p2) {
+entry:
+  ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]]
+  ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#]]
+  ; CHECK: %[[#arg2:]] = OpFunctionParameter %[[#]]
+  ; CHECK: %[[#mul:]] = OpIMul %[[#vec4_64]] %[[#arg0]] %[[#arg1]]
+  ; CHECK: OpIAdd %[[#vec4_64]] %[[#mul]] %[[#arg2]]
+  %3 = mul nsw <4 x i64> %p0, %p1
+  %4 = add nsw <4 x i64> %3, %p2
+  ret <4 x i64> %4
+}
+
+define spir_func noundef i32 @test_mad_uint(i32 noundef %p0, i32 noundef %p1, i32 noundef %p2) {
+entry:
+  ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]]
+  ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#]]
+  ; CHECK: %[[#arg2:]] = OpFunctionParameter %[[#]]
+  ; CHECK: %[[#mul:]] = OpIMul %[[#int_32]] %[[#arg0]] %[[#arg1]]
+  ; CHECK: OpIAdd %[[#int_32]] %[[#mul]] %[[#arg2]]
+  %3 = mul nuw i32 %p0, %p1
+  %4 = add nuw i32 %3, %p2
+  ret i32 %4
+}
+
+define spir_func noundef <2 x i32> @test_mad_uint2(<2 x i32> noundef %p0, <2 x i32> noundef %p1, <2 x i32> noundef %p2) {
+entry:
+  ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]]
+  ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#]]
+  ; CHECK: %[[#arg2:]] = OpFunctionParameter %[[#]]
+  ; CHECK: %[[#mul:]] = OpIMul %[[#vec2_32]] %[[#arg0]] %[[#arg1]]
+  ; CHECK: OpIAdd %[[#vec2_32]] %[[#mul]] %[[#arg2]]
+  %3 = mul nuw <2 x i32> %p0, %p1
+  %4 = add nuw <2 x i32> %3, %p2
+  ret <2 x i32> %4
+}
+
+define spir_func noundef <3 x i32> @test_mad_uint3(<3 x i32> noundef %p0, <3 x i32> noundef %p1, <3 x i32> noundef %p2) {
+entry:
+  ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]]
+  ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#]]
+  ; CHECK: %[[#arg2:]] = OpFunctionParameter %[[#]]
+  ; CHECK: %[[#mul:]] = OpIMul %[[#vec3_32]] %[[#arg0]] %[[#arg1]]
+  ; CHECK: OpIAdd %[[#vec3_32]] %[[#mul]] %[[#arg2]]
+  %3 = mul nuw <3 x i32> %p0, %p1
+  %4 = add nuw <3 x i32> %3, %p2
+  ret <3 x i32> %4
+}
+
+define spir_func noundef <4 x i32> @test_mad_uint4(<4 x i32> noundef %p0, <4 x i32> noundef %p1, <4 x i32> noundef %p2) {
+entry:
+  ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]]
+  ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#]]
+  ; CHECK: %[[#arg2:]] = OpFunctionParameter %[[#]]
+  ; CHECK: %[[#mul:]] = OpIMul %[[#vec4_32]] %[[#arg0]] %[[#arg1]]
+  ; CHECK: OpIAdd %[[#vec4_32]] %[[#mul]] %[[#arg2]]
+  %3 = mul nuw <4 x i32> %p0, %p1
+  %4 = add nuw <4 x i32> %3, %p2
+  ret <4 x i32> %4
+}
+
+define spir_func noundef i64 @test_mad_uint64_t(i64 noundef %p0, i64 noundef %p1, i64 noundef %p2) {
+entry:
+  ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]]
+  ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#]]
+  ; CHECK: %[[#arg2:]] = OpFunctionParameter %[[#]]
+  ; CHECK: %[[#mul:]] = OpIMul %[[#int_64]] %[[#arg0]] %[[#arg1]]
+  ; CHECK: OpIAdd %[[#int_64]] %[[#mul]] %[[#arg2]]
+  %3 = mul nuw i64 %p0, %p1
+  %4 = add nuw i64 %3, %p2
+  ret i64 %4
+}
+
+define spir_func noundef <2 x i64> @test_mad_uint64_t2(<2 x i64> noundef %p0, <2 x i64> noundef %p1, <2 x i64> noundef %p2) {
+entry:
+  ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]]
+  ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#]]
+  ; CHECK: %[[#arg2:]] = OpFunctionParameter %[[#]]
+  ; CHECK: %[[#mul:]] = OpIMul %[[#vec2_64]] %[[#arg0]] %[[#arg1]]
+  ; CHECK: OpIAdd %[[#vec2_64]] %[[#mul]] %[[#arg2]]
+  %3 = mul nuw <2 x i64> %p0, %p1
+  %4 = add nuw <2 x i64> %3, %p2
+  ret <2 x i64> %4
+}
+
+define spir_func noundef <3 x i64> @test_mad_uint64_t3(<3 x i64> noundef %p0, <3 x i64> noundef %p1, <3 x i64> noundef %p2) {
+entry:
+  ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]]
+  ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#]]
+  ; CHECK: %[[#arg2:]] = OpFunctionParameter %[[#]]
+  ; CHECK: %[[#mul:]] = OpIMul %[[#vec3_64]] %[[#arg0]] %[[#arg1]]
+  ; CHECK: OpIAdd %[[#vec3_64]] %[[#mul]] %[[#arg2]]
+  %3 = mul nuw <3 x i64> %p0, %p1
+  %4 = add nuw <3 x i64> %3, %p2
+  ret <3 x i64> %4
+}
+
+define spir_func noundef <4 x i64> @test_mad_uint64_t4(<4 x i64> noundef %p0, <4 x i64> noundef %p1, <4 x i64> noundef %p2) {
+entry:
+  ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]]
+  ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#]]
+  ; CHECK: %[[#arg2:]] = OpFunctionParameter %[[#]]
+  ; CHECK: %[[#mul:]] = OpIMul %[[#vec4_64]] %[[#arg0]] %[[#arg1]]
+  ; CHECK: OpIAdd %[[#vec4_64]] %[[#mul]] %[[#arg2]]
+  %3 = mul nuw <4 x i64> %p0, %p1
+  %4 = add nuw <4 x i64> %3, %p2
+  ret <4 x i64> %4
+}