[clang] [llvm] [SPIRV][HLSL] Add mad intrinsic lowering for spirv (PR #89130)

Wed Apr 17 12:58:07 PDT 2024

llvmbot wrote:




@llvm/pr-subscribers-hlsl

Author: Farzon Lotfi (farzonl)

<details>
<summary>Changes</summary>

- `clang/lib/CodeGen/CGBuiltin.cpp` - Add a generic mull add implementation. Make DXIL implementation tied to target.
- `clang/lib/CodeGen/CGHLSLRuntime.cpp` - existing target switching only lets you swap on intrinsics, but in this case there is no spirv intrinsic so we needed another way to swap on behavior. My ideas was to use lambdas and capture the variables that needed to be used to keep the call generic.
- `clang/lib/CodeGen/CGHLSLRuntime.h` - function signature for the behavior switching per target.
-  `clang/lib/CodeGen/CodeGenFunction.h` - Add `EmitHLSLMadIntrinsic` so mad behavior still has access to underlying CodeGenFunction properties like `Builder`.

resolves #88944

---

Patch is 42.04 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/89130.diff


8 Files Affected:

- (modified) clang/lib/CodeGen/CGBuiltin.cpp (+35-17) 
- (modified) clang/lib/CodeGen/CGHLSLRuntime.cpp (+15) 
- (modified) clang/lib/CodeGen/CGHLSLRuntime.h (+8-1) 
- (modified) clang/lib/CodeGen/CodeGenFunction.h (+1) 
- (modified) clang/test/CodeGenHLSL/builtins/mad.hlsl (+148-92) 
- (modified) llvm/test/CodeGen/DirectX/fmad.ll (+6-6) 
- (modified) llvm/test/CodeGen/SPIRV/hlsl-intrinsics/fmad.ll (+6-6) 
- (added) llvm/test/CodeGen/SPIRV/hlsl-intrinsics/imad.ll (+302) 


``````````diff

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index a05874e63c73c2..3f9f5aa9b42ed0 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -18186,6 +18186,40 @@ Intrinsic::ID getDotProductIntrinsic(QualType QT, int elementCount) {
   return Intrinsic::dx_udot;
 }
 
+Value *CodeGenFunction::EmitHLSLMadIntrinsic(const CallExpr *E) {
+  Value *M = EmitScalarExpr(E->getArg(0));
+  Value *A = EmitScalarExpr(E->getArg(1));
+  Value *B = EmitScalarExpr(E->getArg(2));
+  if (E->getArg(0)->getType()->hasFloatingRepresentation())
+    return Builder.CreateIntrinsic(
+        /*ReturnType*/ M->getType(), Intrinsic::fmuladd,
+        ArrayRef<Value *>{M, A, B}, nullptr, "hlsl.fmad");
+
+  auto EmitHLSLIMadDirectX = [E, M, A, B, this]() -> Value * {
+    if (E->getArg(0)->getType()->hasSignedIntegerRepresentation())
+      return Builder.CreateIntrinsic(
+          /*ReturnType*/ M->getType(), Intrinsic::dx_imad,
+          ArrayRef<Value *>{M, A, B}, nullptr, "dx.imad");
+    assert(E->getArg(0)->getType()->hasUnsignedIntegerRepresentation());
+    return Builder.CreateIntrinsic(
+        /*ReturnType=*/M->getType(), Intrinsic::dx_umad,
+        ArrayRef<Value *>{M, A, B}, nullptr, "dx.umad");
+  };
+
+  auto EmitHLSLIMadGeneric = [E, M, A, B, this]() -> Value * {
+    if (E->getArg(0)->getType()->hasSignedIntegerRepresentation()) {
+      Value *Mul = Builder.CreateNSWMul(M, A);
+      return Builder.CreateNSWAdd(Mul, B);
+    }
+    assert(E->getArg(0)->getType()->hasUnsignedIntegerRepresentation());
+    Value *Mul = Builder.CreateNUWMul(M, A);
+    return Builder.CreateNUWAdd(Mul, B);
+  };
+
+  return CGM.getHLSLRuntime().emitHLSLIntrinsic(
+      EmitHLSLIMadDirectX, EmitHLSLIMadGeneric, EmitHLSLIMadGeneric);
+}
+
 Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
                                             const CallExpr *E) {
   if (!getLangOpts().HLSL)
@@ -18291,23 +18325,7 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
                                    ArrayRef<Value *>{Op0}, nullptr, "dx.isinf");
   }
   case Builtin::BI__builtin_hlsl_mad: {
-    Value *M = EmitScalarExpr(E->getArg(0));
-    Value *A = EmitScalarExpr(E->getArg(1));
-    Value *B = EmitScalarExpr(E->getArg(2));
-    if (E->getArg(0)->getType()->hasFloatingRepresentation()) {
-      return Builder.CreateIntrinsic(
-          /*ReturnType*/ M->getType(), Intrinsic::fmuladd,
-          ArrayRef<Value *>{M, A, B}, nullptr, "dx.fmad");
-    }
-    if (E->getArg(0)->getType()->hasSignedIntegerRepresentation()) {
-      return Builder.CreateIntrinsic(
-          /*ReturnType*/ M->getType(), Intrinsic::dx_imad,
-          ArrayRef<Value *>{M, A, B}, nullptr, "dx.imad");
-    }
-    assert(E->getArg(0)->getType()->hasUnsignedIntegerRepresentation());
-    return Builder.CreateIntrinsic(
-        /*ReturnType=*/M->getType(), Intrinsic::dx_umad,
-        ArrayRef<Value *>{M, A, B}, nullptr, "dx.umad");
+    return EmitHLSLMadIntrinsic(E);
   }
   case Builtin::BI__builtin_hlsl_elementwise_rcp: {
     Value *Op0 = EmitScalarExpr(E->getArg(0));
diff --git a/clang/lib/CodeGen/CGHLSLRuntime.cpp b/clang/lib/CodeGen/CGHLSLRuntime.cpp
index 5e6a3dd4878f46..1c801a4d1b06a0 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.cpp
+++ b/clang/lib/CodeGen/CGHLSLRuntime.cpp
@@ -119,6 +119,21 @@ llvm::Triple::ArchType CGHLSLRuntime::getArch() {
   return CGM.getTarget().getTriple().getArch();
 }
 
+Value *
+CGHLSLRuntime::emitHLSLIntrinsic(llvm::function_ref<Value *()> DxilEmitter,
+                                 llvm::function_ref<Value *()> SPIRVEmitter,
+                                 llvm::function_ref<Value *()> GenericEmitter) {
+  llvm::Triple::ArchType Arch = getArch();
+  switch (Arch) {
+  case llvm::Triple::dxil:
+    return DxilEmitter();
+  case llvm::Triple::spirv:
+    return SPIRVEmitter();
+  default:
+    return GenericEmitter();
+  }
+}
+
 void CGHLSLRuntime::addConstant(VarDecl *D, Buffer &CB) {
   if (D->getStorageClass() == SC_Static) {
     // For static inside cbuffer, take as global static.
diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h
index 506b364f5b2ec7..923cf2140c13f0 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.h
+++ b/clang/lib/CodeGen/CGHLSLRuntime.h
@@ -50,6 +50,7 @@ namespace llvm {
 class GlobalVariable;
 class Function;
 class StructType;
+class Value;
 } // namespace llvm
 
 namespace clang {
@@ -79,7 +80,13 @@ class CGHLSLRuntime {
   //===----------------------------------------------------------------------===//
   // End of reserved area for HLSL intrinsic getters.
   //===----------------------------------------------------------------------===//
-
+  llvm::Value *emitHLSLIntrinsic(
+      llvm::function_ref<llvm::Value *()> DxilEmitter,
+      llvm::function_ref<llvm::Value *()> SPIRVEmitter,
+      llvm::function_ref<llvm::Value *()> GenericEmitter =
+          []() -> llvm::Value * {
+        llvm_unreachable("Intrinsic not supported by target architecture.");
+      });
   struct BufferResBinding {
     // The ID like 2 in register(b2, space1).
     std::optional<unsigned> Reg;
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index ff1873325d409f..f0e923949f2047 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -4548,6 +4548,7 @@ class CodeGenFunction : public CodeGenTypeCache {
   llvm::Value *EmitPPCBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
   llvm::Value *EmitAMDGPUBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
   llvm::Value *EmitHLSLBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
+  llvm::Value *EmitHLSLMadIntrinsic(const CallExpr *E);
   llvm::Value *EmitScalarOrConstFoldImmArg(unsigned ICEArguments, unsigned Idx,
                                            const CallExpr *E);
   llvm::Value *EmitSystemZBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
diff --git a/clang/test/CodeGenHLSL/builtins/mad.hlsl b/clang/test/CodeGenHLSL/builtins/mad.hlsl
index 749eac6d64736d..bd4f38067a5c59 100644
--- a/clang/test/CodeGenHLSL/builtins/mad.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/mad.hlsl
@@ -1,182 +1,238 @@
 // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
 // RUN:   dxil-pc-shadermodel6.3-library %s -fnative-half-type \
 // RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s \ 
-// RUN:   --check-prefixes=CHECK,NATIVE_HALF
+// RUN:   --check-prefixes=CHECK,DXIL_CHECK,DXIL_NATIVE_HALF,NATIVE_HALF
 // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
 // RUN:   dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \
-// RUN:   -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF
+// RUN:   -o - | FileCheck %s --check-prefixes=CHECK,DXIL_CHECK,NO_HALF
+
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
+// RUN:   spirv-unknown-vulkan-compute %s -fnative-half-type \
+// RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s \ 
+// RUN:   --check-prefixes=CHECK,NATIVE_HALF,SPIR_NATIVE_HALF,SPIR_CHECK
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
+// RUN:   spirv-unknown-vulkan-compute %s -emit-llvm -disable-llvm-passes \
+// RUN:   -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF,SPIR_CHECK
 
 #ifdef __HLSL_ENABLE_16_BIT
-// NATIVE_HALF: %dx.umad = call i16 @llvm.dx.umad.i16(i16 %0, i16 %1, i16 %2)
-// NATIVE_HALF: ret i16 %dx.umad
+// DXIL_NATIVE_HALF: %dx.umad = call i16 @llvm.dx.umad.i16(i16 %0, i16 %1, i16 %2)
+// DXIL_NATIVE_HALF: ret i16 %dx.umad
+// SPIR_NATIVE_HALF: mul nuw i16 %{{.*}}, %{{.*}}
+// SPIR_NATIVE_HALF: add nuw i16 %{{.*}}, %{{.*}}
 uint16_t test_mad_uint16_t(uint16_t p0, uint16_t p1, uint16_t p2) { return mad(p0, p1, p2); }
 
-// NATIVE_HALF: %dx.umad = call <2 x i16>  @llvm.dx.umad.v2i16(<2 x i16> %0, <2 x i16> %1, <2 x i16> %2)
-// NATIVE_HALF: ret <2 x i16> %dx.umad
+// DXIL_NATIVE_HALF: %dx.umad = call <2 x i16>  @llvm.dx.umad.v2i16(<2 x i16> %0, <2 x i16> %1, <2 x i16> %2)
+// DXIL_NATIVE_HALF: ret <2 x i16> %dx.umad
+// SPIR_NATIVE_HALF: mul nuw <2 x i16>  %{{.*}}, %{{.*}}
+// SPIR_NATIVE_HALF: add nuw <2 x i16>  %{{.*}}, %{{.*}}
 uint16_t2 test_mad_uint16_t2(uint16_t2 p0, uint16_t2 p1, uint16_t2 p2) { return mad(p0, p1, p2); }
 
-// NATIVE_HALF: %dx.umad = call <3 x i16>  @llvm.dx.umad.v3i16(<3 x i16> %0, <3 x i16> %1, <3 x i16> %2)
-// NATIVE_HALF: ret <3 x i16> %dx.umad
+// DXIL_NATIVE_HALF: %dx.umad = call <3 x i16>  @llvm.dx.umad.v3i16(<3 x i16> %0, <3 x i16> %1, <3 x i16> %2)
+// DXIL_NATIVE_HALF: ret <3 x i16> %dx.umad
+// SPIR_NATIVE_HALF: mul nuw <3 x i16>  %{{.*}}, %{{.*}}
+// SPIR_NATIVE_HALF: add nuw <3 x i16>  %{{.*}}, %{{.*}}
 uint16_t3 test_mad_uint16_t3(uint16_t3 p0, uint16_t3 p1, uint16_t3 p2) { return mad(p0, p1, p2); }
 
-// NATIVE_HALF: %dx.umad = call <4 x i16>  @llvm.dx.umad.v4i16(<4 x i16> %0, <4 x i16> %1, <4 x i16> %2)
-// NATIVE_HALF: ret <4 x i16> %dx.umad
+// DXIL_NATIVE_HALF: %dx.umad = call <4 x i16>  @llvm.dx.umad.v4i16(<4 x i16> %0, <4 x i16> %1, <4 x i16> %2)
+// DXIL_NATIVE_HALF: ret <4 x i16> %dx.umad
+// SPIR_NATIVE_HALF: mul nuw <4 x i16>  %{{.*}}, %{{.*}}
+// SPIR_NATIVE_HALF: add nuw <4 x i16>  %{{.*}}, %{{.*}}
 uint16_t4 test_mad_uint16_t4(uint16_t4 p0, uint16_t4 p1, uint16_t4 p2) { return mad(p0, p1, p2); }
 
-// NATIVE_HALF: %dx.imad = call i16 @llvm.dx.imad.i16(i16 %0, i16 %1, i16 %2)
-// NATIVE_HALF: ret i16 %dx.imad
+// DXIL_NATIVE_HALF: %dx.imad = call i16 @llvm.dx.imad.i16(i16 %0, i16 %1, i16 %2)
+// DXIL_NATIVE_HALF: ret i16 %dx.imad
+// SPIR_NATIVE_HALF: mul nsw i16 %{{.*}}, %{{.*}}
+// SPIR_NATIVE_HALF: add nsw i16 %{{.*}}, %{{.*}}
 int16_t test_mad_int16_t(int16_t p0, int16_t p1, int16_t p2) { return mad(p0, p1, p2); }
 
-// NATIVE_HALF: %dx.imad = call <2 x i16>  @llvm.dx.imad.v2i16(<2 x i16> %0, <2 x i16> %1, <2 x i16> %2)
-// NATIVE_HALF: ret <2 x i16> %dx.imad
+// DXIL_NATIVE_HALF: %dx.imad = call <2 x i16>  @llvm.dx.imad.v2i16(<2 x i16> %0, <2 x i16> %1, <2 x i16> %2)
+// DXIL_NATIVE_HALF: ret <2 x i16> %dx.imad
+// SPIR_NATIVE_HALF: mul nsw <2 x i16>  %{{.*}}, %{{.*}}
+// SPIR_NATIVE_HALF: add nsw <2 x i16>  %{{.*}}, %{{.*}}
 int16_t2 test_mad_int16_t2(int16_t2 p0, int16_t2 p1, int16_t2 p2) { return mad(p0, p1, p2); }
 
-// NATIVE_HALF: %dx.imad = call <3 x i16>  @llvm.dx.imad.v3i16(<3 x i16> %0, <3 x i16> %1, <3 x i16> %2)
-// NATIVE_HALF: ret <3 x i16> %dx.imad
+// DXIL_NATIVE_HALF: %dx.imad = call <3 x i16>  @llvm.dx.imad.v3i16(<3 x i16> %0, <3 x i16> %1, <3 x i16> %2)
+// DXIL_NATIVE_HALF: ret <3 x i16> %dx.imad
+// SPIR_NATIVE_HALF: mul nsw <3 x i16>  %{{.*}}, %{{.*}}
+// SPIR_NATIVE_HALF: add nsw <3 x i16>  %{{.*}}, %{{.*}}
 int16_t3 test_mad_int16_t3(int16_t3 p0, int16_t3 p1, int16_t3 p2) { return mad(p0, p1, p2); }
 
-// NATIVE_HALF: %dx.imad = call <4 x i16>  @llvm.dx.imad.v4i16(<4 x i16> %0, <4 x i16> %1, <4 x i16> %2)
-// NATIVE_HALF: ret <4 x i16> %dx.imad
+// DXIL_NATIVE_HALF: %dx.imad = call <4 x i16>  @llvm.dx.imad.v4i16(<4 x i16> %0, <4 x i16> %1, <4 x i16> %2)
+// DXIL_NATIVE_HALF: ret <4 x i16> %dx.imad
+// SPIR_NATIVE_HALF: mul nsw <4 x i16>  %{{.*}}, %{{.*}}
+// SPIR_NATIVE_HALF: add nsw <4 x i16>  %{{.*}}, %{{.*}}
 int16_t4 test_mad_int16_t4(int16_t4 p0, int16_t4 p1, int16_t4 p2) { return mad(p0, p1, p2); }
 #endif // __HLSL_ENABLE_16_BIT
 
-// NATIVE_HALF: %dx.fmad = call half @llvm.fmuladd.f16(half %0, half %1, half %2)
-// NATIVE_HALF: ret half %dx.fmad
-// NO_HALF: %dx.fmad = call float @llvm.fmuladd.f32(float %0, float %1, float %2)
-// NO_HALF: ret float %dx.fmad
+// NATIVE_HALF: %hlsl.fmad = call half @llvm.fmuladd.f16(half %0, half %1, half %2)
+// NATIVE_HALF: ret half %hlsl.fmad
+// NO_HALF: %hlsl.fmad = call float @llvm.fmuladd.f32(float %0, float %1, float %2)
+// NO_HALF: ret float %hlsl.fmad
 half test_mad_half(half p0, half p1, half p2) { return mad(p0, p1, p2); }
 
-// NATIVE_HALF: %dx.fmad = call <2 x half>  @llvm.fmuladd.v2f16(<2 x half> %0, <2 x half> %1, <2 x half> %2)
-// NATIVE_HALF: ret <2 x half> %dx.fmad
-// NO_HALF: %dx.fmad = call <2 x float>  @llvm.fmuladd.v2f32(<2 x float> %0, <2 x float> %1, <2 x float> %2)
-// NO_HALF: ret <2 x float> %dx.fmad
+// NATIVE_HALF: %hlsl.fmad = call <2 x half>  @llvm.fmuladd.v2f16(<2 x half> %0, <2 x half> %1, <2 x half> %2)
+// NATIVE_HALF: ret <2 x half> %hlsl.fmad
+// NO_HALF: %hlsl.fmad = call <2 x float>  @llvm.fmuladd.v2f32(<2 x float> %0, <2 x float> %1, <2 x float> %2)
+// NO_HALF: ret <2 x float> %hlsl.fmad
 half2 test_mad_half2(half2 p0, half2 p1, half2 p2) { return mad(p0, p1, p2); }
 
-// NATIVE_HALF: %dx.fmad = call <3 x half>  @llvm.fmuladd.v3f16(<3 x half> %0, <3 x half> %1, <3 x half> %2)
-// NATIVE_HALF: ret <3 x half> %dx.fmad
-// NO_HALF: %dx.fmad = call <3 x float>  @llvm.fmuladd.v3f32(<3 x float> %0, <3 x float> %1, <3 x float> %2)
-// NO_HALF: ret <3 x float> %dx.fmad
+// NATIVE_HALF: %hlsl.fmad = call <3 x half>  @llvm.fmuladd.v3f16(<3 x half> %0, <3 x half> %1, <3 x half> %2)
+// NATIVE_HALF: ret <3 x half> %hlsl.fmad
+// NO_HALF: %hlsl.fmad = call <3 x float>  @llvm.fmuladd.v3f32(<3 x float> %0, <3 x float> %1, <3 x float> %2)
+// NO_HALF: ret <3 x float> %hlsl.fmad
 half3 test_mad_half3(half3 p0, half3 p1, half3 p2) { return mad(p0, p1, p2); }
 
-// NATIVE_HALF: %dx.fmad = call <4 x half>  @llvm.fmuladd.v4f16(<4 x half> %0, <4 x half> %1, <4 x half> %2)
-// NATIVE_HALF: ret <4 x half> %dx.fmad
-// NO_HALF: %dx.fmad = call <4 x float>  @llvm.fmuladd.v4f32(<4 x float> %0, <4 x float> %1, <4 x float> %2)
-// NO_HALF: ret <4 x float> %dx.fmad
+// NATIVE_HALF: %hlsl.fmad = call <4 x half>  @llvm.fmuladd.v4f16(<4 x half> %0, <4 x half> %1, <4 x half> %2)
+// NATIVE_HALF: ret <4 x half> %hlsl.fmad
+// NO_HALF: %hlsl.fmad = call <4 x float>  @llvm.fmuladd.v4f32(<4 x float> %0, <4 x float> %1, <4 x float> %2)
+// NO_HALF: ret <4 x float> %hlsl.fmad
 half4 test_mad_half4(half4 p0, half4 p1, half4 p2) { return mad(p0, p1, p2); }
 
-// CHECK: %dx.fmad = call float @llvm.fmuladd.f32(float %0, float %1, float %2)
-// CHECK: ret float %dx.fmad
+// CHECK: %hlsl.fmad = call float @llvm.fmuladd.f32(float %0, float %1, float %2)
+// CHECK: ret float %hlsl.fmad
 float test_mad_float(float p0, float p1, float p2) { return mad(p0, p1, p2); }
 
-// CHECK: %dx.fmad = call <2 x float>  @llvm.fmuladd.v2f32(<2 x float> %0, <2 x float> %1, <2 x float> %2)
-// CHECK: ret <2 x float> %dx.fmad
+// CHECK: %hlsl.fmad = call <2 x float>  @llvm.fmuladd.v2f32(<2 x float> %0, <2 x float> %1, <2 x float> %2)
+// CHECK: ret <2 x float> %hlsl.fmad
 float2 test_mad_float2(float2 p0, float2 p1, float2 p2) { return mad(p0, p1, p2); }
 
-// CHECK: %dx.fmad = call <3 x float>  @llvm.fmuladd.v3f32(<3 x float> %0, <3 x float> %1, <3 x float> %2)
-// CHECK: ret <3 x float> %dx.fmad
+// CHECK: %hlsl.fmad = call <3 x float>  @llvm.fmuladd.v3f32(<3 x float> %0, <3 x float> %1, <3 x float> %2)
+// CHECK: ret <3 x float> %hlsl.fmad
 float3 test_mad_float3(float3 p0, float3 p1, float3 p2) { return mad(p0, p1, p2); }
 
-// CHECK: %dx.fmad = call <4 x float>  @llvm.fmuladd.v4f32(<4 x float> %0, <4 x float> %1, <4 x float> %2)
-// CHECK: ret <4 x float> %dx.fmad
+// CHECK: %hlsl.fmad = call <4 x float>  @llvm.fmuladd.v4f32(<4 x float> %0, <4 x float> %1, <4 x float> %2)
+// CHECK: ret <4 x float> %hlsl.fmad
 float4 test_mad_float4(float4 p0, float4 p1, float4 p2) { return mad(p0, p1, p2); }
 
-// CHECK: %dx.fmad = call double @llvm.fmuladd.f64(double %0, double %1, double %2)
-// CHECK: ret double %dx.fmad
+// CHECK: %hlsl.fmad = call double @llvm.fmuladd.f64(double %0, double %1, double %2)
+// CHECK: ret double %hlsl.fmad
 double test_mad_double(double p0, double p1, double p2) { return mad(p0, p1, p2); }
 
-// CHECK: %dx.fmad = call <2 x double>  @llvm.fmuladd.v2f64(<2 x double> %0, <2 x double> %1, <2 x double> %2)
-// CHECK: ret <2 x double> %dx.fmad
+// CHECK: %hlsl.fmad = call <2 x double>  @llvm.fmuladd.v2f64(<2 x double> %0, <2 x double> %1, <2 x double> %2)
+// CHECK: ret <2 x double> %hlsl.fmad
 double2 test_mad_double2(double2 p0, double2 p1, double2 p2) { return mad(p0, p1, p2); }
 
-// CHECK: %dx.fmad = call <3 x double>  @llvm.fmuladd.v3f64(<3 x double> %0, <3 x double> %1, <3 x double> %2)
-// CHECK: ret <3 x double> %dx.fmad
+// CHECK: %hlsl.fmad = call <3 x double>  @llvm.fmuladd.v3f64(<3 x double> %0, <3 x double> %1, <3 x double> %2)
+// CHECK: ret <3 x double> %hlsl.fmad
 double3 test_mad_double3(double3 p0, double3 p1, double3 p2) { return mad(p0, p1, p2); }
 
-// CHECK: %dx.fmad = call <4 x double>  @llvm.fmuladd.v4f64(<4 x double> %0, <4 x double> %1, <4 x double> %2)
-// CHECK: ret <4 x double> %dx.fmad
+// CHECK: %hlsl.fmad = call <4 x double>  @llvm.fmuladd.v4f64(<4 x double> %0, <4 x double> %1, <4 x double> %2)
+// CHECK: ret <4 x double> %hlsl.fmad
 double4 test_mad_double4(double4 p0, double4 p1, double4 p2) { return mad(p0, p1, p2); }
 
-// CHECK: %dx.imad = call i32 @llvm.dx.imad.i32(i32 %0, i32 %1, i32 %2)
-// CHECK: ret i32 %dx.imad
+// DXIL_CHECK: %dx.imad = call i32 @llvm.dx.imad.i32(i32 %0, i32 %1, i32 %2)
+// DXIL_CHECK: ret i32 %dx.imad
+// SPIR_CHECK: mul nsw i32 %{{.*}}, %{{.*}}
+// SPIR_CHECK: add nsw i32 %{{.*}}, %{{.*}}
 int test_mad_int(int p0, int p1, int p2) { return mad(p0, p1, p2); }
 
-// CHECK: %dx.imad = call <2 x i32>  @llvm.dx.imad.v2i32(<2 x i32> %0, <2 x i32> %1, <2 x i32> %2)
-// CHECK: ret <2 x i32> %dx.imad
+// DXIL_CHECK: %dx.imad = call <2 x i32>  @llvm.dx.imad.v2i32(<2 x i32> %0, <2 x i32> %1, <2 x i32> %2)
+// DXIL_CHECK: ret <2 x i32> %dx.imad
+// SPIR_CHECK: mul nsw <2 x i32>  %{{.*}}, %{{.*}}
+// SPIR_CHECK: add nsw <2 x i32>  %{{.*}}, %{{.*}}
 int2 test_mad_int2(int2 p0, int2 p1, int2 p2) { return mad(p0, p1, p2); }
 
-// CHECK: %dx.imad = call <3 x i32>  @llvm.dx.imad.v3i32(<3 x i32> %0, <3 x i32> %1, <3 x i32> %2)
-// CHECK: ret <3 x i32> %dx.imad
+// DXIL_CHECK: %dx.imad = call <3 x i32>  @llvm.dx.imad.v3i32(<3 x i32> %0, <3 x i32> %1, <3 x i32> %2)
+// DXIL_CHECK: ret <3 x i32> %dx.imad
+// SPIR_CHECK: mul nsw <3 x i32>  %{{.*}}, %{{.*}}
+// SPIR_CHECK: add nsw <3 x i32>  %{{.*}}, %{{.*}}
 int3 test_mad_int3(int3 p0, int3 p1, int3 p2) { return mad(p0, p1, p2); }
 
-// CHECK: %dx.imad = call <4 x i32>  @llvm.dx.imad.v4i32(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2)
-// CHECK: ret <4 x i32> %dx.imad
+// DXIL_CHECK: %dx.imad = call <4 x i32>  @llvm.dx.imad.v4i32(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2)
+// DXIL_CHECK: ret <4 x i32> %dx.imad
+// SPIR_CHECK: mul nsw <4 x i32>  %{{.*}}, %{{.*}}
+// SPIR_CHECK: add nsw <4 x i32>  %{{.*}}, %{{.*}}
 int4 test_mad_int4(int4 p0, int4 p1, int4 p2) { return mad(p0, p1, p2); }
 
-// CHECK: %dx.imad = call i64 @llvm.dx.imad.i64(i64 %0, i64 %1, i64 %2)
-// CHECK: ret i64 %dx.imad
+// DXIL_CHECK: %dx.imad = call i64 @llvm.dx.imad.i64(i64 %0, i64 %1, i64 %2)
+// DXIL_CHECK: ret i64 %dx.imad
+// SPIR_CHECK: mul nsw i64 %{{.*}}, %{{.*}}
+// SPIR_CHECK: add nsw i64 %{{.*}}, %{{.*}}
 int64_t test_mad_int64_t(int64_t p0, int64_t p1, int64_t p2) { return mad(p0, p1, p2); }
 
-// CHECK: %dx.imad = call <2 x i64>  @llvm.dx.imad.v2i64(<2 x i64> %0, <2 x i64> %1, <2 x i64> %2)
-// CHECK: ret <2 x i64> %dx.imad
+// DXIL_CHECK: %dx.imad = call <2 x i64>  @llvm.dx.imad.v2i64(<2 x i64> %0, <2 x i64> %1, <2 x i64> %2)
+// DXIL_CHECK: ret <2 x i64> %dx.imad
+// SPIR_CHECK: mul nsw <2 x i64>  %{{.*}}, %{{.*}}
+// SPIR_CHECK: add nsw <2 x i64>  %{{.*}}, %{{.*}}
 int64_t2 test_mad_int64_t2(int64_t2 p0, int64_t2 p1, int64_t2 p2) { return mad(p0, p1, p2); }
 
-// CHECK: %dx.imad = call <3 x i64>  @llvm.dx.imad.v3i64(<3 x i64> %0, <3 x i64> %1, <3 x i64> %2)
-// CHECK: ret <3 x i64> %dx.imad
+// DXIL_CHECK: %dx.imad = call <3 x i64>  @llvm.dx.imad.v3i64(<3 x i64> %0, <3 x i64> %1, <3 x i64> %2)
+// DXIL_CHECK: ret <3 x i64> %dx.imad
+// SPIR_CHECK: mul nsw <3 x i64>  %{{.*}}, %{{.*}}
+// SPIR_CHECK: add nsw <3 x i64>  %{{.*}}, %{{.*}}
 int64_t3 test_mad_int64_t3(int64_t3 p0, int64_t3 p1, int64_t3 p2) { return mad(p0, p1, p2); }
 
-// CHECK: %dx.imad = call <4 x i64>  @llvm.dx.imad.v4i64(<4 x i64> %0, <4 x i64> %1, <4 x i64> %2)
-// CHECK: ret <4 x i64> %dx.imad
+// DXIL_CHECK: %dx.imad = call <4 x i64>  @llvm.dx.imad.v4i64(<4 x i64> %0, <4 x i64> %1, <4 x i6...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/89130