[clang] bc6c068 - [HLSL] Adding HLSL `clip` function. (#114588)

Thu Nov 14 23:34:12 PST 2024

Author: joaosaffran
Date: 2024-11-14T23:34:07-08:00
New Revision: bc6c0681271788ca7078fb679ac67b56944de1a6

URL: https://github.com/llvm/llvm-project/commit/bc6c0681271788ca7078fb679ac67b56944de1a6
DIFF: https://github.com/llvm/llvm-project/commit/bc6c0681271788ca7078fb679ac67b56944de1a6.diff

LOG: [HLSL] Adding HLSL `clip` function. (#114588)

Adding HLSL `clip` function.
 - adding llvm intrinsic
 - adding sema checks
 - adding dxil lowering
 - ading spirv lowering
 - adding sema tests
 - adding codegen tests
 - adding lowering tests

Closes #99093

---------

Co-authored-by: Joao Saffran <jderezende at microsoft.com>

Added: 
    clang/test/CodeGenHLSL/builtins/clip.hlsl
    clang/test/SemaHLSL/BuiltIns/clip-errors.hlsl
    llvm/test/CodeGen/DirectX/discard.ll
    llvm/test/CodeGen/DirectX/discard_error.ll
    llvm/test/CodeGen/SPIRV/hlsl-intrinsics/discard.ll

Modified: 
    clang/include/clang/Basic/Builtins.td
    clang/lib/CodeGen/CGBuiltin.cpp
    clang/lib/Headers/hlsl/hlsl_intrinsics.h
    clang/lib/Sema/SemaHLSL.cpp
    llvm/docs/SPIRVUsage.rst
    llvm/include/llvm/IR/IntrinsicsDirectX.td
    llvm/include/llvm/IR/IntrinsicsSPIRV.td
    llvm/lib/Target/DirectX/DXIL.td
    llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp
    llvm/lib/Target/SPIRV/SPIRVInstrInfo.td
    llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
    llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
    llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index e866605ac05c09..8653474744c58f 100644

--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -4895,6 +4895,12 @@ def HLSLSplitDouble: LangBuiltin<"HLSL_LANG"> {
   let Prototype = "void(...)";
 }
 
+def HLSLClip: LangBuiltin<"HLSL_LANG"> {
+  let Spellings = ["__builtin_hlsl_elementwise_clip"];
+  let Attributes = [NoThrow, Const];
+  let Prototype = "void(...)";
+}
+
 // Builtins for XRay.
 def XRayCustomEvent : Builtin {
   let Spellings = ["__xray_customevent"];

diff  --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 9e0c0bff0125c0..e03cb6e8c3c861 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -99,6 +99,47 @@ static void initializeAlloca(CodeGenFunction &CGF, AllocaInst *AI, Value *Size,
   I->addAnnotationMetadata("auto-init");
 }
 
+static Value *handleHlslClip(const CallExpr *E, CodeGenFunction *CGF) {
+  Value *Op0 = CGF->EmitScalarExpr(E->getArg(0));
+
+  Constant *FZeroConst = ConstantFP::getZero(CGF->FloatTy);
+  Value *CMP;
+  Value *LastInstr;
+
+  if (const auto *VecTy = E->getArg(0)->getType()->getAs<clang::VectorType>()) {
+    FZeroConst = ConstantVector::getSplat(
+        ElementCount::getFixed(VecTy->getNumElements()), FZeroConst);
+    auto *FCompInst = CGF->Builder.CreateFCmpOLT(Op0, FZeroConst);
+    CMP = CGF->Builder.CreateIntrinsic(
+        CGF->Builder.getInt1Ty(), CGF->CGM.getHLSLRuntime().getAnyIntrinsic(),
+        {FCompInst}, nullptr);
+  } else
+    CMP = CGF->Builder.CreateFCmpOLT(Op0, FZeroConst);
+
+  if (CGF->CGM.getTarget().getTriple().isDXIL())
+    LastInstr = CGF->Builder.CreateIntrinsic(
+        CGF->VoidTy, llvm::Intrinsic::dx_discard, {CMP}, nullptr);
+  else if (CGF->CGM.getTarget().getTriple().isSPIRV()) {
+    BasicBlock *LT0 = CGF->createBasicBlock("lt0", CGF->CurFn);
+    BasicBlock *End = CGF->createBasicBlock("end", CGF->CurFn);
+
+    CGF->Builder.CreateCondBr(CMP, LT0, End);
+
+    CGF->Builder.SetInsertPoint(LT0);
+
+    CGF->Builder.CreateIntrinsic(CGF->VoidTy, llvm::Intrinsic::spv_discard, {},
+                                 nullptr);
+
+    LastInstr = CGF->Builder.CreateBr(End);
+
+    CGF->Builder.SetInsertPoint(End);
+  } else {
+    llvm_unreachable("Backend Codegen not supported.");
+  }
+
+  return LastInstr;
+}
+
 static Value *handleHlslSplitdouble(const CallExpr *E, CodeGenFunction *CGF) {
   Value *Op0 = CGF->EmitScalarExpr(E->getArg(0));
   const auto *OutArg1 = dyn_cast<HLSLOutArgExpr>(E->getArg(1));
@@ -19208,6 +19249,10 @@ case Builtin::BI__builtin_hlsl_elementwise_isinf: {
            "asuint operands types mismatch");
     return handleHlslSplitdouble(E, this);
   }
+  case Builtin::BI__builtin_hlsl_elementwise_clip:
+    assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
+           "clip operands types mismatch");
+    return handleHlslClip(E, this);
   }
   return nullptr;
 }

diff  --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
index 2ee3827d720495..f84f48fc1c122d 100644
--- a/clang/lib/Headers/hlsl/hlsl_intrinsics.h
+++ b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
@@ -655,6 +655,23 @@ double3 clamp(double3, double3, double3);
 _HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
 double4 clamp(double4, double4, double4);
 
+//===----------------------------------------------------------------------===//
+// clip builtins
+//===----------------------------------------------------------------------===//
+
+/// \fn void clip(T Val)
+/// \brief Discards the current pixel if the specified value is less than zero.
+/// \param Val The input value.
+
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clip)
+void clip(float);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clip)
+void clip(float2);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clip)
+void clip(float3);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clip)
+void clip(float4);
+
 //===----------------------------------------------------------------------===//
 // cos builtins
 //===----------------------------------------------------------------------===//

diff  --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index 92d6436b6073c7..f4fc0f2ddc27a6 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -2133,6 +2133,14 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
       return true;
     break;
   }
+  case Builtin::BI__builtin_hlsl_elementwise_clip: {
+    if (SemaRef.checkArgCount(TheCall, 1))
+      return true;
+
+    if (CheckScalarOrVector(&SemaRef, TheCall, SemaRef.Context.FloatTy, 0))
+      return true;
+    break;
+  }
   case Builtin::BI__builtin_elementwise_acos:
   case Builtin::BI__builtin_elementwise_asin:
   case Builtin::BI__builtin_elementwise_atan:

diff  --git a/clang/test/CodeGenHLSL/builtins/clip.hlsl b/clang/test/CodeGenHLSL/builtins/clip.hlsl
new file mode 100644
index 00000000000000..63843c151e3ac5
--- /dev/null
+++ b/clang/test/CodeGenHLSL/builtins/clip.hlsl
@@ -0,0 +1,39 @@
+// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.3-pixel %s -fnative-half-type -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -finclude-default-header -triple spirv-vulkan-pixel %s -fnative-half-type -emit-llvm -o - | FileCheck %s --check-prefix=SPIRV
+
+
+void test_scalar(float Buf) {
+  // CHECK:      define void @{{.*}}test_scalar{{.*}}(float {{.*}} [[VALP:%.*]])
+  // CHECK:      [[LOAD:%.*]] = load float, ptr [[VALP]].addr, align 4
+  // CHECK-NEXT: [[FCMP:%.*]] = fcmp olt float [[LOAD]], 0.000000e+00
+  // CHECK-NO:   call i1 @llvm.dx.any
+  // CHECK-NEXT: call void @llvm.dx.discard(i1 [[FCMP]])
+  //
+  // SPIRV:      define spir_func void @{{.*}}test_scalar{{.*}}(float {{.*}} [[VALP:%.*]])
+  // SPIRV:      [[LOAD:%.*]] = load float, ptr [[VALP]].addr, align 4
+  // SPIRV-NEXT: [[FCMP:%.*]] = fcmp olt float [[LOAD]], 0.000000e+00
+  // SPIRV-NO:   call i1 @llvm.spv.any
+  // SPIRV-NEXT: br i1 [[FCMP]], label %[[LTL:.*]], label %[[ENDL:.*]]
+  // SPIRV:      [[LTL]]: ; preds = %entry
+  // SPIRV-NEXT: call void @llvm.spv.discard()
+  // SPIRV:      br label %[[ENDL]]
+  clip(Buf);
+}
+
+void test_vector4(float4 Buf) {
+  // CHECK:      define void @{{.*}}test_vector{{.*}}(<4 x float> {{.*}} [[VALP:%.*]])
+  // CHECK:      [[LOAD:%.*]] = load <4 x float>, ptr [[VALP]].addr, align 16
+  // CHECK-NEXT: [[FCMP:%.*]] = fcmp olt <4 x float> [[LOAD]], zeroinitializer
+  // CHECK-NEXT: [[ANYC:%.*]] = call i1 @llvm.dx.any.v4i1(<4 x i1> [[FCMP]])
+  // CHECK-NEXT: call void @llvm.dx.discard(i1 [[ANYC]])
+  //
+  // SPIRV:      define spir_func void @{{.*}}test_vector{{.*}}(<4 x float> {{.*}} [[VALP:%.*]])
+  // SPIRV:      [[LOAD:%.*]] = load <4 x float>, ptr [[VALP]].addr, align 16
+  // SPIRV-NEXT: [[FCMP:%.*]] = fcmp olt <4 x float> [[LOAD]], zeroinitializer
+  // SPIRV-NEXT: [[ANYC:%.*]] = call i1 @llvm.spv.any.v4i1(<4 x i1> [[FCMP]]) 
+  // SPIRV-NEXT: br i1 [[ANYC]], label %[[LTL:.*]], label %[[ENDL:.*]]
+  // SPIRV:      [[LTL]]: ; preds = %entry
+  // SPIRV-NEXT: call void @llvm.spv.discard()
+  // SPIRV-NEXT: br label %[[ENDL]]
+  clip(Buf);
+}

diff  --git a/clang/test/SemaHLSL/BuiltIns/clip-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/clip-errors.hlsl
new file mode 100644
index 00000000000000..871e512e128c54
--- /dev/null
+++ b/clang/test/SemaHLSL/BuiltIns/clip-errors.hlsl
@@ -0,0 +1,27 @@
+// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -verify
+
+
+void test_arg_missing() {
+  __builtin_hlsl_elementwise_clip();
+ // expected-error at -1 {{too few arguments to function call, expected 1, have 0}} 
+}
+
+void test_too_many_args(float p1, float p2) {
+  __builtin_hlsl_elementwise_clip(p1, p2);
+ // expected-error at -1 {{too many arguments to function call, expected 1, have 2}} 
+}
+
+void test_first_arg_type_mismatch(bool p) {
+  __builtin_hlsl_elementwise_clip(p);
+ // expected-error at -1 {{invalid operand of type 'bool' where 'float' or a vector of such type is required}} 
+}
+
+void test_first_arg_type_mismatch_3(half3 p) {
+  __builtin_hlsl_elementwise_clip(p);
+ // expected-error at -1 {{invalid operand of type 'half3' (aka 'vector<half, 3>') where 'float' or a vector of such type is required}} 
+}
+
+void test_first_arg_type_mismatch_3(double p) {
+  __builtin_hlsl_elementwise_clip(p);
+ // expected-error at -1 {{invalid operand of type 'double' where 'float' or a vector of such type is required}} 
+}

diff  --git a/llvm/docs/SPIRVUsage.rst b/llvm/docs/SPIRVUsage.rst
index 277b9c15292c53..70eae5b760427b 100644
--- a/llvm/docs/SPIRVUsage.rst
+++ b/llvm/docs/SPIRVUsage.rst
@@ -149,6 +149,8 @@ list of supported SPIR-V extensions, sorted alphabetically by their extension na
      - Adds atomic min and max instruction on floating-point numbers.
    * - ``SPV_EXT_arithmetic_fence``
      - Adds an instruction that prevents fast-math optimizations between its argument and the expression that contains it.
+   * - ``SPV_EXT_demote_to_helper_invocation``
+     - Adds an instruction that demotes a fragment shader invocation to a helper invocation.
    * - ``SPV_INTEL_arbitrary_precision_integers``
      - Allows generating arbitrary width integer types.
    * - ``SPV_INTEL_bfloat16_conversion``

diff  --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td
index fe6a5524335501..48a9595f844f05 100644
--- a/llvm/include/llvm/IR/IntrinsicsDirectX.td
+++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td
@@ -102,6 +102,7 @@ def int_dx_step : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty, L
 def int_dx_splitdouble : DefaultAttrsIntrinsic<[llvm_anyint_ty, LLVMMatchType<0>], 
     [LLVMScalarOrSameVectorWidth<0, llvm_double_ty>], [IntrNoMem]>;
 def int_dx_radians : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
+def int_dx_discard : DefaultAttrsIntrinsic<[], [llvm_i1_ty], []>;
 def int_dx_firstbituhigh : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>;
 def int_dx_firstbitshigh : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>;
 }

diff  --git a/llvm/include/llvm/IR/IntrinsicsSPIRV.td b/llvm/include/llvm/IR/IntrinsicsSPIRV.td
index f29eb7ee22b2d2..20e5e026b9281f 100644
--- a/llvm/include/llvm/IR/IntrinsicsSPIRV.td
+++ b/llvm/include/llvm/IR/IntrinsicsSPIRV.td
@@ -91,6 +91,7 @@ let TargetPrefix = "spv" in {
   def int_spv_sign : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_any_ty], [IntrNoMem]>;
   def int_spv_radians : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty], [IntrNoMem]>;
   def int_spv_group_memory_barrier_with_group_sync : DefaultAttrsIntrinsic<[], [], []>;
+  def int_spv_discard : DefaultAttrsIntrinsic<[], [], []>;
   def int_spv_uclamp : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
   def int_spv_sclamp : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
   def int_spv_nclamp : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;

diff  --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td
index 7b0cb8c41b12ae..1a8e110491cc87 100644
--- a/llvm/lib/Target/DirectX/DXIL.td
+++ b/llvm/lib/Target/DirectX/DXIL.td
@@ -770,6 +770,14 @@ def CheckAccessFullyMapped : DXILOp<71, checkAccessFullyMapped> {
   let stages = [Stages<DXIL1_0, [all_stages]>];
 }
 
+def Discard : DXILOp<82, discard> {
+  let Doc = "discard the current pixel";
+  let LLVMIntrinsic = int_dx_discard;
+  let arguments = [Int1Ty];
+  let result = VoidTy;
+  let stages = [Stages<DXIL1_0, [pixel]>];
+}
+
 def ThreadId :  DXILOp<93, threadId> {
   let Doc = "Reads the thread ID";
   let LLVMIntrinsic = int_dx_thread_id;

diff  --git a/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp b/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp
index d6a59addaf2d0a..186bccc481a8a3 100644
--- a/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp
@@ -30,6 +30,8 @@ static const std::map<std::string, SPIRV::Extension::Extension, std::less<>>
          SPIRV::Extension::Extension::SPV_EXT_shader_atomic_float_min_max},
         {"SPV_EXT_arithmetic_fence",
          SPIRV::Extension::Extension::SPV_EXT_arithmetic_fence},
+        {"SPV_EXT_demote_to_helper_invocation",
+         SPIRV::Extension::Extension::SPV_EXT_demote_to_helper_invocation},
         {"SPV_INTEL_arbitrary_precision_integers",
          SPIRV::Extension::Extension::SPV_INTEL_arbitrary_precision_integers},
         {"SPV_INTEL_cache_controls",

diff  --git a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td
index 130b557f9e8484..fde23d9d0ca5f0 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td
+++ b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td
@@ -639,6 +639,7 @@ let isReturn = 1, hasDelaySlot = 0, isBarrier = 0, isTerminator = 1, isNotDuplic
 }
 def OpLifetimeStart: Op<256, (outs), (ins ID:$ptr, i32imm:$sz), "OpLifetimeStart $ptr, $sz">;
 def OpLifetimeStop: Op<257, (outs), (ins ID:$ptr, i32imm:$sz), "OpLifetimeStop $ptr, $sz">;
+def OpDemoteToHelperInvocation: SimpleOp<"OpDemoteToHelperInvocation", 5380>;
 
 // 3.42.18 Atomic Instructions
 

diff  --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
index 158314d23393ae..1a752e44c35018 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
@@ -165,6 +165,9 @@ class SPIRVInstructionSelector : public InstructionSelector {
                  unsigned comparisonOpcode, MachineInstr &I) const;
   bool selectCross(Register ResVReg, const SPIRVType *ResType,
                    MachineInstr &I) const;
+  bool selectDiscard(Register ResVReg, const SPIRVType *ResType,
+                     MachineInstr &I) const;
+
   bool selectICmp(Register ResVReg, const SPIRVType *ResType,
                   MachineInstr &I) const;
   bool selectFCmp(Register ResVReg, const SPIRVType *ResType,
@@ -2183,6 +2186,28 @@ bool SPIRVInstructionSelector::selectSplatVector(Register ResVReg,
   return MIB.constrainAllUses(TII, TRI, RBI);
 }
 
+bool SPIRVInstructionSelector::selectDiscard(Register ResVReg,
+                                             const SPIRVType *ResType,
+                                             MachineInstr &I) const {
+
+  unsigned Opcode;
+
+  if (STI.canUseExtension(
+          SPIRV::Extension::SPV_EXT_demote_to_helper_invocation) ||
+      STI.isAtLeastSPIRVVer(llvm::VersionTuple(1, 6))) {
+    Opcode = SPIRV::OpDemoteToHelperInvocation;
+  } else {
+    Opcode = SPIRV::OpKill;
+    // OpKill must be the last operation of any basic block.
+    MachineInstr *NextI = I.getNextNode();
+    NextI->removeFromParent();
+  }
+
+  MachineBasicBlock &BB = *I.getParent();
+  return BuildMI(BB, I, I.getDebugLoc(), TII.get(Opcode))
+      .constrainAllUses(TII, TRI, RBI);
+}
+
 bool SPIRVInstructionSelector::selectCmp(Register ResVReg,
                                          const SPIRVType *ResType,
                                          unsigned CmpOpc,
@@ -2857,6 +2882,9 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg,
     selectReadImageIntrinsic(ResVReg, ResType, I);
     return true;
   }
+  case Intrinsic::spv_discard: {
+    return selectDiscard(ResVReg, ResType, I);
+  }
   default: {
     std::string DiagMsg;
     raw_string_ostream OS(DiagMsg);

diff  --git a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
index 0308c56093064c..551c36daa609fe 100644
--- a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
@@ -633,7 +633,8 @@ void RequirementHandler::initAvailableCapabilities(const SPIRVSubtarget &ST) {
   if (ST.isAtLeastSPIRVVer(VersionTuple(1, 6)))
     addAvailableCaps({Capability::DotProduct, Capability::DotProductInputAll,
                       Capability::DotProductInput4x8Bit,
-                      Capability::DotProductInput4x8BitPacked});
+                      Capability::DotProductInput4x8BitPacked,
+                      Capability::DemoteToHelperInvocation});
 
   // Add capabilities enabled by extensions.
   for (auto Extension : ST.getAllAvailableExtensions()) {
@@ -1419,6 +1420,19 @@ void addInstrRequirements(const MachineInstr &MI,
       Reqs.addCapability(SPIRV::Capability::SplitBarrierINTEL);
     }
     break;
+  case SPIRV::OpKill: {
+    Reqs.addCapability(SPIRV::Capability::Shader);
+  } break;
+  case SPIRV::OpDemoteToHelperInvocation:
+    Reqs.addCapability(SPIRV::Capability::DemoteToHelperInvocation);
+
+    if (ST.canUseExtension(
+            SPIRV::Extension::SPV_EXT_demote_to_helper_invocation)) {
+      if (!ST.isAtLeastSPIRVVer(llvm::VersionTuple(1, 6)))
+        Reqs.addExtension(
+            SPIRV::Extension::SPV_EXT_demote_to_helper_invocation);
+    }
+    break;
   case SPIRV::OpSDot:
   case SPIRV::OpUDot:
     AddDotProductRequirements(MI, Reqs, ST);

diff  --git a/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td b/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td
index 5a00a7785b7745..b88f6f5766a053 100644
--- a/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td
+++ b/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td
@@ -456,6 +456,7 @@ defm VulkanMemoryModelDeviceScopeKHR : CapabilityOperand<5346, 0, 0, [], []>;
 defm ImageFootprintNV : CapabilityOperand<5282, 0, 0, [], []>;
 defm FragmentBarycentricNV : CapabilityOperand<5284, 0, 0, [], []>;
 defm ComputeDerivativeGroupQuadsNV : CapabilityOperand<5288, 0, 0, [], []>;
+defm DemoteToHelperInvocation : CapabilityOperand<5379, 0x10600, 0, [SPV_EXT_demote_to_helper_invocation], []>;
 defm ComputeDerivativeGroupLinearNV : CapabilityOperand<5350, 0, 0, [], []>;
 defm FragmentDensityEXT : CapabilityOperand<5291, 0, 0, [], [Shader]>;
 defm PhysicalStorageBufferAddressesEXT : CapabilityOperand<5347, 0, 0, [], [Shader]>;

diff  --git a/llvm/test/CodeGen/DirectX/discard.ll b/llvm/test/CodeGen/DirectX/discard.ll
new file mode 100644
index 00000000000000..2a9ec59156e7cc
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/discard.ll
@@ -0,0 +1,31 @@
+; RUN: opt -passes='function(scalarizer),module(dxil-op-lower,dxil-intrinsic-expansion)' -S -mtriple=dxil-pc-shadermodel6.3-pixel %s | FileCheck %s
+
+; CHECK-LABEL: define void @test_scalar
+; CHECK: call void @dx.op.discard(i32 82, i1 %0)
+;
+define void @test_scalar(float noundef %p) #0 {
+entry:
+  %0 = fcmp olt float %p, 0.000000e+00
+  call void @llvm.dx.discard(i1 %0)
+  ret void
+}
+
+; CHECK-LABEL: define void @test_vector
+; CHECK:       [[EXTR0:%.*]] = extractelement <4 x i1> [[INP:%.*]], i64 0
+; CHECK-NEXT:  [[EXTR1:%.*]] = extractelement <4 x i1> [[INP:%.*]], i64 1
+; CHECK-NEXT:  [[OR1:%.*]] = or i1 [[EXTR0]], [[EXTR1]]
+; CHECK-NEXT:  [[EXTR2:%.*]] = extractelement <4 x i1> [[INP:%.*]], i64 2
+; CHECK-NEXT:  [[OR2:%.*]] = or i1 [[OR1]], [[EXTR2]]
+; CHECK-NEXT:  [[EXTR3:%.*]] = extractelement <4 x i1> [[INP:%.*]], i64 3
+; CHECK-NEXT:  [[OR3:%.*]] = or i1 [[OR2]], [[EXTR3]]
+; CHECK-NEXT:  call void @dx.op.discard(i32 82, i1 [[OR3]])
+;
+define void @test_vector(<4 x float> noundef %p) #0 {
+entry:
+  %0 = fcmp olt <4 x float> %p, zeroinitializer
+  %1 = call i1 @llvm.dx.any.v4i1(<4 x i1> %0)
+  call void @llvm.dx.discard(i1 %1)
+  ret void
+}
+
+declare void @llvm.dx.discard(i1)

diff  --git a/llvm/test/CodeGen/DirectX/discard_error.ll b/llvm/test/CodeGen/DirectX/discard_error.ll
new file mode 100644
index 00000000000000..f863b39856e6b4
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/discard_error.ll
@@ -0,0 +1,12 @@
+; RUN: not opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s 2>&1 | FileCheck %s
+
+; DXIL operation discard does not support no bool overload type
+
+; CHECK: invalid intrinsic signature
+; CHECK: call void @llvm.dx.discard(double %p)
+;
+define void @discard_double(double noundef %p) {
+entry:
+  call void @llvm.dx.discard(double %p)
+  ret void
+}

diff  --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/discard.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/discard.ll
new file mode 100644
index 00000000000000..831592610d1067
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/discard.ll
@@ -0,0 +1,77 @@
+; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv1.5-unknown-unknown %s -o - | FileCheck %s --check-prefixes=CHECK,SPIRV15
+; RUN: llc -verify-machineinstrs -spirv-ext=+SPV_EXT_demote_to_helper_invocation -O0 -mtriple=spirv1.5-unknown-unknown %s -o - | FileCheck %s --check-prefixes=CHECK,SPIRV16,WITH-EXTENSION,WITH-CAPABILITY
+; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv1.6-unknown-unknown %s -o - | FileCheck %s --check-prefixes=CHECK,SPIRV16,WITH-CAPABILITY
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv1.5-unknown-unknown %s -o - -filetype=obj | spirv-val %}
+; RUN: %if spirv-tools %{ llc -O0 -spirv-ext=+SPV_EXT_demote_to_helper_invocation -mtriple=spirv1.5-unknown-unknown %s -o - -filetype=obj | spirv-val %}
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv1.6-unknown-unknown %s -o - -filetype=obj | spirv-val %}
+
+
+; Make sure lowering is correctly generating spirv code.
+
+; WITH-CAPABILITY-DAG: OpCapability DemoteToHelperInvocation
+; WITH-EXTENSION-DAG: OpExtension "SPV_EXT_demote_to_helper_invocation"
+
+; CHECK-DAG:   %[[#float:]] = OpTypeFloat 32
+; CHECK-DAG:   %[[#void:]]  = OpTypeVoid
+; CHECK-DAG:   %[[#bool:]]  = OpTypeBool
+; CHECK-DAG:   %[[#v4bool:]]  = OpTypeVector %[[#bool]] 4
+; CHECK-DAG:   %[[#v4float:]] = OpTypeVector %[[#float]] 4
+; CHECK-DAG:   %[[#fzero:]] = OpConstant %[[#float]] 0
+; CHECK-DAG:   %[[#v4fzero:]] = OpConstantNull %[[#v4float]]
+
+define void @test_scalar(float noundef %Buf) {
+entry:
+; CHECK-LABEL: ; -- Begin function test_scalar
+; CHECK:       %[[#load:]] = OpFunctionParameter %[[#float]]
+; CHECK:       %[[#cmplt:]] = OpFOrdLessThan %[[#bool]] %[[#load]] %[[#fzero]]
+; CHECK:       OpBranchConditional %[[#cmplt]] %[[#truel:]] %[[#endl:]]
+; CHECK:       %[[#truel]] = OpLabel
+; SPIRV15:     OpKill
+; SPIRV16-NO:  OpKill
+; SPIRV15-NO:  OpBranch %[[#endl]]
+; SPIRV16:     OpDemoteToHelperInvocation
+; SPIRV16:     OpBranch %[[#endl]]
+; CHECK:       %[[#endl]] = OpLabel
+  %Buf.addr = alloca float, align 4
+  store float %Buf, ptr %Buf.addr, align 4
+  %1 = load float, ptr %Buf.addr, align 4
+  %2 = fcmp olt float %1, 0.000000e+00
+  br i1 %2, label %lt0, label %end
+
+lt0:                                              ; preds = %entry
+  call void @llvm.spv.discard()
+  br label %end
+
+end:                                              ; preds = %lt0, %entry
+  ret void
+}
+declare void @llvm.spv.discard()
+
+define void @test_vector(<4 x float> noundef %Buf) {
+entry:
+; CHECK-LABEL: ; -- Begin function test_vector
+; CHECK:       %[[#loadvec:]] = OpFunctionParameter %[[#v4float]]
+; CHECK:       %[[#cmplt:]] = OpFOrdLessThan %[[#v4bool]] %[[#loadvec]] %[[#v4fzero]]
+; CHECK:       %[[#opany:]] = OpAny %[[#bool]] %[[#cmplt]]
+; CHECK:       OpBranchConditional %[[#opany]]  %[[#truel:]] %[[#endl:]]
+; CHECK:       %[[#truel]] = OpLabel
+; SPIRV15:     OpKill
+; SPIRV16-NO:  OpKill
+; SPIRV15-NO:  OpBranch %[[#endl]]
+; SPIRV16:     OpDemoteToHelperInvocation
+; SPIRV16:     OpBranch %[[#endl]]
+; CHECK:       %[[#endl]] = OpLabel
+  %Buf.addr = alloca <4 x float>, align 16
+  store <4 x float> %Buf, ptr %Buf.addr, align 16
+  %1 = load <4 x float>, ptr %Buf.addr, align 16
+  %2 = fcmp olt <4 x float> %1, zeroinitializer
+  %3 = call i1 @llvm.spv.any.v4i1(<4 x i1> %2)
+  br i1 %3, label %lt0, label %end
+
+lt0:                                              ; preds = %entry
+  call void @llvm.spv.discard()
+  br label %end
+
+end:                                              ; preds = %lt0, %entry
+  ret void
+}