[llvm] 73244e8 - [VP] Add vp.icmp comparison intrinsic and docs

Wed Mar 30 09:16:50 PDT 2022

Author: Fraser Cormack
Date: 2022-03-30T17:05:11+01:00
New Revision: 73244e8f8568fb0ebf0be5943c5d290f5b91d0c1

URL: https://github.com/llvm/llvm-project/commit/73244e8f8568fb0ebf0be5943c5d290f5b91d0c1
DIFF: https://github.com/llvm/llvm-project/commit/73244e8f8568fb0ebf0be5943c5d290f5b91d0c1.diff

LOG: [VP] Add vp.icmp comparison intrinsic and docs

This patch mostly follows up on D121292 which introduced the vp.fcmp
intrinsic.

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D122729

Added: 
    

Modified: 
    llvm/docs/LangRef.rst
    llvm/include/llvm/IR/Intrinsics.td
    llvm/include/llvm/IR/VPIntrinsics.def
    llvm/lib/IR/IntrinsicInst.cpp
    llvm/lib/IR/Verifier.cpp
    llvm/test/Verifier/invalid-vp-intrinsics.ll
    llvm/test/Verifier/vp-intrinsics.ll
    llvm/unittests/IR/VPIntrinsicTest.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 51dd530c5e19b..8c06c18e46079 100644

--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -11126,6 +11126,8 @@ The '``icmp``' instruction takes three operands. The first operand is
 the condition code indicating the kind of comparison to perform. It is
 not a value, just a keyword. The possible condition codes are:
 
+.. _icmp_md_cc:
+
 #. ``eq``: equal
 #. ``ne``: not equal
 #. ``ugt``: unsigned greater than
@@ -11148,6 +11150,8 @@ The '``icmp``' compares ``op1`` and ``op2`` according to the condition
 code given as ``cond``. The comparison performed always yields either an
 :ref:`i1 <t_integer>` or vector of ``i1`` result, as follows:
 
+.. _icmp_md_cc_sem:
+
 #. ``eq``: yields ``true`` if the operands are equal, ``false``
    otherwise. No sign interpretation is necessary or performed.
 #. ``ne``: yields ``true`` if the operands are unequal, ``false``
@@ -20341,6 +20345,64 @@ Examples:
       %also.r = select <4 x i1> %mask, <4 x i1> %t, <4 x i1> undef
 
 
+.. _int_vp_icmp:
+
+'``llvm.vp.icmp.*``' Intrinsics
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+This is an overloaded intrinsic.
+
+::
+
+      declare <32 x i1> @llvm.vp.icmp.v32i32(<32 x i32> <left_op>, <32 x i32> <right_op>, metadata <condition code>, <32 x i1> <mask>, i32 <vector_length>)
+      declare <vscale x 2 x i1> @llvm.vp.icmp.nxv2i32(<vscale x 2 x i32> <left_op>, <vscale x 2 x i32> <right_op>, metadata <condition code>, <vscale x 2 x i1> <mask>, i32 <vector_length>)
+      declare <128 x i1> @llvm.vp.icmp.v128i8(<128 x i8> <left_op>, <128 x i8> <right_op>, metadata <condition code>, <128 x i1> <mask>, i32 <vector_length>)
+
+Overview:
+"""""""""
+
+The '``llvm.vp.icmp``' intrinsic returns a vector of boolean values based on
+the comparison of its operands. The operation has a mask and an explicit vector
+length parameter.
+
+
+Arguments:
+""""""""""
+
+The '``llvm.vp.icmp``' intrinsic takes the two values to compare as its first
+and second operands. These two values must be vectors of :ref:`integer
+<t_integer>` types.
+The return type is the result of the comparison. The return type must be a
+vector of :ref:`i1 <t_integer>` type. The fourth operand is the vector mask.
+The return type, the values to compare, and the vector mask have the same
+number of elements. The third operand is the condition code indicating the kind
+of comparison to perform. It must be a metadata string with :ref:`one of the
+supported integer condition code values <icmp_md_cc>`. The fifth operand is the
+explicit vector length of the operation.
+
+Semantics:
+""""""""""
+
+The '``llvm.vp.icmp``' compares its first two operands according to the
+condition code given as the third operand. The operands are compared element by
+element on each enabled lane, where the the semantics of the comparison are
+defined :ref:`according to the condition code <icmp_md_cc_sem>`. Masked-off
+lanes are undefined.
+
+Examples:
+"""""""""
+
+.. code-block:: llvm
+
+      %r = call <4 x i1> @llvm.vp.icmp.v4i32(<4 x i32> %a, <4 x i32> %b, metadata !"ne", <4 x i1> %mask, i32 %evl)
+      ;; For all lanes below %evl, %r is lane-wise equivalent to %also.r
+
+      %t = icmp ne <4 x i32> %a, %b
+      %also.r = select <4 x i1> %mask, <4 x i1> %t, <4 x i1> undef
+
+
 .. _int_mload_mstore:
 
 Masked Vector Load and Store Intrinsics

diff  --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index 16c5b7bdd73e4..40f1c8fd8f272 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -1575,6 +1575,13 @@ let IntrProperties = [IntrNoMem, IntrNoSync, IntrWillReturn] in {
                                   llvm_metadata_ty,
                                   LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
                                   llvm_i32_ty]>;
+
+  def int_vp_icmp : DefaultAttrsIntrinsic<[ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty> ],
+                                [ llvm_anyvector_ty,
+                                  LLVMMatchType<0>,
+                                  llvm_metadata_ty,
+                                  LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+                                  llvm_i32_ty]>;
 }
 
 // Reductions

diff  --git a/llvm/include/llvm/IR/VPIntrinsics.def b/llvm/include/llvm/IR/VPIntrinsics.def
index 12314f8e2bb17..9b3e7f72fe66f 100644
--- a/llvm/include/llvm/IR/VPIntrinsics.def
+++ b/llvm/include/llvm/IR/VPIntrinsics.def
@@ -267,6 +267,12 @@ VP_PROPERTY_CMP(2, true)
 VP_PROPERTY_CONSTRAINEDFP(0, 1, experimental_constrained_fcmp)
 END_REGISTER_VP(vp_fcmp, VP_FCMP)
 
+// llvm.vp.icmp(x,y,cc,mask,vlen)
+BEGIN_REGISTER_VP(vp_icmp, 3, 4, VP_ICMP, -1)
+VP_PROPERTY_FUNCTIONAL_OPC(ICmp)
+VP_PROPERTY_CMP(2, false)
+END_REGISTER_VP(vp_icmp, VP_ICMP)
+
 ///// } Comparisons
 
 ///// Memory Operations {

diff  --git a/llvm/lib/IR/IntrinsicInst.cpp b/llvm/lib/IR/IntrinsicInst.cpp
index eeffbaf41bbf6..67bcb116ed12f 100644
--- a/llvm/lib/IR/IntrinsicInst.cpp
+++ b/llvm/lib/IR/IntrinsicInst.cpp
@@ -576,6 +576,24 @@ bool VPCmpIntrinsic::isVPCmp(Intrinsic::ID ID) {
   return false;
 }
 
+static ICmpInst::Predicate getIntPredicateFromMD(const Value *Op) {
+  Metadata *MD = cast<MetadataAsValue>(Op)->getMetadata();
+  if (!MD || !isa<MDString>(MD))
+    return ICmpInst::BAD_ICMP_PREDICATE;
+  return StringSwitch<ICmpInst::Predicate>(cast<MDString>(MD)->getString())
+      .Case("eq", ICmpInst::ICMP_EQ)
+      .Case("ne", ICmpInst::ICMP_NE)
+      .Case("ugt", ICmpInst::ICMP_UGT)
+      .Case("uge", ICmpInst::ICMP_UGE)
+      .Case("ult", ICmpInst::ICMP_ULT)
+      .Case("ule", ICmpInst::ICMP_ULE)
+      .Case("sgt", ICmpInst::ICMP_SGT)
+      .Case("sge", ICmpInst::ICMP_SGE)
+      .Case("slt", ICmpInst::ICMP_SLT)
+      .Case("sle", ICmpInst::ICMP_SLE)
+      .Default(ICmpInst::BAD_ICMP_PREDICATE);
+}
+
 CmpInst::Predicate VPCmpIntrinsic::getPredicate() const {
   bool IsFP = true;
   Optional<unsigned> CCArgIdx;
@@ -590,9 +608,9 @@ CmpInst::Predicate VPCmpIntrinsic::getPredicate() const {
 #define END_REGISTER_VP_INTRINSIC(VPID) break;
 #include "llvm/IR/VPIntrinsics.def"
   }
-  assert(CCArgIdx.hasValue() && IsFP &&
-         "Unexpected vector-predicated comparison");
-  return getFPPredicateFromMD(getArgOperand(*CCArgIdx));
+  assert(CCArgIdx.hasValue() && "Unexpected vector-predicated comparison");
+  return IsFP ? getFPPredicateFromMD(getArgOperand(*CCArgIdx))
+              : getIntPredicateFromMD(getArgOperand(*CCArgIdx));
 }
 
 unsigned VPReductionIntrinsic::getVectorParamPos() const {

diff  --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index bbd1d4950d000..d4f221fe878b3 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -5606,6 +5606,11 @@ void Verifier::visitVPIntrinsic(VPIntrinsic &VPI) {
     Assert(CmpInst::isFPPredicate(Pred),
            "invalid predicate for VP FP comparison intrinsic", &VPI);
   }
+  if (VPI.getIntrinsicID() == Intrinsic::vp_icmp) {
+    auto Pred = cast<VPCmpIntrinsic>(&VPI)->getPredicate();
+    Assert(CmpInst::isIntPredicate(Pred),
+           "invalid predicate for VP integer comparison intrinsic", &VPI);
+  }
 }
 
 void Verifier::visitConstrainedFPIntrinsic(ConstrainedFPIntrinsic &FPI) {

diff  --git a/llvm/test/Verifier/invalid-vp-intrinsics.ll b/llvm/test/Verifier/invalid-vp-intrinsics.ll
index 4a24f32f894e2..08639352c3ea9 100644
--- a/llvm/test/Verifier/invalid-vp-intrinsics.ll
+++ b/llvm/test/Verifier/invalid-vp-intrinsics.ll
@@ -2,6 +2,7 @@
 
 declare <4 x i32> @llvm.vp.fptosi.v4i32.v8f32(<8 x float>, <4 x i1>, i32)
 declare <4 x i1> @llvm.vp.fcmp.v4f32(<4 x float>, <4 x float>, metadata, <4 x i1>, i32)
+declare <4 x i1> @llvm.vp.icmp.v4i32(<4 x i32>, <4 x i32>, metadata, <4 x i1>, i32)
 
 ; CHECK: VP cast intrinsic first argument and result vector lengths must be equal
 ; CHECK-NEXT: %r0 = call <4 x i32>
@@ -21,3 +22,14 @@ define void @test_vp_fcmp(<4 x float> %a, <4 x float> %b, <4 x i1> %m, i32 %n) {
   %r1 = call <4 x i1> @llvm.vp.fcmp.v4f32(<4 x float> %a, <4 x float> %b, metadata !"eq", <4 x i1> %m, i32 %n)
   ret void
 }
+
+; CHECK: invalid predicate for VP integer comparison intrinsic
+; CHECK-NEXT: %r0 = call <4 x i1> @llvm.vp.icmp.v4i32
+; CHECK: invalid predicate for VP integer comparison intrinsic
+; CHECK-NEXT: %r1 = call <4 x i1> @llvm.vp.icmp.v4i32
+
+define void @test_vp_icmp(<4 x i32> %a, <4 x i32> %b, <4 x i1> %m, i32 %n) {
+  %r0 = call <4 x i1> @llvm.vp.icmp.v4i32(<4 x i32> %a, <4 x i32> %b, metadata !"bad", <4 x i1> %m, i32 %n)
+  %r1 = call <4 x i1> @llvm.vp.icmp.v4i32(<4 x i32> %a, <4 x i32> %b, metadata !"oeq", <4 x i1> %m, i32 %n)
+  ret void
+}

diff  --git a/llvm/test/Verifier/vp-intrinsics.ll b/llvm/test/Verifier/vp-intrinsics.ll
index 1156eafbee5ac..4ff99745029cc 100644
--- a/llvm/test/Verifier/vp-intrinsics.ll
+++ b/llvm/test/Verifier/vp-intrinsics.ll
@@ -63,8 +63,9 @@ define void @test_vp_int_fp_conversions(<8 x i32> %i0, <8 x float> %f0, <8 x i1>
   ret void
 }
 
-define void @test_vp_comparisons(<8 x float> %f0, <8 x float> %f1, <8 x i1> %mask, i32 %evl) {
+define void @test_vp_comparisons(<8 x float> %f0, <8 x float> %f1, <8 x i32> %i0, <8 x i32> %i1, <8 x i1> %mask, i32 %evl) {
   %r0 = call <8 x i1> @llvm.vp.fcmp.v8f32(<8 x float> %f0, <8 x float> %f1, metadata !"oeq", <8 x i1> %mask, i32 %evl)
+  %r1 = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %i0, <8 x i32> %i1, metadata !"eq", <8 x i1> %mask, i32 %evl)
   ret void
 }
 
@@ -108,6 +109,7 @@ declare <8 x float> @llvm.vp.sitofp.v8f32.v8i32(<8 x i32>, <8 x i1>, i32)
 declare <8 x i32> @llvm.vp.fptosi.v8i32.v8f32(<8 x float>, <8 x i1>, i32)
 ; compares
 declare <8 x i1> @llvm.vp.fcmp.v8f32(<8 x float>, <8 x float>, metadata, <8 x i1>, i32)
+declare <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32>, <8 x i32>, metadata, <8 x i1>, i32)
 ; shuffles
 declare <8 x i32> @llvm.experimental.vp.splice.v8i32(<8 x i32>, <8 x i32>, i32, <8 x i1>, i32, i32)
 declare <vscale x 8 x i32> @llvm.experimental.vp.splice.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x i32>, i32, <vscale x 8 x i1>, i32, i32)

diff  --git a/llvm/unittests/IR/VPIntrinsicTest.cpp b/llvm/unittests/IR/VPIntrinsicTest.cpp
index fe508e76c167e..5dcabc49faf04 100644
--- a/llvm/unittests/IR/VPIntrinsicTest.cpp
+++ b/llvm/unittests/IR/VPIntrinsicTest.cpp
@@ -99,6 +99,8 @@ class VPIntrinsicTest : public testing::Test {
 
     Str << " declare <8 x i1> @llvm.vp.fcmp.v8f32"
         << "(<8 x float>, <8 x float>, metadata, <8 x i1>, i32) ";
+    Str << " declare <8 x i1> @llvm.vp.icmp.v8i16"
+        << "(<8 x i16>, <8 x i16>, metadata, <8 x i1>, i32) ";
 
     return parseAssemblyString(Str.str(), Err, C);
   }