[llvm] 676af12 - [VE] SHL,SRA,SRL v256i32|64 isel and tests

Wed Dec 15 02:33:18 PST 2021

Author: Simon Moll
Date: 2021-12-15T11:32:18+01:00
New Revision: 676af1272b07426c80be5806cef830e6f6e469e0

URL: https://github.com/llvm/llvm-project/commit/676af1272b07426c80be5806cef830e6f6e469e0
DIFF: https://github.com/llvm/llvm-project/commit/676af1272b07426c80be5806cef830e6f6e469e0.diff

LOG: [VE] SHL,SRA,SRL v256i32|64 isel and tests

Reviewed By: kaz7

Differential Revision: https://reviews.llvm.org/D115734

Added: 
    llvm/test/CodeGen/VE/Vector/vp_sra.ll
    llvm/test/CodeGen/VE/Vector/vp_srl.ll

Modified: 
    llvm/lib/Target/VE/VEISelLowering.cpp
    llvm/lib/Target/VE/VVPInstrInfo.td
    llvm/lib/Target/VE/VVPInstrPatternsVec.td
    llvm/lib/Target/VE/VVPNodes.def
    llvm/test/CodeGen/VE/Vector/vp_ashr.ll
    llvm/test/CodeGen/VE/Vector/vp_lshr.ll
    llvm/test/CodeGen/VE/Vector/vp_shl.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/VE/VEISelLowering.cpp b/llvm/lib/Target/VE/VEISelLowering.cpp
index 32315543826a1..5ef223d6030b9 100644

--- a/llvm/lib/Target/VE/VEISelLowering.cpp
+++ b/llvm/lib/Target/VE/VEISelLowering.cpp
@@ -1720,7 +1720,7 @@ SDValue VETargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::EXTRACT_VECTOR_ELT:
     return lowerEXTRACT_VECTOR_ELT(Op, DAG);
 
-#define ADD_BINARY_VVP_OP(VVP_NAME, ISD_NAME) case ISD::ISD_NAME:
+#define ADD_BINARY_VVP_OP(VVP_NAME, VP_NAME, ISD_NAME) case ISD::ISD_NAME:
 #include "VVPNodes.def"
     return lowerToVVP(Op, DAG);
   }

diff  --git a/llvm/lib/Target/VE/VVPInstrInfo.td b/llvm/lib/Target/VE/VVPInstrInfo.td
index eb68e321ba321..ff72ea9accfad 100644
--- a/llvm/lib/Target/VE/VVPInstrInfo.td
+++ b/llvm/lib/Target/VE/VVPInstrInfo.td
@@ -54,4 +54,8 @@ def c_vvp_or   : vvp_commutative<vvp_or>;
 def vvp_xor    : SDNode<"VEISD::VVP_XOR",  SDTIntBinOpVVP>;
 def c_vvp_xor  : vvp_commutative<vvp_xor>;
 
+def vvp_srl    : SDNode<"VEISD::VVP_SRL",  SDTIntBinOpVVP>;
+def vvp_sra    : SDNode<"VEISD::VVP_SRA",  SDTIntBinOpVVP>;
+def vvp_shl    : SDNode<"VEISD::VVP_SHL",  SDTIntBinOpVVP>;
+
 // } Binary Operators

diff  --git a/llvm/lib/Target/VE/VVPInstrPatternsVec.td b/llvm/lib/Target/VE/VVPInstrPatternsVec.td
index a36049f24f0c4..8e519a3f8265b 100644
--- a/llvm/lib/Target/VE/VVPInstrPatternsVec.td
+++ b/llvm/lib/Target/VE/VVPInstrPatternsVec.td
@@ -39,6 +39,28 @@ multiclass Binary_rv<SDPatternOperator OpNode,
                 ScalarVT:$sx, $vy, $mask, $avl)>;
 }
 
+multiclass Binary_vr<SDPatternOperator OpNode,
+    ValueType ScalarVT, ValueType DataVT,
+    ValueType MaskVT, string OpBaseName> {
+  // Masked with select, broadcast.
+  // TODO
+
+  // Unmasked, broadcast.
+  def : Pat<(OpNode
+                DataVT:$vx, (any_broadcast ScalarVT:$sy),
+                (MaskVT true_mask),
+                i32:$avl),
+            (!cast<Instruction>(OpBaseName#"vrl")
+                $vx, ScalarVT:$sy, $avl)>;
+  // Masked, broadcast.
+  def : Pat<(OpNode
+                DataVT:$vx, (any_broadcast ScalarVT:$sy),
+                MaskVT:$mask,
+                i32:$avl),
+            (!cast<Instruction>(OpBaseName#"vrml")
+                $vx, ScalarVT:$sy, $mask, $avl)>;
+}
+
 multiclass Binary_vv<SDPatternOperator OpNode,
     ValueType DataVT,
     ValueType MaskVT, string OpBaseName> {
@@ -70,6 +92,14 @@ multiclass Binary_rv_vv<
   defm : Binary_vv<OpNode, DataVT, MaskVT, OpBaseName>;
 }
 
+multiclass Binary_vr_vv<
+    SDPatternOperator OpNode,
+    ValueType ScalarVT, ValueType DataVT, ValueType MaskVT,
+    string OpBaseName> {
+  defm : Binary_vr<OpNode, ScalarVT, DataVT, MaskVT, OpBaseName>;
+  defm : Binary_vv<OpNode, DataVT, MaskVT, OpBaseName>;
+}
+
 // Expand both 64bit and 32 bit variant (256 elements)
 multiclass Binary_rv_vv_ShortLong<
     SDPatternOperator OpNode,
@@ -83,6 +113,17 @@ multiclass Binary_rv_vv_ShortLong<
                       ShortOpBaseName>;
 }
 
+multiclass Binary_vr_vv_ShortLong<
+    SDPatternOperator OpNode,
+    ValueType LongScalarVT, ValueType LongDataVT, string LongOpBaseName,
+    ValueType ShortScalarVT, ValueType ShortDataVT, string ShortOpBaseName> {
+  defm : Binary_vr_vv<OpNode,
+                      LongScalarVT, LongDataVT, v256i1,
+                      LongOpBaseName>;
+  defm : Binary_vr_vv<OpNode,
+                      ShortScalarVT, ShortDataVT, v256i1,
+                      ShortOpBaseName>;
+}
 
 defm : Binary_rv_vv_ShortLong<c_vvp_add,
                               i64, v256i64, "VADDSL",
@@ -102,3 +143,12 @@ defm : Binary_rv_vv_ShortLong<c_vvp_or,
 defm : Binary_rv_vv_ShortLong<c_vvp_xor,
                               i64, v256i64, "VXOR",
                               i32, v256i32, "PVXORLO">;
+defm : Binary_vr_vv_ShortLong<vvp_shl,
+                              i64, v256i64, "VSLL",
+                              i32, v256i32, "PVSLLLO">;
+defm : Binary_vr_vv_ShortLong<vvp_sra,
+                              i64, v256i64, "VSRAL",
+                              i32, v256i32, "PVSRALO">;
+defm : Binary_vr_vv_ShortLong<vvp_srl,
+                              i64, v256i64, "VSRL",
+                              i32, v256i32, "PVSRLLO">;

diff  --git a/llvm/lib/Target/VE/VVPNodes.def b/llvm/lib/Target/VE/VVPNodes.def
index dc18131573b8c..9141214bdccb9 100644
--- a/llvm/lib/Target/VE/VVPNodes.def
+++ b/llvm/lib/Target/VE/VVPNodes.def
@@ -28,18 +28,30 @@
 /// \p VVPName is a VVP Binary operator.
 /// \p SDNAME is the generic SD opcode corresponding to \p VVPName.
 #ifndef ADD_BINARY_VVP_OP
-#define ADD_BINARY_VVP_OP(X,Y) ADD_VVP_OP(X,Y) HANDLE_VP_TO_VVP(VP_##Y, X)
+#define ADD_BINARY_VVP_OP(VVPNAME,VPNAME,SDNAME) \
+            ADD_VVP_OP(VVPNAME,SDNAME) \
+            HANDLE_VP_TO_VVP(VPNAME, VVPNAME)
+#endif
+
+#ifndef ADD_BINARY_VVP_OP_COMPACT
+#define ADD_BINARY_VVP_OP_COMPACT(NAME) \
+    ADD_BINARY_VVP_OP(VVP_##NAME,VP_##NAME,NAME)
 #endif
 
 // Integer arithmetic.
-ADD_BINARY_VVP_OP(VVP_ADD,ADD)
-ADD_BINARY_VVP_OP(VVP_SUB,SUB)
-ADD_BINARY_VVP_OP(VVP_MUL,MUL)
+ADD_BINARY_VVP_OP_COMPACT(ADD)
+ADD_BINARY_VVP_OP_COMPACT(SUB)
+ADD_BINARY_VVP_OP_COMPACT(MUL)
 
-ADD_BINARY_VVP_OP(VVP_AND,AND)
-ADD_BINARY_VVP_OP(VVP_OR,OR)
-ADD_BINARY_VVP_OP(VVP_XOR,XOR)
+ADD_BINARY_VVP_OP(VVP_SRA,VP_ASHR,SRA)
+ADD_BINARY_VVP_OP(VVP_SRL,VP_LSHR,SRL)
+ADD_BINARY_VVP_OP_COMPACT(SHL)
+
+ADD_BINARY_VVP_OP_COMPACT(AND)
+ADD_BINARY_VVP_OP_COMPACT(OR)
+ADD_BINARY_VVP_OP_COMPACT(XOR)
 
-#undef HANDLE_VP_TO_VVP
 #undef ADD_BINARY_VVP_OP
+#undef ADD_BINARY_VVP_OP_COMPACT
 #undef ADD_VVP_OP
+#undef HANDLE_VP_TO_VVP

diff  --git a/llvm/test/CodeGen/VE/Vector/vp_ashr.ll b/llvm/test/CodeGen/VE/Vector/vp_ashr.ll
index 234432bfde1a9..ef82ed5c9d19b 100644
--- a/llvm/test/CodeGen/VE/Vector/vp_ashr.ll
+++ b/llvm/test/CodeGen/VE/Vector/vp_ashr.ll
@@ -1,16 +1,29 @@
-; REQUIRES: asserts
-; RUN: not --crash llc < %s -march=ve -mattr=+vpu -o /dev/null 2>&1 | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -march=ve -mattr=+vpu | FileCheck %s
 
-; CHECK:  t{{[0-9]+}}: v256i32 = vp_ashr [[A:t[0-9]+]], [[B:t[0-9]+]], [[MASK:t[0-9]+]], [[EVL:t[0-9]+]] 
-; CHECK:  [[A]]: v256i32
-; CHECK:  [[B]]: v256i32
-; CHECK:  [[MASK]]: v256i1
-; CHECK:  [[EVL]]: i32
+declare <256 x i32> @llvm.vp.ashr.v256i32(<256 x i32>, <256 x i32>, <256 x i1>, i32)
 
-define <256 x i32> @test_vp_int(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %n) {
+define fastcc <256 x i32> @test_vp_int(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_int:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    pvsra.lo %v0, %v0, %v1, %vm1
+; CHECK-NEXT:    b.l.t (, %s10)
   %r0 = call <256 x i32> @llvm.vp.ashr.v256i32(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %n)
   ret <256 x i32> %r0
 }
 
-; integer arith
-declare <256 x i32> @llvm.vp.ashr.v256i32(<256 x i32>, <256 x i32>, <256 x i1>, i32)
+
+declare <256 x i64> @llvm.vp.ashr.v256i64(<256 x i64>, <256 x i64>, <256 x i1>, i32)
+
+define fastcc <256 x i64> @test_vp_v256i64(<256 x i64> %i0, <256 x i64> %i1, <256 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_v256i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vsra.l %v0, %v0, %v1, %vm1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %r0 = call <256 x i64> @llvm.vp.ashr.v256i64(<256 x i64> %i0, <256 x i64> %i1, <256 x i1> %m, i32 %n)
+  ret <256 x i64> %r0
+}

diff  --git a/llvm/test/CodeGen/VE/Vector/vp_lshr.ll b/llvm/test/CodeGen/VE/Vector/vp_lshr.ll
index 0cbacef892ad1..f071c88448900 100644
--- a/llvm/test/CodeGen/VE/Vector/vp_lshr.ll
+++ b/llvm/test/CodeGen/VE/Vector/vp_lshr.ll
@@ -1,16 +1,30 @@
-; REQUIRES: asserts
-; RUN: not --crash llc < %s -march=ve -mattr=+vpu -o /dev/null 2>&1 | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -march=ve -mattr=+vpu | FileCheck %s
 
-; CHECK:  t{{[0-9]+}}: v256i32 = vp_lshr [[A:t[0-9]+]], [[B:t[0-9]+]], [[MASK:t[0-9]+]], [[EVL:t[0-9]+]] 
-; CHECK:  [[A]]: v256i32
-; CHECK:  [[B]]: v256i32
-; CHECK:  [[MASK]]: v256i1
-; CHECK:  [[EVL]]: i32
+declare <256 x i32> @llvm.vp.lshr.v256i32(<256 x i32>, <256 x i32>, <256 x i1>, i32)
 
-define <256 x i32> @test_vp_int(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %n) {
+define fastcc <256 x i32> @test_vp_int(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_int:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    pvsrl.lo %v0, %v0, %v1, %vm1
+; CHECK-NEXT:    b.l.t (, %s10)
   %r0 = call <256 x i32> @llvm.vp.lshr.v256i32(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %n)
   ret <256 x i32> %r0
 }
 
-; integer arith
-declare <256 x i32> @llvm.vp.lshr.v256i32(<256 x i32>, <256 x i32>, <256 x i1>, i32)
+
+declare <256 x i64> @llvm.vp.lshr.v256i64(<256 x i64>, <256 x i64>, <256 x i1>, i32)
+
+define fastcc <256 x i64> @test_vp_v256i64(<256 x i64> %i0, <256 x i64> %i1, <256 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_v256i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vsrl %v0, %v0, %v1, %vm1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %r0 = call <256 x i64> @llvm.vp.lshr.v256i64(<256 x i64> %i0, <256 x i64> %i1, <256 x i1> %m, i32 %n)
+  ret <256 x i64> %r0
+}
+

diff  --git a/llvm/test/CodeGen/VE/Vector/vp_shl.ll b/llvm/test/CodeGen/VE/Vector/vp_shl.ll
index 553bf0596dfcd..94a4af3eb4c2d 100644
--- a/llvm/test/CodeGen/VE/Vector/vp_shl.ll
+++ b/llvm/test/CodeGen/VE/Vector/vp_shl.ll
@@ -1,16 +1,29 @@
-; REQUIRES: asserts
-; RUN: not --crash llc < %s -march=ve -mattr=+vpu -o /dev/null 2>&1 | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -march=ve -mattr=+vpu | FileCheck %s
 
-; CHECK:  t{{[0-9]+}}: v256i32 = vp_shl [[A:t[0-9]+]], [[B:t[0-9]+]], [[MASK:t[0-9]+]], [[EVL:t[0-9]+]] 
-; CHECK:  [[A]]: v256i32
-; CHECK:  [[B]]: v256i32
-; CHECK:  [[MASK]]: v256i1
-; CHECK:  [[EVL]]: i32
+declare <256 x i32> @llvm.vp.shl.v256i32(<256 x i32>, <256 x i32>, <256 x i1>, i32)
 
-define <256 x i32> @test_vp_int(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %n) {
+define fastcc <256 x i32> @test_vp_v256i32(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_v256i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    pvsll.lo %v0, %v0, %v1, %vm1
+; CHECK-NEXT:    b.l.t (, %s10)
   %r0 = call <256 x i32> @llvm.vp.shl.v256i32(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %n)
   ret <256 x i32> %r0
 }
 
-; integer arith
-declare <256 x i32> @llvm.vp.shl.v256i32(<256 x i32>, <256 x i32>, <256 x i1>, i32)
+
+declare <256 x i64> @llvm.vp.shl.v256i64(<256 x i64>, <256 x i64>, <256 x i1>, i32)
+
+define fastcc <256 x i64> @test_vp_v256i64(<256 x i64> %i0, <256 x i64> %i1, <256 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_v256i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vsll %v0, %v0, %v1, %vm1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %r0 = call <256 x i64> @llvm.vp.shl.v256i64(<256 x i64> %i0, <256 x i64> %i1, <256 x i1> %m, i32 %n)
+  ret <256 x i64> %r0
+}

diff  --git a/llvm/test/CodeGen/VE/Vector/vp_sra.ll b/llvm/test/CodeGen/VE/Vector/vp_sra.ll
new file mode 100644
index 0000000000000..3cdb424dc27e9
--- /dev/null
+++ b/llvm/test/CodeGen/VE/Vector/vp_sra.ll
@@ -0,0 +1,30 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -march=ve -mattr=+vpu | FileCheck %s
+
+declare <256 x i32> @llvm.vp.ashr.v256i32(<256 x i32>, <256 x i32>, <256 x i1>, i32)
+
+define fastcc <256 x i32> @test_vp_ashr_v256i32(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_ashr_v256i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    pvsra.lo %v0, %v0, %v1, %vm1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %r0 = call <256 x i32> @llvm.vp.ashr.v256i32(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %n)
+  ret <256 x i32> %r0
+}
+
+
+declare <256 x i64> @llvm.vp.ashr.v256i64(<256 x i64>, <256 x i64>, <256 x i1>, i32)
+
+define fastcc <256 x i64> @test_vp_int_v256i64(<256 x i64> %i0, <256 x i64> %i1, <256 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_int_v256i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vsra.l %v0, %v0, %v1, %vm1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %r0 = call <256 x i64> @llvm.vp.ashr.v256i64(<256 x i64> %i0, <256 x i64> %i1, <256 x i1> %m, i32 %n)
+  ret <256 x i64> %r0
+}
+

diff  --git a/llvm/test/CodeGen/VE/Vector/vp_srl.ll b/llvm/test/CodeGen/VE/Vector/vp_srl.ll
new file mode 100644
index 0000000000000..c41aa1928213c
--- /dev/null
+++ b/llvm/test/CodeGen/VE/Vector/vp_srl.ll
@@ -0,0 +1,30 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -march=ve -mattr=+vpu | FileCheck %s
+
+declare <256 x i32> @llvm.vp.lshr.v256i32(<256 x i32>, <256 x i32>, <256 x i1>, i32)
+
+define fastcc <256 x i32> @test_vp_lshr_v256i32(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_lshr_v256i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    pvsrl.lo %v0, %v0, %v1, %vm1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %r0 = call <256 x i32> @llvm.vp.lshr.v256i32(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %n)
+  ret <256 x i32> %r0
+}
+
+
+declare <256 x i64> @llvm.vp.lshr.v256i64(<256 x i64>, <256 x i64>, <256 x i1>, i32)
+
+define fastcc <256 x i64> @test_vp_int_v256i64(<256 x i64> %i0, <256 x i64> %i1, <256 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_int_v256i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vsrl %v0, %v0, %v1, %vm1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %r0 = call <256 x i64> @llvm.vp.lshr.v256i64(<256 x i64> %i0, <256 x i64> %i1, <256 x i1> %m, i32 %n)
+  ret <256 x i64> %r0
+}
+